CR7CAD commited on
Commit
fc55093
Β·
verified Β·
1 Parent(s): 156a16b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +765 -432
app.py CHANGED
@@ -1,520 +1,853 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import re
4
- import json
5
- import nltk
6
- from nltk.corpus import stopwords
7
- from nltk.tokenize import word_tokenize
8
- import torch
9
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
10
- import time
11
  import os
12
- import docx2txt
13
  import io
 
14
  import docx
 
 
 
 
 
 
 
 
 
15
 
16
- # Set page title and configuration
17
  st.set_page_config(
18
  page_title="Resume-Job Fit Analyzer",
19
- page_icon="πŸ“Š",
20
- layout="wide"
21
  )
22
 
23
- # Download NLTK resources if needed
24
- @st.cache_resource
25
- def download_nltk_resources():
26
- try:
27
- nltk.data.find('tokenizers/punkt')
28
- nltk.data.find('corpora/stopwords')
29
- except LookupError:
30
- nltk.download('punkt')
31
- nltk.download('stopwords')
32
- return stopwords.words('english')
33
-
34
- stop_words = download_nltk_resources()
35
 
36
- # Load models
37
- @st.cache_resource
 
 
38
  def load_models():
39
- """Load and cache the NLP models"""
40
- models = {}
41
-
42
- # Use BART for resume parsing
43
- models['parser'] = pipeline(
44
- "text2text-generation",
45
- model="facebook/bart-base", # This would be the fine-tuned model in production
46
- device=0 if torch.cuda.is_available() else -1
47
- )
48
-
49
- # Use Qwen for evaluation
50
- models['evaluator'] = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
51
- models['evaluator_tokenizer'] = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
52
-
53
- return models
 
 
 
 
54
 
55
- # Read resume file
56
- def read_resume_file(uploaded_file):
57
- """Extract text from uploaded resume file"""
58
- file_extension = os.path.splitext(uploaded_file.name)[1].lower()
59
-
60
- if file_extension == ".txt":
61
- # Text file
62
- text = uploaded_file.read().decode('utf-8')
63
- return text
64
-
65
- elif file_extension == ".docx":
66
- # Modern Word document
 
 
 
 
 
67
  try:
68
- text = docx2txt.process(uploaded_file)
69
- return text
70
  except Exception as e:
71
- st.error(f"Error reading DOCX file: {str(e)}")
72
- return None
73
-
74
- elif file_extension == ".doc":
75
- # Legacy Word document - this is more complex
76
  try:
77
- # For .doc files, we'll return a warning that the conversion might not be perfect
78
- st.warning("Note: .doc files might not convert perfectly. For best results, upload .docx or .txt files.")
79
-
80
- # Save the uploaded file temporarily
81
- with open("temp_file.doc", "wb") as f:
82
- f.write(uploaded_file.getbuffer())
83
-
84
- # Use an external converter - this is a placeholder
85
- # In a real implementation, you might want to use antiword, textract or similar
86
- # Here we'll try using docx2txt as a fallback but it might not work well
87
  try:
88
- text = docx2txt.process("temp_file.doc")
89
- except:
90
- # If that fails, try a very basic approach
91
- with open("temp_file.doc", "rb") as f:
92
- content = f.read()
93
- text = content.decode('utf-8', errors='ignore')
94
- # Try to extract readable text by removing binary parts
95
- text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
96
-
97
- # Clean up
98
- if os.path.exists("temp_file.doc"):
99
- os.remove("temp_file.doc")
100
-
101
- return text
102
  except Exception as e:
103
- st.error(f"Error reading DOC file: {str(e)}")
104
- return None
105
-
 
 
 
106
  else:
107
- st.error(f"Unsupported file format: {file_extension}")
108
- return None
 
 
109
 
110
- # Extract skills from text
111
- def extract_skills(text, skill_keywords):
112
- """Extract skills from text based on a predefined list of skills"""
113
- found_skills = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  text_lower = text.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- for skill in skill_keywords:
117
- # Create a regular expression pattern for whole word matching
118
- pattern = r'\b' + re.escape(skill.lower()) + r'\b'
119
- if re.search(pattern, text_lower):
120
- found_skills.append(skill)
121
-
122
- return list(set(found_skills))
123
-
124
- # Parse resume
125
- def parse_resume(resume_text, models):
126
- """Extract structured information from resume text"""
127
- # In production, this would use the fine-tuned BART model
128
- # For now, we'll implement a simple rule-based parser
129
-
130
- # Clean the text
131
- clean_text = re.sub(r'\s+', ' ', resume_text).strip()
132
-
133
- # Extract common skill keywords (this would be a more extensive list in production)
134
- tech_skills = [
135
- "Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL",
136
- "React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring",
137
- "TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP",
138
- "AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions",
139
- "REST API", "GraphQL", "Microservices", "Serverless"
 
140
  ]
141
 
142
- soft_skills = [
143
- "Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking",
144
- "Time management", "Adaptability", "Creativity", "Collaboration", "Presentation"
145
  ]
146
 
147
- # Extract skills
148
- found_tech_skills = extract_skills(clean_text, tech_skills)
149
- found_soft_skills = extract_skills(clean_text, soft_skills)
150
 
151
- # Extract experience using regex patterns (simplified)
152
- experience_pattern = r'(?:Experience|EXPERIENCE|Work Experience|WORK EXPERIENCE).*?(?:Education|EDUCATION|Skills|SKILLS|$)'
153
- experience_match = re.search(experience_pattern, clean_text, re.DOTALL)
154
- experience_text = experience_match.group(0) if experience_match else ""
 
 
 
 
 
 
155
 
156
- # Extract education using regex patterns (simplified)
157
- education_pattern = r'(?:Education|EDUCATION).*?(?:Skills|SKILLS|Experience|EXPERIENCE|$)'
158
- education_match = re.search(education_pattern, clean_text, re.DOTALL)
159
- education_text = education_match.group(0) if education_match else ""
160
 
161
- # Estimate years of experience (simplified)
162
- years_exp = 0
163
- year_patterns = [
164
- r'(\d{4})\s*-\s*(?:present|current|now|2023|2024|2025)',
165
- r'(\d{4})\s*-\s*(\d{4})'
166
- ]
 
 
 
 
 
 
 
 
 
167
 
168
- for pattern in year_patterns:
169
- matches = re.findall(pattern, clean_text, re.IGNORECASE)
170
- for match in matches:
171
- if isinstance(match, tuple):
172
- start_year = int(match[0])
173
- end_year = int(match[1]) if match[1].isdigit() else 2025
174
- years_exp += (end_year - start_year)
175
- else:
176
- start_year = int(match)
177
- years_exp += (2025 - start_year)
178
-
179
- # Cap reasonable years
180
- years_exp = min(years_exp, 30)
181
-
182
- # Create structured data
183
- structured_data = {
184
- "skills": {
185
- "technical": found_tech_skills,
186
- "soft": found_soft_skills
187
- },
188
- "experience": {
189
- "years": years_exp,
190
- "summary": experience_text[:300] + "..." if len(experience_text) > 300 else experience_text
191
- },
192
- "education": education_text[:300] + "..." if len(education_text) > 300 else education_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  }
194
 
195
- return structured_data
196
-
197
- # Parse job description
198
- def parse_job_description(job_text):
199
- """Extract key requirements from job description"""
200
- # Clean the text
201
- clean_text = re.sub(r'\s+', ' ', job_text).strip()
202
-
203
- # Extract common skill keywords (same as resume parser)
204
- tech_skills = [
205
- "Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL",
206
- "React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring",
207
- "TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP",
208
- "AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions",
209
- "REST API", "GraphQL", "Microservices", "Serverless"
210
- ]
211
 
212
- soft_skills = [
213
- "Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking",
214
- "Time management", "Adaptability", "Creativity", "Collaboration", "Presentation"
 
 
215
  ]
216
 
217
- # Extract skills
218
- required_tech_skills = extract_skills(clean_text, tech_skills)
219
- required_soft_skills = extract_skills(clean_text, soft_skills)
 
 
 
 
 
220
 
221
- # Extract years of experience requirement (simplified)
222
  exp_patterns = [
223
- r'(\d+)\+?\s*(?:years|yrs|yr)(?:\s*of)?\s*(?:experience|exp)',
224
- r'(?:experience|exp)(?:\s*of)?\s*(\d+)\+?\s*(?:years|yrs|yr)'
225
  ]
226
 
227
- required_years = 0
228
  for pattern in exp_patterns:
229
- matches = re.findall(pattern, clean_text, re.IGNORECASE)
230
- if matches:
231
- # Take the highest mentioned years
232
- required_years = max([int(y) for y in matches if y.isdigit()] + [required_years])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
- # Extract job title
235
- title_pattern = r'^(.*?)(?:\n|$)'
236
- title_match = re.search(title_pattern, clean_text)
237
- job_title = title_match.group(1).strip() if title_match else "Not specified"
 
238
 
239
- # Create structured data
240
- structured_data = {
241
  "title": job_title,
242
- "requirements": {
243
- "technical_skills": required_tech_skills,
244
- "soft_skills": required_soft_skills,
245
- "years_experience": required_years
246
- },
247
- "full_text": job_text
248
  }
249
 
250
- return structured_data
251
 
252
- # Calculate match score
253
- def calculate_match_score(resume_data, job_data):
254
- """Calculate how well the resume matches the job description"""
255
- scores = {}
 
 
 
 
256
 
257
- # Calculate skill match percentage
258
- required_tech_skills = set(job_data["requirements"]["technical_skills"])
259
- candidate_tech_skills = set(resume_data["skills"]["technical"])
260
 
261
- required_soft_skills = set(job_data["requirements"]["soft_skills"])
262
- candidate_soft_skills = set(resume_data["skills"]["soft"])
 
263
 
264
- if required_tech_skills:
265
- tech_match = len(candidate_tech_skills.intersection(required_tech_skills)) / len(required_tech_skills)
266
- scores["technical_skills"] = {
267
- "score": int(tech_match * 100),
268
- "matched": list(candidate_tech_skills.intersection(required_tech_skills)),
269
- "missing": list(required_tech_skills - candidate_tech_skills)
270
- }
271
- else:
272
- scores["technical_skills"] = {"score": 0, "matched": [], "missing": []}
273
-
274
- if required_soft_skills:
275
- soft_match = len(candidate_soft_skills.intersection(required_soft_skills)) / len(required_soft_skills)
276
- scores["soft_skills"] = {
277
- "score": int(soft_match * 100),
278
- "matched": list(candidate_soft_skills.intersection(required_soft_skills)),
279
- "missing": list(required_soft_skills - candidate_soft_skills)
280
- }
281
- else:
282
- scores["soft_skills"] = {"score": 0, "matched": [], "missing": []}
283
 
284
- # Experience match
285
- required_years = job_data["requirements"]["years_experience"]
286
- candidate_years = resume_data["experience"]["years"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- if required_years > 0:
289
- if candidate_years >= required_years:
290
- exp_score = 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  else:
292
- exp_score = int((candidate_years / required_years) * 100)
 
293
 
294
- scores["experience"] = {
295
- "score": exp_score,
296
- "candidate_years": candidate_years,
297
- "required_years": required_years
298
- }
299
- else:
300
- scores["experience"] = {
301
- "score": 100 if candidate_years > 0 else 50,
302
- "candidate_years": candidate_years,
303
- "required_years": "Not specified"
304
  }
305
 
306
- # Calculate overall score (weighted)
307
- tech_weight = 0.6
308
- soft_weight = 0.2
309
- exp_weight = 0.2
310
 
311
- overall_score = (
312
- scores["technical_skills"]["score"] * tech_weight +
313
- scores["soft_skills"]["score"] * soft_weight +
314
- scores["experience"]["score"] * exp_weight
315
- )
 
316
 
317
- scores["overall"] = int(overall_score)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
- return scores
320
-
321
- # Generate expert assessment using Qwen
322
- def generate_assessment(resume_data, job_data, match_scores, models):
323
- """Generate an expert assessment using Qwen model"""
324
- # Prepare context
325
- job_title = job_data["title"]
326
- matched_skills = match_scores["technical_skills"]["matched"]
327
- missing_skills = match_scores["technical_skills"]["missing"]
328
- experience_match = match_scores["experience"]
329
- overall_score = match_scores["overall"]
 
 
 
 
 
330
 
331
- # Determine fit classification
332
- fit_status = "FIT" if overall_score >= 70 else "NOT FIT"
 
 
 
333
 
334
- # Create prompt for Qwen
335
- prompt = f"""
336
- <|im_start|>system
337
- You are an expert resume evaluator. Analyze how well a candidate fits a job posting and provide professional feedback.
338
- <|im_end|>
339
 
340
- <|im_start|>user
341
- Evaluate this candidate for a {job_title} position.
342
 
343
- Overall match score: {overall_score}%
344
- Technical skills match: {match_scores["technical_skills"]["score"]}%
345
- Soft skills match: {match_scores["soft_skills"]["score"]}%
346
- Experience match: {experience_match["score"]}%
347
 
348
- Candidate has: {experience_match["candidate_years"]} years of experience
349
- Position requires: {experience_match["required_years"]} years of experience
 
 
 
 
350
 
351
- Matched technical skills: {", ".join(matched_skills) if matched_skills else "None"}
352
- Missing technical skills: {", ".join(missing_skills) if missing_skills else "None"}
353
 
354
- Create a professional assessment of this candidate. First state whether they are a FIT or NOT FIT for the position, then explain why with specific strengths and development areas.
355
- <|im_end|>
 
 
 
 
356
 
357
- <|im_start|>assistant
358
- """
359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  try:
361
- # Generate the assessment using Qwen
362
- tokenizer = models['evaluator_tokenizer']
363
- qwen_model = models['evaluator']
364
-
365
- inputs = tokenizer(prompt, return_tensors="pt")
366
- outputs = qwen_model.generate(
367
- inputs.input_ids,
368
- max_new_tokens=512,
369
  do_sample=True,
370
- temperature=0.7,
371
- top_p=0.9
372
  )
 
 
 
 
 
 
373
 
374
- assessment = tokenizer.decode(outputs[0], skip_special_tokens=True)
375
-
376
- # Extract the assistant's response
377
- if "<|im_start|>assistant" in assessment:
378
- assessment = assessment.split("<|im_start|>assistant")[-1]
379
-
380
- # Clean up any remaining markers
381
- assessment = re.sub(r'<\|im_(start|end)\|>', '', assessment)
382
- assessment = assessment.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
- # If no assessment was generated, create a fallback
385
- if not assessment or len(assessment) < 50:
386
- assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status)
387
  except Exception as e:
388
- st.error(f"Error generating assessment: {str(e)}")
389
- assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status)
390
-
391
- return assessment, fit_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
  # Generate fallback assessment
394
- def generate_fallback_assessment(resume_data, job_data, match_scores, fit_status):
395
  """Generate a fallback assessment if the model fails"""
396
- job_title = job_data["title"]
397
- matched_skills = match_scores["technical_skills"]["matched"]
398
- missing_skills = match_scores["technical_skills"]["missing"]
399
- overall_score = match_scores["overall"]
400
 
401
  if fit_status == "FIT":
402
- assessment = f"""FIT: This candidate demonstrates a strong alignment with the {job_title} position, achieving an overall match score of {overall_score}%. Their proficiency in {', '.join(matched_skills) if matched_skills else 'relevant skills'} positions them well to contribute effectively from the start. The candidate's experience level is suitable for the role's requirements. To maximize their success, they could consider developing expertise in {', '.join(missing_skills) if missing_skills else 'additional specialized areas relevant to this role'}.
403
  """
404
  else:
405
- assessment = f"""NOT FIT: This candidate currently shows limited alignment with the {job_title} position, with an overall match score of {overall_score}%. While they demonstrate some relevant capabilities in {', '.join(matched_skills) if matched_skills else 'a few areas'}, they would need to develop expertise in critical areas such as {', '.join(missing_skills) if missing_skills else 'key technical requirements for this position'}. The candidate may become more competitive for this role by focusing on these skill gaps and gaining more relevant experience.
406
  """
407
 
408
  return assessment
409
 
410
- # Create the main header and interface
 
 
411
  st.title("Resume-Job Fit Analyzer")
412
- st.markdown("### Evaluate how well a resume matches a job description")
 
 
 
 
 
 
 
413
 
414
  # Resume upload
415
- st.subheader("Resume")
416
- uploaded_file = st.file_uploader("Upload Resume (.doc, .docx, .txt)", type=["doc", "docx", "txt"])
417
 
418
  # Job description input
419
- st.subheader("Job Description")
420
- job_description = st.text_area("Paste job description here", height=200,
421
- placeholder="Paste the job description here...")
422
-
423
- # Display resume text if file is uploaded
424
- resume_text = None
425
- if uploaded_file is not None:
426
- resume_text = read_resume_file(uploaded_file)
427
- if resume_text:
428
- with st.expander("View Resume Text"):
429
- st.text(resume_text[:1000] + ("..." if len(resume_text) > 1000 else ""))
430
-
431
- # Analysis button
432
- analyze_button = st.button("Analyze Match", type="primary")
433
-
434
- # Main analysis logic
435
- if analyze_button:
436
- if not resume_text or not job_description:
437
- st.error("Please upload a resume file and provide a job description.")
438
  else:
439
- with st.spinner("Analyzing resume and job match..."):
440
- # Record start time
441
- start_time = time.time()
442
-
443
- # Load models (uses caching so only loads once)
444
- models = load_models()
445
-
446
- # Parse resume and job description
447
- resume_data = parse_resume(resume_text, models)
448
- job_data = parse_job_description(job_description)
449
-
450
- # Calculate match score
451
- match_scores = calculate_match_score(resume_data, job_data)
452
-
453
- # Generate assessment
454
- assessment, fit_status = generate_assessment(resume_data, job_data, match_scores, models)
455
-
456
- # Calculate execution time
457
- execution_time = time.time() - start_time
458
-
459
- # Display results
460
- st.success(f"Analysis complete in {execution_time:.2f} seconds")
461
-
462
- # Display fit status prominently
463
- st.markdown(f"## Overall Result: {fit_status}")
464
-
465
- # Display match score
466
- st.subheader("Match Score")
467
- score_col1, score_col2, score_col3 = st.columns(3)
468
-
469
- with score_col1:
470
- st.metric("Overall Match", f"{match_scores['overall']}%")
471
-
472
- with score_col2:
473
- st.metric("Technical Skills", f"{match_scores['technical_skills']['score']}%")
474
-
475
- with score_col3:
476
- st.metric("Experience Match", f"{match_scores['experience']['score']}%")
477
-
478
- # Show skills breakdown
479
- st.subheader("Skills Breakdown")
480
- skill_col1, skill_col2 = st.columns(2)
481
-
482
- with skill_col1:
483
- st.markdown("##### Matched Skills")
484
- if match_scores["technical_skills"]["matched"]:
485
- for skill in match_scores["technical_skills"]["matched"]:
486
- st.markdown(f"βœ… {skill}")
487
- else:
488
- st.markdown("No matched skills found")
489
-
490
- with skill_col2:
491
- st.markdown("##### Missing Skills")
492
- if match_scores["technical_skills"]["missing"]:
493
- for skill in match_scores["technical_skills"]["missing"]:
494
- st.markdown(f"❌ {skill}")
495
- else:
496
- st.markdown("No missing skills detected")
497
-
498
- # Show experience comparison
499
- st.subheader("Experience")
500
- exp_col1, exp_col2 = st.columns(2)
501
-
502
- with exp_col1:
503
- st.markdown(f"**Required**: {job_data['requirements']['years_experience']} years")
504
-
505
- with exp_col2:
506
- st.markdown(f"**Candidate has**: {resume_data['experience']['years']} years")
507
-
508
- # Display detailed assessment
509
- st.subheader("Expert Assessment")
510
- st.markdown(assessment)
511
-
512
- # Show parsed data (expandable)
513
- with st.expander("View Parsed Data"):
514
- col1, col2 = st.columns(2)
515
- with col1:
516
- st.subheader("Resume Data")
517
- st.json(resume_data)
518
- with col2:
519
- st.subheader("Job Requirements")
520
- st.json(job_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import io
3
+ import streamlit as st
4
  import docx
5
+ import docx2txt
6
+ import tempfile
7
+ import time
8
+ import re
9
+ import math
10
+ import concurrent.futures
11
+ import pandas as pd
12
+ from functools import lru_cache
13
+ from transformers import pipeline
14
 
15
+ # Set page title and hide sidebar
16
  st.set_page_config(
17
  page_title="Resume-Job Fit Analyzer",
18
+ initial_sidebar_state="collapsed"
 
19
  )
20
 
21
+ # Hide sidebar completely with custom CSS
22
+ st.markdown("""
23
+ <style>
24
+ [data-testid="collapsedControl"] {display: none;}
25
+ section[data-testid="stSidebar"] {display: none;}
26
+ </style>
27
+ """, unsafe_allow_html=True)
 
 
 
 
 
28
 
29
+ #####################################
30
+ # Preload Models
31
+ #####################################
32
+ @st.cache_resource(show_spinner=True)
33
  def load_models():
34
+ """Load models at startup"""
35
+ with st.spinner("Loading AI models... This may take a minute on first run."):
36
+ models = {}
37
+ # Use bart-base for summarization
38
+ models['summarizer'] = pipeline(
39
+ "summarization",
40
+ model="facebook/bart-base",
41
+ max_length=100,
42
+ truncation=True
43
+ )
44
+
45
+ # Load model for evaluation
46
+ models['evaluator'] = pipeline(
47
+ "text2text-generation",
48
+ model="Qwen/Qwen2.5-0.5B-Instruct",
49
+ max_length=300
50
+ )
51
+
52
+ return models
53
 
54
+ # Preload models immediately when app starts
55
+ models = load_models()
56
+
57
+ #####################################
58
+ # Function: Extract Text from File
59
+ #####################################
60
+ @st.cache_data(show_spinner=False)
61
+ def extract_text_from_file(file_obj):
62
+ """
63
+ Extract text from .docx and .doc files.
64
+ Returns the extracted text or an error message if extraction fails.
65
+ """
66
+ filename = file_obj.name
67
+ ext = os.path.splitext(filename)[1].lower()
68
+ text = ""
69
+
70
+ if ext == ".docx":
71
  try:
72
+ document = docx.Document(file_obj)
73
+ text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
74
  except Exception as e:
75
+ text = f"Error processing DOCX file: {e}"
76
+ elif ext == ".doc":
 
 
 
77
  try:
78
+ # For .doc files, we need to save to a temp file
79
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
80
+ temp_file.write(file_obj.getvalue())
81
+ temp_path = temp_file.name
82
+
83
+ # Use docx2txt which is generally faster
 
 
 
 
84
  try:
85
+ text = docx2txt.process(temp_path)
86
+ except Exception:
87
+ text = "Could not process .doc file. Please convert to .docx format."
88
+
89
+ # Clean up temp file
90
+ os.unlink(temp_path)
 
 
 
 
 
 
 
 
91
  except Exception as e:
92
+ text = f"Error processing DOC file: {e}"
93
+ elif ext == ".txt":
94
+ try:
95
+ text = file_obj.getvalue().decode("utf-8")
96
+ except Exception as e:
97
+ text = f"Error processing TXT file: {e}"
98
  else:
99
+ text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
100
+
101
+ # Limit text size for faster processing
102
+ return text[:15000] if text else text
103
 
104
+ #####################################
105
+ # Functions for Information Extraction
106
+ #####################################
107
+
108
+ # Cache the extraction functions to avoid reprocessing
109
+ @lru_cache(maxsize=32)
110
+ def extract_name(text_start):
111
+ """Extract candidate name from the beginning of resume text"""
112
+ # Only use the first 500 characters to speed up processing
113
+ lines = text_start.split('\n')
114
+
115
+ # Check first few non-empty lines for potential names
116
+ potential_name_lines = [line.strip() for line in lines[:5] if line.strip()]
117
+
118
+ if potential_name_lines:
119
+ # First line is often the name if it's short and doesn't contain common headers
120
+ first_line = potential_name_lines[0]
121
+ if 5 <= len(first_line) <= 40 and not any(x in first_line.lower() for x in ["resume", "cv", "curriculum", "vitae", "profile"]):
122
+ return first_line
123
+
124
+ # Look for lines that might contain a name
125
+ for line in potential_name_lines[:3]:
126
+ if len(line.split()) <= 4 and not any(x in line.lower() for x in ["address", "phone", "email", "resume", "cv"]):
127
+ return line
128
+
129
+ return "Unknown (please extract from resume)"
130
+
131
+ def extract_age(text):
132
+ """Extract candidate age from resume text"""
133
+ # Simplified: just check a few common patterns
134
+ age_patterns = [
135
+ r'age:?\s*(\d{1,2})',
136
+ r'(\d{1,2})\s*years\s*old',
137
+ ]
138
+
139
  text_lower = text.lower()
140
+ for pattern in age_patterns:
141
+ matches = re.search(pattern, text_lower)
142
+ if matches:
143
+ return matches.group(1)
144
+
145
+ return "Not specified"
146
+
147
+ def extract_industry(text, base_summary):
148
+ """Extract expected job industry from resume"""
149
+ # Simplified industry keywords focused on the most common ones
150
+ industry_keywords = {
151
+ "technology": ["software", "programming", "developer", "IT", "tech", "computer"],
152
+ "finance": ["banking", "financial", "accounting", "finance", "analyst"],
153
+ "healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor"],
154
+ "education": ["teaching", "teacher", "professor", "education", "university"],
155
+ "marketing": ["marketing", "advertising", "digital marketing", "social media"],
156
+ "engineering": ["engineer", "engineering"],
157
+ "data science": ["data science", "machine learning", "AI", "analytics"],
158
+ "information systems": ["information systems", "ERP", "systems management"]
159
+ }
160
+
161
+ # Count occurrences of industry keywords - using the summary to speed up
162
+ combined_text = base_summary.lower()
163
+
164
+ counts = {}
165
+ for industry, keywords in industry_keywords.items():
166
+ counts[industry] = sum(combined_text.count(keyword.lower()) for keyword in keywords)
167
+
168
+ # Get the industry with the highest count
169
+ if counts:
170
+ likely_industry = max(counts.items(), key=lambda x: x[1])
171
+ if likely_industry[1] > 0:
172
+ return likely_industry[0].capitalize()
173
+
174
+ # Check for educational background that might indicate industry
175
+ degrees = ["computer science", "business", "engineering", "medicine", "education", "finance", "marketing"]
176
 
177
+ for degree in degrees:
178
+ if degree in combined_text:
179
+ return f"{degree.capitalize()}-related field"
180
+
181
+ return "Not clearly specified"
182
+
183
+ def extract_skills_and_work(text):
184
+ """Extract both skills and work experience at once to save processing time"""
185
+ # Common skill categories - reduced keyword list for speed
186
+ skill_categories = {
187
+ "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
188
+ "Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "Algorithms"],
189
+ "Database": ["SQL", "MySQL", "MongoDB", "Database", "NoSQL", "PostgreSQL"],
190
+ "Web Development": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack"],
191
+ "Software Development": ["Agile", "Scrum", "Git", "DevOps", "Docker", "System Design"],
192
+ "Cloud": ["AWS", "Azure", "Google Cloud", "Cloud Computing"],
193
+ "Security": ["Cybersecurity", "Network Security", "Encryption", "Security"],
194
+ "Business": ["Project Management", "Business Analysis", "Leadership", "Teamwork"],
195
+ "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
196
+ }
197
+
198
+ # Work experience extraction
199
+ work_headers = [
200
+ "work experience", "professional experience", "employment history",
201
+ "work history", "experience"
202
  ]
203
 
204
+ next_section_headers = [
205
+ "education", "skills", "certifications", "projects", "achievements"
 
206
  ]
207
 
208
+ # Process everything at once
209
+ lines = text.split('\n')
210
+ text_lower = text.lower()
211
 
212
+ # Skills extraction
213
+ found_skills = []
214
+ for category, skills in skill_categories.items():
215
+ category_skills = []
216
+ for skill in skills:
217
+ if skill.lower() in text_lower:
218
+ category_skills.append(skill)
219
+
220
+ if category_skills:
221
+ found_skills.append(f"{category}: {', '.join(category_skills)}")
222
 
223
+ # Work experience extraction - simplified approach
224
+ work_section = []
225
+ in_work_section = False
 
226
 
227
+ for idx, line in enumerate(lines):
228
+ line_lower = line.lower().strip()
229
+
230
+ # Start of work section
231
+ if not in_work_section:
232
+ if any(header in line_lower for header in work_headers):
233
+ in_work_section = True
234
+ continue
235
+ # End of work section
236
+ elif in_work_section:
237
+ if any(header in line_lower for header in next_section_headers):
238
+ break
239
+
240
+ if line.strip():
241
+ work_section.append(line.strip())
242
 
243
+ # Simplified work formatting
244
+ if not work_section:
245
+ work_experience = "Work experience not clearly identified"
246
+ else:
247
+ # Just take the first 5-7 lines of the work section as a summary
248
+ work_lines = []
249
+ company_count = 0
250
+ current_company = ""
251
+
252
+ for line in work_section:
253
+ # New company entry often has a date
254
+ if re.search(r'(19|20)\d{2}', line):
255
+ company_count += 1
256
+ if company_count <= 3: # Limit to 3 most recent positions
257
+ current_company = line
258
+ work_lines.append(f"**{line}**")
259
+ else:
260
+ break
261
+ elif company_count <= 3 and len(work_lines) < 10: # Limit total lines
262
+ work_lines.append(line)
263
+
264
+ work_experience = "\nβ€’ " + "\nβ€’ ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
265
+
266
+ skills_formatted = "\nβ€’ " + "\nβ€’ ".join(found_skills) if found_skills else "No specific technical skills clearly identified"
267
+
268
+ return skills_formatted, work_experience
269
+
270
+ #####################################
271
+ # Function: Summarize Resume Text
272
+ #####################################
273
+ def summarize_resume_text(resume_text):
274
+ """
275
+ Generates a structured summary of the resume text
276
+ """
277
+ start_time = time.time()
278
+
279
+ # First, generate a quick summary using pre-loaded model
280
+ max_input_length = 1024 # Model limit
281
+
282
+ # Only summarize the first portion of text for speed
283
+ text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
284
+ base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
285
+
286
+ # Extract information in parallel where possible
287
+ with concurrent.futures.ThreadPoolExecutor() as executor:
288
+ # These can run in parallel
289
+ name_future = executor.submit(extract_name, resume_text[:500]) # Only use start of text
290
+ age_future = executor.submit(extract_age, resume_text)
291
+ industry_future = executor.submit(extract_industry, resume_text, base_summary)
292
+ skills_work_future = executor.submit(extract_skills_and_work, resume_text)
293
+
294
+ # Get results
295
+ name = name_future.result()
296
+ age = age_future.result()
297
+ industry = industry_future.result()
298
+ skills, work_experience = skills_work_future.result()
299
+
300
+ # Format the structured summary
301
+ formatted_summary = f"Name: {name}\n"
302
+ formatted_summary += f"Age: {age}\n"
303
+ formatted_summary += f"Expected Job Industry: {industry}\n\n"
304
+ formatted_summary += f"Previous Work Experience: {work_experience}\n\n"
305
+ formatted_summary += f"Skills: {skills}"
306
+
307
+ execution_time = time.time() - start_time
308
+
309
+ return formatted_summary, execution_time
310
+
311
+ #####################################
312
+ # Function: Extract Job Requirements
313
+ #####################################
314
+ def extract_job_requirements(job_description):
315
+ """
316
+ Extract key requirements and skills from a job description
317
+ """
318
+ # Common technical skill categories to look for
319
+ tech_skill_categories = {
320
+ "programming_languages": ["Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL", "Ruby", "PHP", "Swift", "Kotlin"],
321
+ "web_technologies": ["React", "Angular", "Vue", "Node.js", "HTML", "CSS", "Django", "Flask", "Spring", "REST API", "GraphQL"],
322
+ "data_tech": ["Machine Learning", "TensorFlow", "PyTorch", "Data Science", "AI", "Big Data", "Deep Learning", "NLP", "Computer Vision"],
323
+ "cloud_devops": ["AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions", "Terraform", "Serverless"],
324
+ "database": ["SQL", "MySQL", "PostgreSQL", "MongoDB", "Redis", "Elasticsearch", "DynamoDB", "Cassandra"],
325
  }
326
 
327
+ # Common soft skills to look for
328
+ soft_skills = ["Communication", "Leadership", "Teamwork", "Problem-solving", "Critical thinking", "Adaptability", "Creativity", "Time management"]
329
+
330
+ # Clean the text for processing
331
+ clean_job_text = job_description.lower()
 
 
 
 
 
 
 
 
 
 
 
332
 
333
+ # Extract job title
334
+ title_patterns = [
335
+ r'^([^:.\n]+?)(position|role|job|opening|vacancy)',
336
+ r'^([^:.\n]+?)\n',
337
+ r'(hiring|looking for(?: a| an)?|recruiting)(?: a| an)? ([^:.\n]+?)(:-|[.:]|\n|$)'
338
  ]
339
 
340
+ job_title = "Not specified"
341
+ for pattern in title_patterns:
342
+ title_match = re.search(pattern, clean_job_text, re.IGNORECASE)
343
+ if title_match:
344
+ potential_title = title_match.group(1).strip() if len(title_match.groups()) >= 1 else title_match.group(2).strip()
345
+ if 3 <= len(potential_title) <= 50: # Reasonable title length
346
+ job_title = potential_title.capitalize()
347
+ break
348
 
349
+ # Extract years of experience
350
  exp_patterns = [
351
+ r'(\d+)(?:\+)?\s*(?:years|yrs)(?:\s*of)?\s*(?:experience|exp)',
352
+ r'experience\s*(?:of)?\s*(\d+)(?:\+)?\s*(?:years|yrs)'
353
  ]
354
 
355
+ years_required = 0
356
  for pattern in exp_patterns:
357
+ exp_match = re.search(pattern, clean_job_text, re.IGNORECASE)
358
+ if exp_match:
359
+ try:
360
+ years_required = int(exp_match.group(1))
361
+ break
362
+ except:
363
+ pass
364
+
365
+ # Extract technical skills
366
+ found_tech_skills = {}
367
+ all_tech_skills = []
368
+
369
+ for category, skills in tech_skill_categories.items():
370
+ category_skills = []
371
+ for skill in skills:
372
+ if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text):
373
+ category_skills.append(skill)
374
+ all_tech_skills.append(skill)
375
+
376
+ if category_skills:
377
+ found_tech_skills[category] = category_skills
378
+
379
+ # Extract soft skills
380
+ found_soft_skills = []
381
+ for skill in soft_skills:
382
+ if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text):
383
+ found_soft_skills.append(skill)
384
+
385
+ # Extract educational requirements
386
+ edu_patterns = [
387
+ r"bachelor'?s degree|bs|b\.s\.",
388
+ r"master'?s degree|ms|m\.s\.",
389
+ r"phd|ph\.d\.|doctorate",
390
+ r"mba|m\.b\.a\."
391
+ ]
392
 
393
+ education_required = []
394
+ for pattern in edu_patterns:
395
+ if re.search(pattern, clean_job_text, re.IGNORECASE):
396
+ edu_match = re.search(pattern, clean_job_text, re.IGNORECASE).group(0)
397
+ education_required.append(edu_match.capitalize())
398
 
399
+ # Format the job requirements
400
+ job_requirements = {
401
  "title": job_title,
402
+ "years_experience": years_required,
403
+ "technical_skills": all_tech_skills,
404
+ "soft_skills": found_soft_skills,
405
+ "education": education_required,
 
 
406
  }
407
 
408
+ return job_requirements
409
 
410
+ #####################################
411
+ # Function: Analyze Job Fit
412
+ #####################################
413
+ def analyze_job_fit(resume_summary, job_description):
414
+ """
415
+ Analyze how well the candidate fits the job requirements with detailed category breakdowns.
416
+ """
417
+ start_time = time.time()
418
 
419
+ # Extract job requirements
420
+ job_requirements = extract_job_requirements(job_description)
 
421
 
422
+ # Define skill categories to evaluate against
423
+ resume_lower = resume_summary.lower()
424
+ job_lower = job_description.lower()
425
 
426
+ # Define keyword categories based on the job description
427
+ # We'll dynamically build these based on the job requirements
428
+ skill_keywords = {
429
+ "technical_skills": job_requirements["technical_skills"],
430
+ "soft_skills": job_requirements["soft_skills"],
431
+ "education": job_requirements["education"],
432
+ }
 
 
 
 
 
 
 
 
 
 
 
 
433
 
434
+ # Add additional keywords from the job description for comprehensive analysis
435
+ additional_keywords = {
436
+ "problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging",
437
+ "optimization", "solution", "resolve", "analyze"],
438
+ "domain_knowledge": ["industry", "experience", "expertise", "knowledge", "familiar with", "understanding of"],
439
+ "collaboration": ["team", "collaborate", "cooperation", "cross-functional", "communication", "stakeholder"]
440
+ }
441
+
442
+ # Merge the keywords
443
+ skill_keywords.update(additional_keywords)
444
+
445
+ # Category weights with descriptive labels
446
+ category_weights = {
447
+ "technical_skills": {"weight": 0.40, "label": "Technical Skills"},
448
+ "soft_skills": {"weight": 0.15, "label": "Soft Skills"},
449
+ "education": {"weight": 0.10, "label": "Education"},
450
+ "problem_solving": {"weight": 0.15, "label": "Problem Solving"},
451
+ "domain_knowledge": {"weight": 0.10, "label": "Domain Knowledge"},
452
+ "collaboration": {"weight": 0.10, "label": "Collaboration"}
453
+ }
454
 
455
+ # Calculate category scores and store detailed information
456
+ category_scores = {}
457
+ category_details = {}
458
+ found_skills = {}
459
+
460
+ for category, keywords in skill_keywords.items():
461
+ if not keywords: # Skip empty categories
462
+ category_scores[category] = 0.0
463
+ category_details[category] = {
464
+ "raw_percentage": 0,
465
+ "adjusted_score": 0,
466
+ "matching_keywords": [],
467
+ "total_keywords": 0,
468
+ "matches": 0
469
+ }
470
+ found_skills[category] = []
471
+ continue
472
+
473
+ # Find the specific matching keywords for feedback
474
+ category_matches = []
475
+ for keyword in keywords:
476
+ if keyword.lower() in resume_lower:
477
+ category_matches.append(keyword)
478
+
479
+ found_skills[category] = category_matches
480
+
481
+ # Count matches but cap at a reasonable level
482
+ matches = len(category_matches)
483
+ total_keywords = len(keywords)
484
+
485
+ # Calculate raw percentage for this category
486
+ raw_percentage = int((matches / max(1, total_keywords)) * 100)
487
+
488
+ # Apply logarithmic scaling for more realistic scores
489
+ if matches == 0:
490
+ adjusted_score = 0.0
491
  else:
492
+ # Logarithmic scaling to prevent perfect scores
493
+ adjusted_score = min(0.95, (math.log(matches + 1) / math.log(min(total_keywords, 8) + 1)))
494
 
495
+ # Store both raw and adjusted scores for feedback
496
+ category_scores[category] = adjusted_score
497
+ category_details[category] = {
498
+ "raw_percentage": raw_percentage,
499
+ "adjusted_score": int(adjusted_score * 100),
500
+ "matching_keywords": category_matches,
501
+ "total_keywords": total_keywords,
502
+ "matches": matches
 
 
503
  }
504
 
505
+ # Check for years of experience match
506
+ years_required = job_requirements["years_experience"]
 
 
507
 
508
+ # Extract years of experience from resume
509
+ experience_years = 0
510
+ year_patterns = [
511
+ r'(\d+)\s*(?:\+)?\s*years?\s*(?:of)?\s*experience',
512
+ r'experience\s*(?:of)?\s*(\d+)\s*(?:\+)?\s*years?'
513
+ ]
514
 
515
+ for pattern in year_patterns:
516
+ exp_match = re.search(pattern, resume_lower)
517
+ if exp_match:
518
+ try:
519
+ experience_years = int(exp_match.group(1))
520
+ break
521
+ except:
522
+ pass
523
+
524
+ # If we couldn't find explicit years, try to count based on work history
525
+ if experience_years == 0:
526
+ # Try to extract from work experience section
527
+ work_exp_match = re.search(r'work experience:(.*?)(?=\n\n|$)', resume_summary, re.IGNORECASE | re.DOTALL)
528
+ if work_exp_match:
529
+ work_text = work_exp_match.group(1).lower()
530
+ years = re.findall(r'(\d{4})\s*-\s*(\d{4}|present|current)', work_text)
531
+
532
+ total_years = 0
533
+ for year_range in years:
534
+ start_year = int(year_range[0])
535
+ if year_range[1].isdigit():
536
+ end_year = int(year_range[1])
537
+ else:
538
+ end_year = 2025 # Assume "present" is current year
539
+
540
+ total_years += (end_year - start_year)
541
+
542
+ experience_years = total_years
543
 
544
+ # Calculate experience match score
545
+ if years_required > 0:
546
+ if experience_years >= years_required:
547
+ exp_score = 1.0
548
+ else:
549
+ exp_score = experience_years / years_required
550
+ else:
551
+ exp_score = 1.0 # If no specific years required, assume full match
552
+
553
+ category_scores["experience"] = exp_score
554
+ category_details["experience"] = {
555
+ "raw_percentage": int(exp_score * 100),
556
+ "adjusted_score": int(exp_score * 100),
557
+ "candidate_years": experience_years,
558
+ "required_years": years_required
559
+ }
560
 
561
+ # Calculate weighted score
562
+ weighted_score = 0
563
+ for category, score in category_scores.items():
564
+ if category in category_weights:
565
+ weighted_score += score * category_weights[category]["weight"]
566
 
567
+ # Add experience separately (not in the original weights)
568
+ weighted_score = (weighted_score * 0.8) + (category_scores["experience"] * 0.2)
 
 
 
569
 
570
+ # Apply final curve to keep scores in a realistic range
571
+ match_percentage = min(95, max(35, int(weighted_score * 100)))
572
 
573
+ # Determine fit/not fit status
574
+ fit_status = "FIT" if match_percentage >= 70 else "NOT FIT"
 
 
575
 
576
+ # Get more specific information for a better prompt
577
+ # Get top skills across all categories (up to 5 total)
578
+ all_matching_skills = []
579
+ for category, matches in found_skills.items():
580
+ if matches:
581
+ all_matching_skills.extend(matches)
582
 
583
+ top_skills = list(set(all_matching_skills))[:5] # Remove duplicates and take top 5
584
+ skills_text = ", ".join(top_skills) if top_skills else "limited relevant skills"
585
 
586
+ # Get strongest and weakest categories for more specific feedback
587
+ categories_sorted = sorted(
588
+ [(cat, category_details[cat]["adjusted_score"]) for cat in category_weights.keys() if cat in category_details],
589
+ key=lambda x: x[1],
590
+ reverse=True
591
+ )
592
 
593
+ top_category = category_weights[categories_sorted[0][0]]["label"] if categories_sorted else "Technical Skills"
594
+ weak_category = category_weights[categories_sorted[-1][0]]["label"] if categories_sorted else "Domain Knowledge"
595
 
596
+ # Create a prompt for the evaluation model
597
+ prompt = f"""
598
+ Generate a professional expert assessment for a job candidate applying for the position: {job_requirements['title']}.
599
+ Skills detected in candidate: {skills_text}.
600
+ Strongest area: {top_category} ({categories_sorted[0][1]}%).
601
+ Weakest area: {weak_category} ({categories_sorted[-1][1]}%).
602
+ Overall match: {match_percentage}%.
603
+ Fit status: {fit_status}
604
+
605
+ Write an evaluative assessment that analyzes the candidate's fit for this position.
606
+ Start with "{fit_status}: This candidate" and provide a professional evaluation of their fit.
607
+
608
+ {fit_status}: This candidate"""
609
+
610
  try:
611
+ # Generate the assessment using the evaluation model
612
+ assessment_results = models['evaluator'](
613
+ prompt,
614
+ max_length=300,
 
 
 
 
615
  do_sample=True,
616
+ temperature=0.75,
617
+ num_return_sequences=3
618
  )
619
+
620
+ # Find the best response with thorough cleaning
621
+ best_assessment = None
622
+ for result in assessment_results:
623
+ # Get the raw text
624
+ raw_text = result['generated_text'].strip()
625
 
626
+ # Extract just the part that starts with the fit status
627
+ if f"{fit_status}: This candidate" in raw_text:
628
+ # Find the start of the actual assessment
629
+ start_idx = raw_text.find(f"{fit_status}: This candidate")
630
+ text = raw_text[start_idx:]
631
+
632
+ # Check if it's actually an assessment (not just instructions)
633
+ if len(text) > 50 and not any(x in text.lower() for x in [
634
+ "actionable advice",
635
+ "include specific",
636
+ "make an assessment",
637
+ "evaluate their",
638
+ "assess their",
639
+ "provide specific areas"
640
+ ]):
641
+ best_assessment = text
642
+ break
643
+
644
+ # Use the best response or generate a fallback if none were ideal
645
+ if best_assessment:
646
+ assessment = best_assessment
647
+ else:
648
+ # Generate a completely manual assessment
649
+ assessment = generate_fallback_assessment(
650
+ resume_summary,
651
+ job_requirements,
652
+ match_percentage,
653
+ top_skills,
654
+ top_category,
655
+ weak_category,
656
+ fit_status
657
+ )
658
 
 
 
 
659
  except Exception as e:
660
+ # Fallback to a manual assessment
661
+ assessment = generate_fallback_assessment(
662
+ resume_summary,
663
+ job_requirements,
664
+ match_percentage,
665
+ top_skills,
666
+ top_category,
667
+ weak_category,
668
+ fit_status
669
+ )
670
+
671
+ # Final cleanup
672
+ assessment = re.sub(r'include specific actionable advice.*?improvement\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
673
+ assessment = re.sub(r'make an assessment.*?resume\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
674
+ assessment = re.sub(r'evaluate their technical skills.*?position\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
675
+ assessment = re.sub(r'assess their strengths.*?contributions', '', assessment, flags=re.DOTALL|re.IGNORECASE)
676
+ assessment = re.sub(r'provide specific areas.*?needed', '', assessment, flags=re.DOTALL|re.IGNORECASE)
677
+ assessment = re.sub(r'give an overall.*?position', '', assessment, flags=re.DOTALL|re.IGNORECASE)
678
+
679
+ # Clean up any double spaces, newlines, etc.
680
+ assessment = re.sub(r'\s+', ' ', assessment)
681
+ assessment = assessment.strip()
682
+
683
+ # If cleaning removed too much text, use the fallback
684
+ if len(assessment) < 50 or not assessment.startswith(f"{fit_status}: This candidate"):
685
+ assessment = generate_fallback_assessment(
686
+ resume_summary,
687
+ job_requirements,
688
+ match_percentage,
689
+ top_skills,
690
+ top_category,
691
+ weak_category,
692
+ fit_status
693
+ )
694
+
695
+ # Make sure percentages are consistent
696
+ assessment = re.sub(r'\b\d{1,2}%\b', f"{match_percentage}%", assessment)
697
+
698
+ execution_time = time.time() - start_time
699
+
700
+ return assessment, match_percentage, category_details, job_requirements, execution_time
701
 
702
  # Generate fallback assessment
703
+ def generate_fallback_assessment(resume_summary, job_requirements, match_percentage, top_skills, top_category, weak_category, fit_status):
704
  """Generate a fallback assessment if the model fails"""
705
+ job_title = job_requirements["title"]
706
+ skills_text = ", ".join(top_skills) if top_skills else "relevant skills"
 
 
707
 
708
  if fit_status == "FIT":
709
+ assessment = f"""{fit_status}: This candidate demonstrates strong alignment with the {job_title} position, achieving an overall match score of {match_percentage}%. Their proficiency in {skills_text} positions them well to contribute effectively, with particular strength in {top_category}. The candidate's experience level is suitable for the role's requirements. To maximize their success, they could consider developing expertise in {weak_category} to round out their skill set for this position.
710
  """
711
  else:
712
+ assessment = f"""{fit_status}: This candidate currently shows limited alignment with the {job_title} position, with an overall match score of {match_percentage}%. While they demonstrate some capabilities in {top_category} and have experience with {skills_text}, they would need to develop expertise in {weak_category} to be more competitive for this role. The candidate may become a stronger fit by focusing on these skill gaps and gaining more relevant experience in the key requirements for this position.
713
  """
714
 
715
  return assessment
716
 
717
+ #####################################
718
+ # Main Streamlit Interface
719
+ #####################################
720
  st.title("Resume-Job Fit Analyzer")
721
+ st.markdown(
722
+ """
723
+ Upload your resume file in **.docx**, **.doc**, or **.txt** format and enter a job description to see how well you match with the job requirements. The app performs the following tasks:
724
+ 1. Extracts text from your resume.
725
+ 2. Uses AI to generate a structured candidate summary.
726
+ 3. Analyzes how well your profile fits the specific job requirements.
727
+ """
728
+ )
729
 
730
  # Resume upload
731
+ uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
 
732
 
733
  # Job description input
734
+ job_description = st.text_area("Enter Job Description", height=200, placeholder="Paste the job description here...")
735
+
736
+ # Process button with optimized flow
737
+ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"):
738
+ # Create a placeholder for the progress bar
739
+ progress_bar = st.progress(0)
740
+ status_text = st.empty()
741
+
742
+ # Step 1: Extract text
743
+ status_text.text("Step 1/3: Extracting text from resume...")
744
+ resume_text = extract_text_from_file(uploaded_file)
745
+ progress_bar.progress(25)
746
+
747
+ if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
748
+ st.error(resume_text)
 
 
 
 
749
  else:
750
+ # Step 2: Generate summary
751
+ status_text.text("Step 2/3: Analyzing resume and generating summary...")
752
+ summary, summarization_time = summarize_resume_text(resume_text)
753
+ progress_bar.progress(50)
754
+
755
+ # Display summary
756
+ st.subheader("Your Resume Summary")
757
+ st.markdown(summary)
758
+ st.info(f"Summary generated in {summarization_time:.2f} seconds")
759
+
760
+ # Step 3: Generate job fit assessment
761
+ status_text.text("Step 3/3: Evaluating job fit...")
762
+ assessment, match_percentage, category_details, job_requirements, assessment_time = analyze_job_fit(summary, job_description)
763
+ progress_bar.progress(100)
764
+
765
+ # Clear status messages
766
+ status_text.empty()
767
+
768
+ # Display job fit results
769
+ st.subheader("Job Fit Assessment")
770
+
771
+ # Display match percentage with appropriate color and emoji
772
+ if match_percentage >= 85:
773
+ st.success(f"**Overall Job Match Score:** {match_percentage}% 🌟")
774
+ elif match_percentage >= 70:
775
+ st.success(f"**Overall Job Match Score:** {match_percentage}% βœ…")
776
+ elif match_percentage >= 50:
777
+ st.warning(f"**Overall Job Match Score:** {match_percentage}% ⚠️")
778
+ else:
779
+ st.error(f"**Overall Job Match Score:** {match_percentage}% πŸ”")
780
+
781
+ # Add detailed score breakdown
782
+ st.markdown("### Score Breakdown")
783
+
784
+ # Create a neat table with category scores
785
+ breakdown_data = []
786
+ for category, details in category_details.items():
787
+ if category == "experience":
788
+ label = "Experience"
789
+ matching_info = f"{details['candidate_years']} years (Required: {details['required_years']} years)"
790
+ else:
791
+ # Get the nice label for the category
792
+ label = {"technical_skills": "Technical Skills",
793
+ "soft_skills": "Soft Skills",
794
+ "education": "Education",
795
+ "problem_solving": "Problem Solving",
796
+ "domain_knowledge": "Domain Knowledge",
797
+ "collaboration": "Collaboration"}[category]
798
+
799
+ matching_info = ", ".join(details["matching_keywords"][:3]) if details.get("matching_keywords") else "None detected"
800
+
801
+ # Add formatted breakdown row
802
+ breakdown_data.append({
803
+ "Category": label,
804
+ "Score": f"{details['adjusted_score']}%",
805
+ "Matching Items": matching_info
806
+ })
807
+
808
+ # Convert to DataFrame and display
809
+ breakdown_df = pd.DataFrame(breakdown_data)
810
+ # Remove the index column entirely
811
+ st.table(breakdown_df.set_index('Category').reset_index()) # This removes the numerical index
812
+
813
+ # Show a note about how scores are calculated
814
+ with st.expander("How are these scores calculated?"):
815
+ st.markdown("""
816
+ - **Technical Skills** (40% of total): Evaluates programming languages, software tools, and technical requirements
817
+ - **Soft Skills** (15% of total): Assesses communication, teamwork, and interpersonal abilities
818
+ - **Education** (10% of total): Compares educational requirements with candidate's background
819
+ - **Problem Solving** (15% of total): Measures analytical thinking and approach to challenges
820
+ - **Domain Knowledge** (10% of total): Evaluates industry-specific experience and knowledge
821
+ - **Collaboration** (10% of total): Assesses team skills and cross-functional collaboration
822
+ - **Experience** (20% overall modifier): Years of relevant experience compared to job requirements
823
+
824
+ Scores are calculated based on keyword matches in your resume, with diminishing returns applied (first few skills matter more than later ones).
825
+ """)
826
+
827
+ # Display assessment
828
+ st.markdown("### Expert Assessment")
829
+ st.markdown(assessment)
830
+
831
+ st.info(f"Assessment completed in {assessment_time:.2f} seconds")
832
+
833
+ # Add potential next steps based on the match percentage
834
+ st.subheader("Recommended Next Steps")
835
+
836
+ if match_percentage >= 80:
837
+ st.markdown("""
838
+ - Consider applying for this position as you appear to be a strong match
839
+ - Prepare for technical interviews by focusing on your strongest skills
840
+ - Review the job description again to prepare for specific interview questions
841
+ """)
842
+ elif match_percentage >= 60:
843
+ st.markdown("""
844
+ - Focus on strengthening your weaker areas before applying
845
+ - Highlight your strongest skills and experience in your cover letter
846
+ - Consider gaining additional experience or certifications in key required areas
847
+ """)
848
+ else:
849
+ st.markdown("""
850
+ - This position may not be the best fit for your current skills and experience
851
+ - Consider roles that better align with your strengths
852
+ - If you're set on this type of position, focus on developing skills in the areas mentioned in the job description
853
+ """)