root commited on
Commit
c1efc08
Β·
1 Parent(s): fbc936b
Files changed (2) hide show
  1. app.py +828 -42
  2. requirements.txt +14 -5
app.py CHANGED
@@ -2,12 +2,34 @@ import streamlit as st
2
  import pdfplumber
3
  import io
4
  import spacy
5
- from transformers import pipeline
 
 
 
 
6
  import subprocess
7
  import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  st.set_page_config(
10
- page_title="Resume Screener & Skill Extractor",
11
  page_icon="πŸ“„",
12
  layout="wide"
13
  )
@@ -27,27 +49,71 @@ def download_spacy_model():
27
  def load_models():
28
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
29
  nlp = download_spacy_model()
30
- return summarizer, nlp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # Initialize models
33
- summarizer, nlp = load_models()
34
 
35
  # Job descriptions and required skills
36
  job_descriptions = {
37
  "Software Engineer": {
38
  "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
39
  "git", "cloud", "web development", "software development", "coding"],
40
- "description": "Looking for software engineers with strong programming skills and experience in software development."
 
 
 
 
 
 
 
41
  },
42
  "Interaction Designer": {
43
  "skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
44
  "sketch", "adobe", "design thinking", "interaction design"],
45
- "description": "Seeking interaction designers with expertise in user experience and interface design."
 
 
 
 
 
 
 
46
  },
47
  "Data Scientist": {
48
  "skills": ["python", "r", "statistics", "machine learning", "data analysis",
49
  "sql", "tensorflow", "pytorch", "pandas", "numpy"],
50
- "description": "Looking for data scientists with strong analytical and machine learning skills."
 
 
 
 
 
 
 
 
51
  }
52
  }
53
 
@@ -58,8 +124,312 @@ def extract_text_from_pdf(pdf_file):
58
  text += page.extract_text() or ""
59
  return text
60
 
61
- def analyze_resume(text, job_title):
62
- # Extract relevant skills
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  doc = nlp(text.lower())
64
  found_skills = []
65
  required_skills = job_descriptions[job_title]["skills"]
@@ -68,6 +438,22 @@ def analyze_resume(text, job_title):
68
  if skill in text.lower():
69
  found_skills.append(skill)
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # Generate summary
72
  chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
73
  summaries = []
@@ -75,17 +461,92 @@ def analyze_resume(text, job_title):
75
  summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
76
  summaries.append(summary)
77
 
78
- return found_skills, " ".join(summaries)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  # Streamlit UI
81
- st.title("πŸ“„ Resume Screener & Skill Extractor")
82
 
83
  # Add description
84
  st.markdown("""
85
- This app helps recruiters analyze resumes by:
86
- - Extracting relevant skills for specific job positions
87
- - Generating a concise summary of the candidate's background
88
- - Identifying skill gaps for the selected role
 
 
 
 
89
  """)
90
 
91
  # Create two columns
@@ -101,54 +562,379 @@ with col2:
101
 
102
  # Show job description
103
  if job_title:
104
- st.info(f"**Required Skills:**\n" +
105
  "\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
106
 
107
  if uploaded_file and job_title:
108
  try:
109
  # Show spinner while processing
110
- with st.spinner("Analyzing resume..."):
111
  # Extract text from PDF
112
  text = extract_text_from_pdf(uploaded_file)
113
 
114
  # Analyze resume
115
- found_skills, summary = analyze_resume(text, job_title)
 
 
 
 
116
 
117
  # Display results in tabs
118
- tab1, tab2, tab3 = st.tabs(["πŸ“Š Skills Match", "πŸ“ Resume Summary", "🎯 Skills Gap"])
 
 
 
 
 
 
 
119
 
120
  with tab1:
121
- # Display matched skills
122
- st.subheader("🎯 Matched Skills")
123
- if found_skills:
124
- for skill in found_skills:
125
- st.success(f"βœ… {skill.title()}")
126
-
127
- # Calculate match percentage
128
- match_percentage = len(found_skills) / len(job_descriptions[job_title]["skills"]) * 100
129
- st.metric("Skills Match", f"{match_percentage:.1f}%")
130
- else:
131
- st.warning("No direct skill matches found.")
132
-
133
- with tab2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  # Display resume summary
135
  st.subheader("πŸ“ Resume Summary")
136
- st.write(summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  with tab3:
139
- # Display missing skills
140
- st.subheader("πŸ“Œ Skills to Develop")
141
- missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
142
- if skill not in found_skills]
143
- if missing_skills:
144
- for skill in missing_skills:
145
- st.warning(f"βž– {skill.title()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  else:
147
- st.success("Great! The candidate has all the required skills!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  except Exception as e:
150
  st.error(f"An error occurred while processing the resume: {str(e)}")
 
151
 
152
  # Add footer
153
  st.markdown("---")
154
- st.markdown("Made with ❀️ using Streamlit and Hugging Face")
 
2
  import pdfplumber
3
  import io
4
  import spacy
5
+ import re
6
+ import pandas as pd
7
+ import matplotlib.pyplot as plt
8
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
9
+ from sentence_transformers import SentenceTransformer, util
10
  import subprocess
11
  import sys
12
+ import torch
13
+ import nltk
14
+ from nltk.tokenize import word_tokenize
15
+ from datetime import datetime
16
+ import plotly.express as px
17
+ import plotly.graph_objects as go
18
+ import numpy as np
19
+ from collections import defaultdict
20
+
21
+ # Initialize NLTK
22
+ @st.cache_resource
23
+ def download_nltk_resources():
24
+ try:
25
+ nltk.data.find('tokenizers/punkt')
26
+ except LookupError:
27
+ nltk.download('punkt')
28
+
29
+ download_nltk_resources()
30
 
31
  st.set_page_config(
32
+ page_title="Comprehensive Resume Screener & Skill Extractor",
33
  page_icon="πŸ“„",
34
  layout="wide"
35
  )
 
49
  def load_models():
50
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
51
  nlp = download_spacy_model()
52
+
53
+ # Load sentence transformer for semantic matching
54
+ try:
55
+ sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
56
+ except Exception as e:
57
+ st.error(f"Failed to load sentence transformer: {str(e)}")
58
+ sentence_model = None
59
+
60
+ # Load Qwen3-8B model for career advice
61
+ try:
62
+ device = "cuda" if torch.cuda.is_available() else "cpu"
63
+ qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
64
+ qwen_model = AutoModelForCausalLM.from_pretrained(
65
+ "Qwen/Qwen3-8B",
66
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
67
+ device_map="auto"
68
+ )
69
+ except Exception as e:
70
+ st.error(f"Failed to load Qwen3-8B model: {str(e)}")
71
+ qwen_tokenizer = None
72
+ qwen_model = None
73
+
74
+ return summarizer, nlp, sentence_model, qwen_tokenizer, qwen_model
75
 
76
  # Initialize models
77
+ summarizer, nlp, sentence_model, qwen_tokenizer, qwen_model = load_models()
78
 
79
  # Job descriptions and required skills
80
  job_descriptions = {
81
  "Software Engineer": {
82
  "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
83
  "git", "cloud", "web development", "software development", "coding"],
84
+ "description": "Looking for software engineers with strong programming skills and experience in software development.",
85
+ "semantic_description": """
86
+ We're seeking a talented Software Engineer to design, develop, and maintain high-quality software solutions.
87
+ The ideal candidate has strong programming skills in languages like Python, Java, or JavaScript, and experience with
88
+ SQL databases. You should be proficient in algorithms, data structures, and version control systems like Git.
89
+ Experience with cloud platforms and web development frameworks is a plus. You'll be responsible for the full
90
+ software development lifecycle, from requirements gathering to deployment and maintenance.
91
+ """
92
  },
93
  "Interaction Designer": {
94
  "skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
95
  "sketch", "adobe", "design thinking", "interaction design"],
96
+ "description": "Seeking interaction designers with expertise in user experience and interface design.",
97
+ "semantic_description": """
98
+ We're looking for a creative Interaction Designer to craft intuitive and engaging user experiences.
99
+ You should have expertise in UI/UX design principles and methods, with a portfolio demonstrating your
100
+ ability to conduct user research, create wireframes, and develop interactive prototypes. Proficiency
101
+ with design tools like Figma, Sketch, and Adobe Creative Suite is required. You'll collaborate with
102
+ product managers and developers to iterate on designs based on user feedback and business requirements.
103
+ """
104
  },
105
  "Data Scientist": {
106
  "skills": ["python", "r", "statistics", "machine learning", "data analysis",
107
  "sql", "tensorflow", "pytorch", "pandas", "numpy"],
108
+ "description": "Looking for data scientists with strong analytical and machine learning skills.",
109
+ "semantic_description": """
110
+ We're seeking a skilled Data Scientist to extract insights from complex datasets and build predictive models.
111
+ The ideal candidate has strong programming skills in Python or R, expertise in statistical analysis, and
112
+ experience with machine learning algorithms. You should be proficient in SQL for data extraction and tools
113
+ like TensorFlow or PyTorch for deep learning. Experience with data manipulation libraries like Pandas and NumPy
114
+ is essential. You'll work on projects from exploratory data analysis to model deployment, collaborating with
115
+ stakeholders to solve business problems through data-driven approaches.
116
+ """
117
  }
118
  }
119
 
 
124
  text += page.extract_text() or ""
125
  return text
126
 
127
+ def extract_work_experience(text):
128
+ """Extract work experience details including company names, job titles, and dates"""
129
+ # Find common section headers for work experience
130
+ work_exp_patterns = [
131
+ r"(?i)WORK EXPERIENCE|PROFESSIONAL EXPERIENCE|EMPLOYMENT HISTORY|EXPERIENCE",
132
+ r"(?i)EDUCATION|ACADEMIC|QUALIFICATIONS"
133
+ ]
134
+
135
+ # Find the start of work experience section
136
+ work_exp_start = None
137
+ for pattern in work_exp_patterns[:1]: # Use only the work experience patterns
138
+ match = re.search(pattern, text)
139
+ if match:
140
+ work_exp_start = match.end()
141
+ break
142
+
143
+ if work_exp_start is None:
144
+ return []
145
+
146
+ # Find the end of work experience section (start of education or next major section)
147
+ work_exp_end = len(text)
148
+ for pattern in work_exp_patterns[1:]: # Use only the education pattern
149
+ match = re.search(pattern, text)
150
+ if match and match.start() > work_exp_start:
151
+ work_exp_end = match.start()
152
+ break
153
+
154
+ work_exp_text = text[work_exp_start:work_exp_end]
155
+
156
+ # Extract job entries
157
+ # Look for patterns of job titles, company names, and dates
158
+ job_entries = []
159
+
160
+ # Pattern for dates (MM/YYYY or Month YYYY)
161
+ date_pattern = r"(?i)(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[,\s]+\d{4}|\d{1,2}/\d{4}|\d{4}"
162
+
163
+ # Pattern for common job title indicators
164
+ job_title_pattern = r"(?i)(Senior|Lead|Principal|Junior|Associate)?\s*(Software Engineer|Developer|Designer|Analyst|Manager|Director|Consultant|Specialist|Coordinator|Administrator)"
165
+
166
+ # Split into paragraphs which often represent job entries
167
+ paragraphs = re.split(r'\n\s*\n', work_exp_text)
168
+
169
+ for paragraph in paragraphs:
170
+ # Skip short paragraphs that are likely not job entries
171
+ if len(paragraph.strip()) < 30:
172
+ continue
173
+
174
+ # Extract dates
175
+ dates = re.findall(date_pattern, paragraph)
176
+ start_date = dates[0] if dates else "Unknown"
177
+ end_date = dates[-1] if len(dates) > 1 else "Present"
178
+
179
+ # Extract job title
180
+ title_match = re.search(job_title_pattern, paragraph)
181
+ job_title = title_match.group(0) if title_match else "Unknown Position"
182
+
183
+ # Extract company name (typically near the job title or at the start of the paragraph)
184
+ lines = paragraph.split('\n')
185
+ company = lines[0].strip() if lines else "Unknown Company"
186
+ if job_title in company:
187
+ company = company.replace(job_title, "").strip()
188
+
189
+ # Clean company name
190
+ for date in dates:
191
+ company = company.replace(date, "").strip()
192
+ company = re.sub(r'[,\.\|\-]', ' ', company).strip()
193
+
194
+ job_entries.append({
195
+ "company": company,
196
+ "title": job_title,
197
+ "start_date": start_date,
198
+ "end_date": end_date,
199
+ "description": paragraph
200
+ })
201
+
202
+ return job_entries
203
+
204
+ def estimate_skill_proficiency(text, skill):
205
+ """Estimate proficiency level for a skill"""
206
+ # Define proficiency indicators
207
+ basic_indicators = ["familiar with", "basic knowledge", "understanding of", "exposure to"]
208
+ intermediate_indicators = ["experience with", "proficient in", "worked with", "2-3 years", "2 years", "3 years"]
209
+ advanced_indicators = ["expert in", "advanced", "extensive experience", "lead", "architected", "designed", "5+ years", "4+ years"]
210
+
211
+ # Convert to lowercase for matching
212
+ text_lower = text.lower()
213
+
214
+ # Find skill mentions and surrounding context
215
+ skill_lower = skill.lower()
216
+ skill_index = text_lower.find(skill_lower)
217
+
218
+ if skill_index == -1:
219
+ return None
220
+
221
+ # Extract context (100 characters before and after the skill mention)
222
+ start = max(0, skill_index - 100)
223
+ end = min(len(text_lower), skill_index + len(skill_lower) + 100)
224
+ context = text_lower[start:end]
225
+
226
+ # Check for proficiency indicators
227
+ for indicator in advanced_indicators:
228
+ if indicator in context:
229
+ return "Advanced"
230
+
231
+ for indicator in intermediate_indicators:
232
+ if indicator in context:
233
+ return "Intermediate"
234
+
235
+ for indicator in basic_indicators:
236
+ if indicator in context:
237
+ return "Basic"
238
+
239
+ # Default to basic if skill is mentioned but no proficiency indicators are found
240
+ return "Basic"
241
+
242
+ def calculate_seniority_score(job_entries):
243
+ """Calculate a seniority score based on job titles and years of experience"""
244
+ # Define seniority levels for common job titles
245
+ seniority_levels = {
246
+ "intern": 1,
247
+ "junior": 2,
248
+ "associate": 3,
249
+ "developer": 4,
250
+ "engineer": 4,
251
+ "designer": 4,
252
+ "analyst": 4,
253
+ "senior": 6,
254
+ "lead": 7,
255
+ "manager": 7,
256
+ "principal": 8,
257
+ "director": 9,
258
+ "vp": 10,
259
+ "cto": 10,
260
+ "cio": 10,
261
+ "ceo": 10
262
+ }
263
+
264
+ # Calculate total years of experience
265
+ total_years = 0
266
+
267
+ for job in job_entries:
268
+ # Parse start and end dates
269
+ try:
270
+ start_year = re.search(r'\d{4}', job["start_date"])
271
+ end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
272
+
273
+ if start_year:
274
+ start_year = int(start_year.group(0))
275
+ end_year = int(end_year.group(0)) if end_year else datetime.now().year
276
+ years = end_year - start_year
277
+ if 0 <= years <= 30: # Sanity check
278
+ total_years += years
279
+ except Exception:
280
+ # Skip if there's an issue with date parsing
281
+ pass
282
+
283
+ # Calculate title-based seniority
284
+ highest_seniority = 0
285
+
286
+ for job in job_entries:
287
+ title_lower = job["title"].lower()
288
+ for level_title, score in seniority_levels.items():
289
+ if level_title in title_lower and score > highest_seniority:
290
+ highest_seniority = score
291
+
292
+ # Combine years of experience and title-based seniority
293
+ # Years of experience factor: 0-2 years (1), 3-5 years (2), 6-10 years (3), 11+ years (4)
294
+ years_factor = 1
295
+ if total_years >= 3:
296
+ years_factor = 2
297
+ if total_years >= 6:
298
+ years_factor = 3
299
+ if total_years >= 11:
300
+ years_factor = 4
301
+
302
+ # Final seniority score (1-10 scale)
303
+ seniority_score = min(10, max(1, (highest_seniority * 0.6) + (years_factor * 1.0)))
304
+
305
+ return round(seniority_score, 1), total_years
306
+
307
+ def detect_fraud_signals(text, job_entries):
308
+ """Detect potential fraud signals in the resume"""
309
+ fraud_signals = []
310
+
311
+ # Check for impossible timelines (overlapping full-time roles)
312
+ if len(job_entries) >= 2:
313
+ for i in range(len(job_entries) - 1):
314
+ for j in range(i+1, len(job_entries)):
315
+ # Check if both jobs have date information
316
+ if (job_entries[i]["start_date"] != "Unknown" and
317
+ job_entries[i]["end_date"] != "Unknown" and
318
+ job_entries[j]["start_date"] != "Unknown" and
319
+ job_entries[j]["end_date"] != "Unknown"):
320
+
321
+ # Get years for comparison
322
+ i_start = re.search(r'\d{4}', job_entries[i]["start_date"])
323
+ i_end = re.search(r'\d{4}', job_entries[i]["end_date"]) if job_entries[i]["end_date"] != "Present" else None
324
+ j_start = re.search(r'\d{4}', job_entries[j]["start_date"])
325
+ j_end = re.search(r'\d{4}', job_entries[j]["end_date"]) if job_entries[j]["end_date"] != "Present" else None
326
+
327
+ # Convert to integers for comparison
328
+ if i_start and j_start:
329
+ i_start = int(i_start.group(0))
330
+ i_end = int(i_end.group(0)) if i_end else datetime.now().year
331
+ j_start = int(j_start.group(0))
332
+ j_end = int(j_end.group(0)) if j_end else datetime.now().year
333
+
334
+ # Check for significant overlap (more than 6 months)
335
+ if ((i_start <= j_start < i_end) or (j_start <= i_start < j_end)) and job_entries[i]["company"] != job_entries[j]["company"]:
336
+ overlap_years = min(i_end, j_end) - max(i_start, j_start)
337
+ if overlap_years > 0.5: # More than 6 months overlap
338
+ fraud_signals.append(f"Potential timeline inconsistency: Overlapping roles at {job_entries[i]['company']} and {job_entries[j]['company']} for {overlap_years:.1f} years")
339
+
340
+ # Check for suspicious keywords or phrases
341
+ suspicious_phrases = [
342
+ "self-employed",
343
+ "freelance",
344
+ "consultant",
345
+ "entrepreneur",
346
+ "founder",
347
+ "ceo of own company"
348
+ ]
349
+
350
+ # Look for suspicious gap filling
351
+ for phrase in suspicious_phrases:
352
+ if phrase in text.lower():
353
+ # Not all of these are fraudulent, but they warrant verification
354
+ fraud_signals.append(f"Verification recommended: Contains '{phrase}' which may need additional verification")
355
+
356
+ # Check for unexplained gaps in employment history
357
+ if len(job_entries) >= 2:
358
+ for i in range(len(job_entries) - 1):
359
+ # Sort entries by start date
360
+ if "Unknown" not in job_entries[i]["end_date"] and "Unknown" not in job_entries[i+1]["start_date"]:
361
+ end_match = re.search(r'\d{4}', job_entries[i]["end_date"])
362
+ start_match = re.search(r'\d{4}', job_entries[i+1]["start_date"])
363
+
364
+ if end_match and start_match:
365
+ end_year = int(end_match.group(0))
366
+ start_year = int(start_match.group(0))
367
+
368
+ # If there's more than a 1-year gap
369
+ if start_year - end_year > 1:
370
+ fraud_signals.append(f"Employment gap: {end_year} to {start_year} ({start_year - end_year} years)")
371
+
372
+ return fraud_signals
373
+
374
+ def predict_career_trajectory(job_entries, current_skills):
375
+ """Predict logical next roles based on career progression"""
376
+ # Career path mappings based on common progressions
377
+ career_paths = {
378
+ "software engineer": ["Senior Software Engineer", "Lead Developer", "Software Architect", "Engineering Manager", "CTO"],
379
+ "developer": ["Senior Developer", "Technical Lead", "Software Architect", "Development Manager", "CTO"],
380
+ "designer": ["Senior Designer", "Lead Designer", "Design Manager", "Creative Director", "VP of Design"],
381
+ "data scientist": ["Senior Data Scientist", "Lead Data Scientist", "Data Science Manager", "Director of Analytics", "Chief Data Officer"]
382
+ }
383
+
384
+ # Extract current role from latest job entry
385
+ current_role = job_entries[0]["title"].lower() if job_entries else "unknown"
386
+
387
+ # Find the best matching career path
388
+ best_match = None
389
+ for role_key in career_paths:
390
+ if role_key in current_role:
391
+ best_match = role_key
392
+ break
393
+
394
+ if not best_match:
395
+ return ["Career path prediction requires more information"]
396
+
397
+ # Find current position in the career path
398
+ current_index = 0
399
+ for i, role in enumerate(career_paths[best_match]):
400
+ if any(indicator in current_role for indicator in ["senior", "lead", "manager", "director", "vp", "chief"]):
401
+ # If current role contains seniority indicators, advance the index
402
+ if "senior" in current_role and "senior" in role.lower():
403
+ current_index = i
404
+ break
405
+ elif "lead" in current_role and "lead" in role.lower():
406
+ current_index = i
407
+ break
408
+ elif "manager" in current_role and "manager" in role.lower():
409
+ current_index = i
410
+ break
411
+ elif "director" in current_role and "director" in role.lower():
412
+ current_index = i
413
+ break
414
+
415
+ # Get next potential roles (up to 3)
416
+ next_roles = []
417
+ for i in range(current_index + 1, min(current_index + 4, len(career_paths[best_match]))):
418
+ next_roles.append(career_paths[best_match][i])
419
+
420
+ if not next_roles:
421
+ next_roles = ["You're at a senior level in your career path. Consider lateral moves or industry specialization."]
422
+
423
+ return next_roles
424
+
425
+ def analyze_resume(text, job_title, sentence_model):
426
+ # Extract work experience
427
+ job_entries = extract_work_experience(text)
428
+
429
+ # Sort job entries by start date (most recent first)
430
+ job_entries.sort(key=lambda x: "9999" if x["start_date"] == "Unknown" else x["start_date"], reverse=True)
431
+
432
+ # Extract relevant skills with basic keyword matching
433
  doc = nlp(text.lower())
434
  found_skills = []
435
  required_skills = job_descriptions[job_title]["skills"]
 
438
  if skill in text.lower():
439
  found_skills.append(skill)
440
 
441
+ # Determine skill proficiency levels
442
+ skill_proficiencies = {}
443
+ for skill in found_skills:
444
+ proficiency = estimate_skill_proficiency(text, skill)
445
+ if proficiency:
446
+ skill_proficiencies[skill] = proficiency
447
+
448
+ # Calculate seniority score
449
+ seniority_score, years_experience = calculate_seniority_score(job_entries)
450
+
451
+ # Detect fraud signals
452
+ fraud_signals = detect_fraud_signals(text, job_entries)
453
+
454
+ # Predict career trajectory
455
+ next_roles = predict_career_trajectory(job_entries, found_skills)
456
+
457
  # Generate summary
458
  chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
459
  summaries = []
 
461
  summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
462
  summaries.append(summary)
463
 
464
+ # Semantic matching with job description
465
+ semantic_score = 0
466
+ if sentence_model:
467
+ try:
468
+ resume_embedding = sentence_model.encode(text[:5000]) # Limit to first 5000 chars to avoid memory issues
469
+ job_embedding = sentence_model.encode(job_descriptions[job_title]["semantic_description"])
470
+ semantic_score = float(util.pytorch_cos_sim(resume_embedding, job_embedding)[0][0])
471
+ except Exception as e:
472
+ st.error(f"Error in semantic matching: {str(e)}")
473
+
474
+ return {
475
+ "found_skills": found_skills,
476
+ "skill_proficiencies": skill_proficiencies,
477
+ "summary": " ".join(summaries),
478
+ "job_entries": job_entries,
479
+ "seniority_score": seniority_score,
480
+ "years_experience": years_experience,
481
+ "fraud_signals": fraud_signals,
482
+ "next_roles": next_roles,
483
+ "semantic_score": semantic_score
484
+ }
485
+
486
+ def generate_career_advice(resume_text, job_title, analysis_results):
487
+ if qwen_model is None or qwen_tokenizer is None:
488
+ return "Career advice model not available. Please check the model installation."
489
+
490
+ # Get missing skills
491
+ missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
492
+ if skill not in analysis_results["found_skills"]]
493
+
494
+ # Create a prompt for the model
495
+ prompt = f"""
496
+ You are a professional career advisor. Based on the resume and the target job position,
497
+ provide personalized advice on skills to develop and suggest projects that would help the candidate
498
+ become a better fit for the position.
499
+
500
+ Resume summary: {analysis_results["summary"]}
501
+
502
+ Target position: {job_title}
503
+
504
+ Job requirements: {job_descriptions[job_title]["description"]}
505
+
506
+ Skills the candidate has: {', '.join([f"{skill} ({analysis_results['skill_proficiencies'].get(skill, 'Basic')})" for skill in analysis_results["found_skills"]])}
507
+
508
+ Skills the candidate needs to develop: {', '.join(missing_skills)}
509
+
510
+ Current experience: {analysis_results["years_experience"]} years
511
+ Current seniority level: {analysis_results["seniority_score"]}/10
512
+ Potential next career moves: {', '.join(analysis_results["next_roles"])}
513
+
514
+ Provide the following:
515
+ 1. Specific advice on how to develop the missing skills
516
+ 2. 3-5 project ideas that would showcase these skills and align with the candidate's career trajectory
517
+ 3. Resources for learning (courses, books, websites)
518
+ 4. Suggestions on how to position existing experience for this role
519
+ """
520
+
521
+ # Generate advice using Qwen3-8B
522
+ try:
523
+ inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)
524
+ with torch.no_grad():
525
+ outputs = qwen_model.generate(
526
+ **inputs,
527
+ max_new_tokens=1024,
528
+ temperature=0.7,
529
+ top_p=0.9,
530
+ do_sample=True
531
+ )
532
+ advice = qwen_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
533
+ return advice
534
+ except Exception as e:
535
+ return f"Failed to generate career advice: {str(e)}"
536
 
537
  # Streamlit UI
538
+ st.title("πŸ“„ Comprehensive Resume Analyzer")
539
 
540
  # Add description
541
  st.markdown("""
542
+ This app helps recruiters and job seekers analyze resumes with advanced features:
543
+
544
+ - **Semantic Job Matching**: Uses AI to match resumes to job descriptions beyond keywords
545
+ - **Skill Proficiency Detection**: Identifies skill levels from context
546
+ - **Career Progression Analysis**: Visualizes job history and seniority
547
+ - **Fraud Detection**: Flags potential inconsistencies for verification
548
+ - **Career Path Prediction**: Suggests logical next roles based on experience
549
+ - **Personalized Development Advice**: Recommends skills, projects, and resources
550
  """)
551
 
552
  # Create two columns
 
562
 
563
  # Show job description
564
  if job_title:
565
+ st.info(f"**Job Description:**\n{job_descriptions[job_title]['description']}\n\n**Required Skills:**\n" +
566
  "\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
567
 
568
  if uploaded_file and job_title:
569
  try:
570
  # Show spinner while processing
571
+ with st.spinner("Analyzing resume with advanced AI..."):
572
  # Extract text from PDF
573
  text = extract_text_from_pdf(uploaded_file)
574
 
575
  # Analyze resume
576
+ analysis_results = analyze_resume(text, job_title, sentence_model)
577
+
578
+ # Calculate missing skills
579
+ missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
580
+ if skill not in analysis_results["found_skills"]]
581
 
582
  # Display results in tabs
583
+ tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
584
+ "πŸ“Š Match Score",
585
+ "🎯 Skills Analysis",
586
+ "πŸ‘¨β€πŸ’Ό Experience",
587
+ "πŸ“ˆ Career Path",
588
+ "🚩 Verification",
589
+ "πŸš€ Career Advice"
590
+ ])
591
 
592
  with tab1:
593
+ # Display match scores
594
+ st.subheader("πŸ“Š Job Match Analysis")
595
+
596
+ # Calculate match scores
597
+ keyword_match = len(analysis_results["found_skills"]) / len(job_descriptions[job_title]["skills"]) * 100
598
+ semantic_match = analysis_results["semantic_score"] * 100
599
+
600
+ # Display scores with gauges
601
+ col1, col2 = st.columns(2)
602
+
603
+ with col1:
604
+ # Keyword match gauge
605
+ fig = go.Figure(go.Indicator(
606
+ mode = "gauge+number",
607
+ value = keyword_match,
608
+ title = {'text': "Keyword Match"},
609
+ gauge = {
610
+ 'axis': {'range': [0, 100]},
611
+ 'bar': {'color': "darkblue"},
612
+ 'steps': [
613
+ {'range': [0, 30], 'color': "lightgray"},
614
+ {'range': [30, 70], 'color': "gray"},
615
+ {'range': [70, 100], 'color': "lightblue"}
616
+ ],
617
+ 'threshold': {
618
+ 'line': {'color': "red", 'width': 4},
619
+ 'thickness': 0.75,
620
+ 'value': 70
621
+ }
622
+ }
623
+ ))
624
+ st.plotly_chart(fig, use_container_width=True)
625
+
626
+ with col2:
627
+ # Semantic match gauge
628
+ fig = go.Figure(go.Indicator(
629
+ mode = "gauge+number",
630
+ value = semantic_match,
631
+ title = {'text': "Semantic Match"},
632
+ gauge = {
633
+ 'axis': {'range': [0, 100]},
634
+ 'bar': {'color': "darkgreen"},
635
+ 'steps': [
636
+ {'range': [0, 30], 'color': "lightgray"},
637
+ {'range': [30, 70], 'color': "gray"},
638
+ {'range': [70, 100], 'color': "lightgreen"}
639
+ ],
640
+ 'threshold': {
641
+ 'line': {'color': "red", 'width': 4},
642
+ 'thickness': 0.75,
643
+ 'value': 70
644
+ }
645
+ }
646
+ ))
647
+ st.plotly_chart(fig, use_container_width=True)
648
+
649
+ # Calculate overall match score (weighted average)
650
+ overall_match = (keyword_match * 0.4) + (semantic_match * 0.6)
651
+
652
+ # Create overall score gauge
653
+ fig = go.Figure(go.Indicator(
654
+ mode = "gauge+number+delta",
655
+ value = overall_match,
656
+ title = {'text': "Overall Match Score"},
657
+ delta = {'reference': 75, 'increasing': {'color': "green"}},
658
+ gauge = {
659
+ 'axis': {'range': [0, 100]},
660
+ 'bar': {'color': "darkblue"},
661
+ 'steps': [
662
+ {'range': [0, 50], 'color': "lightgray"},
663
+ {'range': [50, 75], 'color': "gray"},
664
+ {'range': [75, 100], 'color': "darkblue"}
665
+ ],
666
+ 'threshold': {
667
+ 'line': {'color': "red", 'width': 4},
668
+ 'thickness': 0.75,
669
+ 'value': 75
670
+ }
671
+ }
672
+ ))
673
+
674
+ st.plotly_chart(fig, use_container_width=True)
675
+
676
  # Display resume summary
677
  st.subheader("πŸ“ Resume Summary")
678
+ st.write(analysis_results["summary"])
679
+
680
+ with tab2:
681
+ # Display skills analysis
682
+ st.subheader("🎯 Skills Analysis")
683
+
684
+ # Create two columns
685
+ col1, col2 = st.columns(2)
686
+
687
+ with col1:
688
+ # Display matched skills with proficiency levels
689
+ st.subheader("🟒 Skills Present")
690
+
691
+ # Create a DataFrame for the skills table
692
+ skills_data = []
693
+ for skill in analysis_results["found_skills"]:
694
+ proficiency = analysis_results["skill_proficiencies"].get(skill, "Basic")
695
+ skills_data.append({
696
+ "Skill": skill.title(),
697
+ "Proficiency": proficiency
698
+ })
699
+
700
+ if skills_data:
701
+ skills_df = pd.DataFrame(skills_data)
702
+
703
+ # Add proficiency color coding
704
+ def color_proficiency(val):
705
+ if val == "Advanced":
706
+ return 'background-color: #d4f7d4'
707
+ elif val == "Intermediate":
708
+ return 'background-color: #fff2cc'
709
+ else:
710
+ return 'background-color: #f2f2f2'
711
+
712
+ st.dataframe(skills_df.style.applymap(color_proficiency, subset=['Proficiency']),
713
+ use_container_width=True)
714
+ else:
715
+ st.warning("No direct skill matches found.")
716
+
717
+ with col2:
718
+ # Display missing skills
719
+ st.subheader("πŸ”΄ Skills to Develop")
720
+ if missing_skills:
721
+ missing_df = pd.DataFrame({"Skill": [skill.title() for skill in missing_skills]})
722
+ st.dataframe(missing_df, use_container_width=True)
723
+ else:
724
+ st.success("Great! The candidate has all the required skills!")
725
+
726
+ # Create a radar chart for skills coverage
727
+ st.subheader("Skills Coverage")
728
+
729
+ # Prepare data for radar chart
730
+ categories = job_descriptions[job_title]["skills"]
731
+ values = [1 if skill in analysis_results["found_skills"] else 0 for skill in categories]
732
+
733
+ # Create radar chart
734
+ fig = go.Figure()
735
+
736
+ fig.add_trace(go.Scatterpolar(
737
+ r=values,
738
+ theta=categories,
739
+ fill='toself',
740
+ name='Present Skills'
741
+ ))
742
+
743
+ fig.add_trace(go.Scatterpolar(
744
+ r=[1] * len(categories),
745
+ theta=categories,
746
+ fill='toself',
747
+ name='Required Skills',
748
+ opacity=0.3
749
+ ))
750
+
751
+ fig.update_layout(
752
+ polar=dict(
753
+ radialaxis=dict(
754
+ visible=True,
755
+ range=[0, 1]
756
+ )),
757
+ showlegend=True
758
+ )
759
+
760
+ st.plotly_chart(fig, use_container_width=True)
761
 
762
  with tab3:
763
+ # Display experience analysis
764
+ st.subheader("πŸ‘¨β€πŸ’Ό Experience Analysis")
765
+
766
+ # Display seniority metrics
767
+ col1, col2 = st.columns(2)
768
+
769
+ with col1:
770
+ # Seniority score gauge
771
+ fig = go.Figure(go.Indicator(
772
+ mode="gauge+number",
773
+ value=analysis_results["seniority_score"],
774
+ title={'text': "Seniority Score"},
775
+ gauge={
776
+ 'axis': {'range': [0, 10]},
777
+ 'bar': {'color': "darkblue"},
778
+ 'steps': [
779
+ {'range': [0, 3], 'color': "lightgray"},
780
+ {'range': [3, 7], 'color': "gray"},
781
+ {'range': [7, 10], 'color': "lightblue"}
782
+ ],
783
+ 'threshold': {
784
+ 'line': {'color': "red", 'width': 4},
785
+ 'thickness': 0.75,
786
+ 'value': 7
787
+ }
788
+ }
789
+ ))
790
+ st.plotly_chart(fig, use_container_width=True)
791
+
792
+ with col2:
793
+ # Years of experience
794
+ fig = go.Figure(go.Indicator(
795
+ mode="number+delta",
796
+ value=analysis_results["years_experience"],
797
+ number={'suffix': " years"},
798
+ title={"text": "Years of Experience"},
799
+ delta={'reference': 5, 'relative': False}
800
+ ))
801
+ st.plotly_chart(fig, use_container_width=True)
802
+
803
+ # Display career progression timeline
804
+ st.subheader("Career Progression Timeline")
805
+
806
+ if analysis_results["job_entries"]:
807
+ # Create timeline data
808
+ timeline_data = []
809
+
810
+ for job in analysis_results["job_entries"]:
811
+ # Extract years for visualization
812
+ start_year = re.search(r'\d{4}', job["start_date"])
813
+ end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
814
+
815
+ if start_year:
816
+ start_year = int(start_year.group(0))
817
+ end_year = int(end_year.group(0)) if end_year else datetime.now().year
818
+
819
+ timeline_data.append({
820
+ "Role": job["title"],
821
+ "Company": job["company"],
822
+ "Start": start_year,
823
+ "End": end_year,
824
+ "Duration": end_year - start_year
825
+ })
826
+
827
+ if timeline_data:
828
+ # Create DataFrame for timeline
829
+ timeline_df = pd.DataFrame(timeline_data)
830
+
831
+ # Sort by start date (ascending)
832
+ timeline_df = timeline_df.sort_values(by="Start")
833
+
834
+ # Create Gantt chart
835
+ fig = px.timeline(
836
+ timeline_df,
837
+ x_start="Start",
838
+ x_end="End",
839
+ y="Company",
840
+ color="Role",
841
+ hover_data=["Duration"],
842
+ labels={"Company": "Employer"}
843
+ )
844
+
845
+ fig.update_layout(
846
+ xaxis_title="Year",
847
+ yaxis_title="Employer",
848
+ title="Career Progression"
849
+ )
850
+
851
+ st.plotly_chart(fig, use_container_width=True)
852
+ else:
853
+ st.warning("Couldn't extract timeline data from the resume.")
854
  else:
855
+ st.warning("No work experience entries found in the resume.")
856
+
857
+ with tab4:
858
+ # Display career path analysis
859
+ st.subheader("πŸ“ˆ Career Path Analysis")
860
+
861
+ # Display next role suggestions
862
+ st.subheader("Suggested Next Roles")
863
+
864
+ for i, role in enumerate(analysis_results["next_roles"]):
865
+ st.info(f"**Option {i+1}:** {role}")
866
+
867
+ # Add simple career progression visualization
868
+ st.subheader("Career Progression Path")
869
+
870
+ # Extract current role from latest job entry
871
+ current_role = analysis_results["job_entries"][0]["title"] if analysis_results["job_entries"] else "Current Position"
872
+
873
+ # Create nodes for career path
874
+ career_nodes = [current_role] + analysis_results["next_roles"]
875
+
876
+ # Create a simple digraph visualization
877
+ career_df = pd.DataFrame({
878
+ "From": [career_nodes[i] for i in range(len(career_nodes)-1)],
879
+ "To": [career_nodes[i+1] for i in range(len(career_nodes)-1)],
880
+ "Value": [10 for _ in range(len(career_nodes)-1)]
881
+ })
882
+
883
+ # Create a Sankey diagram
884
+ fig = go.Figure(data=[go.Sankey(
885
+ node=dict(
886
+ pad=15,
887
+ thickness=20,
888
+ line=dict(color="black", width=0.5),
889
+ label=career_nodes,
890
+ color="blue"
891
+ ),
892
+ link=dict(
893
+ source=[i for i in range(len(career_nodes)-1)],
894
+ target=[i+1 for i in range(len(career_nodes)-1)],
895
+ value=[1 for _ in range(len(career_nodes)-1)]
896
+ )
897
+ )])
898
+
899
+ fig.update_layout(title_text="Potential Career Path", font_size=12)
900
+ st.plotly_chart(fig, use_container_width=True)
901
+
902
+ with tab5:
903
+ # Display fraud detection analysis
904
+ st.subheader("🚩 Verification Points")
905
+
906
+ if analysis_results["fraud_signals"]:
907
+ st.warning("The following points may require verification:")
908
+ for signal in analysis_results["fraud_signals"]:
909
+ st.markdown(f"- {signal}")
910
+ else:
911
+ st.success("No significant inconsistencies detected in the resume.")
912
+
913
+ # Add common verification tips
914
+ st.subheader("Recommended Verification Steps")
915
+ st.markdown("""
916
+ Even when no inconsistencies are detected, consider these verification steps:
917
+
918
+ 1. **Reference Checks**: Contact previous employers to confirm employment dates and responsibilities
919
+ 2. **Skills Assessment**: Use technical interviews or tests to verify claimed skills
920
+ 3. **Education Verification**: Confirm degrees and certifications with educational institutions
921
+ 4. **Portfolio Review**: Examine work samples or project contributions
922
+ 5. **Online Presence**: Check LinkedIn, GitHub, or other professional profiles for consistency
923
+ """)
924
+
925
+ with tab6:
926
+ # Display career advice
927
+ st.subheader("πŸš€ Career Advice and Development Plan")
928
+
929
+ if st.button("Generate Personalized Career Advice"):
930
+ with st.spinner("Generating detailed career advice and development plan..."):
931
+ advice = generate_career_advice(text, job_title, analysis_results)
932
+ st.markdown(advice)
933
 
934
  except Exception as e:
935
  st.error(f"An error occurred while processing the resume: {str(e)}")
936
+ st.exception(e)
937
 
938
  # Add footer
939
  st.markdown("---")
940
+ st.markdown("Made with ❀️ using Streamlit, Hugging Face, and Advanced AI")
requirements.txt CHANGED
@@ -1,5 +1,14 @@
1
- streamlit>=1.31.0
2
- pdfplumber>=0.10.3
3
- transformers>=4.37.2
4
- torch>=2.1.2
5
- spacy>=3.7.2
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.29.0
2
+ pdfplumber==0.10.2
3
+ spacy==3.7.2
4
+ transformers==4.36.2
5
+ sentence-transformers==2.2.2
6
+ torch==2.1.2
7
+ nltk==3.8.1
8
+ pandas==2.1.4
9
+ matplotlib==3.8.2
10
+ plotly==5.18.0
11
+ numpy==1.26.2
12
+ python-docx==1.0.1
13
+ huggingface-hub==0.19.4
14
+ accelerate==0.25.0