root
commited on
Commit
·
eaa3094
1
Parent(s):
ea35a5b
ss
Browse files- app.py +651 -904
- fix_dependencies.py +0 -48
- requirements.txt +7 -1
app.py
CHANGED
@@ -2,80 +2,45 @@ import streamlit as st
|
|
2 |
import pdfplumber
|
3 |
import io
|
4 |
import spacy
|
5 |
-
import
|
6 |
-
import pandas as pd
|
7 |
-
import matplotlib.pyplot as plt
|
8 |
-
from transformers import pipeline
|
9 |
-
# Import SentenceTransformer with try-except
|
10 |
-
try:
|
11 |
-
from sentence_transformers import SentenceTransformer
|
12 |
-
# Try to import util, if it fails, we'll create our own minimal version
|
13 |
-
try:
|
14 |
-
from sentence_transformers import util
|
15 |
-
except ImportError:
|
16 |
-
# Create a minimal util module replacement with the functions we need
|
17 |
-
class util:
|
18 |
-
@staticmethod
|
19 |
-
def pytorch_cos_sim(a, b):
|
20 |
-
"""
|
21 |
-
Compute cosine similarity between two PyTorch tensors
|
22 |
-
"""
|
23 |
-
import torch
|
24 |
-
if not isinstance(a, torch.Tensor):
|
25 |
-
a = torch.tensor(a)
|
26 |
-
if not isinstance(b, torch.Tensor):
|
27 |
-
b = torch.tensor(b)
|
28 |
-
|
29 |
-
if len(a.shape) == 1:
|
30 |
-
a = a.unsqueeze(0)
|
31 |
-
if len(b.shape) == 1:
|
32 |
-
b = b.unsqueeze(0)
|
33 |
-
|
34 |
-
a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
|
35 |
-
b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
|
36 |
-
return torch.mm(a_norm, b_norm.transpose(0, 1))
|
37 |
-
except ImportError:
|
38 |
-
st.error("Failed to import SentenceTransformer. Semantic matching will be disabled.")
|
39 |
-
SentenceTransformer = None
|
40 |
-
class util:
|
41 |
-
@staticmethod
|
42 |
-
def pytorch_cos_sim(*args, **kwargs):
|
43 |
-
return 0
|
44 |
import subprocess
|
45 |
import sys
|
46 |
import torch
|
47 |
-
import
|
48 |
-
|
49 |
-
|
50 |
import plotly.express as px
|
51 |
import plotly.graph_objects as go
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
66 |
|
67 |
-
#
|
68 |
@st.cache_resource
|
69 |
def download_nltk_resources():
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
|
75 |
download_nltk_resources()
|
76 |
|
77 |
st.set_page_config(
|
78 |
-
page_title="
|
79 |
page_icon="📄",
|
80 |
layout="wide"
|
81 |
)
|
@@ -93,31 +58,30 @@ def download_spacy_model():
|
|
93 |
# Load the NLP models
|
94 |
@st.cache_resource
|
95 |
def load_models():
|
96 |
-
|
97 |
-
|
98 |
-
except Exception as e:
|
99 |
-
st.error(f"Failed to load summarization model: {str(e)}")
|
100 |
-
# Fallback to a simpler summarizer that just takes the first few sentences
|
101 |
-
summarizer = lambda text, **kwargs: [{"summary_text": ". ".join(text.split(". ")[:3]) + "."}]
|
102 |
|
|
|
|
|
|
|
|
|
103 |
try:
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
except Exception as e:
|
106 |
-
st.error(f"Failed to load
|
107 |
-
|
108 |
-
|
109 |
-
# Load sentence transformer for semantic matching
|
110 |
-
sentence_model = None
|
111 |
-
if SentenceTransformer is not None:
|
112 |
-
try:
|
113 |
-
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
114 |
-
except Exception as e:
|
115 |
-
st.error(f"Failed to load sentence transformer: {str(e)}")
|
116 |
|
117 |
-
return summarizer, nlp, sentence_model
|
118 |
|
119 |
# Initialize models
|
120 |
-
summarizer, nlp, sentence_model = load_models()
|
121 |
|
122 |
# Job descriptions and required skills
|
123 |
job_descriptions = {
|
@@ -125,38 +89,61 @@ job_descriptions = {
|
|
125 |
"skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
|
126 |
"git", "cloud", "web development", "software development", "coding"],
|
127 |
"description": "Looking for software engineers with strong programming skills and experience in software development.",
|
128 |
-
"
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
},
|
136 |
"Interaction Designer": {
|
137 |
"skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
|
138 |
"sketch", "adobe", "design thinking", "interaction design"],
|
139 |
"description": "Seeking interaction designers with expertise in user experience and interface design.",
|
140 |
-
"
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
},
|
148 |
"Data Scientist": {
|
149 |
"skills": ["python", "r", "statistics", "machine learning", "data analysis",
|
150 |
"sql", "tensorflow", "pytorch", "pandas", "numpy"],
|
151 |
"description": "Looking for data scientists with strong analytical and machine learning skills.",
|
152 |
-
"
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
}
|
161 |
}
|
162 |
|
@@ -167,312 +154,8 @@ def extract_text_from_pdf(pdf_file):
|
|
167 |
text += page.extract_text() or ""
|
168 |
return text
|
169 |
|
170 |
-
def
|
171 |
-
|
172 |
-
# Find common section headers for work experience
|
173 |
-
work_exp_patterns = [
|
174 |
-
r"(?i)WORK EXPERIENCE|PROFESSIONAL EXPERIENCE|EMPLOYMENT HISTORY|EXPERIENCE",
|
175 |
-
r"(?i)EDUCATION|ACADEMIC|QUALIFICATIONS"
|
176 |
-
]
|
177 |
-
|
178 |
-
# Find the start of work experience section
|
179 |
-
work_exp_start = None
|
180 |
-
for pattern in work_exp_patterns[:1]: # Use only the work experience patterns
|
181 |
-
match = re.search(pattern, text)
|
182 |
-
if match:
|
183 |
-
work_exp_start = match.end()
|
184 |
-
break
|
185 |
-
|
186 |
-
if work_exp_start is None:
|
187 |
-
return []
|
188 |
-
|
189 |
-
# Find the end of work experience section (start of education or next major section)
|
190 |
-
work_exp_end = len(text)
|
191 |
-
for pattern in work_exp_patterns[1:]: # Use only the education pattern
|
192 |
-
match = re.search(pattern, text)
|
193 |
-
if match and match.start() > work_exp_start:
|
194 |
-
work_exp_end = match.start()
|
195 |
-
break
|
196 |
-
|
197 |
-
work_exp_text = text[work_exp_start:work_exp_end]
|
198 |
-
|
199 |
-
# Extract job entries
|
200 |
-
# Look for patterns of job titles, company names, and dates
|
201 |
-
job_entries = []
|
202 |
-
|
203 |
-
# Pattern for dates (MM/YYYY or Month YYYY)
|
204 |
-
date_pattern = r"(?i)(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[,\s]+\d{4}|\d{1,2}/\d{4}|\d{4}"
|
205 |
-
|
206 |
-
# Pattern for common job title indicators
|
207 |
-
job_title_pattern = r"(?i)(Senior|Lead|Principal|Junior|Associate)?\s*(Software Engineer|Developer|Designer|Analyst|Manager|Director|Consultant|Specialist|Coordinator|Administrator)"
|
208 |
-
|
209 |
-
# Split into paragraphs which often represent job entries
|
210 |
-
paragraphs = re.split(r'\n\s*\n', work_exp_text)
|
211 |
-
|
212 |
-
for paragraph in paragraphs:
|
213 |
-
# Skip short paragraphs that are likely not job entries
|
214 |
-
if len(paragraph.strip()) < 30:
|
215 |
-
continue
|
216 |
-
|
217 |
-
# Extract dates
|
218 |
-
dates = re.findall(date_pattern, paragraph)
|
219 |
-
start_date = dates[0] if dates else "Unknown"
|
220 |
-
end_date = dates[-1] if len(dates) > 1 else "Present"
|
221 |
-
|
222 |
-
# Extract job title
|
223 |
-
title_match = re.search(job_title_pattern, paragraph)
|
224 |
-
job_title = title_match.group(0) if title_match else "Unknown Position"
|
225 |
-
|
226 |
-
# Extract company name (typically near the job title or at the start of the paragraph)
|
227 |
-
lines = paragraph.split('\n')
|
228 |
-
company = lines[0].strip() if lines else "Unknown Company"
|
229 |
-
if job_title in company:
|
230 |
-
company = company.replace(job_title, "").strip()
|
231 |
-
|
232 |
-
# Clean company name
|
233 |
-
for date in dates:
|
234 |
-
company = company.replace(date, "").strip()
|
235 |
-
company = re.sub(r'[,\.\|\-]', ' ', company).strip()
|
236 |
-
|
237 |
-
job_entries.append({
|
238 |
-
"company": company,
|
239 |
-
"title": job_title,
|
240 |
-
"start_date": start_date,
|
241 |
-
"end_date": end_date,
|
242 |
-
"description": paragraph
|
243 |
-
})
|
244 |
-
|
245 |
-
return job_entries
|
246 |
-
|
247 |
-
def estimate_skill_proficiency(text, skill):
|
248 |
-
"""Estimate proficiency level for a skill"""
|
249 |
-
# Define proficiency indicators
|
250 |
-
basic_indicators = ["familiar with", "basic knowledge", "understanding of", "exposure to"]
|
251 |
-
intermediate_indicators = ["experience with", "proficient in", "worked with", "2-3 years", "2 years", "3 years"]
|
252 |
-
advanced_indicators = ["expert in", "advanced", "extensive experience", "lead", "architected", "designed", "5+ years", "4+ years"]
|
253 |
-
|
254 |
-
# Convert to lowercase for matching
|
255 |
-
text_lower = text.lower()
|
256 |
-
|
257 |
-
# Find skill mentions and surrounding context
|
258 |
-
skill_lower = skill.lower()
|
259 |
-
skill_index = text_lower.find(skill_lower)
|
260 |
-
|
261 |
-
if skill_index == -1:
|
262 |
-
return None
|
263 |
-
|
264 |
-
# Extract context (100 characters before and after the skill mention)
|
265 |
-
start = max(0, skill_index - 100)
|
266 |
-
end = min(len(text_lower), skill_index + len(skill_lower) + 100)
|
267 |
-
context = text_lower[start:end]
|
268 |
-
|
269 |
-
# Check for proficiency indicators
|
270 |
-
for indicator in advanced_indicators:
|
271 |
-
if indicator in context:
|
272 |
-
return "Advanced"
|
273 |
-
|
274 |
-
for indicator in intermediate_indicators:
|
275 |
-
if indicator in context:
|
276 |
-
return "Intermediate"
|
277 |
-
|
278 |
-
for indicator in basic_indicators:
|
279 |
-
if indicator in context:
|
280 |
-
return "Basic"
|
281 |
-
|
282 |
-
# Default to basic if skill is mentioned but no proficiency indicators are found
|
283 |
-
return "Basic"
|
284 |
-
|
285 |
-
def calculate_seniority_score(job_entries):
|
286 |
-
"""Calculate a seniority score based on job titles and years of experience"""
|
287 |
-
# Define seniority levels for common job titles
|
288 |
-
seniority_levels = {
|
289 |
-
"intern": 1,
|
290 |
-
"junior": 2,
|
291 |
-
"associate": 3,
|
292 |
-
"developer": 4,
|
293 |
-
"engineer": 4,
|
294 |
-
"designer": 4,
|
295 |
-
"analyst": 4,
|
296 |
-
"senior": 6,
|
297 |
-
"lead": 7,
|
298 |
-
"manager": 7,
|
299 |
-
"principal": 8,
|
300 |
-
"director": 9,
|
301 |
-
"vp": 10,
|
302 |
-
"cto": 10,
|
303 |
-
"cio": 10,
|
304 |
-
"ceo": 10
|
305 |
-
}
|
306 |
-
|
307 |
-
# Calculate total years of experience
|
308 |
-
total_years = 0
|
309 |
-
|
310 |
-
for job in job_entries:
|
311 |
-
# Parse start and end dates
|
312 |
-
try:
|
313 |
-
start_year = re.search(r'\d{4}', job["start_date"])
|
314 |
-
end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
|
315 |
-
|
316 |
-
if start_year:
|
317 |
-
start_year = int(start_year.group(0))
|
318 |
-
end_year = int(end_year.group(0)) if end_year else datetime.now().year
|
319 |
-
years = end_year - start_year
|
320 |
-
if 0 <= years <= 30: # Sanity check
|
321 |
-
total_years += years
|
322 |
-
except Exception:
|
323 |
-
# Skip if there's an issue with date parsing
|
324 |
-
pass
|
325 |
-
|
326 |
-
# Calculate title-based seniority
|
327 |
-
highest_seniority = 0
|
328 |
-
|
329 |
-
for job in job_entries:
|
330 |
-
title_lower = job["title"].lower()
|
331 |
-
for level_title, score in seniority_levels.items():
|
332 |
-
if level_title in title_lower and score > highest_seniority:
|
333 |
-
highest_seniority = score
|
334 |
-
|
335 |
-
# Combine years of experience and title-based seniority
|
336 |
-
# Years of experience factor: 0-2 years (1), 3-5 years (2), 6-10 years (3), 11+ years (4)
|
337 |
-
years_factor = 1
|
338 |
-
if total_years >= 3:
|
339 |
-
years_factor = 2
|
340 |
-
if total_years >= 6:
|
341 |
-
years_factor = 3
|
342 |
-
if total_years >= 11:
|
343 |
-
years_factor = 4
|
344 |
-
|
345 |
-
# Final seniority score (1-10 scale)
|
346 |
-
seniority_score = min(10, max(1, (highest_seniority * 0.6) + (years_factor * 1.0)))
|
347 |
-
|
348 |
-
return round(seniority_score, 1), total_years
|
349 |
-
|
350 |
-
def detect_fraud_signals(text, job_entries):
|
351 |
-
"""Detect potential fraud signals in the resume"""
|
352 |
-
fraud_signals = []
|
353 |
-
|
354 |
-
# Check for impossible timelines (overlapping full-time roles)
|
355 |
-
if len(job_entries) >= 2:
|
356 |
-
for i in range(len(job_entries) - 1):
|
357 |
-
for j in range(i+1, len(job_entries)):
|
358 |
-
# Check if both jobs have date information
|
359 |
-
if (job_entries[i]["start_date"] != "Unknown" and
|
360 |
-
job_entries[i]["end_date"] != "Unknown" and
|
361 |
-
job_entries[j]["start_date"] != "Unknown" and
|
362 |
-
job_entries[j]["end_date"] != "Unknown"):
|
363 |
-
|
364 |
-
# Get years for comparison
|
365 |
-
i_start = re.search(r'\d{4}', job_entries[i]["start_date"])
|
366 |
-
i_end = re.search(r'\d{4}', job_entries[i]["end_date"]) if job_entries[i]["end_date"] != "Present" else None
|
367 |
-
j_start = re.search(r'\d{4}', job_entries[j]["start_date"])
|
368 |
-
j_end = re.search(r'\d{4}', job_entries[j]["end_date"]) if job_entries[j]["end_date"] != "Present" else None
|
369 |
-
|
370 |
-
# Convert to integers for comparison
|
371 |
-
if i_start and j_start:
|
372 |
-
i_start = int(i_start.group(0))
|
373 |
-
i_end = int(i_end.group(0)) if i_end else datetime.now().year
|
374 |
-
j_start = int(j_start.group(0))
|
375 |
-
j_end = int(j_end.group(0)) if j_end else datetime.now().year
|
376 |
-
|
377 |
-
# Check for significant overlap (more than 6 months)
|
378 |
-
if ((i_start <= j_start < i_end) or (j_start <= i_start < j_end)) and job_entries[i]["company"] != job_entries[j]["company"]:
|
379 |
-
overlap_years = min(i_end, j_end) - max(i_start, j_start)
|
380 |
-
if overlap_years > 0.5: # More than 6 months overlap
|
381 |
-
fraud_signals.append(f"Potential timeline inconsistency: Overlapping roles at {job_entries[i]['company']} and {job_entries[j]['company']} for {overlap_years:.1f} years")
|
382 |
-
|
383 |
-
# Check for suspicious keywords or phrases
|
384 |
-
suspicious_phrases = [
|
385 |
-
"self-employed",
|
386 |
-
"freelance",
|
387 |
-
"consultant",
|
388 |
-
"entrepreneur",
|
389 |
-
"founder",
|
390 |
-
"ceo of own company"
|
391 |
-
]
|
392 |
-
|
393 |
-
# Look for suspicious gap filling
|
394 |
-
for phrase in suspicious_phrases:
|
395 |
-
if phrase in text.lower():
|
396 |
-
# Not all of these are fraudulent, but they warrant verification
|
397 |
-
fraud_signals.append(f"Verification recommended: Contains '{phrase}' which may need additional verification")
|
398 |
-
|
399 |
-
# Check for unexplained gaps in employment history
|
400 |
-
if len(job_entries) >= 2:
|
401 |
-
for i in range(len(job_entries) - 1):
|
402 |
-
# Sort entries by start date
|
403 |
-
if "Unknown" not in job_entries[i]["end_date"] and "Unknown" not in job_entries[i+1]["start_date"]:
|
404 |
-
end_match = re.search(r'\d{4}', job_entries[i]["end_date"])
|
405 |
-
start_match = re.search(r'\d{4}', job_entries[i+1]["start_date"])
|
406 |
-
|
407 |
-
if end_match and start_match:
|
408 |
-
end_year = int(end_match.group(0))
|
409 |
-
start_year = int(start_match.group(0))
|
410 |
-
|
411 |
-
# If there's more than a 1-year gap
|
412 |
-
if start_year - end_year > 1:
|
413 |
-
fraud_signals.append(f"Employment gap: {end_year} to {start_year} ({start_year - end_year} years)")
|
414 |
-
|
415 |
-
return fraud_signals
|
416 |
-
|
417 |
-
def predict_career_trajectory(job_entries, current_skills):
|
418 |
-
"""Predict logical next roles based on career progression"""
|
419 |
-
# Career path mappings based on common progressions
|
420 |
-
career_paths = {
|
421 |
-
"software engineer": ["Senior Software Engineer", "Lead Developer", "Software Architect", "Engineering Manager", "CTO"],
|
422 |
-
"developer": ["Senior Developer", "Technical Lead", "Software Architect", "Development Manager", "CTO"],
|
423 |
-
"designer": ["Senior Designer", "Lead Designer", "Design Manager", "Creative Director", "VP of Design"],
|
424 |
-
"data scientist": ["Senior Data Scientist", "Lead Data Scientist", "Data Science Manager", "Director of Analytics", "Chief Data Officer"]
|
425 |
-
}
|
426 |
-
|
427 |
-
# Extract current role from latest job entry
|
428 |
-
current_role = job_entries[0]["title"].lower() if job_entries else "unknown"
|
429 |
-
|
430 |
-
# Find the best matching career path
|
431 |
-
best_match = None
|
432 |
-
for role_key in career_paths:
|
433 |
-
if role_key in current_role:
|
434 |
-
best_match = role_key
|
435 |
-
break
|
436 |
-
|
437 |
-
if not best_match:
|
438 |
-
return ["Career path prediction requires more information"]
|
439 |
-
|
440 |
-
# Find current position in the career path
|
441 |
-
current_index = 0
|
442 |
-
for i, role in enumerate(career_paths[best_match]):
|
443 |
-
if any(indicator in current_role for indicator in ["senior", "lead", "manager", "director", "vp", "chief"]):
|
444 |
-
# If current role contains seniority indicators, advance the index
|
445 |
-
if "senior" in current_role and "senior" in role.lower():
|
446 |
-
current_index = i
|
447 |
-
break
|
448 |
-
elif "lead" in current_role and "lead" in role.lower():
|
449 |
-
current_index = i
|
450 |
-
break
|
451 |
-
elif "manager" in current_role and "manager" in role.lower():
|
452 |
-
current_index = i
|
453 |
-
break
|
454 |
-
elif "director" in current_role and "director" in role.lower():
|
455 |
-
current_index = i
|
456 |
-
break
|
457 |
-
|
458 |
-
# Get next potential roles (up to 3)
|
459 |
-
next_roles = []
|
460 |
-
for i in range(current_index + 1, min(current_index + 4, len(career_paths[best_match]))):
|
461 |
-
next_roles.append(career_paths[best_match][i])
|
462 |
-
|
463 |
-
if not next_roles:
|
464 |
-
next_roles = ["You're at a senior level in your career path. Consider lateral moves or industry specialization."]
|
465 |
-
|
466 |
-
return next_roles
|
467 |
-
|
468 |
-
def analyze_resume(text, job_title, sentence_model):
|
469 |
-
# Extract work experience
|
470 |
-
job_entries = extract_work_experience(text)
|
471 |
-
|
472 |
-
# Sort job entries by start date (most recent first)
|
473 |
-
job_entries.sort(key=lambda x: "9999" if x["start_date"] == "Unknown" else x["start_date"], reverse=True)
|
474 |
-
|
475 |
-
# Extract relevant skills with basic keyword matching
|
476 |
doc = nlp(text.lower())
|
477 |
found_skills = []
|
478 |
required_skills = job_descriptions[job_title]["skills"]
|
@@ -481,22 +164,6 @@ def analyze_resume(text, job_title, sentence_model):
|
|
481 |
if skill in text.lower():
|
482 |
found_skills.append(skill)
|
483 |
|
484 |
-
# Determine skill proficiency levels
|
485 |
-
skill_proficiencies = {}
|
486 |
-
for skill in found_skills:
|
487 |
-
proficiency = estimate_skill_proficiency(text, skill)
|
488 |
-
if proficiency:
|
489 |
-
skill_proficiencies[skill] = proficiency
|
490 |
-
|
491 |
-
# Calculate seniority score
|
492 |
-
seniority_score, years_experience = calculate_seniority_score(job_entries)
|
493 |
-
|
494 |
-
# Detect fraud signals
|
495 |
-
fraud_signals = detect_fraud_signals(text, job_entries)
|
496 |
-
|
497 |
-
# Predict career trajectory
|
498 |
-
next_roles = predict_career_trajectory(job_entries, found_skills)
|
499 |
-
|
500 |
# Generate summary
|
501 |
chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
|
502 |
summaries = []
|
@@ -504,190 +171,92 @@ def analyze_resume(text, job_title, sentence_model):
|
|
504 |
summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
|
505 |
summaries.append(summary)
|
506 |
|
507 |
-
#
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
|
517 |
return {
|
518 |
-
|
519 |
-
"
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
|
|
527 |
}
|
528 |
|
529 |
def generate_career_advice(resume_text, job_title, found_skills, missing_skills):
|
530 |
-
|
531 |
-
|
532 |
-
to avoid dependency issues
|
533 |
-
"""
|
534 |
-
# Template-based advice generation
|
535 |
-
advice = f"""## Career Development Plan for {job_title} Position
|
536 |
-
|
537 |
-
### Skills to Develop
|
538 |
-
|
539 |
-
The following skills would strengthen your resume for this position:
|
540 |
-
|
541 |
-
"""
|
542 |
|
543 |
-
#
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
- **Project idea**: Build a data analysis tool or web application using Python and popular frameworks
|
549 |
-
- **Resources**: Coursera's Python for Everybody, Python.org tutorials, Real Python website
|
550 |
|
551 |
-
|
552 |
-
elif skill == "java":
|
553 |
-
advice += f"""#### Java
|
554 |
-
- **How to develop**: Complete a comprehensive Java course with practical exercises
|
555 |
-
- **Project idea**: Develop a backend service with Spring Boot
|
556 |
-
- **Resources**: Oracle's Java tutorials, Udemy courses on Java, "Effective Java" by Joshua Bloch
|
557 |
|
558 |
-
|
559 |
-
elif skill == "javascript":
|
560 |
-
advice += f"""#### JavaScript
|
561 |
-
- **How to develop**: Practice with modern JavaScript frameworks
|
562 |
-
- **Project idea**: Create an interactive web application with React or Vue.js
|
563 |
-
- **Resources**: MDN Web Docs, freeCodeCamp, "Eloquent JavaScript" by Marijn Haverbeke
|
564 |
|
565 |
-
|
566 |
-
elif skill == "sql":
|
567 |
-
advice += f"""#### SQL
|
568 |
-
- **How to develop**: Practice with database design and complex queries
|
569 |
-
- **Project idea**: Design a database system for a small business with reports and analytics
|
570 |
-
- **Resources**: SQLZoo, Mode Analytics SQL tutorial, W3Schools SQL course
|
571 |
-
|
572 |
-
"""
|
573 |
-
elif "algorithms" in skill or "data structures" in skill:
|
574 |
-
advice += f"""#### Algorithms & Data Structures
|
575 |
-
- **How to develop**: Solve coding problems regularly on platforms like LeetCode
|
576 |
-
- **Project idea**: Implement classic algorithms and optimize them for specific use cases
|
577 |
-
- **Resources**: "Cracking the Coding Interview" book, AlgoExpert, Coursera Algorithms specialization
|
578 |
-
|
579 |
-
"""
|
580 |
-
elif "git" in skill:
|
581 |
-
advice += f"""#### Git & Version Control
|
582 |
-
- **How to develop**: Contribute to open source projects to practice Git workflows
|
583 |
-
- **Project idea**: Set up a personal project with proper branching strategies and CI/CD
|
584 |
-
- **Resources**: Git documentation, GitHub Learning Lab, Atlassian Git tutorials
|
585 |
-
|
586 |
-
"""
|
587 |
-
elif "cloud" in skill:
|
588 |
-
advice += f"""#### Cloud Technologies
|
589 |
-
- **How to develop**: Get hands-on experience with a major cloud provider (AWS, Azure, GCP)
|
590 |
-
- **Project idea**: Deploy an application to the cloud with proper infrastructure as code
|
591 |
-
- **Resources**: Cloud provider documentation, A Cloud Guru courses, free tier accounts
|
592 |
|
593 |
-
|
594 |
-
elif "ui" in skill or "ux" in skill:
|
595 |
-
advice += f"""#### UI/UX Design
|
596 |
-
- **How to develop**: Study design principles and practice creating user interfaces
|
597 |
-
- **Project idea**: Redesign an existing website or app with focus on user experience
|
598 |
-
- **Resources**: Nielsen Norman Group articles, Interaction Design Foundation, Figma tutorials
|
599 |
|
600 |
-
|
601 |
-
elif "machine learning" in skill:
|
602 |
-
advice += f"""#### Machine Learning
|
603 |
-
- **How to develop**: Take courses on ML fundamentals and practice with datasets
|
604 |
-
- **Project idea**: Build a predictive model to solve a real-world problem
|
605 |
-
- **Resources**: Andrew Ng's Coursera courses, Kaggle competitions, "Hands-On Machine Learning" book
|
606 |
-
|
607 |
-
"""
|
608 |
-
elif "data analysis" in skill:
|
609 |
-
advice += f"""#### Data Analysis
|
610 |
-
- **How to develop**: Practice analyzing datasets and creating visualizations
|
611 |
-
- **Project idea**: Perform an exploratory data analysis on a public dataset
|
612 |
-
- **Resources**: DataCamp courses, Kaggle datasets, "Python for Data Analysis" by Wes McKinney
|
613 |
-
|
614 |
-
"""
|
615 |
-
else:
|
616 |
-
advice += f"""#### {skill.title()}
|
617 |
-
- **How to develop**: Research industry best practices and take relevant courses
|
618 |
-
- **Project idea**: Create a portfolio piece that showcases this skill
|
619 |
-
- **Resources**: Online courses, industry blogs, and practice projects
|
620 |
-
|
621 |
-
"""
|
622 |
-
|
623 |
-
# Add project recommendations based on job title
|
624 |
-
advice += f"""
|
625 |
-
### Recommended Projects for {job_title}
|
626 |
-
|
627 |
-
Based on the target position and the skills needed, here are some project ideas:
|
628 |
-
|
629 |
-
"""
|
630 |
-
if job_title == "Software Engineer":
|
631 |
-
advice += """
|
632 |
-
1. **Full-Stack Web Application**: Build a complete web app with frontend, backend, and database
|
633 |
-
2. **API Service**: Create a RESTful or GraphQL API with proper authentication and documentation
|
634 |
-
3. **Mobile Application**: Develop a cross-platform mobile app using React Native or Flutter
|
635 |
-
4. **Automation Tools**: Build scripts or applications that automate repetitive tasks
|
636 |
-
5. **Contribution to Open Source**: Find a project aligned with your skills and contribute meaningfully
|
637 |
-
|
638 |
-
"""
|
639 |
-
elif job_title == "Interaction Designer":
|
640 |
-
advice += """
|
641 |
-
1. **Design System**: Create a comprehensive design system with components and usage guidelines
|
642 |
-
2. **Website Redesign**: Redesign an existing website with focus on improved UX
|
643 |
-
3. **Mobile App Prototype**: Design a fully interactive mobile app prototype
|
644 |
-
4. **User Research Project**: Conduct user research and create a report with insights and recommendations
|
645 |
-
5. **Design Case Study**: Document your design process for solving a specific problem
|
646 |
-
|
647 |
-
"""
|
648 |
-
elif job_title == "Data Scientist":
|
649 |
-
advice += """
|
650 |
-
1. **Predictive Model**: Build a machine learning model that solves a real-world problem
|
651 |
-
2. **Data Visualization Dashboard**: Create an interactive dashboard to visualize complex data
|
652 |
-
3. **Natural Language Processing**: Develop a text analysis or sentiment analysis project
|
653 |
-
4. **Time Series Analysis**: Analyze time-based data and build forecasting models
|
654 |
-
5. **A/B Testing Framework**: Design and implement a framework for testing hypotheses
|
655 |
|
|
|
|
|
|
|
|
|
656 |
"""
|
657 |
-
|
658 |
-
# General advice for all positions
|
659 |
-
advice += """
|
660 |
-
### Learning Resources
|
661 |
-
|
662 |
-
- **Online Platforms**: Coursera, Udemy, Pluralsight, LinkedIn Learning
|
663 |
-
- **Documentation**: Official language and framework documentation
|
664 |
-
- **Communities**: Stack Overflow, GitHub, Reddit programming communities
|
665 |
-
- **Books**: O'Reilly publications specific to your target technologies
|
666 |
-
- **YouTube Channels**: Traversy Media, Tech With Tim, freeCodeCamp
|
667 |
-
|
668 |
-
### Positioning Your Experience
|
669 |
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
677 |
|
678 |
# Streamlit UI
|
679 |
-
st.title("📄
|
680 |
|
681 |
# Add description
|
682 |
st.markdown("""
|
683 |
-
This app helps recruiters
|
684 |
-
|
685 |
-
-
|
686 |
-
-
|
687 |
-
-
|
688 |
-
- **Fraud Detection**: Flags potential inconsistencies for verification
|
689 |
-
- **Career Path Prediction**: Suggests logical next roles based on experience
|
690 |
-
- **Personalized Development Advice**: Recommends skills, projects, and resources
|
691 |
""")
|
692 |
|
693 |
# Create two columns
|
@@ -703,379 +272,557 @@ with col2:
|
|
703 |
|
704 |
# Show job description
|
705 |
if job_title:
|
706 |
-
st.info(f"**
|
707 |
"\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
|
708 |
|
709 |
if uploaded_file and job_title:
|
710 |
try:
|
711 |
# Show spinner while processing
|
712 |
-
with st.spinner("Analyzing resume
|
713 |
# Extract text from PDF
|
714 |
text = extract_text_from_pdf(uploaded_file)
|
715 |
|
716 |
# Analyze resume
|
717 |
-
|
718 |
|
719 |
# Calculate missing skills
|
720 |
missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
|
721 |
-
if skill not in
|
722 |
|
723 |
# Display results in tabs
|
724 |
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
|
725 |
-
"📊 Match
|
726 |
-
"
|
727 |
-
"
|
728 |
-
"
|
729 |
-
"
|
730 |
"🚀 Career Advice"
|
731 |
])
|
732 |
|
733 |
with tab1:
|
734 |
-
#
|
735 |
-
st.subheader("📊 Job Match Analysis")
|
736 |
-
|
737 |
-
# Calculate match scores
|
738 |
-
keyword_match = len(analysis_results["found_skills"]) / len(job_descriptions[job_title]["skills"]) * 100
|
739 |
-
semantic_match = analysis_results["semantic_score"] * 100
|
740 |
-
|
741 |
-
# Display scores with gauges
|
742 |
col1, col2 = st.columns(2)
|
743 |
|
744 |
with col1:
|
745 |
-
#
|
746 |
-
|
747 |
-
|
748 |
-
|
749 |
-
|
750 |
-
|
751 |
-
'
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
'line': {'color': "red", 'width': 4},
|
760 |
-
'thickness': 0.75,
|
761 |
-
'value': 70
|
762 |
-
}
|
763 |
-
}
|
764 |
-
))
|
765 |
-
st.plotly_chart(fig, use_container_width=True)
|
766 |
|
767 |
with col2:
|
768 |
-
#
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
))
|
788 |
-
st.plotly_chart(fig, use_container_width=True)
|
789 |
-
|
790 |
-
# Calculate overall match score (weighted average)
|
791 |
-
overall_match = (keyword_match * 0.4) + (semantic_match * 0.6)
|
792 |
-
|
793 |
-
# Create overall score gauge
|
794 |
-
fig = go.Figure(go.Indicator(
|
795 |
-
mode = "gauge+number+delta",
|
796 |
-
value = overall_match,
|
797 |
-
title = {'text': "Overall Match Score"},
|
798 |
-
delta = {'reference': 75, 'increasing': {'color': "green"}},
|
799 |
-
gauge = {
|
800 |
-
'axis': {'range': [0, 100]},
|
801 |
-
'bar': {'color': "darkblue"},
|
802 |
-
'steps': [
|
803 |
-
{'range': [0, 50], 'color': "lightgray"},
|
804 |
-
{'range': [50, 75], 'color': "gray"},
|
805 |
-
{'range': [75, 100], 'color': "darkblue"}
|
806 |
-
],
|
807 |
-
'threshold': {
|
808 |
-
'line': {'color': "red", 'width': 4},
|
809 |
-
'thickness': 0.75,
|
810 |
-
'value': 75
|
811 |
-
}
|
812 |
-
}
|
813 |
-
))
|
814 |
-
|
815 |
-
st.plotly_chart(fig, use_container_width=True)
|
816 |
-
|
817 |
# Display resume summary
|
818 |
st.subheader("📝 Resume Summary")
|
819 |
-
st.write(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
820 |
|
821 |
-
with
|
822 |
-
# Display skills
|
823 |
-
st.subheader("
|
824 |
|
825 |
# Create two columns
|
826 |
col1, col2 = st.columns(2)
|
827 |
|
828 |
with col1:
|
829 |
-
#
|
830 |
-
st.subheader("🟢 Skills Present")
|
831 |
-
|
832 |
-
# Create a DataFrame for the skills table
|
833 |
-
skills_data = []
|
834 |
-
for skill in analysis_results["found_skills"]:
|
835 |
-
proficiency = analysis_results["skill_proficiencies"].get(skill, "Basic")
|
836 |
-
skills_data.append({
|
837 |
-
"Skill": skill.title(),
|
838 |
-
"Proficiency": proficiency
|
839 |
-
})
|
840 |
-
|
841 |
-
if skills_data:
|
842 |
-
skills_df = pd.DataFrame(skills_data)
|
843 |
-
|
844 |
-
# Add proficiency color coding
|
845 |
-
def color_proficiency(val):
|
846 |
-
if val == "Advanced":
|
847 |
-
return 'background-color: #d4f7d4'
|
848 |
-
elif val == "Intermediate":
|
849 |
-
return 'background-color: #fff2cc'
|
850 |
-
else:
|
851 |
-
return 'background-color: #f2f2f2'
|
852 |
-
|
853 |
-
st.dataframe(skills_df.style.applymap(color_proficiency, subset=['Proficiency']),
|
854 |
-
use_container_width=True)
|
855 |
-
else:
|
856 |
-
st.warning("No direct skill matches found.")
|
857 |
-
|
858 |
-
with col2:
|
859 |
-
# Display missing skills
|
860 |
-
st.subheader("🔴 Skills to Develop")
|
861 |
if missing_skills:
|
862 |
-
|
863 |
-
|
864 |
else:
|
865 |
st.success("Great! The candidate has all the required skills!")
|
866 |
|
867 |
-
# Create a radar chart for skills coverage
|
868 |
-
st.subheader("Skills Coverage")
|
869 |
-
|
870 |
-
# Prepare data for radar chart
|
871 |
-
categories = job_descriptions[job_title]["skills"]
|
872 |
-
values = [1 if skill in analysis_results["found_skills"] else 0 for skill in categories]
|
873 |
-
|
874 |
-
# Create radar chart
|
875 |
-
fig = go.Figure()
|
876 |
-
|
877 |
-
fig.add_trace(go.Scatterpolar(
|
878 |
-
r=values,
|
879 |
-
theta=categories,
|
880 |
-
fill='toself',
|
881 |
-
name='Present Skills'
|
882 |
-
))
|
883 |
-
|
884 |
-
fig.add_trace(go.Scatterpolar(
|
885 |
-
r=[1] * len(categories),
|
886 |
-
theta=categories,
|
887 |
-
fill='toself',
|
888 |
-
name='Required Skills',
|
889 |
-
opacity=0.3
|
890 |
-
))
|
891 |
-
|
892 |
-
fig.update_layout(
|
893 |
-
polar=dict(
|
894 |
-
radialaxis=dict(
|
895 |
-
visible=True,
|
896 |
-
range=[0, 1]
|
897 |
-
)),
|
898 |
-
showlegend=True
|
899 |
-
)
|
900 |
-
|
901 |
-
st.plotly_chart(fig, use_container_width=True)
|
902 |
-
|
903 |
-
with tab3:
|
904 |
-
# Display experience analysis
|
905 |
-
st.subheader("👨💼 Experience Analysis")
|
906 |
-
|
907 |
-
# Display seniority metrics
|
908 |
-
col1, col2 = st.columns(2)
|
909 |
-
|
910 |
-
with col1:
|
911 |
-
# Seniority score gauge
|
912 |
-
fig = go.Figure(go.Indicator(
|
913 |
-
mode="gauge+number",
|
914 |
-
value=analysis_results["seniority_score"],
|
915 |
-
title={'text': "Seniority Score"},
|
916 |
-
gauge={
|
917 |
-
'axis': {'range': [0, 10]},
|
918 |
-
'bar': {'color': "darkblue"},
|
919 |
-
'steps': [
|
920 |
-
{'range': [0, 3], 'color': "lightgray"},
|
921 |
-
{'range': [3, 7], 'color': "gray"},
|
922 |
-
{'range': [7, 10], 'color': "lightblue"}
|
923 |
-
],
|
924 |
-
'threshold': {
|
925 |
-
'line': {'color': "red", 'width': 4},
|
926 |
-
'thickness': 0.75,
|
927 |
-
'value': 7
|
928 |
-
}
|
929 |
-
}
|
930 |
-
))
|
931 |
-
st.plotly_chart(fig, use_container_width=True)
|
932 |
-
|
933 |
with col2:
|
934 |
-
#
|
935 |
-
|
936 |
-
mode="number+delta",
|
937 |
-
value=analysis_results["years_experience"],
|
938 |
-
number={'suffix': " years"},
|
939 |
-
title={"text": "Years of Experience"},
|
940 |
-
delta={'reference': 5, 'relative': False}
|
941 |
-
))
|
942 |
-
st.plotly_chart(fig, use_container_width=True)
|
943 |
-
|
944 |
-
# Display career progression timeline
|
945 |
-
st.subheader("Career Progression Timeline")
|
946 |
-
|
947 |
-
if analysis_results["job_entries"]:
|
948 |
-
# Create timeline data
|
949 |
-
timeline_data = []
|
950 |
|
951 |
-
|
952 |
-
|
953 |
-
|
954 |
-
end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
|
955 |
-
|
956 |
-
if start_year:
|
957 |
-
start_year = int(start_year.group(0))
|
958 |
-
end_year = int(end_year.group(0)) if end_year else datetime.now().year
|
959 |
-
|
960 |
-
timeline_data.append({
|
961 |
-
"Role": job["title"],
|
962 |
-
"Company": job["company"],
|
963 |
-
"Start": start_year,
|
964 |
-
"End": end_year,
|
965 |
-
"Duration": end_year - start_year
|
966 |
-
})
|
967 |
|
968 |
-
if
|
969 |
-
|
970 |
-
|
971 |
-
|
972 |
-
# Sort by start date (ascending)
|
973 |
-
timeline_df = timeline_df.sort_values(by="Start")
|
974 |
-
|
975 |
-
# Create Gantt chart
|
976 |
-
fig = px.timeline(
|
977 |
-
timeline_df,
|
978 |
-
x_start="Start",
|
979 |
-
x_end="End",
|
980 |
-
y="Company",
|
981 |
-
color="Role",
|
982 |
-
hover_data=["Duration"],
|
983 |
-
labels={"Company": "Employer"}
|
984 |
-
)
|
985 |
-
|
986 |
-
fig.update_layout(
|
987 |
-
xaxis_title="Year",
|
988 |
-
yaxis_title="Employer",
|
989 |
-
title="Career Progression"
|
990 |
-
)
|
991 |
|
992 |
-
st.
|
993 |
else:
|
994 |
-
st.
|
995 |
-
|
996 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
997 |
|
998 |
with tab4:
|
999 |
-
# Display career path
|
1000 |
-
st.subheader("
|
1001 |
|
1002 |
-
#
|
1003 |
-
st.
|
1004 |
|
1005 |
-
|
1006 |
-
|
1007 |
|
1008 |
-
#
|
1009 |
-
|
1010 |
-
|
1011 |
-
|
1012 |
-
|
1013 |
-
|
1014 |
-
|
1015 |
-
|
1016 |
-
|
1017 |
-
|
1018 |
-
|
1019 |
-
|
1020 |
-
|
1021 |
-
|
1022 |
-
|
1023 |
-
|
1024 |
-
#
|
1025 |
-
|
1026 |
-
|
1027 |
-
|
1028 |
-
|
1029 |
-
|
1030 |
-
|
1031 |
-
|
1032 |
-
),
|
1033 |
-
link=dict(
|
1034 |
-
source=[i for i in range(len(career_nodes)-1)],
|
1035 |
-
target=[i+1 for i in range(len(career_nodes)-1)],
|
1036 |
-
value=[1 for _ in range(len(career_nodes)-1)]
|
1037 |
-
)
|
1038 |
-
)])
|
1039 |
-
|
1040 |
-
fig.update_layout(title_text="Potential Career Path", font_size=12)
|
1041 |
-
st.plotly_chart(fig, use_container_width=True)
|
1042 |
|
1043 |
with tab5:
|
1044 |
-
# Display
|
1045 |
-
st.subheader("
|
|
|
|
|
|
|
1046 |
|
1047 |
-
if
|
1048 |
-
|
1049 |
-
for
|
1050 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1051 |
else:
|
1052 |
-
st.
|
1053 |
|
1054 |
-
#
|
1055 |
-
st.
|
1056 |
-
st.markdown("""
|
1057 |
-
Even when no inconsistencies are detected, consider these verification steps:
|
1058 |
|
1059 |
-
|
1060 |
-
|
1061 |
-
|
1062 |
-
|
1063 |
-
|
1064 |
-
|
1065 |
|
1066 |
with tab6:
|
1067 |
# Display career advice
|
1068 |
-
st.subheader("🚀 Career Advice and
|
1069 |
|
1070 |
-
if st.button("Generate
|
1071 |
-
with st.spinner("Generating
|
1072 |
-
advice = generate_career_advice(text, job_title,
|
1073 |
st.markdown(advice)
|
1074 |
-
|
1075 |
except Exception as e:
|
1076 |
st.error(f"An error occurred while processing the resume: {str(e)}")
|
1077 |
-
st.exception(e)
|
1078 |
|
1079 |
# Add footer
|
1080 |
st.markdown("---")
|
1081 |
-
st.markdown("Made with ❤️ using Streamlit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import pdfplumber
|
3 |
import io
|
4 |
import spacy
|
5 |
+
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import subprocess
|
7 |
import sys
|
8 |
import torch
|
9 |
+
import re
|
10 |
+
import pandas as pd
|
11 |
+
import numpy as np
|
12 |
import plotly.express as px
|
13 |
import plotly.graph_objects as go
|
14 |
+
from datetime import datetime
|
15 |
+
import dateparser
|
16 |
+
from sentence_transformers import SentenceTransformer
|
17 |
+
import nltk
|
18 |
+
from nltk.tokenize import word_tokenize
|
19 |
+
from nltk.corpus import stopwords
|
20 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
21 |
+
import faiss
|
22 |
+
import requests
|
23 |
+
from bs4 import BeautifulSoup
|
24 |
+
import networkx as nx
|
25 |
+
import Levenshtein
|
26 |
+
import json
|
27 |
+
import matplotlib.pyplot as plt
|
28 |
+
from io import BytesIO
|
29 |
+
import base64
|
30 |
+
from sentence_transformers import util
|
31 |
|
32 |
+
# Download NLTK resources
|
33 |
@st.cache_resource
|
34 |
def download_nltk_resources():
|
35 |
+
nltk.download('punkt')
|
36 |
+
nltk.download('stopwords')
|
37 |
+
nltk.download('wordnet')
|
38 |
+
nltk.download('averaged_perceptron_tagger')
|
39 |
|
40 |
download_nltk_resources()
|
41 |
|
42 |
st.set_page_config(
|
43 |
+
page_title="Resume Screener & Skill Extractor",
|
44 |
page_icon="📄",
|
45 |
layout="wide"
|
46 |
)
|
|
|
58 |
# Load the NLP models
|
59 |
@st.cache_resource
|
60 |
def load_models():
|
61 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
62 |
+
nlp = download_spacy_model()
|
|
|
|
|
|
|
|
|
63 |
|
64 |
+
# Load sentence transformer model for semantic matching
|
65 |
+
sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
|
66 |
+
|
67 |
+
# Load Qwen3-8B model for career advice
|
68 |
try:
|
69 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
70 |
+
qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
|
71 |
+
qwen_model = AutoModelForCausalLM.from_pretrained(
|
72 |
+
"Qwen/Qwen3-8B",
|
73 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
74 |
+
device_map="auto"
|
75 |
+
)
|
76 |
except Exception as e:
|
77 |
+
st.error(f"Failed to load Qwen3-8B model: {str(e)}")
|
78 |
+
qwen_tokenizer = None
|
79 |
+
qwen_model = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
+
return summarizer, nlp, qwen_tokenizer, qwen_model, sentence_model
|
82 |
|
83 |
# Initialize models
|
84 |
+
summarizer, nlp, qwen_tokenizer, qwen_model, sentence_model = load_models()
|
85 |
|
86 |
# Job descriptions and required skills
|
87 |
job_descriptions = {
|
|
|
89 |
"skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
|
90 |
"git", "cloud", "web development", "software development", "coding"],
|
91 |
"description": "Looking for software engineers with strong programming skills and experience in software development.",
|
92 |
+
"must_have": ["python", "git", "algorithms"],
|
93 |
+
"nice_to_have": ["cloud", "java", "javascript"],
|
94 |
+
"seniority_levels": {
|
95 |
+
"Junior": "0-2 years of experience, familiar with basic programming concepts",
|
96 |
+
"Mid-level": "3-5 years of experience, proficient in multiple languages, experience with system design",
|
97 |
+
"Senior": "6+ years of experience, expert in software architecture, mentoring, and leading projects"
|
98 |
+
}
|
99 |
},
|
100 |
"Interaction Designer": {
|
101 |
"skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
|
102 |
"sketch", "adobe", "design thinking", "interaction design"],
|
103 |
"description": "Seeking interaction designers with expertise in user experience and interface design.",
|
104 |
+
"must_have": ["ui", "ux", "prototyping"],
|
105 |
+
"nice_to_have": ["figma", "sketch", "user research"],
|
106 |
+
"seniority_levels": {
|
107 |
+
"Junior": "0-2 years of experience, basic design skills, understanding of UX principles",
|
108 |
+
"Mid-level": "3-5 years of experience, strong portfolio, experience with user research",
|
109 |
+
"Senior": "6+ years of experience, leadership in design systems, driving design strategy"
|
110 |
+
}
|
111 |
},
|
112 |
"Data Scientist": {
|
113 |
"skills": ["python", "r", "statistics", "machine learning", "data analysis",
|
114 |
"sql", "tensorflow", "pytorch", "pandas", "numpy"],
|
115 |
"description": "Looking for data scientists with strong analytical and machine learning skills.",
|
116 |
+
"must_have": ["python", "statistics", "machine learning"],
|
117 |
+
"nice_to_have": ["tensorflow", "pytorch", "r"],
|
118 |
+
"seniority_levels": {
|
119 |
+
"Junior": "0-2 years of experience, basic knowledge of statistics and ML algorithms",
|
120 |
+
"Mid-level": "3-5 years of experience, model development, feature engineering",
|
121 |
+
"Senior": "6+ years of experience, advanced ML techniques, research experience"
|
122 |
+
}
|
123 |
+
},
|
124 |
+
"Product Manager": {
|
125 |
+
"skills": ["product strategy", "roadmap planning", "user stories", "agile", "market research",
|
126 |
+
"stakeholder management", "analytics", "user experience", "a/b testing", "prioritization"],
|
127 |
+
"description": "Seeking product managers who can drive product vision, strategy, and execution.",
|
128 |
+
"must_have": ["product strategy", "roadmap planning", "stakeholder management"],
|
129 |
+
"nice_to_have": ["agile", "analytics", "a/b testing"],
|
130 |
+
"seniority_levels": {
|
131 |
+
"Junior": "0-2 years of experience, assisting with feature definition and user stories",
|
132 |
+
"Mid-level": "3-5 years of experience, owning products/features, market research",
|
133 |
+
"Senior": "6+ years of experience, defining product vision, managing teams, strategic planning"
|
134 |
+
}
|
135 |
+
},
|
136 |
+
"DevOps Engineer": {
|
137 |
+
"skills": ["linux", "aws", "docker", "kubernetes", "ci/cd", "terraform",
|
138 |
+
"ansible", "monitoring", "scripting", "automation", "security"],
|
139 |
+
"description": "Looking for DevOps engineers to build and maintain infrastructure and deployment pipelines.",
|
140 |
+
"must_have": ["linux", "docker", "ci/cd"],
|
141 |
+
"nice_to_have": ["kubernetes", "terraform", "aws"],
|
142 |
+
"seniority_levels": {
|
143 |
+
"Junior": "0-2 years of experience, basic system administration, scripting",
|
144 |
+
"Mid-level": "3-5 years of experience, container orchestration, infrastructure as code",
|
145 |
+
"Senior": "6+ years of experience, architecture design, security, team leadership"
|
146 |
+
}
|
147 |
}
|
148 |
}
|
149 |
|
|
|
154 |
text += page.extract_text() or ""
|
155 |
return text
|
156 |
|
157 |
+
def analyze_resume(text, job_title):
|
158 |
+
# Extract relevant skills
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
doc = nlp(text.lower())
|
160 |
found_skills = []
|
161 |
required_skills = job_descriptions[job_title]["skills"]
|
|
|
164 |
if skill in text.lower():
|
165 |
found_skills.append(skill)
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
# Generate summary
|
168 |
chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
|
169 |
summaries = []
|
|
|
171 |
summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
|
172 |
summaries.append(summary)
|
173 |
|
174 |
+
# Extract experience timeline
|
175 |
+
experiences = extract_experience(text)
|
176 |
+
|
177 |
+
# Calculate semantic match score
|
178 |
+
match_score = semantic_matching(text, job_title)
|
179 |
+
|
180 |
+
# Estimate seniority
|
181 |
+
seniority, years_experience, leadership_count, must_have_percentage = estimate_seniority(experiences, found_skills, job_title)
|
182 |
+
|
183 |
+
# Extract skill levels
|
184 |
+
skill_levels = extract_skill_levels(text, found_skills)
|
185 |
+
|
186 |
+
# Check for timeline inconsistencies
|
187 |
+
inconsistencies = check_timeline_inconsistencies(experiences)
|
188 |
+
|
189 |
+
# Verify companies
|
190 |
+
company_verification = verify_companies(experiences)
|
191 |
+
|
192 |
+
# Predict career trajectory
|
193 |
+
career_prediction = predict_career_trajectory(experiences, seniority, job_title)
|
194 |
|
195 |
return {
|
196 |
+
'found_skills': found_skills,
|
197 |
+
'summary': " ".join(summaries),
|
198 |
+
'experiences': experiences,
|
199 |
+
'match_score': match_score,
|
200 |
+
'seniority': seniority,
|
201 |
+
'years_experience': years_experience,
|
202 |
+
'skill_levels': skill_levels,
|
203 |
+
'inconsistencies': inconsistencies,
|
204 |
+
'company_verification': company_verification,
|
205 |
+
'career_prediction': career_prediction
|
206 |
}
|
207 |
|
208 |
def generate_career_advice(resume_text, job_title, found_skills, missing_skills):
|
209 |
+
if qwen_model is None or qwen_tokenizer is None:
|
210 |
+
return "Career advice model not available. Please check the model installation."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
+
# Create a prompt for the model
|
213 |
+
prompt = f"""
|
214 |
+
You are a professional career advisor. Based on the resume and the target job position,
|
215 |
+
provide personalized advice on skills to develop and suggest projects that would help the candidate
|
216 |
+
become a better fit for the position.
|
|
|
|
|
217 |
|
218 |
+
Resume summary: {resume_text[:1000]}...
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
+
Target position: {job_title}
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
+
Job requirements: {job_descriptions[job_title]['description']}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
+
Skills the candidate has: {', '.join(found_skills)}
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
+
Skills the candidate needs to develop: {', '.join(missing_skills)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
+
Provide the following:
|
229 |
+
1. Specific advice on how to develop the missing skills
|
230 |
+
2. 3-5 project ideas that would showcase these skills
|
231 |
+
3. Resources for learning (courses, books, websites)
|
232 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
+
# Generate advice using Qwen3-8B
|
235 |
+
try:
|
236 |
+
inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)
|
237 |
+
with torch.no_grad():
|
238 |
+
outputs = qwen_model.generate(
|
239 |
+
**inputs,
|
240 |
+
max_new_tokens=1024,
|
241 |
+
temperature=0.7,
|
242 |
+
top_p=0.9,
|
243 |
+
do_sample=True
|
244 |
+
)
|
245 |
+
advice = qwen_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
246 |
+
return advice
|
247 |
+
except Exception as e:
|
248 |
+
return f"Failed to generate career advice: {str(e)}"
|
249 |
|
250 |
# Streamlit UI
|
251 |
+
st.title("📄 Resume Screener & Skill Extractor")
|
252 |
|
253 |
# Add description
|
254 |
st.markdown("""
|
255 |
+
This app helps recruiters analyze resumes by:
|
256 |
+
- Extracting relevant skills for specific job positions
|
257 |
+
- Generating a concise summary of the candidate's background
|
258 |
+
- Identifying skill gaps for the selected role
|
259 |
+
- Providing personalized career advice and project recommendations
|
|
|
|
|
|
|
260 |
""")
|
261 |
|
262 |
# Create two columns
|
|
|
272 |
|
273 |
# Show job description
|
274 |
if job_title:
|
275 |
+
st.info(f"**Required Skills:**\n" +
|
276 |
"\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
|
277 |
|
278 |
if uploaded_file and job_title:
|
279 |
try:
|
280 |
# Show spinner while processing
|
281 |
+
with st.spinner("Analyzing resume..."):
|
282 |
# Extract text from PDF
|
283 |
text = extract_text_from_pdf(uploaded_file)
|
284 |
|
285 |
# Analyze resume
|
286 |
+
resume_data = analyze_resume(text, job_title)
|
287 |
|
288 |
# Calculate missing skills
|
289 |
missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
|
290 |
+
if skill not in resume_data['found_skills']]
|
291 |
|
292 |
# Display results in tabs
|
293 |
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
|
294 |
+
"📊 Skills Match",
|
295 |
+
"📝 Resume Summary",
|
296 |
+
"🎯 Skills Gap",
|
297 |
+
"👨💼 Career Path",
|
298 |
+
"🔍 Authentication",
|
299 |
"🚀 Career Advice"
|
300 |
])
|
301 |
|
302 |
with tab1:
|
303 |
+
# First create columns for skill match percentage and semantic match
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
col1, col2 = st.columns(2)
|
305 |
|
306 |
with col1:
|
307 |
+
# Display matched skills
|
308 |
+
st.subheader("🎯 Matched Skills")
|
309 |
+
if resume_data['found_skills']:
|
310 |
+
for skill in resume_data['found_skills']:
|
311 |
+
# Show skill with proficiency level
|
312 |
+
level = resume_data['skill_levels'].get(skill, 'intermediate')
|
313 |
+
level_emoji = "🟢" if level == 'advanced' else "🟡" if level == 'intermediate' else "🟠"
|
314 |
+
st.success(f"{level_emoji} {skill.title()} ({level.title()})")
|
315 |
+
|
316 |
+
# Calculate match percentage
|
317 |
+
match_percentage = len(resume_data['found_skills']) / len(job_descriptions[job_title]["skills"]) * 100
|
318 |
+
st.metric("Skills Match", f"{match_percentage:.1f}%")
|
319 |
+
else:
|
320 |
+
st.warning("No direct skill matches found.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
|
322 |
with col2:
|
323 |
+
# Display semantic match score
|
324 |
+
st.subheader("💡 Semantic Match")
|
325 |
+
st.metric("Overall Match Score", f"{resume_data['match_score']:.1f}%")
|
326 |
+
|
327 |
+
# Display must-have skills match
|
328 |
+
must_have_skills = job_descriptions[job_title]["must_have"]
|
329 |
+
must_have_count = sum(1 for skill in must_have_skills if skill in resume_data['found_skills'])
|
330 |
+
must_have_percentage = (must_have_count / len(must_have_skills)) * 100
|
331 |
+
|
332 |
+
st.write("Must-have skills:")
|
333 |
+
st.progress(must_have_percentage / 100)
|
334 |
+
st.write(f"{must_have_count} out of {len(must_have_skills)} ({must_have_percentage:.1f}%)")
|
335 |
+
|
336 |
+
# Professional level assessment
|
337 |
+
st.subheader("🧠 Seniority Assessment")
|
338 |
+
st.info(f"**{resume_data['seniority']}** ({resume_data['years_experience']:.1f} years equivalent experience)")
|
339 |
+
st.write(job_descriptions[job_title]["seniority_levels"][resume_data['seniority']])
|
340 |
+
|
341 |
+
with tab2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
# Display resume summary
|
343 |
st.subheader("📝 Resume Summary")
|
344 |
+
st.write(resume_data['summary'])
|
345 |
+
|
346 |
+
# Display experience timeline
|
347 |
+
st.subheader("⏳ Experience Timeline")
|
348 |
+
if resume_data['experiences']:
|
349 |
+
# Convert experiences to dataframe for display
|
350 |
+
exp_data = []
|
351 |
+
for exp in resume_data['experiences']:
|
352 |
+
if 'start_date' in exp and 'end_date' in exp:
|
353 |
+
exp_data.append({
|
354 |
+
'Company': exp['company'],
|
355 |
+
'Role': exp['role'],
|
356 |
+
'Start Date': exp['start_date'].strftime('%b %Y') if exp['start_date'] else 'Unknown',
|
357 |
+
'End Date': exp['end_date'].strftime('%b %Y') if exp['end_date'] != datetime.now() else 'Present',
|
358 |
+
'Duration (months)': exp.get('duration_months', 'Unknown')
|
359 |
+
})
|
360 |
+
else:
|
361 |
+
exp_data.append({
|
362 |
+
'Company': exp['company'],
|
363 |
+
'Role': exp['role'],
|
364 |
+
'Duration': exp.get('duration', 'Unknown')
|
365 |
+
})
|
366 |
+
|
367 |
+
if exp_data:
|
368 |
+
exp_df = pd.DataFrame(exp_data)
|
369 |
+
st.dataframe(exp_df)
|
370 |
+
|
371 |
+
# Create a timeline visualization if dates are available
|
372 |
+
timeline_data = [exp for exp in resume_data['experiences'] if 'start_date' in exp and 'end_date' in exp]
|
373 |
+
if timeline_data:
|
374 |
+
# Sort by start date
|
375 |
+
timeline_data = sorted(timeline_data, key=lambda x: x['start_date'])
|
376 |
+
|
377 |
+
# Create figure
|
378 |
+
fig = go.Figure()
|
379 |
+
|
380 |
+
for i, exp in enumerate(timeline_data):
|
381 |
+
fig.add_trace(go.Bar(
|
382 |
+
x=[(exp['end_date'] - exp['start_date']).days / 30], # Duration in months
|
383 |
+
y=[exp['company']],
|
384 |
+
orientation='h',
|
385 |
+
name=exp['role'],
|
386 |
+
hovertext=f"{exp['role']} at {exp['company']}<br>{exp['start_date'].strftime('%b %Y')} - {exp['end_date'].strftime('%b %Y') if exp['end_date'] != datetime.now() else 'Present'}<br>Duration: {exp.get('duration_months', 0)} months",
|
387 |
+
marker=dict(color=px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)])
|
388 |
+
))
|
389 |
+
|
390 |
+
fig.update_layout(
|
391 |
+
title="Career Timeline",
|
392 |
+
xaxis_title="Duration (months)",
|
393 |
+
yaxis_title="Company",
|
394 |
+
height=400,
|
395 |
+
margin=dict(l=0, r=0, b=0, t=30)
|
396 |
+
)
|
397 |
+
|
398 |
+
st.plotly_chart(fig, use_container_width=True)
|
399 |
+
else:
|
400 |
+
st.warning("No work experience data could be extracted.")
|
401 |
|
402 |
+
with tab3:
|
403 |
+
# Display missing skills
|
404 |
+
st.subheader("📌 Skills to Develop")
|
405 |
|
406 |
# Create two columns
|
407 |
col1, col2 = st.columns(2)
|
408 |
|
409 |
with col1:
|
410 |
+
# Missing skills
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
411 |
if missing_skills:
|
412 |
+
for skill in missing_skills:
|
413 |
+
st.warning(f"➖ {skill.title()}")
|
414 |
else:
|
415 |
st.success("Great! The candidate has all the required skills!")
|
416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
with col2:
|
418 |
+
# Skills gap analysis
|
419 |
+
st.subheader("🔍 Gap Analysis")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
|
421 |
+
# Show must-have skills that are missing
|
422 |
+
missing_must_have = [skill for skill in job_descriptions[job_title]["must_have"]
|
423 |
+
if skill not in resume_data['found_skills']]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
+
if missing_must_have:
|
426 |
+
st.error("**Critical Skills Missing:**")
|
427 |
+
for skill in missing_must_have:
|
428 |
+
st.write(f"- {skill.title()}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
+
st.markdown("These are must-have skills for this position.")
|
431 |
else:
|
432 |
+
st.success("Candidate has all the must-have skills for this position!")
|
433 |
+
|
434 |
+
# Show nice-to-have skills gap
|
435 |
+
missing_nice_to_have = [skill for skill in job_descriptions[job_title]["nice_to_have"]
|
436 |
+
if skill not in resume_data['found_skills']]
|
437 |
+
|
438 |
+
if missing_nice_to_have:
|
439 |
+
st.warning("**Nice-to-Have Skills Missing:**")
|
440 |
+
for skill in missing_nice_to_have:
|
441 |
+
st.write(f"- {skill.title()}")
|
442 |
+
else:
|
443 |
+
st.success("Candidate has all the nice-to-have skills!")
|
444 |
|
445 |
with tab4:
|
446 |
+
# Display career path insights
|
447 |
+
st.subheader("👨💼 Career Trajectory")
|
448 |
|
449 |
+
# Show career prediction
|
450 |
+
st.info(resume_data['career_prediction'])
|
451 |
|
452 |
+
# Show experience trends
|
453 |
+
st.subheader("📈 Experience Analysis")
|
454 |
|
455 |
+
# Check for job hopping
|
456 |
+
if len(resume_data['experiences']) >= 3:
|
457 |
+
# Calculate average job duration
|
458 |
+
durations = [exp.get('duration_months', 0) for exp in resume_data['experiences']
|
459 |
+
if 'duration_months' in exp]
|
460 |
+
|
461 |
+
if durations:
|
462 |
+
avg_duration = sum(durations) / len(durations)
|
463 |
+
|
464 |
+
if avg_duration < 12:
|
465 |
+
st.warning(f"🚩 **Frequent Job Changes**: Average job duration is only {avg_duration:.1f} months")
|
466 |
+
elif avg_duration < 24:
|
467 |
+
st.warning(f"⚠️ **Moderate Job Hopping**: Average job duration is {avg_duration:.1f} months")
|
468 |
+
else:
|
469 |
+
st.success(f"✅ **Stable Employment**: Average job duration is {avg_duration:.1f} months")
|
470 |
+
|
471 |
+
# Show inconsistencies if any
|
472 |
+
if resume_data['inconsistencies']:
|
473 |
+
st.subheader("⚠️ Timeline Inconsistencies")
|
474 |
+
for issue in resume_data['inconsistencies']:
|
475 |
+
if issue['type'] == 'overlap':
|
476 |
+
st.warning(issue['description'])
|
477 |
+
elif issue['type'] == 'gap':
|
478 |
+
st.info(issue['description'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
479 |
|
480 |
with tab5:
|
481 |
+
# Display authentication signals
|
482 |
+
st.subheader("🔍 Resume Authentication")
|
483 |
+
|
484 |
+
# Company verification results
|
485 |
+
st.write("**Company Verification Results:**")
|
486 |
|
487 |
+
if resume_data['company_verification']:
|
488 |
+
# Count suspicious companies
|
489 |
+
suspicious_count = sum(1 for v in resume_data['company_verification']
|
490 |
+
if v['status'] == 'suspicious')
|
491 |
+
|
492 |
+
if suspicious_count == 0:
|
493 |
+
st.success("✅ All companies mentioned in the resume passed basic verification")
|
494 |
+
else:
|
495 |
+
st.warning(f"⚠️ {suspicious_count} companies require further verification")
|
496 |
+
|
497 |
+
# Display verification details
|
498 |
+
verification_data = [{
|
499 |
+
'Company': v['company'],
|
500 |
+
'Status': v['status'].title(),
|
501 |
+
'Notes': v['reason']
|
502 |
+
} for v in resume_data['company_verification']]
|
503 |
+
|
504 |
+
st.dataframe(pd.DataFrame(verification_data))
|
505 |
else:
|
506 |
+
st.info("No company information found for verification.")
|
507 |
|
508 |
+
# Timeline consistency check
|
509 |
+
st.write("**Timeline Consistency Check:**")
|
|
|
|
|
510 |
|
511 |
+
if not resume_data['inconsistencies']:
|
512 |
+
st.success("✅ No timeline inconsistencies detected")
|
513 |
+
else:
|
514 |
+
st.warning(f"⚠️ {len(resume_data['inconsistencies'])} timeline inconsistencies found")
|
515 |
+
for issue in resume_data['inconsistencies']:
|
516 |
+
st.write(f"- {issue['description']}")
|
517 |
|
518 |
with tab6:
|
519 |
# Display career advice
|
520 |
+
st.subheader("🚀 Career Advice and Project Recommendations")
|
521 |
|
522 |
+
if st.button("Generate Career Advice"):
|
523 |
+
with st.spinner("Generating personalized career advice..."):
|
524 |
+
advice = generate_career_advice(text, job_title, resume_data['found_skills'], missing_skills)
|
525 |
st.markdown(advice)
|
526 |
+
|
527 |
except Exception as e:
|
528 |
st.error(f"An error occurred while processing the resume: {str(e)}")
|
|
|
529 |
|
530 |
# Add footer
|
531 |
st.markdown("---")
|
532 |
+
st.markdown("Made with ❤️ using Streamlit and Hugging Face")
|
533 |
+
|
534 |
+
# Semantic matching between resume and job description
|
535 |
+
def semantic_matching(resume_text, job_title):
|
536 |
+
job_desc = job_descriptions[job_title]["description"]
|
537 |
+
|
538 |
+
# Encode texts using sentence transformers
|
539 |
+
resume_embedding = sentence_model.encode(resume_text, convert_to_tensor=True)
|
540 |
+
job_embedding = sentence_model.encode(job_desc, convert_to_tensor=True)
|
541 |
+
|
542 |
+
# Calculate cosine similarity
|
543 |
+
cos_sim = cosine_similarity(
|
544 |
+
resume_embedding.cpu().numpy().reshape(1, -1),
|
545 |
+
job_embedding.cpu().numpy().reshape(1, -1)
|
546 |
+
)[0][0]
|
547 |
+
|
548 |
+
return cos_sim * 100 # Convert to percentage
|
549 |
+
|
550 |
+
# Extract experience timeline from resume
|
551 |
+
def extract_experience(text):
|
552 |
+
# Pattern to find work experience entries
|
553 |
+
# Look for patterns like "Company Name | Role | Jan 2020 - Present"
|
554 |
+
exp_pattern = r"(?i)(.*?(?:inc|llc|ltd|company|corp|corporation|group)?)\s*(?:[|•-]\s*)?(.*?)(?:[|•-]\s*)((?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\w\s,]*\d{4}\s*(?:-|to|–)\s*(?:(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\w\s,]*\d{4}|present))"
|
555 |
+
|
556 |
+
experiences = []
|
557 |
+
for match in re.finditer(exp_pattern, text, re.IGNORECASE):
|
558 |
+
company = match.group(1).strip()
|
559 |
+
role = match.group(2).strip()
|
560 |
+
duration = match.group(3).strip()
|
561 |
+
|
562 |
+
# Parse dates
|
563 |
+
try:
|
564 |
+
date_range = duration.split('-') if '-' in duration else duration.split('to') if 'to' in duration else duration.split('–')
|
565 |
+
start_date = dateparser.parse(date_range[0].strip())
|
566 |
+
|
567 |
+
if 'present' in date_range[1].lower():
|
568 |
+
end_date = datetime.now()
|
569 |
+
else:
|
570 |
+
end_date = dateparser.parse(date_range[1].strip())
|
571 |
+
|
572 |
+
if start_date and end_date:
|
573 |
+
# Calculate duration in months
|
574 |
+
months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month)
|
575 |
+
|
576 |
+
experiences.append({
|
577 |
+
'company': company,
|
578 |
+
'role': role,
|
579 |
+
'start_date': start_date,
|
580 |
+
'end_date': end_date,
|
581 |
+
'duration_months': months
|
582 |
+
})
|
583 |
+
except:
|
584 |
+
# If date parsing fails, still include the experience without dates
|
585 |
+
experiences.append({
|
586 |
+
'company': company,
|
587 |
+
'role': role,
|
588 |
+
'duration': duration
|
589 |
+
})
|
590 |
+
|
591 |
+
return experiences
|
592 |
+
|
593 |
+
# Estimate seniority based on experience and skills
|
594 |
+
def estimate_seniority(experiences, found_skills, job_title):
|
595 |
+
# Calculate total experience in years
|
596 |
+
total_months = sum(exp.get('duration_months', 0) for exp in experiences if 'duration_months' in exp)
|
597 |
+
total_years = total_months / 12
|
598 |
+
|
599 |
+
# Count leadership keywords in roles
|
600 |
+
leadership_keywords = ['lead', 'senior', 'manager', 'head', 'principal', 'architect', 'director']
|
601 |
+
leadership_count = 0
|
602 |
+
|
603 |
+
for exp in experiences:
|
604 |
+
role = exp.get('role', '').lower()
|
605 |
+
for keyword in leadership_keywords:
|
606 |
+
if keyword in role:
|
607 |
+
leadership_count += 1
|
608 |
+
break
|
609 |
+
|
610 |
+
# Calculate skill match percentage for must-have skills
|
611 |
+
must_have_skills = job_descriptions[job_title]["must_have"]
|
612 |
+
must_have_count = sum(1 for skill in must_have_skills if skill in [s.lower() for s in found_skills])
|
613 |
+
must_have_percentage = (must_have_count / len(must_have_skills)) * 100 if must_have_skills else 0
|
614 |
+
|
615 |
+
# Determine seniority level
|
616 |
+
if total_years < 3:
|
617 |
+
seniority = "Junior"
|
618 |
+
elif total_years < 6:
|
619 |
+
seniority = "Mid-level"
|
620 |
+
else:
|
621 |
+
seniority = "Senior"
|
622 |
+
|
623 |
+
# Adjust based on leadership roles and skill match
|
624 |
+
if leadership_count >= 2 and seniority != "Senior":
|
625 |
+
seniority = "Senior" if total_years >= 4 else seniority
|
626 |
+
if must_have_percentage < 50 and seniority == "Senior":
|
627 |
+
seniority = "Mid-level"
|
628 |
+
|
629 |
+
return seniority, total_years, leadership_count, must_have_percentage
|
630 |
+
|
631 |
+
# Check for timeline inconsistencies
|
632 |
+
def check_timeline_inconsistencies(experiences):
|
633 |
+
if not experiences:
|
634 |
+
return []
|
635 |
+
|
636 |
+
inconsistencies = []
|
637 |
+
sorted_experiences = sorted(
|
638 |
+
[exp for exp in experiences if 'start_date' in exp and 'end_date' in exp],
|
639 |
+
key=lambda x: x['start_date']
|
640 |
+
)
|
641 |
+
|
642 |
+
for i in range(len(sorted_experiences) - 1):
|
643 |
+
current = sorted_experiences[i]
|
644 |
+
next_exp = sorted_experiences[i + 1]
|
645 |
+
|
646 |
+
# Check for overlapping full-time roles
|
647 |
+
if current['end_date'] > next_exp['start_date']:
|
648 |
+
overlap_months = (current['end_date'].year - next_exp['start_date'].year) * 12 + \
|
649 |
+
(current['end_date'].month - next_exp['start_date'].month)
|
650 |
+
|
651 |
+
if overlap_months > 1: # Allow 1 month overlap for transitions
|
652 |
+
inconsistencies.append({
|
653 |
+
'type': 'overlap',
|
654 |
+
'description': f"Overlapping roles: {current['company']} and {next_exp['company']} " +
|
655 |
+
f"overlap by {overlap_months} months"
|
656 |
+
})
|
657 |
+
|
658 |
+
# Check for gaps in employment
|
659 |
+
for i in range(len(sorted_experiences) - 1):
|
660 |
+
current = sorted_experiences[i]
|
661 |
+
next_exp = sorted_experiences[i + 1]
|
662 |
+
|
663 |
+
gap_months = (next_exp['start_date'].year - current['end_date'].year) * 12 + \
|
664 |
+
(next_exp['start_date'].month - current['end_date'].month)
|
665 |
+
|
666 |
+
if gap_months > 3: # Flag gaps longer than 3 months
|
667 |
+
inconsistencies.append({
|
668 |
+
'type': 'gap',
|
669 |
+
'description': f"Employment gap of {gap_months} months between " +
|
670 |
+
f"{current['company']} and {next_exp['company']}"
|
671 |
+
})
|
672 |
+
|
673 |
+
return inconsistencies
|
674 |
+
|
675 |
+
# Verify company existence (simplified version)
|
676 |
+
def verify_companies(experiences):
|
677 |
+
verification_results = []
|
678 |
+
|
679 |
+
for exp in experiences:
|
680 |
+
company = exp.get('company', '')
|
681 |
+
if not company:
|
682 |
+
continue
|
683 |
+
|
684 |
+
# Simple heuristic - companies less than 3 characters are suspicious
|
685 |
+
if len(company) < 3:
|
686 |
+
verification_results.append({
|
687 |
+
'company': company,
|
688 |
+
'status': 'suspicious',
|
689 |
+
'reason': 'Company name too short'
|
690 |
+
})
|
691 |
+
continue
|
692 |
+
|
693 |
+
# Check if company matches common fake patterns
|
694 |
+
fake_patterns = ['abc company', 'xyz corp', 'my company', 'personal project']
|
695 |
+
if any(pattern in company.lower() for pattern in fake_patterns):
|
696 |
+
verification_results.append({
|
697 |
+
'company': company,
|
698 |
+
'status': 'suspicious',
|
699 |
+
'reason': 'Matches pattern of fake company names'
|
700 |
+
})
|
701 |
+
continue
|
702 |
+
|
703 |
+
# In a real implementation, you'd call an API to check if the company exists
|
704 |
+
# For this demo, we'll just mark all others as verified
|
705 |
+
verification_results.append({
|
706 |
+
'company': company,
|
707 |
+
'status': 'verified',
|
708 |
+
'reason': 'Passed basic verification checks'
|
709 |
+
})
|
710 |
+
|
711 |
+
return verification_results
|
712 |
+
|
713 |
+
# Extract skill levels from text
|
714 |
+
def extract_skill_levels(text, skills):
|
715 |
+
skill_levels = {}
|
716 |
+
proficiency_indicators = {
|
717 |
+
'basic': ['basic', 'familiar', 'beginner', 'fundamentals', 'exposure'],
|
718 |
+
'intermediate': ['intermediate', 'proficient', 'experienced', 'competent', 'skilled'],
|
719 |
+
'advanced': ['advanced', 'expert', 'mastery', 'specialist', 'lead', 'senior']
|
720 |
+
}
|
721 |
+
|
722 |
+
for skill in skills:
|
723 |
+
# Look for sentences containing the skill
|
724 |
+
sentences = re.findall(r'[^.!?]*%s[^.!?]*[.!?]' % re.escape(skill), text.lower())
|
725 |
+
|
726 |
+
# Default level
|
727 |
+
level = 'intermediate'
|
728 |
+
|
729 |
+
# Check for years of experience indicators
|
730 |
+
years_pattern = re.compile(r'(\d+)\s*(?:\+)?\s*years?(?:\s+of)?\s+(?:experience|exp)?\s+(?:with|in|using)?\s+%s' % re.escape(skill), re.IGNORECASE)
|
731 |
+
for sentence in sentences:
|
732 |
+
years_match = years_pattern.search(sentence)
|
733 |
+
if years_match:
|
734 |
+
years = int(years_match.group(1))
|
735 |
+
if years < 2:
|
736 |
+
level = 'basic'
|
737 |
+
elif years < 5:
|
738 |
+
level = 'intermediate'
|
739 |
+
else:
|
740 |
+
level = 'advanced'
|
741 |
+
break
|
742 |
+
|
743 |
+
# Check for proficiency indicators
|
744 |
+
if level == 'intermediate': # Only override if not already set by years
|
745 |
+
for level_name, indicators in proficiency_indicators.items():
|
746 |
+
for indicator in indicators:
|
747 |
+
pattern = re.compile(r'%s\s+(?:\w+\s+){0,3}%s' % (indicator, re.escape(skill)), re.IGNORECASE)
|
748 |
+
if any(pattern.search(sentence) for sentence in sentences):
|
749 |
+
level = level_name
|
750 |
+
break
|
751 |
+
if level != 'intermediate':
|
752 |
+
break
|
753 |
+
|
754 |
+
skill_levels[skill] = level
|
755 |
+
|
756 |
+
return skill_levels
|
757 |
+
|
758 |
+
# Generate career trajectory prediction
|
759 |
+
def predict_career_trajectory(experiences, seniority, job_title):
|
760 |
+
if not experiences:
|
761 |
+
return "Unable to predict trajectory due to insufficient experience data."
|
762 |
+
|
763 |
+
# Extract roles in chronological order
|
764 |
+
roles = [exp.get('role', '').lower() for exp in experiences if 'role' in exp]
|
765 |
+
|
766 |
+
# If less than 2 roles, not enough data for prediction
|
767 |
+
if len(roles) < 2:
|
768 |
+
if seniority == "Junior":
|
769 |
+
next_role = "Mid-level " + job_title
|
770 |
+
elif seniority == "Mid-level":
|
771 |
+
next_role = "Senior " + job_title
|
772 |
+
else: # Senior
|
773 |
+
leadership_titles = {
|
774 |
+
"Software Engineer": "Technical Lead or Engineering Manager",
|
775 |
+
"Data Scientist": "Lead Data Scientist or Data Science Manager",
|
776 |
+
"Interaction Designer": "Design Lead or UX Director",
|
777 |
+
"Product Manager": "Senior Product Manager or Director of Product",
|
778 |
+
"DevOps Engineer": "DevOps Lead or Infrastructure Architect"
|
779 |
+
}
|
780 |
+
next_role = leadership_titles.get(job_title, f"Director of {job_title}")
|
781 |
+
|
782 |
+
return f"Based on current seniority level, the next logical role could be: {next_role}"
|
783 |
+
|
784 |
+
# Check for upward mobility patterns
|
785 |
+
progression_indicators = ['junior', 'senior', 'lead', 'manager', 'director', 'vp', 'head', 'chief']
|
786 |
+
current_level = -1
|
787 |
+
|
788 |
+
for role in roles:
|
789 |
+
for i, indicator in enumerate(progression_indicators):
|
790 |
+
if indicator in role:
|
791 |
+
if i > current_level:
|
792 |
+
current_level = i
|
793 |
+
|
794 |
+
# Predict next role based on current level
|
795 |
+
if current_level < len(progression_indicators) - 1:
|
796 |
+
next_level = progression_indicators[current_level + 1]
|
797 |
+
|
798 |
+
# Map to specific job titles
|
799 |
+
if next_level == 'senior' and 'senior' not in roles[-1].lower():
|
800 |
+
next_role = f"Senior {job_title}"
|
801 |
+
elif next_level == 'lead':
|
802 |
+
next_role = f"{job_title} Lead"
|
803 |
+
elif next_level == 'manager':
|
804 |
+
if job_title == "Software Engineer":
|
805 |
+
next_role = "Engineering Manager"
|
806 |
+
else:
|
807 |
+
next_role = f"{job_title} Manager"
|
808 |
+
elif next_level == 'director':
|
809 |
+
next_role = f"Director of {job_title}s"
|
810 |
+
elif next_level == 'vp':
|
811 |
+
next_role = f"VP of {job_title}s"
|
812 |
+
elif next_level == 'head':
|
813 |
+
next_role = f"Head of {job_title}"
|
814 |
+
elif next_level == 'chief':
|
815 |
+
if job_title == "Software Engineer":
|
816 |
+
next_role = "CTO (Chief Technology Officer)"
|
817 |
+
elif job_title == "Data Scientist":
|
818 |
+
next_role = "Chief Data Officer"
|
819 |
+
elif job_title == "Product Manager":
|
820 |
+
next_role = "Chief Product Officer"
|
821 |
+
else:
|
822 |
+
next_role = f"Chief {job_title} Officer"
|
823 |
+
else:
|
824 |
+
next_role = f"{next_level.title()} {job_title}"
|
825 |
+
else:
|
826 |
+
next_role = "Executive Leadership or Strategic Advisory roles"
|
827 |
+
|
828 |
+
return f"Based on career progression, the next logical role could be: {next_role}"
|
fix_dependencies.py
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
import subprocess
|
2 |
-
import sys
|
3 |
-
|
4 |
-
def fix_dependencies():
|
5 |
-
"""
|
6 |
-
Fix dependency issues by installing compatible versions of required packages
|
7 |
-
"""
|
8 |
-
print("Fixing dependencies for Resume Screener application...")
|
9 |
-
|
10 |
-
# List of compatible package versions
|
11 |
-
packages = [
|
12 |
-
"streamlit==1.22.0",
|
13 |
-
"pdfplumber==0.9.0",
|
14 |
-
"spacy>=3.4.0",
|
15 |
-
"transformers==4.28.1",
|
16 |
-
"torch==1.13.1",
|
17 |
-
"huggingface-hub==0.14.1",
|
18 |
-
"sentence-transformers==2.2.2",
|
19 |
-
"nltk==3.8.1",
|
20 |
-
"plotly==5.14.1",
|
21 |
-
"pandas==1.5.3",
|
22 |
-
"numpy==1.24.3",
|
23 |
-
"matplotlib==3.7.1",
|
24 |
-
"pydantic==1.10.8",
|
25 |
-
"protobuf<4.0.0",
|
26 |
-
"tqdm>=4.27",
|
27 |
-
"regex>=2022.1.18",
|
28 |
-
"scikit-learn==1.0.2",
|
29 |
-
"scipy==1.8.1"
|
30 |
-
]
|
31 |
-
|
32 |
-
# Install each package
|
33 |
-
for package in packages:
|
34 |
-
print(f"Installing {package}...")
|
35 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
|
36 |
-
|
37 |
-
# Download spaCy model
|
38 |
-
print("Downloading spaCy model...")
|
39 |
-
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
|
40 |
-
|
41 |
-
# Download NLTK data
|
42 |
-
print("Downloading NLTK data...")
|
43 |
-
subprocess.check_call([sys.executable, "-c", "import nltk; nltk.download('punkt')"])
|
44 |
-
|
45 |
-
print("Dependencies fixed successfully!")
|
46 |
-
|
47 |
-
if __name__ == "__main__":
|
48 |
-
fix_dependencies()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -15,4 +15,10 @@ protobuf<4.0.0
|
|
15 |
tqdm>=4.27
|
16 |
regex>=2022.1.18
|
17 |
scikit-learn==1.0.2
|
18 |
-
scipy==1.8.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
tqdm>=4.27
|
16 |
regex>=2022.1.18
|
17 |
scikit-learn==1.0.2
|
18 |
+
scipy==1.8.1
|
19 |
+
dateparser==1.1.8
|
20 |
+
python-Levenshtein==0.21.1
|
21 |
+
networkx==2.8.8
|
22 |
+
faiss-cpu==1.7.4
|
23 |
+
beautifulsoup4==4.12.2
|
24 |
+
requests==2.31.0
|