root commited on
Commit
eaa3094
·
1 Parent(s): ea35a5b
Files changed (3) hide show
  1. app.py +651 -904
  2. fix_dependencies.py +0 -48
  3. requirements.txt +7 -1
app.py CHANGED
@@ -2,80 +2,45 @@ import streamlit as st
2
  import pdfplumber
3
  import io
4
  import spacy
5
- import re
6
- import pandas as pd
7
- import matplotlib.pyplot as plt
8
- from transformers import pipeline
9
- # Import SentenceTransformer with try-except
10
- try:
11
- from sentence_transformers import SentenceTransformer
12
- # Try to import util, if it fails, we'll create our own minimal version
13
- try:
14
- from sentence_transformers import util
15
- except ImportError:
16
- # Create a minimal util module replacement with the functions we need
17
- class util:
18
- @staticmethod
19
- def pytorch_cos_sim(a, b):
20
- """
21
- Compute cosine similarity between two PyTorch tensors
22
- """
23
- import torch
24
- if not isinstance(a, torch.Tensor):
25
- a = torch.tensor(a)
26
- if not isinstance(b, torch.Tensor):
27
- b = torch.tensor(b)
28
-
29
- if len(a.shape) == 1:
30
- a = a.unsqueeze(0)
31
- if len(b.shape) == 1:
32
- b = b.unsqueeze(0)
33
-
34
- a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
35
- b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
36
- return torch.mm(a_norm, b_norm.transpose(0, 1))
37
- except ImportError:
38
- st.error("Failed to import SentenceTransformer. Semantic matching will be disabled.")
39
- SentenceTransformer = None
40
- class util:
41
- @staticmethod
42
- def pytorch_cos_sim(*args, **kwargs):
43
- return 0
44
  import subprocess
45
  import sys
46
  import torch
47
- import nltk
48
- from nltk.tokenize import word_tokenize
49
- from datetime import datetime
50
  import plotly.express as px
51
  import plotly.graph_objects as go
52
- import numpy as np
53
- from collections import defaultdict
54
-
55
- # Fix for huggingface_hub import issue
56
- try:
57
- # For newer versions of huggingface_hub
58
- from huggingface_hub import hf_hub_download
59
- except ImportError:
60
- try:
61
- # For older versions of huggingface_hub
62
- from huggingface_hub import cached_download as hf_hub_download
63
- except ImportError:
64
- st.error("Could not import required functions from huggingface_hub. Please check your installation.")
65
- hf_hub_download = None
 
 
 
66
 
67
- # Initialize NLTK
68
  @st.cache_resource
69
  def download_nltk_resources():
70
- try:
71
- nltk.data.find('tokenizers/punkt')
72
- except LookupError:
73
- nltk.download('punkt')
74
 
75
  download_nltk_resources()
76
 
77
  st.set_page_config(
78
- page_title="Comprehensive Resume Screener & Skill Extractor",
79
  page_icon="📄",
80
  layout="wide"
81
  )
@@ -93,31 +58,30 @@ def download_spacy_model():
93
  # Load the NLP models
94
  @st.cache_resource
95
  def load_models():
96
- try:
97
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
98
- except Exception as e:
99
- st.error(f"Failed to load summarization model: {str(e)}")
100
- # Fallback to a simpler summarizer that just takes the first few sentences
101
- summarizer = lambda text, **kwargs: [{"summary_text": ". ".join(text.split(". ")[:3]) + "."}]
102
 
 
 
 
 
103
  try:
104
- nlp = download_spacy_model()
 
 
 
 
 
 
105
  except Exception as e:
106
- st.error(f"Failed to load spaCy model: {str(e)}")
107
- nlp = None
108
-
109
- # Load sentence transformer for semantic matching
110
- sentence_model = None
111
- if SentenceTransformer is not None:
112
- try:
113
- sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
114
- except Exception as e:
115
- st.error(f"Failed to load sentence transformer: {str(e)}")
116
 
117
- return summarizer, nlp, sentence_model
118
 
119
  # Initialize models
120
- summarizer, nlp, sentence_model = load_models()
121
 
122
  # Job descriptions and required skills
123
  job_descriptions = {
@@ -125,38 +89,61 @@ job_descriptions = {
125
  "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
126
  "git", "cloud", "web development", "software development", "coding"],
127
  "description": "Looking for software engineers with strong programming skills and experience in software development.",
128
- "semantic_description": """
129
- We're seeking a talented Software Engineer to design, develop, and maintain high-quality software solutions.
130
- The ideal candidate has strong programming skills in languages like Python, Java, or JavaScript, and experience with
131
- SQL databases. You should be proficient in algorithms, data structures, and version control systems like Git.
132
- Experience with cloud platforms and web development frameworks is a plus. You'll be responsible for the full
133
- software development lifecycle, from requirements gathering to deployment and maintenance.
134
- """
135
  },
136
  "Interaction Designer": {
137
  "skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
138
  "sketch", "adobe", "design thinking", "interaction design"],
139
  "description": "Seeking interaction designers with expertise in user experience and interface design.",
140
- "semantic_description": """
141
- We're looking for a creative Interaction Designer to craft intuitive and engaging user experiences.
142
- You should have expertise in UI/UX design principles and methods, with a portfolio demonstrating your
143
- ability to conduct user research, create wireframes, and develop interactive prototypes. Proficiency
144
- with design tools like Figma, Sketch, and Adobe Creative Suite is required. You'll collaborate with
145
- product managers and developers to iterate on designs based on user feedback and business requirements.
146
- """
147
  },
148
  "Data Scientist": {
149
  "skills": ["python", "r", "statistics", "machine learning", "data analysis",
150
  "sql", "tensorflow", "pytorch", "pandas", "numpy"],
151
  "description": "Looking for data scientists with strong analytical and machine learning skills.",
152
- "semantic_description": """
153
- We're seeking a skilled Data Scientist to extract insights from complex datasets and build predictive models.
154
- The ideal candidate has strong programming skills in Python or R, expertise in statistical analysis, and
155
- experience with machine learning algorithms. You should be proficient in SQL for data extraction and tools
156
- like TensorFlow or PyTorch for deep learning. Experience with data manipulation libraries like Pandas and NumPy
157
- is essential. You'll work on projects from exploratory data analysis to model deployment, collaborating with
158
- stakeholders to solve business problems through data-driven approaches.
159
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  }
162
 
@@ -167,312 +154,8 @@ def extract_text_from_pdf(pdf_file):
167
  text += page.extract_text() or ""
168
  return text
169
 
170
- def extract_work_experience(text):
171
- """Extract work experience details including company names, job titles, and dates"""
172
- # Find common section headers for work experience
173
- work_exp_patterns = [
174
- r"(?i)WORK EXPERIENCE|PROFESSIONAL EXPERIENCE|EMPLOYMENT HISTORY|EXPERIENCE",
175
- r"(?i)EDUCATION|ACADEMIC|QUALIFICATIONS"
176
- ]
177
-
178
- # Find the start of work experience section
179
- work_exp_start = None
180
- for pattern in work_exp_patterns[:1]: # Use only the work experience patterns
181
- match = re.search(pattern, text)
182
- if match:
183
- work_exp_start = match.end()
184
- break
185
-
186
- if work_exp_start is None:
187
- return []
188
-
189
- # Find the end of work experience section (start of education or next major section)
190
- work_exp_end = len(text)
191
- for pattern in work_exp_patterns[1:]: # Use only the education pattern
192
- match = re.search(pattern, text)
193
- if match and match.start() > work_exp_start:
194
- work_exp_end = match.start()
195
- break
196
-
197
- work_exp_text = text[work_exp_start:work_exp_end]
198
-
199
- # Extract job entries
200
- # Look for patterns of job titles, company names, and dates
201
- job_entries = []
202
-
203
- # Pattern for dates (MM/YYYY or Month YYYY)
204
- date_pattern = r"(?i)(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[,\s]+\d{4}|\d{1,2}/\d{4}|\d{4}"
205
-
206
- # Pattern for common job title indicators
207
- job_title_pattern = r"(?i)(Senior|Lead|Principal|Junior|Associate)?\s*(Software Engineer|Developer|Designer|Analyst|Manager|Director|Consultant|Specialist|Coordinator|Administrator)"
208
-
209
- # Split into paragraphs which often represent job entries
210
- paragraphs = re.split(r'\n\s*\n', work_exp_text)
211
-
212
- for paragraph in paragraphs:
213
- # Skip short paragraphs that are likely not job entries
214
- if len(paragraph.strip()) < 30:
215
- continue
216
-
217
- # Extract dates
218
- dates = re.findall(date_pattern, paragraph)
219
- start_date = dates[0] if dates else "Unknown"
220
- end_date = dates[-1] if len(dates) > 1 else "Present"
221
-
222
- # Extract job title
223
- title_match = re.search(job_title_pattern, paragraph)
224
- job_title = title_match.group(0) if title_match else "Unknown Position"
225
-
226
- # Extract company name (typically near the job title or at the start of the paragraph)
227
- lines = paragraph.split('\n')
228
- company = lines[0].strip() if lines else "Unknown Company"
229
- if job_title in company:
230
- company = company.replace(job_title, "").strip()
231
-
232
- # Clean company name
233
- for date in dates:
234
- company = company.replace(date, "").strip()
235
- company = re.sub(r'[,\.\|\-]', ' ', company).strip()
236
-
237
- job_entries.append({
238
- "company": company,
239
- "title": job_title,
240
- "start_date": start_date,
241
- "end_date": end_date,
242
- "description": paragraph
243
- })
244
-
245
- return job_entries
246
-
247
- def estimate_skill_proficiency(text, skill):
248
- """Estimate proficiency level for a skill"""
249
- # Define proficiency indicators
250
- basic_indicators = ["familiar with", "basic knowledge", "understanding of", "exposure to"]
251
- intermediate_indicators = ["experience with", "proficient in", "worked with", "2-3 years", "2 years", "3 years"]
252
- advanced_indicators = ["expert in", "advanced", "extensive experience", "lead", "architected", "designed", "5+ years", "4+ years"]
253
-
254
- # Convert to lowercase for matching
255
- text_lower = text.lower()
256
-
257
- # Find skill mentions and surrounding context
258
- skill_lower = skill.lower()
259
- skill_index = text_lower.find(skill_lower)
260
-
261
- if skill_index == -1:
262
- return None
263
-
264
- # Extract context (100 characters before and after the skill mention)
265
- start = max(0, skill_index - 100)
266
- end = min(len(text_lower), skill_index + len(skill_lower) + 100)
267
- context = text_lower[start:end]
268
-
269
- # Check for proficiency indicators
270
- for indicator in advanced_indicators:
271
- if indicator in context:
272
- return "Advanced"
273
-
274
- for indicator in intermediate_indicators:
275
- if indicator in context:
276
- return "Intermediate"
277
-
278
- for indicator in basic_indicators:
279
- if indicator in context:
280
- return "Basic"
281
-
282
- # Default to basic if skill is mentioned but no proficiency indicators are found
283
- return "Basic"
284
-
285
- def calculate_seniority_score(job_entries):
286
- """Calculate a seniority score based on job titles and years of experience"""
287
- # Define seniority levels for common job titles
288
- seniority_levels = {
289
- "intern": 1,
290
- "junior": 2,
291
- "associate": 3,
292
- "developer": 4,
293
- "engineer": 4,
294
- "designer": 4,
295
- "analyst": 4,
296
- "senior": 6,
297
- "lead": 7,
298
- "manager": 7,
299
- "principal": 8,
300
- "director": 9,
301
- "vp": 10,
302
- "cto": 10,
303
- "cio": 10,
304
- "ceo": 10
305
- }
306
-
307
- # Calculate total years of experience
308
- total_years = 0
309
-
310
- for job in job_entries:
311
- # Parse start and end dates
312
- try:
313
- start_year = re.search(r'\d{4}', job["start_date"])
314
- end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
315
-
316
- if start_year:
317
- start_year = int(start_year.group(0))
318
- end_year = int(end_year.group(0)) if end_year else datetime.now().year
319
- years = end_year - start_year
320
- if 0 <= years <= 30: # Sanity check
321
- total_years += years
322
- except Exception:
323
- # Skip if there's an issue with date parsing
324
- pass
325
-
326
- # Calculate title-based seniority
327
- highest_seniority = 0
328
-
329
- for job in job_entries:
330
- title_lower = job["title"].lower()
331
- for level_title, score in seniority_levels.items():
332
- if level_title in title_lower and score > highest_seniority:
333
- highest_seniority = score
334
-
335
- # Combine years of experience and title-based seniority
336
- # Years of experience factor: 0-2 years (1), 3-5 years (2), 6-10 years (3), 11+ years (4)
337
- years_factor = 1
338
- if total_years >= 3:
339
- years_factor = 2
340
- if total_years >= 6:
341
- years_factor = 3
342
- if total_years >= 11:
343
- years_factor = 4
344
-
345
- # Final seniority score (1-10 scale)
346
- seniority_score = min(10, max(1, (highest_seniority * 0.6) + (years_factor * 1.0)))
347
-
348
- return round(seniority_score, 1), total_years
349
-
350
- def detect_fraud_signals(text, job_entries):
351
- """Detect potential fraud signals in the resume"""
352
- fraud_signals = []
353
-
354
- # Check for impossible timelines (overlapping full-time roles)
355
- if len(job_entries) >= 2:
356
- for i in range(len(job_entries) - 1):
357
- for j in range(i+1, len(job_entries)):
358
- # Check if both jobs have date information
359
- if (job_entries[i]["start_date"] != "Unknown" and
360
- job_entries[i]["end_date"] != "Unknown" and
361
- job_entries[j]["start_date"] != "Unknown" and
362
- job_entries[j]["end_date"] != "Unknown"):
363
-
364
- # Get years for comparison
365
- i_start = re.search(r'\d{4}', job_entries[i]["start_date"])
366
- i_end = re.search(r'\d{4}', job_entries[i]["end_date"]) if job_entries[i]["end_date"] != "Present" else None
367
- j_start = re.search(r'\d{4}', job_entries[j]["start_date"])
368
- j_end = re.search(r'\d{4}', job_entries[j]["end_date"]) if job_entries[j]["end_date"] != "Present" else None
369
-
370
- # Convert to integers for comparison
371
- if i_start and j_start:
372
- i_start = int(i_start.group(0))
373
- i_end = int(i_end.group(0)) if i_end else datetime.now().year
374
- j_start = int(j_start.group(0))
375
- j_end = int(j_end.group(0)) if j_end else datetime.now().year
376
-
377
- # Check for significant overlap (more than 6 months)
378
- if ((i_start <= j_start < i_end) or (j_start <= i_start < j_end)) and job_entries[i]["company"] != job_entries[j]["company"]:
379
- overlap_years = min(i_end, j_end) - max(i_start, j_start)
380
- if overlap_years > 0.5: # More than 6 months overlap
381
- fraud_signals.append(f"Potential timeline inconsistency: Overlapping roles at {job_entries[i]['company']} and {job_entries[j]['company']} for {overlap_years:.1f} years")
382
-
383
- # Check for suspicious keywords or phrases
384
- suspicious_phrases = [
385
- "self-employed",
386
- "freelance",
387
- "consultant",
388
- "entrepreneur",
389
- "founder",
390
- "ceo of own company"
391
- ]
392
-
393
- # Look for suspicious gap filling
394
- for phrase in suspicious_phrases:
395
- if phrase in text.lower():
396
- # Not all of these are fraudulent, but they warrant verification
397
- fraud_signals.append(f"Verification recommended: Contains '{phrase}' which may need additional verification")
398
-
399
- # Check for unexplained gaps in employment history
400
- if len(job_entries) >= 2:
401
- for i in range(len(job_entries) - 1):
402
- # Sort entries by start date
403
- if "Unknown" not in job_entries[i]["end_date"] and "Unknown" not in job_entries[i+1]["start_date"]:
404
- end_match = re.search(r'\d{4}', job_entries[i]["end_date"])
405
- start_match = re.search(r'\d{4}', job_entries[i+1]["start_date"])
406
-
407
- if end_match and start_match:
408
- end_year = int(end_match.group(0))
409
- start_year = int(start_match.group(0))
410
-
411
- # If there's more than a 1-year gap
412
- if start_year - end_year > 1:
413
- fraud_signals.append(f"Employment gap: {end_year} to {start_year} ({start_year - end_year} years)")
414
-
415
- return fraud_signals
416
-
417
- def predict_career_trajectory(job_entries, current_skills):
418
- """Predict logical next roles based on career progression"""
419
- # Career path mappings based on common progressions
420
- career_paths = {
421
- "software engineer": ["Senior Software Engineer", "Lead Developer", "Software Architect", "Engineering Manager", "CTO"],
422
- "developer": ["Senior Developer", "Technical Lead", "Software Architect", "Development Manager", "CTO"],
423
- "designer": ["Senior Designer", "Lead Designer", "Design Manager", "Creative Director", "VP of Design"],
424
- "data scientist": ["Senior Data Scientist", "Lead Data Scientist", "Data Science Manager", "Director of Analytics", "Chief Data Officer"]
425
- }
426
-
427
- # Extract current role from latest job entry
428
- current_role = job_entries[0]["title"].lower() if job_entries else "unknown"
429
-
430
- # Find the best matching career path
431
- best_match = None
432
- for role_key in career_paths:
433
- if role_key in current_role:
434
- best_match = role_key
435
- break
436
-
437
- if not best_match:
438
- return ["Career path prediction requires more information"]
439
-
440
- # Find current position in the career path
441
- current_index = 0
442
- for i, role in enumerate(career_paths[best_match]):
443
- if any(indicator in current_role for indicator in ["senior", "lead", "manager", "director", "vp", "chief"]):
444
- # If current role contains seniority indicators, advance the index
445
- if "senior" in current_role and "senior" in role.lower():
446
- current_index = i
447
- break
448
- elif "lead" in current_role and "lead" in role.lower():
449
- current_index = i
450
- break
451
- elif "manager" in current_role and "manager" in role.lower():
452
- current_index = i
453
- break
454
- elif "director" in current_role and "director" in role.lower():
455
- current_index = i
456
- break
457
-
458
- # Get next potential roles (up to 3)
459
- next_roles = []
460
- for i in range(current_index + 1, min(current_index + 4, len(career_paths[best_match]))):
461
- next_roles.append(career_paths[best_match][i])
462
-
463
- if not next_roles:
464
- next_roles = ["You're at a senior level in your career path. Consider lateral moves or industry specialization."]
465
-
466
- return next_roles
467
-
468
- def analyze_resume(text, job_title, sentence_model):
469
- # Extract work experience
470
- job_entries = extract_work_experience(text)
471
-
472
- # Sort job entries by start date (most recent first)
473
- job_entries.sort(key=lambda x: "9999" if x["start_date"] == "Unknown" else x["start_date"], reverse=True)
474
-
475
- # Extract relevant skills with basic keyword matching
476
  doc = nlp(text.lower())
477
  found_skills = []
478
  required_skills = job_descriptions[job_title]["skills"]
@@ -481,22 +164,6 @@ def analyze_resume(text, job_title, sentence_model):
481
  if skill in text.lower():
482
  found_skills.append(skill)
483
 
484
- # Determine skill proficiency levels
485
- skill_proficiencies = {}
486
- for skill in found_skills:
487
- proficiency = estimate_skill_proficiency(text, skill)
488
- if proficiency:
489
- skill_proficiencies[skill] = proficiency
490
-
491
- # Calculate seniority score
492
- seniority_score, years_experience = calculate_seniority_score(job_entries)
493
-
494
- # Detect fraud signals
495
- fraud_signals = detect_fraud_signals(text, job_entries)
496
-
497
- # Predict career trajectory
498
- next_roles = predict_career_trajectory(job_entries, found_skills)
499
-
500
  # Generate summary
501
  chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
502
  summaries = []
@@ -504,190 +171,92 @@ def analyze_resume(text, job_title, sentence_model):
504
  summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
505
  summaries.append(summary)
506
 
507
- # Semantic matching with job description
508
- semantic_score = 0
509
- if sentence_model is not None and SentenceTransformer is not None:
510
- try:
511
- resume_embedding = sentence_model.encode(text[:5000]) # Limit to first 5000 chars to avoid memory issues
512
- job_embedding = sentence_model.encode(job_descriptions[job_title]["semantic_description"])
513
- semantic_score = float(util.pytorch_cos_sim(resume_embedding, job_embedding)[0][0])
514
- except Exception as e:
515
- st.error(f"Error in semantic matching: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
516
 
517
  return {
518
- "found_skills": found_skills,
519
- "skill_proficiencies": skill_proficiencies,
520
- "summary": " ".join(summaries),
521
- "job_entries": job_entries,
522
- "seniority_score": seniority_score,
523
- "years_experience": years_experience,
524
- "fraud_signals": fraud_signals,
525
- "next_roles": next_roles,
526
- "semantic_score": semantic_score
 
527
  }
528
 
529
  def generate_career_advice(resume_text, job_title, found_skills, missing_skills):
530
- """
531
- Generate career advice using a template-based approach instead of Qwen3-8B
532
- to avoid dependency issues
533
- """
534
- # Template-based advice generation
535
- advice = f"""## Career Development Plan for {job_title} Position
536
-
537
- ### Skills to Develop
538
-
539
- The following skills would strengthen your resume for this position:
540
-
541
- """
542
 
543
- # Add advice for each missing skill
544
- for skill in missing_skills:
545
- if skill == "python":
546
- advice += f"""#### Python
547
- - **How to develop**: Take online courses focused on Python for {job_title.lower()} applications
548
- - **Project idea**: Build a data analysis tool or web application using Python and popular frameworks
549
- - **Resources**: Coursera's Python for Everybody, Python.org tutorials, Real Python website
550
 
551
- """
552
- elif skill == "java":
553
- advice += f"""#### Java
554
- - **How to develop**: Complete a comprehensive Java course with practical exercises
555
- - **Project idea**: Develop a backend service with Spring Boot
556
- - **Resources**: Oracle's Java tutorials, Udemy courses on Java, "Effective Java" by Joshua Bloch
557
 
558
- """
559
- elif skill == "javascript":
560
- advice += f"""#### JavaScript
561
- - **How to develop**: Practice with modern JavaScript frameworks
562
- - **Project idea**: Create an interactive web application with React or Vue.js
563
- - **Resources**: MDN Web Docs, freeCodeCamp, "Eloquent JavaScript" by Marijn Haverbeke
564
 
565
- """
566
- elif skill == "sql":
567
- advice += f"""#### SQL
568
- - **How to develop**: Practice with database design and complex queries
569
- - **Project idea**: Design a database system for a small business with reports and analytics
570
- - **Resources**: SQLZoo, Mode Analytics SQL tutorial, W3Schools SQL course
571
-
572
- """
573
- elif "algorithms" in skill or "data structures" in skill:
574
- advice += f"""#### Algorithms & Data Structures
575
- - **How to develop**: Solve coding problems regularly on platforms like LeetCode
576
- - **Project idea**: Implement classic algorithms and optimize them for specific use cases
577
- - **Resources**: "Cracking the Coding Interview" book, AlgoExpert, Coursera Algorithms specialization
578
-
579
- """
580
- elif "git" in skill:
581
- advice += f"""#### Git & Version Control
582
- - **How to develop**: Contribute to open source projects to practice Git workflows
583
- - **Project idea**: Set up a personal project with proper branching strategies and CI/CD
584
- - **Resources**: Git documentation, GitHub Learning Lab, Atlassian Git tutorials
585
-
586
- """
587
- elif "cloud" in skill:
588
- advice += f"""#### Cloud Technologies
589
- - **How to develop**: Get hands-on experience with a major cloud provider (AWS, Azure, GCP)
590
- - **Project idea**: Deploy an application to the cloud with proper infrastructure as code
591
- - **Resources**: Cloud provider documentation, A Cloud Guru courses, free tier accounts
592
 
593
- """
594
- elif "ui" in skill or "ux" in skill:
595
- advice += f"""#### UI/UX Design
596
- - **How to develop**: Study design principles and practice creating user interfaces
597
- - **Project idea**: Redesign an existing website or app with focus on user experience
598
- - **Resources**: Nielsen Norman Group articles, Interaction Design Foundation, Figma tutorials
599
 
600
- """
601
- elif "machine learning" in skill:
602
- advice += f"""#### Machine Learning
603
- - **How to develop**: Take courses on ML fundamentals and practice with datasets
604
- - **Project idea**: Build a predictive model to solve a real-world problem
605
- - **Resources**: Andrew Ng's Coursera courses, Kaggle competitions, "Hands-On Machine Learning" book
606
-
607
- """
608
- elif "data analysis" in skill:
609
- advice += f"""#### Data Analysis
610
- - **How to develop**: Practice analyzing datasets and creating visualizations
611
- - **Project idea**: Perform an exploratory data analysis on a public dataset
612
- - **Resources**: DataCamp courses, Kaggle datasets, "Python for Data Analysis" by Wes McKinney
613
-
614
- """
615
- else:
616
- advice += f"""#### {skill.title()}
617
- - **How to develop**: Research industry best practices and take relevant courses
618
- - **Project idea**: Create a portfolio piece that showcases this skill
619
- - **Resources**: Online courses, industry blogs, and practice projects
620
-
621
- """
622
-
623
- # Add project recommendations based on job title
624
- advice += f"""
625
- ### Recommended Projects for {job_title}
626
-
627
- Based on the target position and the skills needed, here are some project ideas:
628
-
629
- """
630
- if job_title == "Software Engineer":
631
- advice += """
632
- 1. **Full-Stack Web Application**: Build a complete web app with frontend, backend, and database
633
- 2. **API Service**: Create a RESTful or GraphQL API with proper authentication and documentation
634
- 3. **Mobile Application**: Develop a cross-platform mobile app using React Native or Flutter
635
- 4. **Automation Tools**: Build scripts or applications that automate repetitive tasks
636
- 5. **Contribution to Open Source**: Find a project aligned with your skills and contribute meaningfully
637
-
638
- """
639
- elif job_title == "Interaction Designer":
640
- advice += """
641
- 1. **Design System**: Create a comprehensive design system with components and usage guidelines
642
- 2. **Website Redesign**: Redesign an existing website with focus on improved UX
643
- 3. **Mobile App Prototype**: Design a fully interactive mobile app prototype
644
- 4. **User Research Project**: Conduct user research and create a report with insights and recommendations
645
- 5. **Design Case Study**: Document your design process for solving a specific problem
646
-
647
- """
648
- elif job_title == "Data Scientist":
649
- advice += """
650
- 1. **Predictive Model**: Build a machine learning model that solves a real-world problem
651
- 2. **Data Visualization Dashboard**: Create an interactive dashboard to visualize complex data
652
- 3. **Natural Language Processing**: Develop a text analysis or sentiment analysis project
653
- 4. **Time Series Analysis**: Analyze time-based data and build forecasting models
654
- 5. **A/B Testing Framework**: Design and implement a framework for testing hypotheses
655
 
 
 
 
 
656
  """
657
-
658
- # General advice for all positions
659
- advice += """
660
- ### Learning Resources
661
-
662
- - **Online Platforms**: Coursera, Udemy, Pluralsight, LinkedIn Learning
663
- - **Documentation**: Official language and framework documentation
664
- - **Communities**: Stack Overflow, GitHub, Reddit programming communities
665
- - **Books**: O'Reilly publications specific to your target technologies
666
- - **YouTube Channels**: Traversy Media, Tech With Tim, freeCodeCamp
667
-
668
- ### Positioning Your Experience
669
 
670
- - Highlight transferable skills from your current experience
671
- - Quantify achievements with metrics where possible
672
- - Frame previous work in terms relevant to the target position
673
- - Create a tailored resume that emphasizes relevant projects and responsibilities
674
- """
675
-
676
- return advice
 
 
 
 
 
 
 
 
677
 
678
  # Streamlit UI
679
- st.title("📄 Comprehensive Resume Analyzer")
680
 
681
  # Add description
682
  st.markdown("""
683
- This app helps recruiters and job seekers analyze resumes with advanced features:
684
-
685
- - **Semantic Job Matching**: Uses AI to match resumes to job descriptions beyond keywords
686
- - **Skill Proficiency Detection**: Identifies skill levels from context
687
- - **Career Progression Analysis**: Visualizes job history and seniority
688
- - **Fraud Detection**: Flags potential inconsistencies for verification
689
- - **Career Path Prediction**: Suggests logical next roles based on experience
690
- - **Personalized Development Advice**: Recommends skills, projects, and resources
691
  """)
692
 
693
  # Create two columns
@@ -703,379 +272,557 @@ with col2:
703
 
704
  # Show job description
705
  if job_title:
706
- st.info(f"**Job Description:**\n{job_descriptions[job_title]['description']}\n\n**Required Skills:**\n" +
707
  "\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
708
 
709
  if uploaded_file and job_title:
710
  try:
711
  # Show spinner while processing
712
- with st.spinner("Analyzing resume with advanced AI..."):
713
  # Extract text from PDF
714
  text = extract_text_from_pdf(uploaded_file)
715
 
716
  # Analyze resume
717
- analysis_results = analyze_resume(text, job_title, sentence_model)
718
 
719
  # Calculate missing skills
720
  missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
721
- if skill not in analysis_results["found_skills"]]
722
 
723
  # Display results in tabs
724
  tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
725
- "📊 Match Score",
726
- "🎯 Skills Analysis",
727
- "👨‍💼 Experience",
728
- "📈 Career Path",
729
- "🚩 Verification",
730
  "🚀 Career Advice"
731
  ])
732
 
733
  with tab1:
734
- # Display match scores
735
- st.subheader("📊 Job Match Analysis")
736
-
737
- # Calculate match scores
738
- keyword_match = len(analysis_results["found_skills"]) / len(job_descriptions[job_title]["skills"]) * 100
739
- semantic_match = analysis_results["semantic_score"] * 100
740
-
741
- # Display scores with gauges
742
  col1, col2 = st.columns(2)
743
 
744
  with col1:
745
- # Keyword match gauge
746
- fig = go.Figure(go.Indicator(
747
- mode = "gauge+number",
748
- value = keyword_match,
749
- title = {'text': "Keyword Match"},
750
- gauge = {
751
- 'axis': {'range': [0, 100]},
752
- 'bar': {'color': "darkblue"},
753
- 'steps': [
754
- {'range': [0, 30], 'color': "lightgray"},
755
- {'range': [30, 70], 'color': "gray"},
756
- {'range': [70, 100], 'color': "lightblue"}
757
- ],
758
- 'threshold': {
759
- 'line': {'color': "red", 'width': 4},
760
- 'thickness': 0.75,
761
- 'value': 70
762
- }
763
- }
764
- ))
765
- st.plotly_chart(fig, use_container_width=True)
766
 
767
  with col2:
768
- # Semantic match gauge
769
- fig = go.Figure(go.Indicator(
770
- mode = "gauge+number",
771
- value = semantic_match,
772
- title = {'text': "Semantic Match"},
773
- gauge = {
774
- 'axis': {'range': [0, 100]},
775
- 'bar': {'color': "darkgreen"},
776
- 'steps': [
777
- {'range': [0, 30], 'color': "lightgray"},
778
- {'range': [30, 70], 'color': "gray"},
779
- {'range': [70, 100], 'color': "lightgreen"}
780
- ],
781
- 'threshold': {
782
- 'line': {'color': "red", 'width': 4},
783
- 'thickness': 0.75,
784
- 'value': 70
785
- }
786
- }
787
- ))
788
- st.plotly_chart(fig, use_container_width=True)
789
-
790
- # Calculate overall match score (weighted average)
791
- overall_match = (keyword_match * 0.4) + (semantic_match * 0.6)
792
-
793
- # Create overall score gauge
794
- fig = go.Figure(go.Indicator(
795
- mode = "gauge+number+delta",
796
- value = overall_match,
797
- title = {'text': "Overall Match Score"},
798
- delta = {'reference': 75, 'increasing': {'color': "green"}},
799
- gauge = {
800
- 'axis': {'range': [0, 100]},
801
- 'bar': {'color': "darkblue"},
802
- 'steps': [
803
- {'range': [0, 50], 'color': "lightgray"},
804
- {'range': [50, 75], 'color': "gray"},
805
- {'range': [75, 100], 'color': "darkblue"}
806
- ],
807
- 'threshold': {
808
- 'line': {'color': "red", 'width': 4},
809
- 'thickness': 0.75,
810
- 'value': 75
811
- }
812
- }
813
- ))
814
-
815
- st.plotly_chart(fig, use_container_width=True)
816
-
817
  # Display resume summary
818
  st.subheader("📝 Resume Summary")
819
- st.write(analysis_results["summary"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
 
821
- with tab2:
822
- # Display skills analysis
823
- st.subheader("🎯 Skills Analysis")
824
 
825
  # Create two columns
826
  col1, col2 = st.columns(2)
827
 
828
  with col1:
829
- # Display matched skills with proficiency levels
830
- st.subheader("🟢 Skills Present")
831
-
832
- # Create a DataFrame for the skills table
833
- skills_data = []
834
- for skill in analysis_results["found_skills"]:
835
- proficiency = analysis_results["skill_proficiencies"].get(skill, "Basic")
836
- skills_data.append({
837
- "Skill": skill.title(),
838
- "Proficiency": proficiency
839
- })
840
-
841
- if skills_data:
842
- skills_df = pd.DataFrame(skills_data)
843
-
844
- # Add proficiency color coding
845
- def color_proficiency(val):
846
- if val == "Advanced":
847
- return 'background-color: #d4f7d4'
848
- elif val == "Intermediate":
849
- return 'background-color: #fff2cc'
850
- else:
851
- return 'background-color: #f2f2f2'
852
-
853
- st.dataframe(skills_df.style.applymap(color_proficiency, subset=['Proficiency']),
854
- use_container_width=True)
855
- else:
856
- st.warning("No direct skill matches found.")
857
-
858
- with col2:
859
- # Display missing skills
860
- st.subheader("🔴 Skills to Develop")
861
  if missing_skills:
862
- missing_df = pd.DataFrame({"Skill": [skill.title() for skill in missing_skills]})
863
- st.dataframe(missing_df, use_container_width=True)
864
  else:
865
  st.success("Great! The candidate has all the required skills!")
866
 
867
- # Create a radar chart for skills coverage
868
- st.subheader("Skills Coverage")
869
-
870
- # Prepare data for radar chart
871
- categories = job_descriptions[job_title]["skills"]
872
- values = [1 if skill in analysis_results["found_skills"] else 0 for skill in categories]
873
-
874
- # Create radar chart
875
- fig = go.Figure()
876
-
877
- fig.add_trace(go.Scatterpolar(
878
- r=values,
879
- theta=categories,
880
- fill='toself',
881
- name='Present Skills'
882
- ))
883
-
884
- fig.add_trace(go.Scatterpolar(
885
- r=[1] * len(categories),
886
- theta=categories,
887
- fill='toself',
888
- name='Required Skills',
889
- opacity=0.3
890
- ))
891
-
892
- fig.update_layout(
893
- polar=dict(
894
- radialaxis=dict(
895
- visible=True,
896
- range=[0, 1]
897
- )),
898
- showlegend=True
899
- )
900
-
901
- st.plotly_chart(fig, use_container_width=True)
902
-
903
- with tab3:
904
- # Display experience analysis
905
- st.subheader("👨‍💼 Experience Analysis")
906
-
907
- # Display seniority metrics
908
- col1, col2 = st.columns(2)
909
-
910
- with col1:
911
- # Seniority score gauge
912
- fig = go.Figure(go.Indicator(
913
- mode="gauge+number",
914
- value=analysis_results["seniority_score"],
915
- title={'text': "Seniority Score"},
916
- gauge={
917
- 'axis': {'range': [0, 10]},
918
- 'bar': {'color': "darkblue"},
919
- 'steps': [
920
- {'range': [0, 3], 'color': "lightgray"},
921
- {'range': [3, 7], 'color': "gray"},
922
- {'range': [7, 10], 'color': "lightblue"}
923
- ],
924
- 'threshold': {
925
- 'line': {'color': "red", 'width': 4},
926
- 'thickness': 0.75,
927
- 'value': 7
928
- }
929
- }
930
- ))
931
- st.plotly_chart(fig, use_container_width=True)
932
-
933
  with col2:
934
- # Years of experience
935
- fig = go.Figure(go.Indicator(
936
- mode="number+delta",
937
- value=analysis_results["years_experience"],
938
- number={'suffix': " years"},
939
- title={"text": "Years of Experience"},
940
- delta={'reference': 5, 'relative': False}
941
- ))
942
- st.plotly_chart(fig, use_container_width=True)
943
-
944
- # Display career progression timeline
945
- st.subheader("Career Progression Timeline")
946
-
947
- if analysis_results["job_entries"]:
948
- # Create timeline data
949
- timeline_data = []
950
 
951
- for job in analysis_results["job_entries"]:
952
- # Extract years for visualization
953
- start_year = re.search(r'\d{4}', job["start_date"])
954
- end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
955
-
956
- if start_year:
957
- start_year = int(start_year.group(0))
958
- end_year = int(end_year.group(0)) if end_year else datetime.now().year
959
-
960
- timeline_data.append({
961
- "Role": job["title"],
962
- "Company": job["company"],
963
- "Start": start_year,
964
- "End": end_year,
965
- "Duration": end_year - start_year
966
- })
967
 
968
- if timeline_data:
969
- # Create DataFrame for timeline
970
- timeline_df = pd.DataFrame(timeline_data)
971
-
972
- # Sort by start date (ascending)
973
- timeline_df = timeline_df.sort_values(by="Start")
974
-
975
- # Create Gantt chart
976
- fig = px.timeline(
977
- timeline_df,
978
- x_start="Start",
979
- x_end="End",
980
- y="Company",
981
- color="Role",
982
- hover_data=["Duration"],
983
- labels={"Company": "Employer"}
984
- )
985
-
986
- fig.update_layout(
987
- xaxis_title="Year",
988
- yaxis_title="Employer",
989
- title="Career Progression"
990
- )
991
 
992
- st.plotly_chart(fig, use_container_width=True)
993
  else:
994
- st.warning("Couldn't extract timeline data from the resume.")
995
- else:
996
- st.warning("No work experience entries found in the resume.")
 
 
 
 
 
 
 
 
 
997
 
998
  with tab4:
999
- # Display career path analysis
1000
- st.subheader("📈 Career Path Analysis")
1001
 
1002
- # Display next role suggestions
1003
- st.subheader("Suggested Next Roles")
1004
 
1005
- for i, role in enumerate(analysis_results["next_roles"]):
1006
- st.info(f"**Option {i+1}:** {role}")
1007
 
1008
- # Add simple career progression visualization
1009
- st.subheader("Career Progression Path")
1010
-
1011
- # Extract current role from latest job entry
1012
- current_role = analysis_results["job_entries"][0]["title"] if analysis_results["job_entries"] else "Current Position"
1013
-
1014
- # Create nodes for career path
1015
- career_nodes = [current_role] + analysis_results["next_roles"]
1016
-
1017
- # Create a simple digraph visualization
1018
- career_df = pd.DataFrame({
1019
- "From": [career_nodes[i] for i in range(len(career_nodes)-1)],
1020
- "To": [career_nodes[i+1] for i in range(len(career_nodes)-1)],
1021
- "Value": [10 for _ in range(len(career_nodes)-1)]
1022
- })
1023
-
1024
- # Create a Sankey diagram
1025
- fig = go.Figure(data=[go.Sankey(
1026
- node=dict(
1027
- pad=15,
1028
- thickness=20,
1029
- line=dict(color="black", width=0.5),
1030
- label=career_nodes,
1031
- color="blue"
1032
- ),
1033
- link=dict(
1034
- source=[i for i in range(len(career_nodes)-1)],
1035
- target=[i+1 for i in range(len(career_nodes)-1)],
1036
- value=[1 for _ in range(len(career_nodes)-1)]
1037
- )
1038
- )])
1039
-
1040
- fig.update_layout(title_text="Potential Career Path", font_size=12)
1041
- st.plotly_chart(fig, use_container_width=True)
1042
 
1043
  with tab5:
1044
- # Display fraud detection analysis
1045
- st.subheader("🚩 Verification Points")
 
 
 
1046
 
1047
- if analysis_results["fraud_signals"]:
1048
- st.warning("The following points may require verification:")
1049
- for signal in analysis_results["fraud_signals"]:
1050
- st.markdown(f"- {signal}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1051
  else:
1052
- st.success("No significant inconsistencies detected in the resume.")
1053
 
1054
- # Add common verification tips
1055
- st.subheader("Recommended Verification Steps")
1056
- st.markdown("""
1057
- Even when no inconsistencies are detected, consider these verification steps:
1058
 
1059
- 1. **Reference Checks**: Contact previous employers to confirm employment dates and responsibilities
1060
- 2. **Skills Assessment**: Use technical interviews or tests to verify claimed skills
1061
- 3. **Education Verification**: Confirm degrees and certifications with educational institutions
1062
- 4. **Portfolio Review**: Examine work samples or project contributions
1063
- 5. **Online Presence**: Check LinkedIn, GitHub, or other professional profiles for consistency
1064
- """)
1065
 
1066
  with tab6:
1067
  # Display career advice
1068
- st.subheader("🚀 Career Advice and Development Plan")
1069
 
1070
- if st.button("Generate Personalized Career Advice"):
1071
- with st.spinner("Generating detailed career advice and development plan..."):
1072
- advice = generate_career_advice(text, job_title, analysis_results["found_skills"], missing_skills)
1073
  st.markdown(advice)
1074
-
1075
  except Exception as e:
1076
  st.error(f"An error occurred while processing the resume: {str(e)}")
1077
- st.exception(e)
1078
 
1079
  # Add footer
1080
  st.markdown("---")
1081
- st.markdown("Made with ❤️ using Streamlit, Hugging Face, and Advanced AI")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import pdfplumber
3
  import io
4
  import spacy
5
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import subprocess
7
  import sys
8
  import torch
9
+ import re
10
+ import pandas as pd
11
+ import numpy as np
12
  import plotly.express as px
13
  import plotly.graph_objects as go
14
+ from datetime import datetime
15
+ import dateparser
16
+ from sentence_transformers import SentenceTransformer
17
+ import nltk
18
+ from nltk.tokenize import word_tokenize
19
+ from nltk.corpus import stopwords
20
+ from sklearn.metrics.pairwise import cosine_similarity
21
+ import faiss
22
+ import requests
23
+ from bs4 import BeautifulSoup
24
+ import networkx as nx
25
+ import Levenshtein
26
+ import json
27
+ import matplotlib.pyplot as plt
28
+ from io import BytesIO
29
+ import base64
30
+ from sentence_transformers import util
31
 
32
+ # Download NLTK resources
33
  @st.cache_resource
34
  def download_nltk_resources():
35
+ nltk.download('punkt')
36
+ nltk.download('stopwords')
37
+ nltk.download('wordnet')
38
+ nltk.download('averaged_perceptron_tagger')
39
 
40
  download_nltk_resources()
41
 
42
  st.set_page_config(
43
+ page_title="Resume Screener & Skill Extractor",
44
  page_icon="📄",
45
  layout="wide"
46
  )
 
58
  # Load the NLP models
59
  @st.cache_resource
60
  def load_models():
61
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
62
+ nlp = download_spacy_model()
 
 
 
 
63
 
64
+ # Load sentence transformer model for semantic matching
65
+ sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
66
+
67
+ # Load Qwen3-8B model for career advice
68
  try:
69
+ device = "cuda" if torch.cuda.is_available() else "cpu"
70
+ qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
71
+ qwen_model = AutoModelForCausalLM.from_pretrained(
72
+ "Qwen/Qwen3-8B",
73
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
74
+ device_map="auto"
75
+ )
76
  except Exception as e:
77
+ st.error(f"Failed to load Qwen3-8B model: {str(e)}")
78
+ qwen_tokenizer = None
79
+ qwen_model = None
 
 
 
 
 
 
 
80
 
81
+ return summarizer, nlp, qwen_tokenizer, qwen_model, sentence_model
82
 
83
  # Initialize models
84
+ summarizer, nlp, qwen_tokenizer, qwen_model, sentence_model = load_models()
85
 
86
  # Job descriptions and required skills
87
  job_descriptions = {
 
89
  "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
90
  "git", "cloud", "web development", "software development", "coding"],
91
  "description": "Looking for software engineers with strong programming skills and experience in software development.",
92
+ "must_have": ["python", "git", "algorithms"],
93
+ "nice_to_have": ["cloud", "java", "javascript"],
94
+ "seniority_levels": {
95
+ "Junior": "0-2 years of experience, familiar with basic programming concepts",
96
+ "Mid-level": "3-5 years of experience, proficient in multiple languages, experience with system design",
97
+ "Senior": "6+ years of experience, expert in software architecture, mentoring, and leading projects"
98
+ }
99
  },
100
  "Interaction Designer": {
101
  "skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
102
  "sketch", "adobe", "design thinking", "interaction design"],
103
  "description": "Seeking interaction designers with expertise in user experience and interface design.",
104
+ "must_have": ["ui", "ux", "prototyping"],
105
+ "nice_to_have": ["figma", "sketch", "user research"],
106
+ "seniority_levels": {
107
+ "Junior": "0-2 years of experience, basic design skills, understanding of UX principles",
108
+ "Mid-level": "3-5 years of experience, strong portfolio, experience with user research",
109
+ "Senior": "6+ years of experience, leadership in design systems, driving design strategy"
110
+ }
111
  },
112
  "Data Scientist": {
113
  "skills": ["python", "r", "statistics", "machine learning", "data analysis",
114
  "sql", "tensorflow", "pytorch", "pandas", "numpy"],
115
  "description": "Looking for data scientists with strong analytical and machine learning skills.",
116
+ "must_have": ["python", "statistics", "machine learning"],
117
+ "nice_to_have": ["tensorflow", "pytorch", "r"],
118
+ "seniority_levels": {
119
+ "Junior": "0-2 years of experience, basic knowledge of statistics and ML algorithms",
120
+ "Mid-level": "3-5 years of experience, model development, feature engineering",
121
+ "Senior": "6+ years of experience, advanced ML techniques, research experience"
122
+ }
123
+ },
124
+ "Product Manager": {
125
+ "skills": ["product strategy", "roadmap planning", "user stories", "agile", "market research",
126
+ "stakeholder management", "analytics", "user experience", "a/b testing", "prioritization"],
127
+ "description": "Seeking product managers who can drive product vision, strategy, and execution.",
128
+ "must_have": ["product strategy", "roadmap planning", "stakeholder management"],
129
+ "nice_to_have": ["agile", "analytics", "a/b testing"],
130
+ "seniority_levels": {
131
+ "Junior": "0-2 years of experience, assisting with feature definition and user stories",
132
+ "Mid-level": "3-5 years of experience, owning products/features, market research",
133
+ "Senior": "6+ years of experience, defining product vision, managing teams, strategic planning"
134
+ }
135
+ },
136
+ "DevOps Engineer": {
137
+ "skills": ["linux", "aws", "docker", "kubernetes", "ci/cd", "terraform",
138
+ "ansible", "monitoring", "scripting", "automation", "security"],
139
+ "description": "Looking for DevOps engineers to build and maintain infrastructure and deployment pipelines.",
140
+ "must_have": ["linux", "docker", "ci/cd"],
141
+ "nice_to_have": ["kubernetes", "terraform", "aws"],
142
+ "seniority_levels": {
143
+ "Junior": "0-2 years of experience, basic system administration, scripting",
144
+ "Mid-level": "3-5 years of experience, container orchestration, infrastructure as code",
145
+ "Senior": "6+ years of experience, architecture design, security, team leadership"
146
+ }
147
  }
148
  }
149
 
 
154
  text += page.extract_text() or ""
155
  return text
156
 
157
+ def analyze_resume(text, job_title):
158
+ # Extract relevant skills
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  doc = nlp(text.lower())
160
  found_skills = []
161
  required_skills = job_descriptions[job_title]["skills"]
 
164
  if skill in text.lower():
165
  found_skills.append(skill)
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  # Generate summary
168
  chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
169
  summaries = []
 
171
  summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
172
  summaries.append(summary)
173
 
174
+ # Extract experience timeline
175
+ experiences = extract_experience(text)
176
+
177
+ # Calculate semantic match score
178
+ match_score = semantic_matching(text, job_title)
179
+
180
+ # Estimate seniority
181
+ seniority, years_experience, leadership_count, must_have_percentage = estimate_seniority(experiences, found_skills, job_title)
182
+
183
+ # Extract skill levels
184
+ skill_levels = extract_skill_levels(text, found_skills)
185
+
186
+ # Check for timeline inconsistencies
187
+ inconsistencies = check_timeline_inconsistencies(experiences)
188
+
189
+ # Verify companies
190
+ company_verification = verify_companies(experiences)
191
+
192
+ # Predict career trajectory
193
+ career_prediction = predict_career_trajectory(experiences, seniority, job_title)
194
 
195
  return {
196
+ 'found_skills': found_skills,
197
+ 'summary': " ".join(summaries),
198
+ 'experiences': experiences,
199
+ 'match_score': match_score,
200
+ 'seniority': seniority,
201
+ 'years_experience': years_experience,
202
+ 'skill_levels': skill_levels,
203
+ 'inconsistencies': inconsistencies,
204
+ 'company_verification': company_verification,
205
+ 'career_prediction': career_prediction
206
  }
207
 
208
  def generate_career_advice(resume_text, job_title, found_skills, missing_skills):
209
+ if qwen_model is None or qwen_tokenizer is None:
210
+ return "Career advice model not available. Please check the model installation."
 
 
 
 
 
 
 
 
 
 
211
 
212
+ # Create a prompt for the model
213
+ prompt = f"""
214
+ You are a professional career advisor. Based on the resume and the target job position,
215
+ provide personalized advice on skills to develop and suggest projects that would help the candidate
216
+ become a better fit for the position.
 
 
217
 
218
+ Resume summary: {resume_text[:1000]}...
 
 
 
 
 
219
 
220
+ Target position: {job_title}
 
 
 
 
 
221
 
222
+ Job requirements: {job_descriptions[job_title]['description']}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
+ Skills the candidate has: {', '.join(found_skills)}
 
 
 
 
 
225
 
226
+ Skills the candidate needs to develop: {', '.join(missing_skills)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
+ Provide the following:
229
+ 1. Specific advice on how to develop the missing skills
230
+ 2. 3-5 project ideas that would showcase these skills
231
+ 3. Resources for learning (courses, books, websites)
232
  """
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ # Generate advice using Qwen3-8B
235
+ try:
236
+ inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)
237
+ with torch.no_grad():
238
+ outputs = qwen_model.generate(
239
+ **inputs,
240
+ max_new_tokens=1024,
241
+ temperature=0.7,
242
+ top_p=0.9,
243
+ do_sample=True
244
+ )
245
+ advice = qwen_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
246
+ return advice
247
+ except Exception as e:
248
+ return f"Failed to generate career advice: {str(e)}"
249
 
250
  # Streamlit UI
251
+ st.title("📄 Resume Screener & Skill Extractor")
252
 
253
  # Add description
254
  st.markdown("""
255
+ This app helps recruiters analyze resumes by:
256
+ - Extracting relevant skills for specific job positions
257
+ - Generating a concise summary of the candidate's background
258
+ - Identifying skill gaps for the selected role
259
+ - Providing personalized career advice and project recommendations
 
 
 
260
  """)
261
 
262
  # Create two columns
 
272
 
273
  # Show job description
274
  if job_title:
275
+ st.info(f"**Required Skills:**\n" +
276
  "\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
277
 
278
  if uploaded_file and job_title:
279
  try:
280
  # Show spinner while processing
281
+ with st.spinner("Analyzing resume..."):
282
  # Extract text from PDF
283
  text = extract_text_from_pdf(uploaded_file)
284
 
285
  # Analyze resume
286
+ resume_data = analyze_resume(text, job_title)
287
 
288
  # Calculate missing skills
289
  missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
290
+ if skill not in resume_data['found_skills']]
291
 
292
  # Display results in tabs
293
  tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
294
+ "📊 Skills Match",
295
+ "📝 Resume Summary",
296
+ "🎯 Skills Gap",
297
+ "👨‍💼 Career Path",
298
+ "🔍 Authentication",
299
  "🚀 Career Advice"
300
  ])
301
 
302
  with tab1:
303
+ # First create columns for skill match percentage and semantic match
 
 
 
 
 
 
 
304
  col1, col2 = st.columns(2)
305
 
306
  with col1:
307
+ # Display matched skills
308
+ st.subheader("🎯 Matched Skills")
309
+ if resume_data['found_skills']:
310
+ for skill in resume_data['found_skills']:
311
+ # Show skill with proficiency level
312
+ level = resume_data['skill_levels'].get(skill, 'intermediate')
313
+ level_emoji = "🟢" if level == 'advanced' else "🟡" if level == 'intermediate' else "🟠"
314
+ st.success(f"{level_emoji} {skill.title()} ({level.title()})")
315
+
316
+ # Calculate match percentage
317
+ match_percentage = len(resume_data['found_skills']) / len(job_descriptions[job_title]["skills"]) * 100
318
+ st.metric("Skills Match", f"{match_percentage:.1f}%")
319
+ else:
320
+ st.warning("No direct skill matches found.")
 
 
 
 
 
 
 
321
 
322
  with col2:
323
+ # Display semantic match score
324
+ st.subheader("💡 Semantic Match")
325
+ st.metric("Overall Match Score", f"{resume_data['match_score']:.1f}%")
326
+
327
+ # Display must-have skills match
328
+ must_have_skills = job_descriptions[job_title]["must_have"]
329
+ must_have_count = sum(1 for skill in must_have_skills if skill in resume_data['found_skills'])
330
+ must_have_percentage = (must_have_count / len(must_have_skills)) * 100
331
+
332
+ st.write("Must-have skills:")
333
+ st.progress(must_have_percentage / 100)
334
+ st.write(f"{must_have_count} out of {len(must_have_skills)} ({must_have_percentage:.1f}%)")
335
+
336
+ # Professional level assessment
337
+ st.subheader("🧠 Seniority Assessment")
338
+ st.info(f"**{resume_data['seniority']}** ({resume_data['years_experience']:.1f} years equivalent experience)")
339
+ st.write(job_descriptions[job_title]["seniority_levels"][resume_data['seniority']])
340
+
341
+ with tab2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  # Display resume summary
343
  st.subheader("📝 Resume Summary")
344
+ st.write(resume_data['summary'])
345
+
346
+ # Display experience timeline
347
+ st.subheader("⏳ Experience Timeline")
348
+ if resume_data['experiences']:
349
+ # Convert experiences to dataframe for display
350
+ exp_data = []
351
+ for exp in resume_data['experiences']:
352
+ if 'start_date' in exp and 'end_date' in exp:
353
+ exp_data.append({
354
+ 'Company': exp['company'],
355
+ 'Role': exp['role'],
356
+ 'Start Date': exp['start_date'].strftime('%b %Y') if exp['start_date'] else 'Unknown',
357
+ 'End Date': exp['end_date'].strftime('%b %Y') if exp['end_date'] != datetime.now() else 'Present',
358
+ 'Duration (months)': exp.get('duration_months', 'Unknown')
359
+ })
360
+ else:
361
+ exp_data.append({
362
+ 'Company': exp['company'],
363
+ 'Role': exp['role'],
364
+ 'Duration': exp.get('duration', 'Unknown')
365
+ })
366
+
367
+ if exp_data:
368
+ exp_df = pd.DataFrame(exp_data)
369
+ st.dataframe(exp_df)
370
+
371
+ # Create a timeline visualization if dates are available
372
+ timeline_data = [exp for exp in resume_data['experiences'] if 'start_date' in exp and 'end_date' in exp]
373
+ if timeline_data:
374
+ # Sort by start date
375
+ timeline_data = sorted(timeline_data, key=lambda x: x['start_date'])
376
+
377
+ # Create figure
378
+ fig = go.Figure()
379
+
380
+ for i, exp in enumerate(timeline_data):
381
+ fig.add_trace(go.Bar(
382
+ x=[(exp['end_date'] - exp['start_date']).days / 30], # Duration in months
383
+ y=[exp['company']],
384
+ orientation='h',
385
+ name=exp['role'],
386
+ hovertext=f"{exp['role']} at {exp['company']}<br>{exp['start_date'].strftime('%b %Y')} - {exp['end_date'].strftime('%b %Y') if exp['end_date'] != datetime.now() else 'Present'}<br>Duration: {exp.get('duration_months', 0)} months",
387
+ marker=dict(color=px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)])
388
+ ))
389
+
390
+ fig.update_layout(
391
+ title="Career Timeline",
392
+ xaxis_title="Duration (months)",
393
+ yaxis_title="Company",
394
+ height=400,
395
+ margin=dict(l=0, r=0, b=0, t=30)
396
+ )
397
+
398
+ st.plotly_chart(fig, use_container_width=True)
399
+ else:
400
+ st.warning("No work experience data could be extracted.")
401
 
402
+ with tab3:
403
+ # Display missing skills
404
+ st.subheader("📌 Skills to Develop")
405
 
406
  # Create two columns
407
  col1, col2 = st.columns(2)
408
 
409
  with col1:
410
+ # Missing skills
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  if missing_skills:
412
+ for skill in missing_skills:
413
+ st.warning(f"➖ {skill.title()}")
414
  else:
415
  st.success("Great! The candidate has all the required skills!")
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  with col2:
418
+ # Skills gap analysis
419
+ st.subheader("🔍 Gap Analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
+ # Show must-have skills that are missing
422
+ missing_must_have = [skill for skill in job_descriptions[job_title]["must_have"]
423
+ if skill not in resume_data['found_skills']]
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
425
+ if missing_must_have:
426
+ st.error("**Critical Skills Missing:**")
427
+ for skill in missing_must_have:
428
+ st.write(f"- {skill.title()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
 
430
+ st.markdown("These are must-have skills for this position.")
431
  else:
432
+ st.success("Candidate has all the must-have skills for this position!")
433
+
434
+ # Show nice-to-have skills gap
435
+ missing_nice_to_have = [skill for skill in job_descriptions[job_title]["nice_to_have"]
436
+ if skill not in resume_data['found_skills']]
437
+
438
+ if missing_nice_to_have:
439
+ st.warning("**Nice-to-Have Skills Missing:**")
440
+ for skill in missing_nice_to_have:
441
+ st.write(f"- {skill.title()}")
442
+ else:
443
+ st.success("Candidate has all the nice-to-have skills!")
444
 
445
  with tab4:
446
+ # Display career path insights
447
+ st.subheader("👨‍💼 Career Trajectory")
448
 
449
+ # Show career prediction
450
+ st.info(resume_data['career_prediction'])
451
 
452
+ # Show experience trends
453
+ st.subheader("📈 Experience Analysis")
454
 
455
+ # Check for job hopping
456
+ if len(resume_data['experiences']) >= 3:
457
+ # Calculate average job duration
458
+ durations = [exp.get('duration_months', 0) for exp in resume_data['experiences']
459
+ if 'duration_months' in exp]
460
+
461
+ if durations:
462
+ avg_duration = sum(durations) / len(durations)
463
+
464
+ if avg_duration < 12:
465
+ st.warning(f"🚩 **Frequent Job Changes**: Average job duration is only {avg_duration:.1f} months")
466
+ elif avg_duration < 24:
467
+ st.warning(f"⚠️ **Moderate Job Hopping**: Average job duration is {avg_duration:.1f} months")
468
+ else:
469
+ st.success(f"✅ **Stable Employment**: Average job duration is {avg_duration:.1f} months")
470
+
471
+ # Show inconsistencies if any
472
+ if resume_data['inconsistencies']:
473
+ st.subheader("⚠️ Timeline Inconsistencies")
474
+ for issue in resume_data['inconsistencies']:
475
+ if issue['type'] == 'overlap':
476
+ st.warning(issue['description'])
477
+ elif issue['type'] == 'gap':
478
+ st.info(issue['description'])
 
 
 
 
 
 
 
 
 
 
479
 
480
  with tab5:
481
+ # Display authentication signals
482
+ st.subheader("🔍 Resume Authentication")
483
+
484
+ # Company verification results
485
+ st.write("**Company Verification Results:**")
486
 
487
+ if resume_data['company_verification']:
488
+ # Count suspicious companies
489
+ suspicious_count = sum(1 for v in resume_data['company_verification']
490
+ if v['status'] == 'suspicious')
491
+
492
+ if suspicious_count == 0:
493
+ st.success("✅ All companies mentioned in the resume passed basic verification")
494
+ else:
495
+ st.warning(f"⚠️ {suspicious_count} companies require further verification")
496
+
497
+ # Display verification details
498
+ verification_data = [{
499
+ 'Company': v['company'],
500
+ 'Status': v['status'].title(),
501
+ 'Notes': v['reason']
502
+ } for v in resume_data['company_verification']]
503
+
504
+ st.dataframe(pd.DataFrame(verification_data))
505
  else:
506
+ st.info("No company information found for verification.")
507
 
508
+ # Timeline consistency check
509
+ st.write("**Timeline Consistency Check:**")
 
 
510
 
511
+ if not resume_data['inconsistencies']:
512
+ st.success("✅ No timeline inconsistencies detected")
513
+ else:
514
+ st.warning(f"⚠️ {len(resume_data['inconsistencies'])} timeline inconsistencies found")
515
+ for issue in resume_data['inconsistencies']:
516
+ st.write(f"- {issue['description']}")
517
 
518
  with tab6:
519
  # Display career advice
520
+ st.subheader("🚀 Career Advice and Project Recommendations")
521
 
522
+ if st.button("Generate Career Advice"):
523
+ with st.spinner("Generating personalized career advice..."):
524
+ advice = generate_career_advice(text, job_title, resume_data['found_skills'], missing_skills)
525
  st.markdown(advice)
526
+
527
  except Exception as e:
528
  st.error(f"An error occurred while processing the resume: {str(e)}")
 
529
 
530
  # Add footer
531
  st.markdown("---")
532
+ st.markdown("Made with ❤️ using Streamlit and Hugging Face")
533
+
534
+ # Semantic matching between resume and job description
535
+ def semantic_matching(resume_text, job_title):
536
+ job_desc = job_descriptions[job_title]["description"]
537
+
538
+ # Encode texts using sentence transformers
539
+ resume_embedding = sentence_model.encode(resume_text, convert_to_tensor=True)
540
+ job_embedding = sentence_model.encode(job_desc, convert_to_tensor=True)
541
+
542
+ # Calculate cosine similarity
543
+ cos_sim = cosine_similarity(
544
+ resume_embedding.cpu().numpy().reshape(1, -1),
545
+ job_embedding.cpu().numpy().reshape(1, -1)
546
+ )[0][0]
547
+
548
+ return cos_sim * 100 # Convert to percentage
549
+
550
+ # Extract experience timeline from resume
551
+ def extract_experience(text):
552
+ # Pattern to find work experience entries
553
+ # Look for patterns like "Company Name | Role | Jan 2020 - Present"
554
+ exp_pattern = r"(?i)(.*?(?:inc|llc|ltd|company|corp|corporation|group)?)\s*(?:[|•-]\s*)?(.*?)(?:[|•-]\s*)((?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\w\s,]*\d{4}\s*(?:-|to|–)\s*(?:(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\w\s,]*\d{4}|present))"
555
+
556
+ experiences = []
557
+ for match in re.finditer(exp_pattern, text, re.IGNORECASE):
558
+ company = match.group(1).strip()
559
+ role = match.group(2).strip()
560
+ duration = match.group(3).strip()
561
+
562
+ # Parse dates
563
+ try:
564
+ date_range = duration.split('-') if '-' in duration else duration.split('to') if 'to' in duration else duration.split('–')
565
+ start_date = dateparser.parse(date_range[0].strip())
566
+
567
+ if 'present' in date_range[1].lower():
568
+ end_date = datetime.now()
569
+ else:
570
+ end_date = dateparser.parse(date_range[1].strip())
571
+
572
+ if start_date and end_date:
573
+ # Calculate duration in months
574
+ months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month)
575
+
576
+ experiences.append({
577
+ 'company': company,
578
+ 'role': role,
579
+ 'start_date': start_date,
580
+ 'end_date': end_date,
581
+ 'duration_months': months
582
+ })
583
+ except:
584
+ # If date parsing fails, still include the experience without dates
585
+ experiences.append({
586
+ 'company': company,
587
+ 'role': role,
588
+ 'duration': duration
589
+ })
590
+
591
+ return experiences
592
+
593
+ # Estimate seniority based on experience and skills
594
+ def estimate_seniority(experiences, found_skills, job_title):
595
+ # Calculate total experience in years
596
+ total_months = sum(exp.get('duration_months', 0) for exp in experiences if 'duration_months' in exp)
597
+ total_years = total_months / 12
598
+
599
+ # Count leadership keywords in roles
600
+ leadership_keywords = ['lead', 'senior', 'manager', 'head', 'principal', 'architect', 'director']
601
+ leadership_count = 0
602
+
603
+ for exp in experiences:
604
+ role = exp.get('role', '').lower()
605
+ for keyword in leadership_keywords:
606
+ if keyword in role:
607
+ leadership_count += 1
608
+ break
609
+
610
+ # Calculate skill match percentage for must-have skills
611
+ must_have_skills = job_descriptions[job_title]["must_have"]
612
+ must_have_count = sum(1 for skill in must_have_skills if skill in [s.lower() for s in found_skills])
613
+ must_have_percentage = (must_have_count / len(must_have_skills)) * 100 if must_have_skills else 0
614
+
615
+ # Determine seniority level
616
+ if total_years < 3:
617
+ seniority = "Junior"
618
+ elif total_years < 6:
619
+ seniority = "Mid-level"
620
+ else:
621
+ seniority = "Senior"
622
+
623
+ # Adjust based on leadership roles and skill match
624
+ if leadership_count >= 2 and seniority != "Senior":
625
+ seniority = "Senior" if total_years >= 4 else seniority
626
+ if must_have_percentage < 50 and seniority == "Senior":
627
+ seniority = "Mid-level"
628
+
629
+ return seniority, total_years, leadership_count, must_have_percentage
630
+
631
+ # Check for timeline inconsistencies
632
+ def check_timeline_inconsistencies(experiences):
633
+ if not experiences:
634
+ return []
635
+
636
+ inconsistencies = []
637
+ sorted_experiences = sorted(
638
+ [exp for exp in experiences if 'start_date' in exp and 'end_date' in exp],
639
+ key=lambda x: x['start_date']
640
+ )
641
+
642
+ for i in range(len(sorted_experiences) - 1):
643
+ current = sorted_experiences[i]
644
+ next_exp = sorted_experiences[i + 1]
645
+
646
+ # Check for overlapping full-time roles
647
+ if current['end_date'] > next_exp['start_date']:
648
+ overlap_months = (current['end_date'].year - next_exp['start_date'].year) * 12 + \
649
+ (current['end_date'].month - next_exp['start_date'].month)
650
+
651
+ if overlap_months > 1: # Allow 1 month overlap for transitions
652
+ inconsistencies.append({
653
+ 'type': 'overlap',
654
+ 'description': f"Overlapping roles: {current['company']} and {next_exp['company']} " +
655
+ f"overlap by {overlap_months} months"
656
+ })
657
+
658
+ # Check for gaps in employment
659
+ for i in range(len(sorted_experiences) - 1):
660
+ current = sorted_experiences[i]
661
+ next_exp = sorted_experiences[i + 1]
662
+
663
+ gap_months = (next_exp['start_date'].year - current['end_date'].year) * 12 + \
664
+ (next_exp['start_date'].month - current['end_date'].month)
665
+
666
+ if gap_months > 3: # Flag gaps longer than 3 months
667
+ inconsistencies.append({
668
+ 'type': 'gap',
669
+ 'description': f"Employment gap of {gap_months} months between " +
670
+ f"{current['company']} and {next_exp['company']}"
671
+ })
672
+
673
+ return inconsistencies
674
+
675
+ # Verify company existence (simplified version)
676
+ def verify_companies(experiences):
677
+ verification_results = []
678
+
679
+ for exp in experiences:
680
+ company = exp.get('company', '')
681
+ if not company:
682
+ continue
683
+
684
+ # Simple heuristic - companies less than 3 characters are suspicious
685
+ if len(company) < 3:
686
+ verification_results.append({
687
+ 'company': company,
688
+ 'status': 'suspicious',
689
+ 'reason': 'Company name too short'
690
+ })
691
+ continue
692
+
693
+ # Check if company matches common fake patterns
694
+ fake_patterns = ['abc company', 'xyz corp', 'my company', 'personal project']
695
+ if any(pattern in company.lower() for pattern in fake_patterns):
696
+ verification_results.append({
697
+ 'company': company,
698
+ 'status': 'suspicious',
699
+ 'reason': 'Matches pattern of fake company names'
700
+ })
701
+ continue
702
+
703
+ # In a real implementation, you'd call an API to check if the company exists
704
+ # For this demo, we'll just mark all others as verified
705
+ verification_results.append({
706
+ 'company': company,
707
+ 'status': 'verified',
708
+ 'reason': 'Passed basic verification checks'
709
+ })
710
+
711
+ return verification_results
712
+
713
+ # Extract skill levels from text
714
+ def extract_skill_levels(text, skills):
715
+ skill_levels = {}
716
+ proficiency_indicators = {
717
+ 'basic': ['basic', 'familiar', 'beginner', 'fundamentals', 'exposure'],
718
+ 'intermediate': ['intermediate', 'proficient', 'experienced', 'competent', 'skilled'],
719
+ 'advanced': ['advanced', 'expert', 'mastery', 'specialist', 'lead', 'senior']
720
+ }
721
+
722
+ for skill in skills:
723
+ # Look for sentences containing the skill
724
+ sentences = re.findall(r'[^.!?]*%s[^.!?]*[.!?]' % re.escape(skill), text.lower())
725
+
726
+ # Default level
727
+ level = 'intermediate'
728
+
729
+ # Check for years of experience indicators
730
+ years_pattern = re.compile(r'(\d+)\s*(?:\+)?\s*years?(?:\s+of)?\s+(?:experience|exp)?\s+(?:with|in|using)?\s+%s' % re.escape(skill), re.IGNORECASE)
731
+ for sentence in sentences:
732
+ years_match = years_pattern.search(sentence)
733
+ if years_match:
734
+ years = int(years_match.group(1))
735
+ if years < 2:
736
+ level = 'basic'
737
+ elif years < 5:
738
+ level = 'intermediate'
739
+ else:
740
+ level = 'advanced'
741
+ break
742
+
743
+ # Check for proficiency indicators
744
+ if level == 'intermediate': # Only override if not already set by years
745
+ for level_name, indicators in proficiency_indicators.items():
746
+ for indicator in indicators:
747
+ pattern = re.compile(r'%s\s+(?:\w+\s+){0,3}%s' % (indicator, re.escape(skill)), re.IGNORECASE)
748
+ if any(pattern.search(sentence) for sentence in sentences):
749
+ level = level_name
750
+ break
751
+ if level != 'intermediate':
752
+ break
753
+
754
+ skill_levels[skill] = level
755
+
756
+ return skill_levels
757
+
758
+ # Generate career trajectory prediction
759
+ def predict_career_trajectory(experiences, seniority, job_title):
760
+ if not experiences:
761
+ return "Unable to predict trajectory due to insufficient experience data."
762
+
763
+ # Extract roles in chronological order
764
+ roles = [exp.get('role', '').lower() for exp in experiences if 'role' in exp]
765
+
766
+ # If less than 2 roles, not enough data for prediction
767
+ if len(roles) < 2:
768
+ if seniority == "Junior":
769
+ next_role = "Mid-level " + job_title
770
+ elif seniority == "Mid-level":
771
+ next_role = "Senior " + job_title
772
+ else: # Senior
773
+ leadership_titles = {
774
+ "Software Engineer": "Technical Lead or Engineering Manager",
775
+ "Data Scientist": "Lead Data Scientist or Data Science Manager",
776
+ "Interaction Designer": "Design Lead or UX Director",
777
+ "Product Manager": "Senior Product Manager or Director of Product",
778
+ "DevOps Engineer": "DevOps Lead or Infrastructure Architect"
779
+ }
780
+ next_role = leadership_titles.get(job_title, f"Director of {job_title}")
781
+
782
+ return f"Based on current seniority level, the next logical role could be: {next_role}"
783
+
784
+ # Check for upward mobility patterns
785
+ progression_indicators = ['junior', 'senior', 'lead', 'manager', 'director', 'vp', 'head', 'chief']
786
+ current_level = -1
787
+
788
+ for role in roles:
789
+ for i, indicator in enumerate(progression_indicators):
790
+ if indicator in role:
791
+ if i > current_level:
792
+ current_level = i
793
+
794
+ # Predict next role based on current level
795
+ if current_level < len(progression_indicators) - 1:
796
+ next_level = progression_indicators[current_level + 1]
797
+
798
+ # Map to specific job titles
799
+ if next_level == 'senior' and 'senior' not in roles[-1].lower():
800
+ next_role = f"Senior {job_title}"
801
+ elif next_level == 'lead':
802
+ next_role = f"{job_title} Lead"
803
+ elif next_level == 'manager':
804
+ if job_title == "Software Engineer":
805
+ next_role = "Engineering Manager"
806
+ else:
807
+ next_role = f"{job_title} Manager"
808
+ elif next_level == 'director':
809
+ next_role = f"Director of {job_title}s"
810
+ elif next_level == 'vp':
811
+ next_role = f"VP of {job_title}s"
812
+ elif next_level == 'head':
813
+ next_role = f"Head of {job_title}"
814
+ elif next_level == 'chief':
815
+ if job_title == "Software Engineer":
816
+ next_role = "CTO (Chief Technology Officer)"
817
+ elif job_title == "Data Scientist":
818
+ next_role = "Chief Data Officer"
819
+ elif job_title == "Product Manager":
820
+ next_role = "Chief Product Officer"
821
+ else:
822
+ next_role = f"Chief {job_title} Officer"
823
+ else:
824
+ next_role = f"{next_level.title()} {job_title}"
825
+ else:
826
+ next_role = "Executive Leadership or Strategic Advisory roles"
827
+
828
+ return f"Based on career progression, the next logical role could be: {next_role}"
fix_dependencies.py DELETED
@@ -1,48 +0,0 @@
1
- import subprocess
2
- import sys
3
-
4
- def fix_dependencies():
5
- """
6
- Fix dependency issues by installing compatible versions of required packages
7
- """
8
- print("Fixing dependencies for Resume Screener application...")
9
-
10
- # List of compatible package versions
11
- packages = [
12
- "streamlit==1.22.0",
13
- "pdfplumber==0.9.0",
14
- "spacy>=3.4.0",
15
- "transformers==4.28.1",
16
- "torch==1.13.1",
17
- "huggingface-hub==0.14.1",
18
- "sentence-transformers==2.2.2",
19
- "nltk==3.8.1",
20
- "plotly==5.14.1",
21
- "pandas==1.5.3",
22
- "numpy==1.24.3",
23
- "matplotlib==3.7.1",
24
- "pydantic==1.10.8",
25
- "protobuf<4.0.0",
26
- "tqdm>=4.27",
27
- "regex>=2022.1.18",
28
- "scikit-learn==1.0.2",
29
- "scipy==1.8.1"
30
- ]
31
-
32
- # Install each package
33
- for package in packages:
34
- print(f"Installing {package}...")
35
- subprocess.check_call([sys.executable, "-m", "pip", "install", package])
36
-
37
- # Download spaCy model
38
- print("Downloading spaCy model...")
39
- subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
40
-
41
- # Download NLTK data
42
- print("Downloading NLTK data...")
43
- subprocess.check_call([sys.executable, "-c", "import nltk; nltk.download('punkt')"])
44
-
45
- print("Dependencies fixed successfully!")
46
-
47
- if __name__ == "__main__":
48
- fix_dependencies()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -15,4 +15,10 @@ protobuf<4.0.0
15
  tqdm>=4.27
16
  regex>=2022.1.18
17
  scikit-learn==1.0.2
18
- scipy==1.8.1
 
 
 
 
 
 
 
15
  tqdm>=4.27
16
  regex>=2022.1.18
17
  scikit-learn==1.0.2
18
+ scipy==1.8.1
19
+ dateparser==1.1.8
20
+ python-Levenshtein==0.21.1
21
+ networkx==2.8.8
22
+ faiss-cpu==1.7.4
23
+ beautifulsoup4==4.12.2
24
+ requests==2.31.0