|
import streamlit as st |
|
import pdfplumber |
|
import re |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import torch |
|
from datetime import datetime |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
import numpy as np |
|
|
|
|
|
st.set_page_config( |
|
page_title="Resume Screener & Skill Extractor", |
|
page_icon="π", |
|
layout="wide" |
|
) |
|
|
|
st.title("π Resume Screener & Skill Extractor") |
|
startup_message = st.empty() |
|
startup_message.info("Loading dependencies and models... This may take a minute on first run.") |
|
|
|
|
|
try: |
|
import spacy |
|
spacy_available = True |
|
except ImportError: |
|
spacy_available = False |
|
st.warning("spaCy is not available. Some features will be limited.") |
|
|
|
try: |
|
from transformers import pipeline |
|
transformers_available = True |
|
except ImportError: |
|
transformers_available = False |
|
st.warning("Transformers is not available. Summary generation will be limited.") |
|
|
|
try: |
|
import nltk |
|
from nltk.tokenize import word_tokenize |
|
nltk_available = True |
|
|
|
|
|
try: |
|
nltk.data.find('tokenizers/punkt') |
|
except LookupError: |
|
nltk.download('punkt') |
|
except ImportError: |
|
nltk_available = False |
|
st.warning("NLTK is not available. Some text processing features will be limited.") |
|
|
|
|
|
try: |
|
from sentence_transformers import SentenceTransformer |
|
try: |
|
from sentence_transformers import util as st_util |
|
sentence_transformers_available = True |
|
except ImportError: |
|
|
|
class CustomSTUtil: |
|
@staticmethod |
|
def pytorch_cos_sim(a, b): |
|
if not isinstance(a, torch.Tensor): |
|
a = torch.tensor(a) |
|
if not isinstance(b, torch.Tensor): |
|
b = torch.tensor(b) |
|
|
|
if len(a.shape) == 1: |
|
a = a.unsqueeze(0) |
|
if len(b.shape) == 1: |
|
b = b.unsqueeze(0) |
|
|
|
a_norm = torch.nn.functional.normalize(a, p=2, dim=1) |
|
b_norm = torch.nn.functional.normalize(b, p=2, dim=1) |
|
return torch.mm(a_norm, b_norm.transpose(0, 1)) |
|
|
|
st_util = CustomSTUtil() |
|
sentence_transformers_available = True |
|
except ImportError: |
|
sentence_transformers_available = False |
|
st.warning("Sentence Transformers is not available. Semantic matching will be disabled.") |
|
|
|
|
|
@st.cache_resource |
|
def load_models(): |
|
models = {} |
|
|
|
|
|
if spacy_available: |
|
try: |
|
models['nlp'] = spacy.load("en_core_web_sm") |
|
except OSError: |
|
try: |
|
import subprocess |
|
import sys |
|
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"]) |
|
models['nlp'] = spacy.load("en_core_web_sm") |
|
except Exception as e: |
|
st.warning(f"Could not load spaCy model: {e}") |
|
models['nlp'] = None |
|
else: |
|
models['nlp'] = None |
|
|
|
|
|
if transformers_available: |
|
try: |
|
models['summarizer'] = pipeline("summarization", model="facebook/bart-large-cnn") |
|
except Exception as e: |
|
st.warning(f"Could not load summarizer model: {e}") |
|
|
|
models['summarizer'] = lambda text, **kwargs: [{"summary_text": ". ".join(text.split(". ")[:5]) + "."}] |
|
else: |
|
|
|
models['summarizer'] = lambda text, **kwargs: [{"summary_text": ". ".join(text.split(". ")[:5]) + "."}] |
|
|
|
|
|
if sentence_transformers_available: |
|
try: |
|
models['sentence_model'] = SentenceTransformer('paraphrase-MiniLM-L6-v2') |
|
except Exception as e: |
|
st.warning(f"Could not load sentence transformer model: {e}") |
|
models['sentence_model'] = None |
|
else: |
|
models['sentence_model'] = None |
|
|
|
return models |
|
|
|
|
|
job_descriptions = { |
|
"Software Engineer": { |
|
"skills": ["python", "java", "javascript", "sql", "algorithms", "data structures", |
|
"git", "cloud", "web development", "software development", "coding"], |
|
"description": "Looking for software engineers with strong programming skills and experience in software development.", |
|
"must_have": ["python", "git", "algorithms"], |
|
"nice_to_have": ["cloud", "java", "javascript"], |
|
"seniority_levels": { |
|
"Junior": "0-2 years of experience, familiar with basic programming concepts", |
|
"Mid-level": "3-5 years of experience, proficient in multiple languages, experience with system design", |
|
"Senior": "6+ years of experience, expert in software architecture, mentoring, and leading projects" |
|
} |
|
}, |
|
"Interaction Designer": { |
|
"skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma", |
|
"sketch", "adobe", "design thinking", "interaction design"], |
|
"description": "Seeking interaction designers with expertise in user experience and interface design.", |
|
"must_have": ["ui", "ux", "prototyping"], |
|
"nice_to_have": ["figma", "sketch", "user research"], |
|
"seniority_levels": { |
|
"Junior": "0-2 years of experience, basic design skills, understanding of UX principles", |
|
"Mid-level": "3-5 years of experience, strong portfolio, experience with user research", |
|
"Senior": "6+ years of experience, leadership in design systems, driving design strategy" |
|
} |
|
}, |
|
"Data Scientist": { |
|
"skills": ["python", "r", "statistics", "machine learning", "data analysis", |
|
"sql", "tensorflow", "pytorch", "pandas", "numpy"], |
|
"description": "Looking for data scientists with strong analytical and machine learning skills.", |
|
"must_have": ["python", "statistics", "machine learning"], |
|
"nice_to_have": ["tensorflow", "pytorch", "r"], |
|
"seniority_levels": { |
|
"Junior": "0-2 years of experience, basic knowledge of statistics and ML algorithms", |
|
"Mid-level": "3-5 years of experience, model development, feature engineering", |
|
"Senior": "6+ years of experience, advanced ML techniques, research experience" |
|
} |
|
} |
|
} |
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
"""Extract text from PDF file.""" |
|
text = "" |
|
try: |
|
with pdfplumber.open(pdf_file) as pdf: |
|
for page in pdf.pages: |
|
text += page.extract_text() or "" |
|
except Exception as e: |
|
st.error(f"Error extracting text from PDF: {e}") |
|
return text |
|
|
|
def extract_skills(text, job_title, nlp=None): |
|
"""Extract skills from resume text.""" |
|
found_skills = [] |
|
required_skills = job_descriptions[job_title]["skills"] |
|
|
|
|
|
for skill in required_skills: |
|
if skill.lower() in text.lower(): |
|
found_skills.append(skill) |
|
|
|
return found_skills |
|
|
|
def extract_experience(text): |
|
"""Extract work experience from resume text.""" |
|
experiences = [] |
|
|
|
|
|
experience_pattern = r"(?i)(\w+[\w\s&,.']+)\s*(?:[-|β’]|\bat\b)\s*([A-Za-z][\w\s&,.']+)\s*(?:[-|β’]|\bfrom\b)\s*(\d{4}(?:\s*[-β]\s*(?:\d{4}|present|current)))" |
|
|
|
matches = re.finditer(experience_pattern, text) |
|
for match in matches: |
|
company = match.group(1).strip() |
|
role = match.group(2).strip() |
|
duration = match.group(3).strip() |
|
|
|
|
|
try: |
|
date_parts = re.split(r'[-β]', duration) |
|
start_year = int(date_parts[0].strip()) |
|
|
|
if len(date_parts) > 1 and 'present' not in date_parts[1].lower() and 'current' not in date_parts[1].lower(): |
|
end_year = int(date_parts[1].strip()) |
|
end_date = datetime(end_year, 12, 31) |
|
else: |
|
end_year = datetime.now().year |
|
end_date = datetime.now() |
|
|
|
start_date = datetime(start_year, 1, 1) |
|
duration_months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month) |
|
|
|
experiences.append({ |
|
'company': company, |
|
'role': role, |
|
'start_date': start_date, |
|
'end_date': end_date, |
|
'duration_months': duration_months |
|
}) |
|
except: |
|
experiences.append({ |
|
'company': company, |
|
'role': role, |
|
'duration': duration |
|
}) |
|
|
|
return experiences |
|
|
|
def analyze_resume(text, job_title, models): |
|
"""Analyze resume text.""" |
|
|
|
found_skills = extract_skills(text, job_title, models.get('nlp')) |
|
|
|
|
|
if models.get('summarizer'): |
|
try: |
|
summary = models['summarizer'](text[:3000], max_length=150, min_length=50, do_sample=False)[0]["summary_text"] |
|
except Exception as e: |
|
st.warning(f"Error generating summary: {e}") |
|
summary = text[:500] + "..." |
|
else: |
|
summary = text[:500] + "..." |
|
|
|
|
|
experiences = extract_experience(text) |
|
|
|
|
|
match_score = 0 |
|
if models.get('sentence_model') and sentence_transformers_available: |
|
try: |
|
resume_embedding = models['sentence_model'].encode(text[:5000], convert_to_tensor=True) |
|
job_embedding = models['sentence_model'].encode(job_descriptions[job_title]["description"], convert_to_tensor=True) |
|
|
|
match_score = float(st_util.pytorch_cos_sim(resume_embedding, job_embedding)[0][0]) * 100 |
|
except Exception as e: |
|
st.warning(f"Error calculating semantic match: {e}") |
|
else: |
|
|
|
match_score = (len(found_skills) / len(job_descriptions[job_title]["skills"])) * 100 |
|
|
|
|
|
years_exp = sum(exp.get('duration_months', 0) for exp in experiences if 'duration_months' in exp) / 12 |
|
|
|
if years_exp < 3: |
|
seniority = "Junior" |
|
elif years_exp < 6: |
|
seniority = "Mid-level" |
|
else: |
|
seniority = "Senior" |
|
|
|
|
|
skill_levels = {} |
|
for skill in found_skills: |
|
|
|
skill_levels[skill] = "intermediate" |
|
|
|
|
|
advanced_patterns = [ |
|
f"expert in {skill}", |
|
f"advanced {skill}", |
|
f"extensive experience with {skill}" |
|
] |
|
if any(pattern in text.lower() for pattern in advanced_patterns): |
|
skill_levels[skill] = "advanced" |
|
|
|
|
|
basic_patterns = [ |
|
f"familiar with {skill}", |
|
f"basic knowledge of {skill}", |
|
f"introduced to {skill}" |
|
] |
|
if any(pattern in text.lower() for pattern in basic_patterns): |
|
skill_levels[skill] = "basic" |
|
|
|
|
|
inconsistencies = [] |
|
if len(experiences) >= 2: |
|
|
|
sorted_exps = sorted( |
|
[exp for exp in experiences if 'start_date' in exp], |
|
key=lambda x: x['start_date'] |
|
) |
|
|
|
|
|
for i in range(len(sorted_exps) - 1): |
|
current = sorted_exps[i] |
|
next_exp = sorted_exps[i+1] |
|
|
|
if current['end_date'] > next_exp['start_date']: |
|
inconsistencies.append({ |
|
'type': 'overlap', |
|
'description': f"Overlapping roles at {current['company']} and {next_exp['company']}" |
|
}) |
|
|
|
|
|
career_prediction = predict_career_path(seniority, job_title) |
|
|
|
return { |
|
'found_skills': found_skills, |
|
'skill_levels': skill_levels, |
|
'summary': summary, |
|
'experiences': experiences, |
|
'match_score': match_score, |
|
'seniority': seniority, |
|
'years_experience': years_exp, |
|
'inconsistencies': inconsistencies, |
|
'career_prediction': career_prediction |
|
} |
|
|
|
def predict_career_path(seniority, job_title): |
|
"""Generate a simple career prediction.""" |
|
if seniority == "Junior": |
|
return f"Next potential role: Senior {job_title}" |
|
elif seniority == "Mid-level": |
|
roles = { |
|
"Software Engineer": "Team Lead, Technical Lead, or Engineering Manager", |
|
"Data Scientist": "Senior Data Scientist or Data Science Lead", |
|
"Interaction Designer": "Senior Designer or UX Lead" |
|
} |
|
return f"Next potential roles: {roles.get(job_title, f'Senior {job_title}')}" |
|
else: |
|
roles = { |
|
"Software Engineer": "Engineering Manager, Software Architect, or CTO", |
|
"Data Scientist": "Head of Data Science, ML Engineering Manager, or Chief Data Officer", |
|
"Interaction Designer": "Design Director, Head of UX, or VP of Design" |
|
} |
|
return f"Next potential roles: {roles.get(job_title, f'Director of {job_title}')}" |
|
|
|
def generate_career_advice(resume_text, job_title, found_skills, missing_skills): |
|
"""Generate career advice based on resume analysis.""" |
|
advice = f"""## Career Development Plan for {job_title} |
|
|
|
### Skills to Develop |
|
|
|
The following skills would strengthen your profile for this position: |
|
|
|
""" |
|
|
|
for skill in missing_skills: |
|
advice += f"- **{skill.title()}**: " |
|
|
|
if skill == "python": |
|
advice += "Take online courses like Coursera's Python for Everybody or follow tutorials on Real Python." |
|
elif skill == "java": |
|
advice += "Complete the Oracle Java Certification or contribute to open-source Java projects." |
|
elif skill == "javascript": |
|
advice += "Build interactive web applications using modern frameworks like React or Vue." |
|
elif skill == "cloud": |
|
advice += "Get hands-on experience with AWS, Azure, or GCP through their free tier offerings." |
|
elif "algorithm" in skill or "data structure" in skill: |
|
advice += "Practice on platforms like LeetCode or HackerRank and study algorithm design principles." |
|
elif "ui" in skill or "ux" in skill: |
|
advice += "Create a portfolio of design work and study interaction design principles." |
|
elif "machine learning" in skill: |
|
advice += "Take Andrew Ng's Machine Learning course on Coursera and work on ML projects with real datasets." |
|
else: |
|
advice += f"Research and practice this skill through online courses, tutorials, and hands-on projects." |
|
|
|
advice += "\n\n" |
|
|
|
advice += f""" |
|
### Project Ideas |
|
|
|
Consider these projects to showcase your skills for a {job_title} position: |
|
|
|
""" |
|
|
|
if job_title == "Software Engineer": |
|
advice += """ |
|
1. **Full-Stack Web Application**: Build a complete web app with frontend, backend, and database |
|
2. **API Service**: Create a RESTful or GraphQL API with proper authentication and documentation |
|
3. **Open Source Contribution**: Contribute to relevant open-source projects in your area of interest |
|
""" |
|
elif job_title == "Data Scientist": |
|
advice += """ |
|
1. **Predictive Model**: Build and deploy a machine learning model that solves a real-world problem |
|
2. **Data Dashboard**: Create an interactive visualization dashboard for complex datasets |
|
3. **Natural Language Processing**: Develop a text classification or sentiment analysis project |
|
""" |
|
elif job_title == "Interaction Designer": |
|
advice += """ |
|
1. **Design System**: Create a comprehensive design system with components and usage guidelines |
|
2. **UX Case Study**: Document your design process for a real or fictional product improvement |
|
3. **Interactive Prototype**: Design a fully functional prototype that demonstrates your interaction design skills |
|
""" |
|
|
|
advice += """ |
|
### Learning Resources |
|
|
|
- **Online Platforms**: Coursera, Udemy, Pluralsight, LinkedIn Learning |
|
- **Practice Sites**: GitHub, HackerRank, LeetCode, Kaggle |
|
- **Communities**: Stack Overflow, Reddit programming communities, relevant Discord servers |
|
""" |
|
|
|
return advice |
|
|
|
|
|
models = load_models() |
|
|
|
|
|
startup_message.empty() |
|
|
|
|
|
st.markdown(""" |
|
This app helps recruiters analyze resumes by: |
|
- Extracting relevant skills for specific job positions |
|
- Generating a concise summary of the candidate's background |
|
- Identifying skill gaps for the selected role |
|
- Providing personalized career advice and project recommendations |
|
""") |
|
|
|
|
|
col1, col2 = st.columns([2, 1]) |
|
|
|
with col1: |
|
|
|
uploaded_file = st.file_uploader("Upload Resume (PDF)", type=["pdf"]) |
|
|
|
with col2: |
|
|
|
job_title = st.selectbox("Select Job Position", list(job_descriptions.keys())) |
|
|
|
|
|
if job_title: |
|
st.info(f"**Required Skills:**\n" + |
|
"\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]])) |
|
|
|
if uploaded_file and job_title: |
|
try: |
|
|
|
with st.spinner("Analyzing resume..."): |
|
|
|
text = extract_text_from_pdf(uploaded_file) |
|
|
|
|
|
analysis_results = analyze_resume(text, job_title, models) |
|
|
|
|
|
missing_skills = [skill for skill in job_descriptions[job_title]["skills"] |
|
if skill not in analysis_results['found_skills']] |
|
|
|
|
|
tab1, tab2, tab3, tab4 = st.tabs([ |
|
"π Skills Match", |
|
"π Resume Summary", |
|
"π― Skills Gap", |
|
"π Career Advice" |
|
]) |
|
|
|
with tab1: |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
|
|
st.subheader("π― Matched Skills") |
|
if analysis_results['found_skills']: |
|
for skill in analysis_results['found_skills']: |
|
|
|
level = analysis_results['skill_levels'].get(skill, 'intermediate') |
|
level_emoji = "π’" if level == 'advanced' else "π‘" if level == 'intermediate' else "π " |
|
st.success(f"{level_emoji} {skill.title()} ({level.title()})") |
|
|
|
|
|
match_percentage = len(analysis_results['found_skills']) / len(job_descriptions[job_title]["skills"]) * 100 |
|
st.metric("Skills Match", f"{match_percentage:.1f}%") |
|
else: |
|
st.warning("No direct skill matches found.") |
|
|
|
with col2: |
|
|
|
st.subheader("π‘ Semantic Match") |
|
st.metric("Overall Match Score", f"{analysis_results['match_score']:.1f}%") |
|
|
|
|
|
must_have_skills = job_descriptions[job_title]["must_have"] |
|
must_have_count = sum(1 for skill in must_have_skills if skill in analysis_results['found_skills']) |
|
must_have_percentage = (must_have_count / len(must_have_skills)) * 100 |
|
|
|
st.write("Must-have skills:") |
|
st.progress(must_have_percentage / 100) |
|
st.write(f"{must_have_count} out of {len(must_have_skills)} ({must_have_percentage:.1f}%)") |
|
|
|
|
|
st.subheader("π§ Seniority Assessment") |
|
st.info(f"**{analysis_results['seniority']}** ({analysis_results['years_experience']:.1f} years equivalent experience)") |
|
st.write(job_descriptions[job_title]["seniority_levels"][analysis_results['seniority']]) |
|
|
|
with tab2: |
|
|
|
st.subheader("π Resume Summary") |
|
st.write(analysis_results['summary']) |
|
|
|
|
|
st.subheader("β³ Experience Timeline") |
|
if analysis_results['experiences']: |
|
|
|
exp_data = [] |
|
for exp in analysis_results['experiences']: |
|
if 'start_date' in exp and 'end_date' in exp: |
|
exp_data.append({ |
|
'Company': exp['company'], |
|
'Role': exp['role'], |
|
'Start Date': exp['start_date'].strftime('%b %Y') if exp['start_date'] else 'Unknown', |
|
'End Date': exp['end_date'].strftime('%b %Y') if exp['end_date'] != datetime.now() else 'Present', |
|
'Duration (months)': exp.get('duration_months', 'Unknown') |
|
}) |
|
else: |
|
exp_data.append({ |
|
'Company': exp['company'], |
|
'Role': exp['role'], |
|
'Duration': exp.get('duration', 'Unknown') |
|
}) |
|
|
|
if exp_data: |
|
exp_df = pd.DataFrame(exp_data) |
|
st.dataframe(exp_df) |
|
|
|
|
|
timeline_data = [exp for exp in analysis_results['experiences'] if 'start_date' in exp and 'end_date' in exp] |
|
if timeline_data and len(timeline_data) > 0: |
|
try: |
|
|
|
timeline_data = sorted(timeline_data, key=lambda x: x['start_date']) |
|
|
|
|
|
fig = go.Figure() |
|
|
|
for i, exp in enumerate(timeline_data): |
|
fig.add_trace(go.Bar( |
|
x=[(exp['end_date'] - exp['start_date']).days / 30], |
|
y=[exp['company']], |
|
orientation='h', |
|
name=exp['role'], |
|
hovertext=f"{exp['role']} at {exp['company']}", |
|
marker=dict(color=px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)]) |
|
)) |
|
|
|
fig.update_layout( |
|
title="Career Timeline", |
|
xaxis_title="Duration (months)", |
|
yaxis_title="Company", |
|
height=400, |
|
margin=dict(l=0, r=0, b=0, t=30) |
|
) |
|
|
|
st.plotly_chart(fig, use_container_width=True) |
|
except Exception as e: |
|
st.warning(f"Could not create timeline visualization: {e}") |
|
else: |
|
st.warning("No work experience data could be extracted.") |
|
|
|
with tab3: |
|
|
|
st.subheader("π Skills to Develop") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
|
|
if missing_skills: |
|
for skill in missing_skills: |
|
st.warning(f"β {skill.title()}") |
|
else: |
|
st.success("Great! The candidate has all the required skills!") |
|
|
|
with col2: |
|
|
|
st.subheader("π Gap Analysis") |
|
|
|
|
|
missing_must_have = [skill for skill in job_descriptions[job_title]["must_have"] |
|
if skill not in analysis_results['found_skills']] |
|
|
|
if missing_must_have: |
|
st.error("**Critical Skills Missing:**") |
|
for skill in missing_must_have: |
|
st.write(f"- {skill.title()}") |
|
|
|
st.markdown("These are must-have skills for this position.") |
|
else: |
|
st.success("Candidate has all the must-have skills for this position!") |
|
|
|
|
|
missing_nice_to_have = [skill for skill in job_descriptions[job_title]["nice_to_have"] |
|
if skill not in analysis_results['found_skills']] |
|
|
|
if missing_nice_to_have: |
|
st.warning("**Nice-to-Have Skills Missing:**") |
|
for skill in missing_nice_to_have: |
|
st.write(f"- {skill.title()}") |
|
else: |
|
st.success("Candidate has all the nice-to-have skills!") |
|
|
|
|
|
st.subheader("π¨βπΌ Career Trajectory") |
|
st.info(analysis_results['career_prediction']) |
|
|
|
with tab4: |
|
|
|
st.subheader("π Career Advice and Project Recommendations") |
|
|
|
if st.button("Generate Career Advice"): |
|
with st.spinner("Generating personalized career advice..."): |
|
advice = generate_career_advice(text, job_title, analysis_results['found_skills'], missing_skills) |
|
st.markdown(advice) |
|
|
|
except Exception as e: |
|
st.error(f"An error occurred while processing the resume: {str(e)}") |
|
st.exception(e) |
|
|
|
|
|
st.markdown("---") |
|
st.markdown("Made with β€οΈ using Streamlit and Hugging Face") |