Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import re | |
import json | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
import torch | |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
import time | |
# Set page title and configuration | |
st.set_page_config( | |
page_title="Resume-Job Fit Analyzer", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Download NLTK resources if needed | |
def download_nltk_resources(): | |
try: | |
nltk.data.find('tokenizers/punkt') | |
nltk.data.find('corpora/stopwords') | |
except LookupError: | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
return stopwords.words('english') | |
stop_words = download_nltk_resources() | |
# Load models | |
def load_models(): | |
"""Load and cache the NLP models""" | |
models = {} | |
# Use BART for resume parsing | |
models['parser'] = pipeline( | |
"text2text-generation", | |
model="facebook/bart-base", # This would be the fine-tuned model in production | |
device=0 if torch.cuda.is_available() else -1 | |
) | |
# Use Qwen for evaluation | |
models['evaluator'] = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct") | |
models['evaluator_tokenizer'] = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct") | |
return models | |
# Extract skills from text | |
def extract_skills(text, skill_keywords): | |
"""Extract skills from text based on a predefined list of skills""" | |
found_skills = [] | |
text_lower = text.lower() | |
for skill in skill_keywords: | |
# Create a regular expression pattern for whole word matching | |
pattern = r'\b' + re.escape(skill.lower()) + r'\b' | |
if re.search(pattern, text_lower): | |
found_skills.append(skill) | |
return list(set(found_skills)) | |
# Parse resume | |
def parse_resume(resume_text, models): | |
"""Extract structured information from resume text""" | |
# In production, this would use the fine-tuned BART model | |
# For now, we'll implement a simple rule-based parser | |
# Clean the text | |
clean_text = re.sub(r'\s+', ' ', resume_text).strip() | |
# Extract common skill keywords (this would be a more extensive list in production) | |
tech_skills = [ | |
"Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL", | |
"React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring", | |
"TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP", | |
"AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions", | |
"REST API", "GraphQL", "Microservices", "Serverless" | |
] | |
soft_skills = [ | |
"Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking", | |
"Time management", "Adaptability", "Creativity", "Collaboration", "Presentation" | |
] | |
# Extract skills | |
found_tech_skills = extract_skills(clean_text, tech_skills) | |
found_soft_skills = extract_skills(clean_text, soft_skills) | |
# Extract experience using regex patterns (simplified) | |
experience_pattern = r'(?:Experience|EXPERIENCE|Work Experience|WORK EXPERIENCE).*?(?:Education|EDUCATION|Skills|SKILLS|$)' | |
experience_match = re.search(experience_pattern, clean_text, re.DOTALL) | |
experience_text = experience_match.group(0) if experience_match else "" | |
# Extract education using regex patterns (simplified) | |
education_pattern = r'(?:Education|EDUCATION).*?(?:Skills|SKILLS|Experience|EXPERIENCE|$)' | |
education_match = re.search(education_pattern, clean_text, re.DOTALL) | |
education_text = education_match.group(0) if education_match else "" | |
# Estimate years of experience (simplified) | |
years_exp = 0 | |
year_patterns = [ | |
r'(\d{4})\s*-\s*(?:present|current|now|2023|2024|2025)', | |
r'(\d{4})\s*-\s*(\d{4})' | |
] | |
for pattern in year_patterns: | |
matches = re.findall(pattern, clean_text, re.IGNORECASE) | |
for match in matches: | |
if isinstance(match, tuple): | |
start_year = int(match[0]) | |
end_year = int(match[1]) if match[1].isdigit() else 2025 | |
years_exp += (end_year - start_year) | |
else: | |
start_year = int(match) | |
years_exp += (2025 - start_year) | |
# Cap reasonable years | |
years_exp = min(years_exp, 30) | |
# Create structured data | |
structured_data = { | |
"skills": { | |
"technical": found_tech_skills, | |
"soft": found_soft_skills | |
}, | |
"experience": { | |
"years": years_exp, | |
"summary": experience_text[:300] + "..." if len(experience_text) > 300 else experience_text | |
}, | |
"education": education_text[:300] + "..." if len(education_text) > 300 else education_text | |
} | |
return structured_data | |
# Parse job description | |
def parse_job_description(job_text): | |
"""Extract key requirements from job description""" | |
# Clean the text | |
clean_text = re.sub(r'\s+', ' ', job_text).strip() | |
# Extract common skill keywords (same as resume parser) | |
tech_skills = [ | |
"Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL", | |
"React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring", | |
"TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP", | |
"AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions", | |
"REST API", "GraphQL", "Microservices", "Serverless" | |
] | |
soft_skills = [ | |
"Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking", | |
"Time management", "Adaptability", "Creativity", "Collaboration", "Presentation" | |
] | |
# Extract skills | |
required_tech_skills = extract_skills(clean_text, tech_skills) | |
required_soft_skills = extract_skills(clean_text, soft_skills) | |
# Extract years of experience requirement (simplified) | |
exp_patterns = [ | |
r'(\d+)\+?\s*(?:years|yrs|yr)(?:\s*of)?\s*(?:experience|exp)', | |
r'(?:experience|exp)(?:\s*of)?\s*(\d+)\+?\s*(?:years|yrs|yr)' | |
] | |
required_years = 0 | |
for pattern in exp_patterns: | |
matches = re.findall(pattern, clean_text, re.IGNORECASE) | |
if matches: | |
# Take the highest mentioned years | |
required_years = max([int(y) for y in matches if y.isdigit()] + [required_years]) | |
# Extract job title | |
title_pattern = r'^(.*?)(?:\n|$)' | |
title_match = re.search(title_pattern, clean_text) | |
job_title = title_match.group(1).strip() if title_match else "Not specified" | |
# Create structured data | |
structured_data = { | |
"title": job_title, | |
"requirements": { | |
"technical_skills": required_tech_skills, | |
"soft_skills": required_soft_skills, | |
"years_experience": required_years | |
}, | |
"full_text": job_text | |
} | |
return structured_data | |
# Calculate match score | |
def calculate_match_score(resume_data, job_data): | |
"""Calculate how well the resume matches the job description""" | |
scores = {} | |
# Calculate skill match percentage | |
required_tech_skills = set(job_data["requirements"]["technical_skills"]) | |
candidate_tech_skills = set(resume_data["skills"]["technical"]) | |
required_soft_skills = set(job_data["requirements"]["soft_skills"]) | |
candidate_soft_skills = set(resume_data["skills"]["soft"]) | |
if required_tech_skills: | |
tech_match = len(candidate_tech_skills.intersection(required_tech_skills)) / len(required_tech_skills) | |
scores["technical_skills"] = { | |
"score": int(tech_match * 100), | |
"matched": list(candidate_tech_skills.intersection(required_tech_skills)), | |
"missing": list(required_tech_skills - candidate_tech_skills) | |
} | |
else: | |
scores["technical_skills"] = {"score": 0, "matched": [], "missing": []} | |
if required_soft_skills: | |
soft_match = len(candidate_soft_skills.intersection(required_soft_skills)) / len(required_soft_skills) | |
scores["soft_skills"] = { | |
"score": int(soft_match * 100), | |
"matched": list(candidate_soft_skills.intersection(required_soft_skills)), | |
"missing": list(required_soft_skills - candidate_soft_skills) | |
} | |
else: | |
scores["soft_skills"] = {"score": 0, "matched": [], "missing": []} | |
# Experience match | |
required_years = job_data["requirements"]["years_experience"] | |
candidate_years = resume_data["experience"]["years"] | |
if required_years > 0: | |
if candidate_years >= required_years: | |
exp_score = 100 | |
else: | |
exp_score = int((candidate_years / required_years) * 100) | |
scores["experience"] = { | |
"score": exp_score, | |
"candidate_years": candidate_years, | |
"required_years": required_years | |
} | |
else: | |
scores["experience"] = { | |
"score": 100 if candidate_years > 0 else 50, | |
"candidate_years": candidate_years, | |
"required_years": "Not specified" | |
} | |
# Calculate overall score (weighted) | |
tech_weight = 0.6 | |
soft_weight = 0.2 | |
exp_weight = 0.2 | |
overall_score = ( | |
scores["technical_skills"]["score"] * tech_weight + | |
scores["soft_skills"]["score"] * soft_weight + | |
scores["experience"]["score"] * exp_weight | |
) | |
scores["overall"] = int(overall_score) | |
return scores | |
# Generate expert assessment using Qwen | |
def generate_assessment(resume_data, job_data, match_scores, models): | |
"""Generate an expert assessment using Qwen model""" | |
# Prepare context | |
job_title = job_data["title"] | |
matched_skills = match_scores["technical_skills"]["matched"] | |
missing_skills = match_scores["technical_skills"]["missing"] | |
experience_match = match_scores["experience"] | |
overall_score = match_scores["overall"] | |
# Determine fit classification | |
fit_status = "FIT" if overall_score >= 70 else "NOT FIT" | |
# Create prompt for Qwen | |
prompt = f""" | |
<|im_start|>system | |
You are an expert resume evaluator. Analyze how well a candidate fits a job posting and provide professional feedback. | |
<|im_end|> | |
<|im_start|>user | |
Evaluate this candidate for a {job_title} position. | |
Overall match score: {overall_score}% | |
Technical skills match: {match_scores["technical_skills"]["score"]}% | |
Soft skills match: {match_scores["soft_skills"]["score"]}% | |
Experience match: {experience_match["score"]}% | |
Candidate has: {experience_match["candidate_years"]} years of experience | |
Position requires: {experience_match["required_years"]} years of experience | |
Matched technical skills: {", ".join(matched_skills) if matched_skills else "None"} | |
Missing technical skills: {", ".join(missing_skills) if missing_skills else "None"} | |
Create a professional assessment of this candidate. First state whether they are a FIT or NOT FIT for the position, then explain why with specific strengths and development areas. | |
<|im_end|> | |
<|im_start|>assistant | |
""" | |
try: | |
# Generate the assessment using Qwen | |
tokenizer = models['evaluator_tokenizer'] | |
qwen_model = models['evaluator'] | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = qwen_model.generate( | |
inputs.input_ids, | |
max_new_tokens=512, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.9 | |
) | |
assessment = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract the assistant's response | |
if "<|im_start|>assistant" in assessment: | |
assessment = assessment.split("<|im_start|>assistant")[-1] | |
# Clean up any remaining markers | |
assessment = re.sub(r'<\|im_(start|end)\|>', '', assessment) | |
assessment = assessment.strip() | |
# If no assessment was generated, create a fallback | |
if not assessment or len(assessment) < 50: | |
assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status) | |
except Exception as e: | |
st.error(f"Error generating assessment: {str(e)}") | |
assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status) | |
return assessment, fit_status | |
# Generate fallback assessment | |
def generate_fallback_assessment(resume_data, job_data, match_scores, fit_status): | |
"""Generate a fallback assessment if the model fails""" | |
job_title = job_data["title"] | |
matched_skills = match_scores["technical_skills"]["matched"] | |
missing_skills = match_scores["technical_skills"]["missing"] | |
overall_score = match_scores["overall"] | |
if fit_status == "FIT": | |
assessment = f"""FIT: This candidate demonstrates a strong alignment with the {job_title} position, achieving an overall match score of {overall_score}%. Their proficiency in {', '.join(matched_skills) if matched_skills else 'relevant skills'} positions them well to contribute effectively from the start. The candidate's experience level is suitable for the role's requirements. To maximize their success, they could consider developing expertise in {', '.join(missing_skills) if missing_skills else 'additional specialized areas relevant to this role'}. | |
""" | |
else: | |
assessment = f"""NOT FIT: This candidate currently shows limited alignment with the {job_title} position, with an overall match score of {overall_score}%. While they demonstrate some relevant capabilities in {', '.join(matched_skills) if matched_skills else 'a few areas'}, they would need to develop expertise in critical areas such as {', '.join(missing_skills) if missing_skills else 'key technical requirements for this position'}. The candidate may become more competitive for this role by focusing on these skill gaps and gaining more relevant experience. | |
""" | |
return assessment | |
# Create the main header and interface | |
st.title("Resume-Job Fit Analyzer") | |
st.markdown("### Evaluate how well a resume matches a job description") | |
# Setup columns for input | |
col1, col2 = st.columns(2) | |
with col1: | |
# Resume input | |
st.subheader("Resume") | |
resume_text = st.text_area("Paste resume text here", height=300, | |
placeholder="Paste the candidate's resume text here...") | |
with col2: | |
# Job description input | |
st.subheader("Job Description") | |
job_description = st.text_area("Paste job description here", height=300, | |
placeholder="Paste the job description here...") | |
# Analysis button | |
analyze_button = st.button("Analyze Match", type="primary", use_container_width=True) | |
# Main analysis logic | |
if analyze_button: | |
if not resume_text or not job_description: | |
st.error("Please provide both a resume and a job description.") | |
else: | |
with st.spinner("Analyzing resume and job match..."): | |
# Record start time | |
start_time = time.time() | |
# Load models (uses caching so only loads once) | |
models = load_models() | |
# Parse resume and job description | |
resume_data = parse_resume(resume_text, models) | |
job_data = parse_job_description(job_description) | |
# Calculate match score | |
match_scores = calculate_match_score(resume_data, job_data) | |
# Generate assessment | |
assessment, fit_status = generate_assessment(resume_data, job_data, match_scores, models) | |
# Calculate execution time | |
execution_time = time.time() - start_time | |
# Display results | |
st.success(f"Analysis complete in {execution_time:.2f} seconds") | |
# Display fit status prominently | |
st.markdown(f"## Overall Result: {fit_status}") | |
# Display match score | |
st.subheader("Match Score") | |
score_col1, score_col2, score_col3 = st.columns(3) | |
with score_col1: | |
st.metric("Overall Match", f"{match_scores['overall']}%") | |
with score_col2: | |
st.metric("Technical Skills", f"{match_scores['technical_skills']['score']}%") | |
with score_col3: | |
st.metric("Experience Match", f"{match_scores['experience']['score']}%") | |
# Show skills breakdown | |
st.subheader("Skills Breakdown") | |
skill_col1, skill_col2 = st.columns(2) | |
with skill_col1: | |
st.markdown("##### Matched Skills") | |
if match_scores["technical_skills"]["matched"]: | |
for skill in match_scores["technical_skills"]["matched"]: | |
st.markdown(f"β {skill}") | |
else: | |
st.markdown("No matched skills found") | |
with skill_col2: | |
st.markdown("##### Missing Skills") | |
if match_scores["technical_skills"]["missing"]: | |
for skill in match_scores["technical_skills"]["missing"]: | |
st.markdown(f"β {skill}") | |
else: | |
st.markdown("No missing skills detected") | |
# Show experience comparison | |
st.subheader("Experience") | |
exp_col1, exp_col2 = st.columns(2) | |
with exp_col1: | |
st.markdown(f"**Required**: {job_data['requirements']['years_experience']} years") | |
with exp_col2: | |
st.markdown(f"**Candidate has**: {resume_data['experience']['years']} years") | |
# Display detailed assessment | |
st.subheader("Expert Assessment") | |
st.markdown(assessment) | |
# Show parsed data (expandable) | |
with st.expander("View Parsed Data"): | |
col1, col2 = st.columns(2) | |
with col1: | |
st.subheader("Resume Data") | |
st.json(resume_data) | |
with col2: | |
st.subheader("Job Requirements") | |
st.json(job_data) |