Spaces:
Sleeping
Sleeping
import streamlit as st | |
from sentence_transformers import SentenceTransformer, util | |
import re | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
# Download NLTK data files | |
nltk.download("stopwords") | |
nltk.download("punkt") | |
# Load English stop words | |
stop_words = set(stopwords.words("english")) | |
def load_model(): | |
return SentenceTransformer('sentence-transformers/all-mpnet-base-v2') | |
model = load_model() | |
# Synonym dictionary for common terms | |
synonyms = { | |
"data analysis": {"data analytics", "data analyst"}, | |
"machine learning": {"ml", "artificial intelligence", "ai"}, | |
"programming": {"coding", "development", "software engineering"}, | |
"statistical analysis": {"statistics", "statistical modeling"}, | |
"visualization": {"data viz", "tableau", "visualizing data"} | |
} | |
def preprocess(text): | |
# Tokenize, remove stop words, and normalize text | |
words = word_tokenize(text.lower()) | |
filtered_words = [word for word in words if word.isalnum() and word not in stop_words] | |
normalized_text = " ".join(filtered_words) | |
return normalized_text | |
def synonym_match(job_desc, resume): | |
match_count = 0 | |
total_keywords = 0 | |
matched_keywords = set() | |
missing_keywords = set() | |
for key, variants in synonyms.items(): | |
job_contains = any(term in job_desc for term in variants) or key in job_desc | |
resume_contains = any(term in resume for term in variants) or key in resume | |
if job_contains: | |
total_keywords += 1 | |
if resume_contains: | |
match_count += 1 | |
matched_keywords.add(key) | |
else: | |
missing_keywords.add(key) | |
return (match_count / total_keywords) * 100 if total_keywords > 0 else 0, list(matched_keywords)[:5], list(missing_keywords)[:5] | |
def keyword_match(job_desc, resume): | |
job_keywords = set(re.findall(r'\b\w+\b', job_desc)) | |
resume_keywords = set(re.findall(r'\b\w+\b', resume)) | |
common_keywords = job_keywords.intersection(resume_keywords) | |
return (len(common_keywords) / len(job_keywords)) * 100 if job_keywords else 0, list(common_keywords)[:5] | |
st.title("Resume and Job Description Similarity Checker") | |
job_description = st.text_area("Paste any job description here:", height=200) | |
resume_text = st.text_area("Paste your resume here:", height=200) | |
if st.button("Compare"): | |
if job_description.strip() and resume_text.strip(): | |
# Preprocess text | |
processed_job_desc = preprocess(job_description) | |
processed_resume = preprocess(resume_text) | |
# Calculate embeddings-based similarity | |
job_description_embedding = model.encode(processed_job_desc) | |
resume_embedding = model.encode(processed_resume) | |
similarity_score = util.cos_sim(job_description_embedding, resume_embedding).item() * 100 | |
# Calculate keyword-based similarity and matched keywords | |
keyword_score, matched_keywords = keyword_match(processed_job_desc, processed_resume) | |
# Calculate synonym-based similarity and missing skills | |
synonym_score, synonym_matches, synonym_misses = synonym_match(processed_job_desc, processed_resume) | |
# Combine scores (adjusting weights as needed) | |
overall_score = (similarity_score * 0.5) + (keyword_score * 0.3) + (synonym_score * 0.2) | |
# Display the overall similarity score | |
st.write(f"**Overall Similarity Score:** {overall_score:.2f}%") | |
# Display matched keywords and missing skills | |
st.write("**Matched Keywords:**", ", ".join(matched_keywords + synonym_matches)[:5]) | |
st.write("**Missing Skills to Consider Adding:**", ", ".join(synonym_misses)[:5]) | |
# Adjusted feedback based on combined score | |
if overall_score > 80: | |
st.success("Excellent match! Your resume closely aligns with the job description.") | |
elif overall_score > 65: | |
st.info("Strong match! Your resume aligns well, but a few minor tweaks could help.") | |
elif overall_score > 50: | |
st.warning("Moderate match. Your resume has some relevant information, but consider emphasizing key skills.") | |
elif overall_score > 35: | |
st.error("Low match. Your resume does not align well. Consider revising to highlight key skills.") | |
else: | |
st.error("Very low match. Your resume is significantly different from the job description. Major revisions may be needed.") | |
else: | |
st.error("Please paste both the job description and your resume to proceed.") |