Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Paused

App Files Files Community

root commited on May 7

Commit

c1efc08

1 Parent(s): fbc936b

ss

Browse files

Files changed (2) hide show

app.py +828 -42
requirements.txt +14 -5

app.py CHANGED Viewed

@@ -2,12 +2,34 @@ import streamlit as st
 import pdfplumber
 import io
 import spacy
-from transformers import pipeline
 import subprocess
 import sys
 st.set_page_config(
-    page_title="Resume Screener & Skill Extractor",
     page_icon="📄",
     layout="wide"
 )
@@ -27,27 +49,71 @@ def download_spacy_model():
 def load_models():
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
     nlp = download_spacy_model()
-    return summarizer, nlp
 # Initialize models
-summarizer, nlp = load_models()
 # Job descriptions and required skills
 job_descriptions = {
     "Software Engineer": {
         "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
                   "git", "cloud", "web development", "software development", "coding"],
-        "description": "Looking for software engineers with strong programming skills and experience in software development."
     },
     "Interaction Designer": {
         "skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
                   "sketch", "adobe", "design thinking", "interaction design"],
-        "description": "Seeking interaction designers with expertise in user experience and interface design."
     },
     "Data Scientist": {
         "skills": ["python", "r", "statistics", "machine learning", "data analysis",
                   "sql", "tensorflow", "pytorch", "pandas", "numpy"],
-        "description": "Looking for data scientists with strong analytical and machine learning skills."
     }
 }
@@ -58,8 +124,312 @@ def extract_text_from_pdf(pdf_file):
             text += page.extract_text() or ""
     return text
-def analyze_resume(text, job_title):
-    # Extract relevant skills
     doc = nlp(text.lower())
     found_skills = []
     required_skills = job_descriptions[job_title]["skills"]
@@ -68,6 +438,22 @@ def analyze_resume(text, job_title):
         if skill in text.lower():
             found_skills.append(skill)
     # Generate summary
     chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
     summaries = []
@@ -75,17 +461,92 @@ def analyze_resume(text, job_title):
         summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
         summaries.append(summary)
-    return found_skills, " ".join(summaries)
 # Streamlit UI
-st.title("📄 Resume Screener & Skill Extractor")
 # Add description
 st.markdown("""
-This app helps recruiters analyze resumes by:
-- Extracting relevant skills for specific job positions
-- Generating a concise summary of the candidate's background
-- Identifying skill gaps for the selected role
 """)
 # Create two columns
@@ -101,54 +562,379 @@ with col2:
     # Show job description
     if job_title:
-        st.info(f"**Required Skills:**\n" +
                 "\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
 if uploaded_file and job_title:
     try:
         # Show spinner while processing
-        with st.spinner("Analyzing resume..."):
             # Extract text from PDF
             text = extract_text_from_pdf(uploaded_file)
             # Analyze resume
-            found_skills, summary = analyze_resume(text, job_title)
         # Display results in tabs
-        tab1, tab2, tab3 = st.tabs(["📊 Skills Match", "📝 Resume Summary", "🎯 Skills Gap"])
         with tab1:
-            # Display matched skills
-            st.subheader("🎯 Matched Skills")
-            if found_skills:
-                for skill in found_skills:
-                    st.success(f"✅ {skill.title()}")
-                # Calculate match percentage
-                match_percentage = len(found_skills) / len(job_descriptions[job_title]["skills"]) * 100
-                st.metric("Skills Match", f"{match_percentage:.1f}%")
-            else:
-                st.warning("No direct skill matches found.")
-        with tab2:
             # Display resume summary
             st.subheader("📝 Resume Summary")
-            st.write(summary)
         with tab3:
-            # Display missing skills
-            st.subheader("📌 Skills to Develop")
-            missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
-                            if skill not in found_skills]
-            if missing_skills:
-                for skill in missing_skills:
-                    st.warning(f"➖ {skill.title()}")
             else:
-                st.success("Great! The candidate has all the required skills!")
     except Exception as e:
         st.error(f"An error occurred while processing the resume: {str(e)}")
 # Add footer
 st.markdown("---")
-st.markdown("Made with ❤️ using Streamlit and Hugging Face")

 import pdfplumber
 import io
 import spacy
+import re
+import pandas as pd
+import matplotlib.pyplot as plt
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+from sentence_transformers import SentenceTransformer, util
 import subprocess
 import sys
+import torch
+import nltk
+from nltk.tokenize import word_tokenize
+from datetime import datetime
+import plotly.express as px
+import plotly.graph_objects as go
+import numpy as np
+from collections import defaultdict
+# Initialize NLTK
+@st.cache_resource
+def download_nltk_resources():
+    try:
+        nltk.data.find('tokenizers/punkt')
+    except LookupError:
+        nltk.download('punkt')
+download_nltk_resources()
 st.set_page_config(
+    page_title="Comprehensive Resume Screener & Skill Extractor",
     page_icon="📄",
     layout="wide"
 )
 def load_models():
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
     nlp = download_spacy_model()
+    # Load sentence transformer for semantic matching
+    try:
+        sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
+    except Exception as e:
+        st.error(f"Failed to load sentence transformer: {str(e)}")
+        sentence_model = None
+    # Load Qwen3-8B model for career advice
+    try:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
+        qwen_model = AutoModelForCausalLM.from_pretrained(
+            "Qwen/Qwen3-8B",
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+            device_map="auto"
+        )
+    except Exception as e:
+        st.error(f"Failed to load Qwen3-8B model: {str(e)}")
+        qwen_tokenizer = None
+        qwen_model = None
+    return summarizer, nlp, sentence_model, qwen_tokenizer, qwen_model
 # Initialize models
+summarizer, nlp, sentence_model, qwen_tokenizer, qwen_model = load_models()
 # Job descriptions and required skills
 job_descriptions = {
     "Software Engineer": {
         "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
                   "git", "cloud", "web development", "software development", "coding"],
+        "description": "Looking for software engineers with strong programming skills and experience in software development.",
+        "semantic_description": """
+        We're seeking a talented Software Engineer to design, develop, and maintain high-quality software solutions.
+        The ideal candidate has strong programming skills in languages like Python, Java, or JavaScript, and experience with
+        SQL databases. You should be proficient in algorithms, data structures, and version control systems like Git.
+        Experience with cloud platforms and web development frameworks is a plus. You'll be responsible for the full
+        software development lifecycle, from requirements gathering to deployment and maintenance.
+        """
     },
     "Interaction Designer": {
         "skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
                   "sketch", "adobe", "design thinking", "interaction design"],
+        "description": "Seeking interaction designers with expertise in user experience and interface design.",
+        "semantic_description": """
+        We're looking for a creative Interaction Designer to craft intuitive and engaging user experiences.
+        You should have expertise in UI/UX design principles and methods, with a portfolio demonstrating your
+        ability to conduct user research, create wireframes, and develop interactive prototypes. Proficiency
+        with design tools like Figma, Sketch, and Adobe Creative Suite is required. You'll collaborate with
+        product managers and developers to iterate on designs based on user feedback and business requirements.
+        """
     },
     "Data Scientist": {
         "skills": ["python", "r", "statistics", "machine learning", "data analysis",
                   "sql", "tensorflow", "pytorch", "pandas", "numpy"],
+        "description": "Looking for data scientists with strong analytical and machine learning skills.",
+        "semantic_description": """
+        We're seeking a skilled Data Scientist to extract insights from complex datasets and build predictive models.
+        The ideal candidate has strong programming skills in Python or R, expertise in statistical analysis, and
+        experience with machine learning algorithms. You should be proficient in SQL for data extraction and tools
+        like TensorFlow or PyTorch for deep learning. Experience with data manipulation libraries like Pandas and NumPy
+        is essential. You'll work on projects from exploratory data analysis to model deployment, collaborating with
+        stakeholders to solve business problems through data-driven approaches.
+        """
     }
 }
             text += page.extract_text() or ""
     return text
+def extract_work_experience(text):
+    """Extract work experience details including company names, job titles, and dates"""
+    # Find common section headers for work experience
+    work_exp_patterns = [
+        r"(?i)WORK EXPERIENCE|PROFESSIONAL EXPERIENCE|EMPLOYMENT HISTORY|EXPERIENCE",
+        r"(?i)EDUCATION|ACADEMIC|QUALIFICATIONS"
+    ]
+    # Find the start of work experience section
+    work_exp_start = None
+    for pattern in work_exp_patterns[:1]:  # Use only the work experience patterns
+        match = re.search(pattern, text)
+        if match:
+            work_exp_start = match.end()
+            break
+    if work_exp_start is None:
+        return []
+    # Find the end of work experience section (start of education or next major section)
+    work_exp_end = len(text)
+    for pattern in work_exp_patterns[1:]:  # Use only the education pattern
+        match = re.search(pattern, text)
+        if match and match.start() > work_exp_start:
+            work_exp_end = match.start()
+            break
+    work_exp_text = text[work_exp_start:work_exp_end]
+    # Extract job entries
+    # Look for patterns of job titles, company names, and dates
+    job_entries = []
+    # Pattern for dates (MM/YYYY or Month YYYY)
+    date_pattern = r"(?i)(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[,\s]+\d{4}|\d{1,2}/\d{4}|\d{4}"
+    # Pattern for common job title indicators
+    job_title_pattern = r"(?i)(Senior|Lead|Principal|Junior|Associate)?\s*(Software Engineer|Developer|Designer|Analyst|Manager|Director|Consultant|Specialist|Coordinator|Administrator)"
+    # Split into paragraphs which often represent job entries
+    paragraphs = re.split(r'\n\s*\n', work_exp_text)
+    for paragraph in paragraphs:
+        # Skip short paragraphs that are likely not job entries
+        if len(paragraph.strip()) < 30:
+            continue
+        # Extract dates
+        dates = re.findall(date_pattern, paragraph)
+        start_date = dates[0] if dates else "Unknown"
+        end_date = dates[-1] if len(dates) > 1 else "Present"
+        # Extract job title
+        title_match = re.search(job_title_pattern, paragraph)
+        job_title = title_match.group(0) if title_match else "Unknown Position"
+        # Extract company name (typically near the job title or at the start of the paragraph)
+        lines = paragraph.split('\n')
+        company = lines[0].strip() if lines else "Unknown Company"
+        if job_title in company:
+            company = company.replace(job_title, "").strip()
+        # Clean company name
+        for date in dates:
+            company = company.replace(date, "").strip()
+        company = re.sub(r'[,\.\|\-]', ' ', company).strip()
+        job_entries.append({
+            "company": company,
+            "title": job_title,
+            "start_date": start_date,
+            "end_date": end_date,
+            "description": paragraph
+        })
+    return job_entries
+def estimate_skill_proficiency(text, skill):
+    """Estimate proficiency level for a skill"""
+    # Define proficiency indicators
+    basic_indicators = ["familiar with", "basic knowledge", "understanding of", "exposure to"]
+    intermediate_indicators = ["experience with", "proficient in", "worked with", "2-3 years", "2 years", "3 years"]
+    advanced_indicators = ["expert in", "advanced", "extensive experience", "lead", "architected", "designed", "5+ years", "4+ years"]
+    # Convert to lowercase for matching
+    text_lower = text.lower()
+    # Find skill mentions and surrounding context
+    skill_lower = skill.lower()
+    skill_index = text_lower.find(skill_lower)
+    if skill_index == -1:
+        return None
+    # Extract context (100 characters before and after the skill mention)
+    start = max(0, skill_index - 100)
+    end = min(len(text_lower), skill_index + len(skill_lower) + 100)
+    context = text_lower[start:end]
+    # Check for proficiency indicators
+    for indicator in advanced_indicators:
+        if indicator in context:
+            return "Advanced"
+    for indicator in intermediate_indicators:
+        if indicator in context:
+            return "Intermediate"
+    for indicator in basic_indicators:
+        if indicator in context:
+            return "Basic"
+    # Default to basic if skill is mentioned but no proficiency indicators are found
+    return "Basic"
+def calculate_seniority_score(job_entries):
+    """Calculate a seniority score based on job titles and years of experience"""
+    # Define seniority levels for common job titles
+    seniority_levels = {
+        "intern": 1,
+        "junior": 2,
+        "associate": 3,
+        "developer": 4,
+        "engineer": 4,
+        "designer": 4,
+        "analyst": 4,
+        "senior": 6,
+        "lead": 7,
+        "manager": 7,
+        "principal": 8,
+        "director": 9,
+        "vp": 10,
+        "cto": 10,
+        "cio": 10,
+        "ceo": 10
+    }
+    # Calculate total years of experience
+    total_years = 0
+    for job in job_entries:
+        # Parse start and end dates
+        try:
+            start_year = re.search(r'\d{4}', job["start_date"])
+            end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
+            if start_year:
+                start_year = int(start_year.group(0))
+                end_year = int(end_year.group(0)) if end_year else datetime.now().year
+                years = end_year - start_year
+                if 0 <= years <= 30:  # Sanity check
+                    total_years += years
+        except Exception:
+            # Skip if there's an issue with date parsing
+            pass
+    # Calculate title-based seniority
+    highest_seniority = 0
+    for job in job_entries:
+        title_lower = job["title"].lower()
+        for level_title, score in seniority_levels.items():
+            if level_title in title_lower and score > highest_seniority:
+                highest_seniority = score
+    # Combine years of experience and title-based seniority
+    # Years of experience factor: 0-2 years (1), 3-5 years (2), 6-10 years (3), 11+ years (4)
+    years_factor = 1
+    if total_years >= 3:
+        years_factor = 2
+    if total_years >= 6:
+        years_factor = 3
+    if total_years >= 11:
+        years_factor = 4
+    # Final seniority score (1-10 scale)
+    seniority_score = min(10, max(1, (highest_seniority * 0.6) + (years_factor * 1.0)))
+    return round(seniority_score, 1), total_years
+def detect_fraud_signals(text, job_entries):
+    """Detect potential fraud signals in the resume"""
+    fraud_signals = []
+    # Check for impossible timelines (overlapping full-time roles)
+    if len(job_entries) >= 2:
+        for i in range(len(job_entries) - 1):
+            for j in range(i+1, len(job_entries)):
+                # Check if both jobs have date information
+                if (job_entries[i]["start_date"] != "Unknown" and
+                    job_entries[i]["end_date"] != "Unknown" and
+                    job_entries[j]["start_date"] != "Unknown" and
+                    job_entries[j]["end_date"] != "Unknown"):
+                    # Get years for comparison
+                    i_start = re.search(r'\d{4}', job_entries[i]["start_date"])
+                    i_end = re.search(r'\d{4}', job_entries[i]["end_date"]) if job_entries[i]["end_date"] != "Present" else None
+                    j_start = re.search(r'\d{4}', job_entries[j]["start_date"])
+                    j_end = re.search(r'\d{4}', job_entries[j]["end_date"]) if job_entries[j]["end_date"] != "Present" else None
+                    # Convert to integers for comparison
+                    if i_start and j_start:
+                        i_start = int(i_start.group(0))
+                        i_end = int(i_end.group(0)) if i_end else datetime.now().year
+                        j_start = int(j_start.group(0))
+                        j_end = int(j_end.group(0)) if j_end else datetime.now().year
+                        # Check for significant overlap (more than 6 months)
+                        if ((i_start <= j_start < i_end) or (j_start <= i_start < j_end)) and job_entries[i]["company"] != job_entries[j]["company"]:
+                            overlap_years = min(i_end, j_end) - max(i_start, j_start)
+                            if overlap_years > 0.5:  # More than 6 months overlap
+                                fraud_signals.append(f"Potential timeline inconsistency: Overlapping roles at {job_entries[i]['company']} and {job_entries[j]['company']} for {overlap_years:.1f} years")
+    # Check for suspicious keywords or phrases
+    suspicious_phrases = [
+        "self-employed",
+        "freelance",
+        "consultant",
+        "entrepreneur",
+        "founder",
+        "ceo of own company"
+    ]
+    # Look for suspicious gap filling
+    for phrase in suspicious_phrases:
+        if phrase in text.lower():
+            # Not all of these are fraudulent, but they warrant verification
+            fraud_signals.append(f"Verification recommended: Contains '{phrase}' which may need additional verification")
+    # Check for unexplained gaps in employment history
+    if len(job_entries) >= 2:
+        for i in range(len(job_entries) - 1):
+            # Sort entries by start date
+            if "Unknown" not in job_entries[i]["end_date"] and "Unknown" not in job_entries[i+1]["start_date"]:
+                end_match = re.search(r'\d{4}', job_entries[i]["end_date"])
+                start_match = re.search(r'\d{4}', job_entries[i+1]["start_date"])
+                if end_match and start_match:
+                    end_year = int(end_match.group(0))
+                    start_year = int(start_match.group(0))
+                    # If there's more than a 1-year gap
+                    if start_year - end_year > 1:
+                        fraud_signals.append(f"Employment gap: {end_year} to {start_year} ({start_year - end_year} years)")
+    return fraud_signals
+def predict_career_trajectory(job_entries, current_skills):
+    """Predict logical next roles based on career progression"""
+    # Career path mappings based on common progressions
+    career_paths = {
+        "software engineer": ["Senior Software Engineer", "Lead Developer", "Software Architect", "Engineering Manager", "CTO"],
+        "developer": ["Senior Developer", "Technical Lead", "Software Architect", "Development Manager", "CTO"],
+        "designer": ["Senior Designer", "Lead Designer", "Design Manager", "Creative Director", "VP of Design"],
+        "data scientist": ["Senior Data Scientist", "Lead Data Scientist", "Data Science Manager", "Director of Analytics", "Chief Data Officer"]
+    }
+    # Extract current role from latest job entry
+    current_role = job_entries[0]["title"].lower() if job_entries else "unknown"
+    # Find the best matching career path
+    best_match = None
+    for role_key in career_paths:
+        if role_key in current_role:
+            best_match = role_key
+            break
+    if not best_match:
+        return ["Career path prediction requires more information"]
+    # Find current position in the career path
+    current_index = 0
+    for i, role in enumerate(career_paths[best_match]):
+        if any(indicator in current_role for indicator in ["senior", "lead", "manager", "director", "vp", "chief"]):
+            # If current role contains seniority indicators, advance the index
+            if "senior" in current_role and "senior" in role.lower():
+                current_index = i
+                break
+            elif "lead" in current_role and "lead" in role.lower():
+                current_index = i
+                break
+            elif "manager" in current_role and "manager" in role.lower():
+                current_index = i
+                break
+            elif "director" in current_role and "director" in role.lower():
+                current_index = i
+                break
+    # Get next potential roles (up to 3)
+    next_roles = []
+    for i in range(current_index + 1, min(current_index + 4, len(career_paths[best_match]))):
+        next_roles.append(career_paths[best_match][i])
+    if not next_roles:
+        next_roles = ["You're at a senior level in your career path. Consider lateral moves or industry specialization."]
+    return next_roles
+def analyze_resume(text, job_title, sentence_model):
+    # Extract work experience
+    job_entries = extract_work_experience(text)
+    # Sort job entries by start date (most recent first)
+    job_entries.sort(key=lambda x: "9999" if x["start_date"] == "Unknown" else x["start_date"], reverse=True)
+    # Extract relevant skills with basic keyword matching
     doc = nlp(text.lower())
     found_skills = []
     required_skills = job_descriptions[job_title]["skills"]
         if skill in text.lower():
             found_skills.append(skill)
+    # Determine skill proficiency levels
+    skill_proficiencies = {}
+    for skill in found_skills:
+        proficiency = estimate_skill_proficiency(text, skill)
+        if proficiency:
+            skill_proficiencies[skill] = proficiency
+    # Calculate seniority score
+    seniority_score, years_experience = calculate_seniority_score(job_entries)
+    # Detect fraud signals
+    fraud_signals = detect_fraud_signals(text, job_entries)
+    # Predict career trajectory
+    next_roles = predict_career_trajectory(job_entries, found_skills)
     # Generate summary
     chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
     summaries = []
         summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
         summaries.append(summary)
+    # Semantic matching with job description
+    semantic_score = 0
+    if sentence_model:
+        try:
+            resume_embedding = sentence_model.encode(text[:5000])  # Limit to first 5000 chars to avoid memory issues
+            job_embedding = sentence_model.encode(job_descriptions[job_title]["semantic_description"])
+            semantic_score = float(util.pytorch_cos_sim(resume_embedding, job_embedding)[0][0])
+        except Exception as e:
+            st.error(f"Error in semantic matching: {str(e)}")
+    return {
+        "found_skills": found_skills,
+        "skill_proficiencies": skill_proficiencies,
+        "summary": " ".join(summaries),
+        "job_entries": job_entries,
+        "seniority_score": seniority_score,
+        "years_experience": years_experience,
+        "fraud_signals": fraud_signals,
+        "next_roles": next_roles,
+        "semantic_score": semantic_score
+    }
+def generate_career_advice(resume_text, job_title, analysis_results):
+    if qwen_model is None or qwen_tokenizer is None:
+        return "Career advice model not available. Please check the model installation."
+    # Get missing skills
+    missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
+                    if skill not in analysis_results["found_skills"]]
+    # Create a prompt for the model
+    prompt = f"""
+You are a professional career advisor. Based on the resume and the target job position,
+provide personalized advice on skills to develop and suggest projects that would help the candidate
+become a better fit for the position.
+Resume summary: {analysis_results["summary"]}
+Target position: {job_title}
+Job requirements: {job_descriptions[job_title]["description"]}
+Skills the candidate has: {', '.join([f"{skill} ({analysis_results['skill_proficiencies'].get(skill, 'Basic')})" for skill in analysis_results["found_skills"]])}
+Skills the candidate needs to develop: {', '.join(missing_skills)}
+Current experience: {analysis_results["years_experience"]} years
+Current seniority level: {analysis_results["seniority_score"]}/10
+Potential next career moves: {', '.join(analysis_results["next_roles"])}
+Provide the following:
+1. Specific advice on how to develop the missing skills
+2. 3-5 project ideas that would showcase these skills and align with the candidate's career trajectory
+3. Resources for learning (courses, books, websites)
+4. Suggestions on how to position existing experience for this role
+"""
+    # Generate advice using Qwen3-8B
+    try:
+        inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)
+        with torch.no_grad():
+            outputs = qwen_model.generate(
+                **inputs,
+                max_new_tokens=1024,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True
+            )
+        advice = qwen_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+        return advice
+    except Exception as e:
+        return f"Failed to generate career advice: {str(e)}"
 # Streamlit UI
+st.title("📄 Comprehensive Resume Analyzer")
 # Add description
 st.markdown("""
+This app helps recruiters and job seekers analyze resumes with advanced features:
+- **Semantic Job Matching**: Uses AI to match resumes to job descriptions beyond keywords
+- **Skill Proficiency Detection**: Identifies skill levels from context
+- **Career Progression Analysis**: Visualizes job history and seniority
+- **Fraud Detection**: Flags potential inconsistencies for verification
+- **Career Path Prediction**: Suggests logical next roles based on experience
+- **Personalized Development Advice**: Recommends skills, projects, and resources
 """)
 # Create two columns
     # Show job description
     if job_title:
+        st.info(f"**Job Description:**\n{job_descriptions[job_title]['description']}\n\n**Required Skills:**\n" +
                 "\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
 if uploaded_file and job_title:
     try:
         # Show spinner while processing
+        with st.spinner("Analyzing resume with advanced AI..."):
             # Extract text from PDF
             text = extract_text_from_pdf(uploaded_file)
             # Analyze resume
+            analysis_results = analyze_resume(text, job_title, sentence_model)
+            # Calculate missing skills
+            missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
+                            if skill not in analysis_results["found_skills"]]
         # Display results in tabs
+        tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
+            "📊 Match Score",
+            "🎯 Skills Analysis",
+            "👨‍💼 Experience",
+            "📈 Career Path",
+            "🚩 Verification",
+            "🚀 Career Advice"
+        ])
         with tab1:
+            # Display match scores
+            st.subheader("📊 Job Match Analysis")
+            # Calculate match scores
+            keyword_match = len(analysis_results["found_skills"]) / len(job_descriptions[job_title]["skills"]) * 100
+            semantic_match = analysis_results["semantic_score"] * 100
+            # Display scores with gauges
+            col1, col2 = st.columns(2)
+            with col1:
+                # Keyword match gauge
+                fig = go.Figure(go.Indicator(
+                    mode = "gauge+number",
+                    value = keyword_match,
+                    title = {'text': "Keyword Match"},
+                    gauge = {
+                        'axis': {'range': [0, 100]},
+                        'bar': {'color': "darkblue"},
+                        'steps': [
+                            {'range': [0, 30], 'color': "lightgray"},
+                            {'range': [30, 70], 'color': "gray"},
+                            {'range': [70, 100], 'color': "lightblue"}
+                        ],
+                        'threshold': {
+                            'line': {'color': "red", 'width': 4},
+                            'thickness': 0.75,
+                            'value': 70
+                        }
+                    }
+                ))
+                st.plotly_chart(fig, use_container_width=True)
+            with col2:
+                # Semantic match gauge
+                fig = go.Figure(go.Indicator(
+                    mode = "gauge+number",
+                    value = semantic_match,
+                    title = {'text': "Semantic Match"},
+                    gauge = {
+                        'axis': {'range': [0, 100]},
+                        'bar': {'color': "darkgreen"},
+                        'steps': [
+                            {'range': [0, 30], 'color': "lightgray"},
+                            {'range': [30, 70], 'color': "gray"},
+                            {'range': [70, 100], 'color': "lightgreen"}
+                        ],
+                        'threshold': {
+                            'line': {'color': "red", 'width': 4},
+                            'thickness': 0.75,
+                            'value': 70
+                        }
+                    }
+                ))
+                st.plotly_chart(fig, use_container_width=True)
+            # Calculate overall match score (weighted average)
+            overall_match = (keyword_match * 0.4) + (semantic_match * 0.6)
+            # Create overall score gauge
+            fig = go.Figure(go.Indicator(
+                mode = "gauge+number+delta",
+                value = overall_match,
+                title = {'text': "Overall Match Score"},
+                delta = {'reference': 75, 'increasing': {'color': "green"}},
+                gauge = {
+                    'axis': {'range': [0, 100]},
+                    'bar': {'color': "darkblue"},
+                    'steps': [
+                        {'range': [0, 50], 'color': "lightgray"},
+                        {'range': [50, 75], 'color': "gray"},
+                        {'range': [75, 100], 'color': "darkblue"}
+                    ],
+                    'threshold': {
+                        'line': {'color': "red", 'width': 4},
+                        'thickness': 0.75,
+                        'value': 75
+                    }
+                }
+            ))
+            st.plotly_chart(fig, use_container_width=True)
             # Display resume summary
             st.subheader("📝 Resume Summary")
+            st.write(analysis_results["summary"])
+        with tab2:
+            # Display skills analysis
+            st.subheader("🎯 Skills Analysis")
+            # Create two columns
+            col1, col2 = st.columns(2)
+            with col1:
+                # Display matched skills with proficiency levels
+                st.subheader("🟢 Skills Present")
+                # Create a DataFrame for the skills table
+                skills_data = []
+                for skill in analysis_results["found_skills"]:
+                    proficiency = analysis_results["skill_proficiencies"].get(skill, "Basic")
+                    skills_data.append({
+                        "Skill": skill.title(),
+                        "Proficiency": proficiency
+                    })
+                if skills_data:
+                    skills_df = pd.DataFrame(skills_data)
+                    # Add proficiency color coding
+                    def color_proficiency(val):
+                        if val == "Advanced":
+                            return 'background-color: #d4f7d4'
+                        elif val == "Intermediate":
+                            return 'background-color: #fff2cc'
+                        else:
+                            return 'background-color: #f2f2f2'
+                    st.dataframe(skills_df.style.applymap(color_proficiency, subset=['Proficiency']),
+                                 use_container_width=True)
+                else:
+                    st.warning("No direct skill matches found.")
+            with col2:
+                # Display missing skills
+                st.subheader("🔴 Skills to Develop")
+                if missing_skills:
+                    missing_df = pd.DataFrame({"Skill": [skill.title() for skill in missing_skills]})
+                    st.dataframe(missing_df, use_container_width=True)
+                else:
+                    st.success("Great! The candidate has all the required skills!")
+            # Create a radar chart for skills coverage
+            st.subheader("Skills Coverage")
+            # Prepare data for radar chart
+            categories = job_descriptions[job_title]["skills"]
+            values = [1 if skill in analysis_results["found_skills"] else 0 for skill in categories]
+            # Create radar chart
+            fig = go.Figure()
+            fig.add_trace(go.Scatterpolar(
+                r=values,
+                theta=categories,
+                fill='toself',
+                name='Present Skills'
+            ))
+            fig.add_trace(go.Scatterpolar(
+                r=[1] * len(categories),
+                theta=categories,
+                fill='toself',
+                name='Required Skills',
+                opacity=0.3
+            ))
+            fig.update_layout(
+                polar=dict(
+                    radialaxis=dict(
+                        visible=True,
+                        range=[0, 1]
+                    )),
+                showlegend=True
+            )
+            st.plotly_chart(fig, use_container_width=True)
         with tab3:
+            # Display experience analysis
+            st.subheader("👨‍💼 Experience Analysis")
+            # Display seniority metrics
+            col1, col2 = st.columns(2)
+            with col1:
+                # Seniority score gauge
+                fig = go.Figure(go.Indicator(
+                    mode="gauge+number",
+                    value=analysis_results["seniority_score"],
+                    title={'text': "Seniority Score"},
+                    gauge={
+                        'axis': {'range': [0, 10]},
+                        'bar': {'color': "darkblue"},
+                        'steps': [
+                            {'range': [0, 3], 'color': "lightgray"},
+                            {'range': [3, 7], 'color': "gray"},
+                            {'range': [7, 10], 'color': "lightblue"}
+                        ],
+                        'threshold': {
+                            'line': {'color': "red", 'width': 4},
+                            'thickness': 0.75,
+                            'value': 7
+                        }
+                    }
+                ))
+                st.plotly_chart(fig, use_container_width=True)
+            with col2:
+                # Years of experience
+                fig = go.Figure(go.Indicator(
+                    mode="number+delta",
+                    value=analysis_results["years_experience"],
+                    number={'suffix': " years"},
+                    title={"text": "Years of Experience"},
+                    delta={'reference': 5, 'relative': False}
+                ))
+                st.plotly_chart(fig, use_container_width=True)
+            # Display career progression timeline
+            st.subheader("Career Progression Timeline")
+            if analysis_results["job_entries"]:
+                # Create timeline data
+                timeline_data = []
+                for job in analysis_results["job_entries"]:
+                    # Extract years for visualization
+                    start_year = re.search(r'\d{4}', job["start_date"])
+                    end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
+                    if start_year:
+                        start_year = int(start_year.group(0))
+                        end_year = int(end_year.group(0)) if end_year else datetime.now().year
+                        timeline_data.append({
+                            "Role": job["title"],
+                            "Company": job["company"],
+                            "Start": start_year,
+                            "End": end_year,
+                            "Duration": end_year - start_year
+                        })
+                if timeline_data:
+                    # Create DataFrame for timeline
+                    timeline_df = pd.DataFrame(timeline_data)
+                    # Sort by start date (ascending)
+                    timeline_df = timeline_df.sort_values(by="Start")
+                    # Create Gantt chart
+                    fig = px.timeline(
+                        timeline_df,
+                        x_start="Start",
+                        x_end="End",
+                        y="Company",
+                        color="Role",
+                        hover_data=["Duration"],
+                        labels={"Company": "Employer"}
+                    )
+                    fig.update_layout(
+                        xaxis_title="Year",
+                        yaxis_title="Employer",
+                        title="Career Progression"
+                    )
+                    st.plotly_chart(fig, use_container_width=True)
+                else:
+                    st.warning("Couldn't extract timeline data from the resume.")
             else:
+                st.warning("No work experience entries found in the resume.")
+        with tab4:
+            # Display career path analysis
+            st.subheader("📈 Career Path Analysis")
+            # Display next role suggestions
+            st.subheader("Suggested Next Roles")
+            for i, role in enumerate(analysis_results["next_roles"]):
+                st.info(f"**Option {i+1}:** {role}")
+            # Add simple career progression visualization
+            st.subheader("Career Progression Path")
+            # Extract current role from latest job entry
+            current_role = analysis_results["job_entries"][0]["title"] if analysis_results["job_entries"] else "Current Position"
+            # Create nodes for career path
+            career_nodes = [current_role] + analysis_results["next_roles"]
+            # Create a simple digraph visualization
+            career_df = pd.DataFrame({
+                "From": [career_nodes[i] for i in range(len(career_nodes)-1)],
+                "To": [career_nodes[i+1] for i in range(len(career_nodes)-1)],
+                "Value": [10 for _ in range(len(career_nodes)-1)]
+            })
+            # Create a Sankey diagram
+            fig = go.Figure(data=[go.Sankey(
+                node=dict(
+                    pad=15,
+                    thickness=20,
+                    line=dict(color="black", width=0.5),
+                    label=career_nodes,
+                    color="blue"
+                ),
+                link=dict(
+                    source=[i for i in range(len(career_nodes)-1)],
+                    target=[i+1 for i in range(len(career_nodes)-1)],
+                    value=[1 for _ in range(len(career_nodes)-1)]
+                )
+            )])
+            fig.update_layout(title_text="Potential Career Path", font_size=12)
+            st.plotly_chart(fig, use_container_width=True)
+        with tab5:
+            # Display fraud detection analysis
+            st.subheader("🚩 Verification Points")
+            if analysis_results["fraud_signals"]:
+                st.warning("The following points may require verification:")
+                for signal in analysis_results["fraud_signals"]:
+                    st.markdown(f"- {signal}")
+            else:
+                st.success("No significant inconsistencies detected in the resume.")
+            # Add common verification tips
+            st.subheader("Recommended Verification Steps")
+            st.markdown("""
+            Even when no inconsistencies are detected, consider these verification steps:
+            1. **Reference Checks**: Contact previous employers to confirm employment dates and responsibilities
+            2. **Skills Assessment**: Use technical interviews or tests to verify claimed skills
+            3. **Education Verification**: Confirm degrees and certifications with educational institutions
+            4. **Portfolio Review**: Examine work samples or project contributions
+            5. **Online Presence**: Check LinkedIn, GitHub, or other professional profiles for consistency
+            """)
+        with tab6:
+            # Display career advice
+            st.subheader("🚀 Career Advice and Development Plan")
+            if st.button("Generate Personalized Career Advice"):
+                with st.spinner("Generating detailed career advice and development plan..."):
+                    advice = generate_career_advice(text, job_title, analysis_results)
+                    st.markdown(advice)
     except Exception as e:
         st.error(f"An error occurred while processing the resume: {str(e)}")
+        st.exception(e)
 # Add footer
 st.markdown("---")
+st.markdown("Made with ❤️ using Streamlit, Hugging Face, and Advanced AI")

requirements.txt CHANGED Viewed

@@ -1,5 +1,14 @@
-streamlit>=1.31.0
-pdfplumber>=0.10.3
-transformers>=4.37.2
-torch>=2.1.2
-spacy>=3.7.2

+streamlit==1.29.0
+pdfplumber==0.10.2
+spacy==3.7.2
+transformers==4.36.2
+sentence-transformers==2.2.2
+torch==2.1.2
+nltk==3.8.1
+pandas==2.1.4
+matplotlib==3.8.2
+plotly==5.18.0
+numpy==1.26.2
+python-docx==1.0.1
+huggingface-hub==0.19.4
+accelerate==0.25.0