Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Paused

App Files Files Community

root commited on May 8

Commit

eaa3094

1 Parent(s): ea35a5b

ss

Browse files

Files changed (3) hide show

app.py +651 -904
fix_dependencies.py +0 -48
requirements.txt +7 -1

app.py CHANGED Viewed

@@ -2,80 +2,45 @@ import streamlit as st
 import pdfplumber
 import io
 import spacy
-import re
-import pandas as pd
-import matplotlib.pyplot as plt
-from transformers import pipeline
-# Import SentenceTransformer with try-except
-try:
-    from sentence_transformers import SentenceTransformer
-    # Try to import util, if it fails, we'll create our own minimal version
-    try:
-        from sentence_transformers import util
-    except ImportError:
-        # Create a minimal util module replacement with the functions we need
-        class util:
-            @staticmethod
-            def pytorch_cos_sim(a, b):
-                """
-                Compute cosine similarity between two PyTorch tensors
-                """
-                import torch
-                if not isinstance(a, torch.Tensor):
-                    a = torch.tensor(a)
-                if not isinstance(b, torch.Tensor):
-                    b = torch.tensor(b)
-                if len(a.shape) == 1:
-                    a = a.unsqueeze(0)
-                if len(b.shape) == 1:
-                    b = b.unsqueeze(0)
-                a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
-                b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
-                return torch.mm(a_norm, b_norm.transpose(0, 1))
-except ImportError:
-    st.error("Failed to import SentenceTransformer. Semantic matching will be disabled.")
-    SentenceTransformer = None
-    class util:
-        @staticmethod
-        def pytorch_cos_sim(*args, **kwargs):
-            return 0
 import subprocess
 import sys
 import torch
-import nltk
-from nltk.tokenize import word_tokenize
-from datetime import datetime
 import plotly.express as px
 import plotly.graph_objects as go
-import numpy as np
-from collections import defaultdict
-# Fix for huggingface_hub import issue
-try:
-    # For newer versions of huggingface_hub
-    from huggingface_hub import hf_hub_download
-except ImportError:
-    try:
-        # For older versions of huggingface_hub
-        from huggingface_hub import cached_download as hf_hub_download
-    except ImportError:
-        st.error("Could not import required functions from huggingface_hub. Please check your installation.")
-        hf_hub_download = None
-# Initialize NLTK
 @st.cache_resource
 def download_nltk_resources():
-    try:
-        nltk.data.find('tokenizers/punkt')
-    except LookupError:
-        nltk.download('punkt')
 download_nltk_resources()
 st.set_page_config(
-    page_title="Comprehensive Resume Screener & Skill Extractor",
     page_icon="📄",
     layout="wide"
 )
@@ -93,31 +58,30 @@ def download_spacy_model():
 # Load the NLP models
 @st.cache_resource
 def load_models():
-    try:
-        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-    except Exception as e:
-        st.error(f"Failed to load summarization model: {str(e)}")
-        # Fallback to a simpler summarizer that just takes the first few sentences
-        summarizer = lambda text, **kwargs: [{"summary_text": ". ".join(text.split(". ")[:3]) + "."}]
     try:
-        nlp = download_spacy_model()
     except Exception as e:
-        st.error(f"Failed to load spaCy model: {str(e)}")
-        nlp = None
-    # Load sentence transformer for semantic matching
-    sentence_model = None
-    if SentenceTransformer is not None:
-        try:
-            sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
-        except Exception as e:
-            st.error(f"Failed to load sentence transformer: {str(e)}")
-    return summarizer, nlp, sentence_model
 # Initialize models
-summarizer, nlp, sentence_model = load_models()
 # Job descriptions and required skills
 job_descriptions = {
@@ -125,38 +89,61 @@ job_descriptions = {
         "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
                   "git", "cloud", "web development", "software development", "coding"],
         "description": "Looking for software engineers with strong programming skills and experience in software development.",
-        "semantic_description": """
-        We're seeking a talented Software Engineer to design, develop, and maintain high-quality software solutions.
-        The ideal candidate has strong programming skills in languages like Python, Java, or JavaScript, and experience with
-        SQL databases. You should be proficient in algorithms, data structures, and version control systems like Git.
-        Experience with cloud platforms and web development frameworks is a plus. You'll be responsible for the full
-        software development lifecycle, from requirements gathering to deployment and maintenance.
-        """
     },
     "Interaction Designer": {
         "skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
                   "sketch", "adobe", "design thinking", "interaction design"],
         "description": "Seeking interaction designers with expertise in user experience and interface design.",
-        "semantic_description": """
-        We're looking for a creative Interaction Designer to craft intuitive and engaging user experiences.
-        You should have expertise in UI/UX design principles and methods, with a portfolio demonstrating your
-        ability to conduct user research, create wireframes, and develop interactive prototypes. Proficiency
-        with design tools like Figma, Sketch, and Adobe Creative Suite is required. You'll collaborate with
-        product managers and developers to iterate on designs based on user feedback and business requirements.
-        """
     },
     "Data Scientist": {
         "skills": ["python", "r", "statistics", "machine learning", "data analysis",
                   "sql", "tensorflow", "pytorch", "pandas", "numpy"],
         "description": "Looking for data scientists with strong analytical and machine learning skills.",
-        "semantic_description": """
-        We're seeking a skilled Data Scientist to extract insights from complex datasets and build predictive models.
-        The ideal candidate has strong programming skills in Python or R, expertise in statistical analysis, and
-        experience with machine learning algorithms. You should be proficient in SQL for data extraction and tools
-        like TensorFlow or PyTorch for deep learning. Experience with data manipulation libraries like Pandas and NumPy
-        is essential. You'll work on projects from exploratory data analysis to model deployment, collaborating with
-        stakeholders to solve business problems through data-driven approaches.
-        """
     }
 }
@@ -167,312 +154,8 @@ def extract_text_from_pdf(pdf_file):
             text += page.extract_text() or ""
     return text
-def extract_work_experience(text):
-    """Extract work experience details including company names, job titles, and dates"""
-    # Find common section headers for work experience
-    work_exp_patterns = [
-        r"(?i)WORK EXPERIENCE|PROFESSIONAL EXPERIENCE|EMPLOYMENT HISTORY|EXPERIENCE",
-        r"(?i)EDUCATION|ACADEMIC|QUALIFICATIONS"
-    ]
-    # Find the start of work experience section
-    work_exp_start = None
-    for pattern in work_exp_patterns[:1]:  # Use only the work experience patterns
-        match = re.search(pattern, text)
-        if match:
-            work_exp_start = match.end()
-            break
-    if work_exp_start is None:
-        return []
-    # Find the end of work experience section (start of education or next major section)
-    work_exp_end = len(text)
-    for pattern in work_exp_patterns[1:]:  # Use only the education pattern
-        match = re.search(pattern, text)
-        if match and match.start() > work_exp_start:
-            work_exp_end = match.start()
-            break
-    work_exp_text = text[work_exp_start:work_exp_end]
-    # Extract job entries
-    # Look for patterns of job titles, company names, and dates
-    job_entries = []
-    # Pattern for dates (MM/YYYY or Month YYYY)
-    date_pattern = r"(?i)(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[,\s]+\d{4}|\d{1,2}/\d{4}|\d{4}"
-    # Pattern for common job title indicators
-    job_title_pattern = r"(?i)(Senior|Lead|Principal|Junior|Associate)?\s*(Software Engineer|Developer|Designer|Analyst|Manager|Director|Consultant|Specialist|Coordinator|Administrator)"
-    # Split into paragraphs which often represent job entries
-    paragraphs = re.split(r'\n\s*\n', work_exp_text)
-    for paragraph in paragraphs:
-        # Skip short paragraphs that are likely not job entries
-        if len(paragraph.strip()) < 30:
-            continue
-        # Extract dates
-        dates = re.findall(date_pattern, paragraph)
-        start_date = dates[0] if dates else "Unknown"
-        end_date = dates[-1] if len(dates) > 1 else "Present"
-        # Extract job title
-        title_match = re.search(job_title_pattern, paragraph)
-        job_title = title_match.group(0) if title_match else "Unknown Position"
-        # Extract company name (typically near the job title or at the start of the paragraph)
-        lines = paragraph.split('\n')
-        company = lines[0].strip() if lines else "Unknown Company"
-        if job_title in company:
-            company = company.replace(job_title, "").strip()
-        # Clean company name
-        for date in dates:
-            company = company.replace(date, "").strip()
-        company = re.sub(r'[,\.\|\-]', ' ', company).strip()
-        job_entries.append({
-            "company": company,
-            "title": job_title,
-            "start_date": start_date,
-            "end_date": end_date,
-            "description": paragraph
-        })
-    return job_entries
-def estimate_skill_proficiency(text, skill):
-    """Estimate proficiency level for a skill"""
-    # Define proficiency indicators
-    basic_indicators = ["familiar with", "basic knowledge", "understanding of", "exposure to"]
-    intermediate_indicators = ["experience with", "proficient in", "worked with", "2-3 years", "2 years", "3 years"]
-    advanced_indicators = ["expert in", "advanced", "extensive experience", "lead", "architected", "designed", "5+ years", "4+ years"]
-    # Convert to lowercase for matching
-    text_lower = text.lower()
-    # Find skill mentions and surrounding context
-    skill_lower = skill.lower()
-    skill_index = text_lower.find(skill_lower)
-    if skill_index == -1:
-        return None
-    # Extract context (100 characters before and after the skill mention)
-    start = max(0, skill_index - 100)
-    end = min(len(text_lower), skill_index + len(skill_lower) + 100)
-    context = text_lower[start:end]
-    # Check for proficiency indicators
-    for indicator in advanced_indicators:
-        if indicator in context:
-            return "Advanced"
-    for indicator in intermediate_indicators:
-        if indicator in context:
-            return "Intermediate"
-    for indicator in basic_indicators:
-        if indicator in context:
-            return "Basic"
-    # Default to basic if skill is mentioned but no proficiency indicators are found
-    return "Basic"
-def calculate_seniority_score(job_entries):
-    """Calculate a seniority score based on job titles and years of experience"""
-    # Define seniority levels for common job titles
-    seniority_levels = {
-        "intern": 1,
-        "junior": 2,
-        "associate": 3,
-        "developer": 4,
-        "engineer": 4,
-        "designer": 4,
-        "analyst": 4,
-        "senior": 6,
-        "lead": 7,
-        "manager": 7,
-        "principal": 8,
-        "director": 9,
-        "vp": 10,
-        "cto": 10,
-        "cio": 10,
-        "ceo": 10
-    }
-    # Calculate total years of experience
-    total_years = 0
-    for job in job_entries:
-        # Parse start and end dates
-        try:
-            start_year = re.search(r'\d{4}', job["start_date"])
-            end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
-            if start_year:
-                start_year = int(start_year.group(0))
-                end_year = int(end_year.group(0)) if end_year else datetime.now().year
-                years = end_year - start_year
-                if 0 <= years <= 30:  # Sanity check
-                    total_years += years
-        except Exception:
-            # Skip if there's an issue with date parsing
-            pass
-    # Calculate title-based seniority
-    highest_seniority = 0
-    for job in job_entries:
-        title_lower = job["title"].lower()
-        for level_title, score in seniority_levels.items():
-            if level_title in title_lower and score > highest_seniority:
-                highest_seniority = score
-    # Combine years of experience and title-based seniority
-    # Years of experience factor: 0-2 years (1), 3-5 years (2), 6-10 years (3), 11+ years (4)
-    years_factor = 1
-    if total_years >= 3:
-        years_factor = 2
-    if total_years >= 6:
-        years_factor = 3
-    if total_years >= 11:
-        years_factor = 4
-    # Final seniority score (1-10 scale)
-    seniority_score = min(10, max(1, (highest_seniority * 0.6) + (years_factor * 1.0)))
-    return round(seniority_score, 1), total_years
-def detect_fraud_signals(text, job_entries):
-    """Detect potential fraud signals in the resume"""
-    fraud_signals = []
-    # Check for impossible timelines (overlapping full-time roles)
-    if len(job_entries) >= 2:
-        for i in range(len(job_entries) - 1):
-            for j in range(i+1, len(job_entries)):
-                # Check if both jobs have date information
-                if (job_entries[i]["start_date"] != "Unknown" and
-                    job_entries[i]["end_date"] != "Unknown" and
-                    job_entries[j]["start_date"] != "Unknown" and
-                    job_entries[j]["end_date"] != "Unknown"):
-                    # Get years for comparison
-                    i_start = re.search(r'\d{4}', job_entries[i]["start_date"])
-                    i_end = re.search(r'\d{4}', job_entries[i]["end_date"]) if job_entries[i]["end_date"] != "Present" else None
-                    j_start = re.search(r'\d{4}', job_entries[j]["start_date"])
-                    j_end = re.search(r'\d{4}', job_entries[j]["end_date"]) if job_entries[j]["end_date"] != "Present" else None
-                    # Convert to integers for comparison
-                    if i_start and j_start:
-                        i_start = int(i_start.group(0))
-                        i_end = int(i_end.group(0)) if i_end else datetime.now().year
-                        j_start = int(j_start.group(0))
-                        j_end = int(j_end.group(0)) if j_end else datetime.now().year
-                        # Check for significant overlap (more than 6 months)
-                        if ((i_start <= j_start < i_end) or (j_start <= i_start < j_end)) and job_entries[i]["company"] != job_entries[j]["company"]:
-                            overlap_years = min(i_end, j_end) - max(i_start, j_start)
-                            if overlap_years > 0.5:  # More than 6 months overlap
-                                fraud_signals.append(f"Potential timeline inconsistency: Overlapping roles at {job_entries[i]['company']} and {job_entries[j]['company']} for {overlap_years:.1f} years")
-    # Check for suspicious keywords or phrases
-    suspicious_phrases = [
-        "self-employed",
-        "freelance",
-        "consultant",
-        "entrepreneur",
-        "founder",
-        "ceo of own company"
-    ]
-    # Look for suspicious gap filling
-    for phrase in suspicious_phrases:
-        if phrase in text.lower():
-            # Not all of these are fraudulent, but they warrant verification
-            fraud_signals.append(f"Verification recommended: Contains '{phrase}' which may need additional verification")
-    # Check for unexplained gaps in employment history
-    if len(job_entries) >= 2:
-        for i in range(len(job_entries) - 1):
-            # Sort entries by start date
-            if "Unknown" not in job_entries[i]["end_date"] and "Unknown" not in job_entries[i+1]["start_date"]:
-                end_match = re.search(r'\d{4}', job_entries[i]["end_date"])
-                start_match = re.search(r'\d{4}', job_entries[i+1]["start_date"])
-                if end_match and start_match:
-                    end_year = int(end_match.group(0))
-                    start_year = int(start_match.group(0))
-                    # If there's more than a 1-year gap
-                    if start_year - end_year > 1:
-                        fraud_signals.append(f"Employment gap: {end_year} to {start_year} ({start_year - end_year} years)")
-    return fraud_signals
-def predict_career_trajectory(job_entries, current_skills):
-    """Predict logical next roles based on career progression"""
-    # Career path mappings based on common progressions
-    career_paths = {
-        "software engineer": ["Senior Software Engineer", "Lead Developer", "Software Architect", "Engineering Manager", "CTO"],
-        "developer": ["Senior Developer", "Technical Lead", "Software Architect", "Development Manager", "CTO"],
-        "designer": ["Senior Designer", "Lead Designer", "Design Manager", "Creative Director", "VP of Design"],
-        "data scientist": ["Senior Data Scientist", "Lead Data Scientist", "Data Science Manager", "Director of Analytics", "Chief Data Officer"]
-    }
-    # Extract current role from latest job entry
-    current_role = job_entries[0]["title"].lower() if job_entries else "unknown"
-    # Find the best matching career path
-    best_match = None
-    for role_key in career_paths:
-        if role_key in current_role:
-            best_match = role_key
-            break
-    if not best_match:
-        return ["Career path prediction requires more information"]
-    # Find current position in the career path
-    current_index = 0
-    for i, role in enumerate(career_paths[best_match]):
-        if any(indicator in current_role for indicator in ["senior", "lead", "manager", "director", "vp", "chief"]):
-            # If current role contains seniority indicators, advance the index
-            if "senior" in current_role and "senior" in role.lower():
-                current_index = i
-                break
-            elif "lead" in current_role and "lead" in role.lower():
-                current_index = i
-                break
-            elif "manager" in current_role and "manager" in role.lower():
-                current_index = i
-                break
-            elif "director" in current_role and "director" in role.lower():
-                current_index = i
-                break
-    # Get next potential roles (up to 3)
-    next_roles = []
-    for i in range(current_index + 1, min(current_index + 4, len(career_paths[best_match]))):
-        next_roles.append(career_paths[best_match][i])
-    if not next_roles:
-        next_roles = ["You're at a senior level in your career path. Consider lateral moves or industry specialization."]
-    return next_roles
-def analyze_resume(text, job_title, sentence_model):
-    # Extract work experience
-    job_entries = extract_work_experience(text)
-    # Sort job entries by start date (most recent first)
-    job_entries.sort(key=lambda x: "9999" if x["start_date"] == "Unknown" else x["start_date"], reverse=True)
-    # Extract relevant skills with basic keyword matching
     doc = nlp(text.lower())
     found_skills = []
     required_skills = job_descriptions[job_title]["skills"]
@@ -481,22 +164,6 @@ def analyze_resume(text, job_title, sentence_model):
         if skill in text.lower():
             found_skills.append(skill)
-    # Determine skill proficiency levels
-    skill_proficiencies = {}
-    for skill in found_skills:
-        proficiency = estimate_skill_proficiency(text, skill)
-        if proficiency:
-            skill_proficiencies[skill] = proficiency
-    # Calculate seniority score
-    seniority_score, years_experience = calculate_seniority_score(job_entries)
-    # Detect fraud signals
-    fraud_signals = detect_fraud_signals(text, job_entries)
-    # Predict career trajectory
-    next_roles = predict_career_trajectory(job_entries, found_skills)
     # Generate summary
     chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
     summaries = []
@@ -504,190 +171,92 @@ def analyze_resume(text, job_title, sentence_model):
         summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
         summaries.append(summary)
-    # Semantic matching with job description
-    semantic_score = 0
-    if sentence_model is not None and SentenceTransformer is not None:
-        try:
-            resume_embedding = sentence_model.encode(text[:5000])  # Limit to first 5000 chars to avoid memory issues
-            job_embedding = sentence_model.encode(job_descriptions[job_title]["semantic_description"])
-            semantic_score = float(util.pytorch_cos_sim(resume_embedding, job_embedding)[0][0])
-        except Exception as e:
-            st.error(f"Error in semantic matching: {str(e)}")
     return {
-        "found_skills": found_skills,
-        "skill_proficiencies": skill_proficiencies,
-        "summary": " ".join(summaries),
-        "job_entries": job_entries,
-        "seniority_score": seniority_score,
-        "years_experience": years_experience,
-        "fraud_signals": fraud_signals,
-        "next_roles": next_roles,
-        "semantic_score": semantic_score
     }
 def generate_career_advice(resume_text, job_title, found_skills, missing_skills):
-    """
-    Generate career advice using a template-based approach instead of Qwen3-8B
-    to avoid dependency issues
-    """
-    # Template-based advice generation
-    advice = f"""## Career Development Plan for {job_title} Position
-### Skills to Develop
-The following skills would strengthen your resume for this position:
-"""
-    # Add advice for each missing skill
-    for skill in missing_skills:
-        if skill == "python":
-            advice += f"""#### Python
-- **How to develop**: Take online courses focused on Python for {job_title.lower()} applications
-- **Project idea**: Build a data analysis tool or web application using Python and popular frameworks
-- **Resources**: Coursera's Python for Everybody, Python.org tutorials, Real Python website
-"""
-        elif skill == "java":
-            advice += f"""#### Java
-- **How to develop**: Complete a comprehensive Java course with practical exercises
-- **Project idea**: Develop a backend service with Spring Boot
-- **Resources**: Oracle's Java tutorials, Udemy courses on Java, "Effective Java" by Joshua Bloch
-"""
-        elif skill == "javascript":
-            advice += f"""#### JavaScript
-- **How to develop**: Practice with modern JavaScript frameworks
-- **Project idea**: Create an interactive web application with React or Vue.js
-- **Resources**: MDN Web Docs, freeCodeCamp, "Eloquent JavaScript" by Marijn Haverbeke
-"""
-        elif skill == "sql":
-            advice += f"""#### SQL
-- **How to develop**: Practice with database design and complex queries
-- **Project idea**: Design a database system for a small business with reports and analytics
-- **Resources**: SQLZoo, Mode Analytics SQL tutorial, W3Schools SQL course
-"""
-        elif "algorithms" in skill or "data structures" in skill:
-            advice += f"""#### Algorithms & Data Structures
-- **How to develop**: Solve coding problems regularly on platforms like LeetCode
-- **Project idea**: Implement classic algorithms and optimize them for specific use cases
-- **Resources**: "Cracking the Coding Interview" book, AlgoExpert, Coursera Algorithms specialization
-"""
-        elif "git" in skill:
-            advice += f"""#### Git & Version Control
-- **How to develop**: Contribute to open source projects to practice Git workflows
-- **Project idea**: Set up a personal project with proper branching strategies and CI/CD
-- **Resources**: Git documentation, GitHub Learning Lab, Atlassian Git tutorials
-"""
-        elif "cloud" in skill:
-            advice += f"""#### Cloud Technologies
-- **How to develop**: Get hands-on experience with a major cloud provider (AWS, Azure, GCP)
-- **Project idea**: Deploy an application to the cloud with proper infrastructure as code
-- **Resources**: Cloud provider documentation, A Cloud Guru courses, free tier accounts
-"""
-        elif "ui" in skill or "ux" in skill:
-            advice += f"""#### UI/UX Design
-- **How to develop**: Study design principles and practice creating user interfaces
-- **Project idea**: Redesign an existing website or app with focus on user experience
-- **Resources**: Nielsen Norman Group articles, Interaction Design Foundation, Figma tutorials
-"""
-        elif "machine learning" in skill:
-            advice += f"""#### Machine Learning
-- **How to develop**: Take courses on ML fundamentals and practice with datasets
-- **Project idea**: Build a predictive model to solve a real-world problem
-- **Resources**: Andrew Ng's Coursera courses, Kaggle competitions, "Hands-On Machine Learning" book
-"""
-        elif "data analysis" in skill:
-            advice += f"""#### Data Analysis
-- **How to develop**: Practice analyzing datasets and creating visualizations
-- **Project idea**: Perform an exploratory data analysis on a public dataset
-- **Resources**: DataCamp courses, Kaggle datasets, "Python for Data Analysis" by Wes McKinney
-"""
-        else:
-            advice += f"""#### {skill.title()}
-- **How to develop**: Research industry best practices and take relevant courses
-- **Project idea**: Create a portfolio piece that showcases this skill
-- **Resources**: Online courses, industry blogs, and practice projects
-"""
-    # Add project recommendations based on job title
-    advice += f"""
-### Recommended Projects for {job_title}
-Based on the target position and the skills needed, here are some project ideas:
-"""
-    if job_title == "Software Engineer":
-        advice += """
-1. **Full-Stack Web Application**: Build a complete web app with frontend, backend, and database
-2. **API Service**: Create a RESTful or GraphQL API with proper authentication and documentation
-3. **Mobile Application**: Develop a cross-platform mobile app using React Native or Flutter
-4. **Automation Tools**: Build scripts or applications that automate repetitive tasks
-5. **Contribution to Open Source**: Find a project aligned with your skills and contribute meaningfully
-"""
-    elif job_title == "Interaction Designer":
-        advice += """
-1. **Design System**: Create a comprehensive design system with components and usage guidelines
-2. **Website Redesign**: Redesign an existing website with focus on improved UX
-3. **Mobile App Prototype**: Design a fully interactive mobile app prototype
-4. **User Research Project**: Conduct user research and create a report with insights and recommendations
-5. **Design Case Study**: Document your design process for solving a specific problem
-"""
-    elif job_title == "Data Scientist":
-        advice += """
-1. **Predictive Model**: Build a machine learning model that solves a real-world problem
-2. **Data Visualization Dashboard**: Create an interactive dashboard to visualize complex data
-3. **Natural Language Processing**: Develop a text analysis or sentiment analysis project
-4. **Time Series Analysis**: Analyze time-based data and build forecasting models
-5. **A/B Testing Framework**: Design and implement a framework for testing hypotheses
 """
-    # General advice for all positions
-    advice += """
-### Learning Resources
-- **Online Platforms**: Coursera, Udemy, Pluralsight, LinkedIn Learning
-- **Documentation**: Official language and framework documentation
-- **Communities**: Stack Overflow, GitHub, Reddit programming communities
-- **Books**: O'Reilly publications specific to your target technologies
-- **YouTube Channels**: Traversy Media, Tech With Tim, freeCodeCamp
-### Positioning Your Experience
-- Highlight transferable skills from your current experience
-- Quantify achievements with metrics where possible
-- Frame previous work in terms relevant to the target position
-- Create a tailored resume that emphasizes relevant projects and responsibilities
-"""
-    return advice
 # Streamlit UI
-st.title("📄 Comprehensive Resume Analyzer")
 # Add description
 st.markdown("""
-This app helps recruiters and job seekers analyze resumes with advanced features:
-- **Semantic Job Matching**: Uses AI to match resumes to job descriptions beyond keywords
-- **Skill Proficiency Detection**: Identifies skill levels from context
-- **Career Progression Analysis**: Visualizes job history and seniority
-- **Fraud Detection**: Flags potential inconsistencies for verification
-- **Career Path Prediction**: Suggests logical next roles based on experience
-- **Personalized Development Advice**: Recommends skills, projects, and resources
 """)
 # Create two columns
@@ -703,379 +272,557 @@ with col2:
     # Show job description
     if job_title:
-        st.info(f"**Job Description:**\n{job_descriptions[job_title]['description']}\n\n**Required Skills:**\n" +
                 "\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
 if uploaded_file and job_title:
     try:
         # Show spinner while processing
-        with st.spinner("Analyzing resume with advanced AI..."):
             # Extract text from PDF
             text = extract_text_from_pdf(uploaded_file)
             # Analyze resume
-            analysis_results = analyze_resume(text, job_title, sentence_model)
             # Calculate missing skills
             missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
-                            if skill not in analysis_results["found_skills"]]
         # Display results in tabs
         tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
-            "📊 Match Score",
-            "🎯 Skills Analysis",
-            "👨‍💼 Experience",
-            "📈 Career Path",
-            "🚩 Verification",
             "🚀 Career Advice"
         ])
         with tab1:
-            # Display match scores
-            st.subheader("📊 Job Match Analysis")
-            # Calculate match scores
-            keyword_match = len(analysis_results["found_skills"]) / len(job_descriptions[job_title]["skills"]) * 100
-            semantic_match = analysis_results["semantic_score"] * 100
-            # Display scores with gauges
             col1, col2 = st.columns(2)
             with col1:
-                # Keyword match gauge
-                fig = go.Figure(go.Indicator(
-                    mode = "gauge+number",
-                    value = keyword_match,
-                    title = {'text': "Keyword Match"},
-                    gauge = {
-                        'axis': {'range': [0, 100]},
-                        'bar': {'color': "darkblue"},
-                        'steps': [
-                            {'range': [0, 30], 'color': "lightgray"},
-                            {'range': [30, 70], 'color': "gray"},
-                            {'range': [70, 100], 'color': "lightblue"}
-                        ],
-                        'threshold': {
-                            'line': {'color': "red", 'width': 4},
-                            'thickness': 0.75,
-                            'value': 70
-                        }
-                    }
-                ))
-                st.plotly_chart(fig, use_container_width=True)
             with col2:
-                # Semantic match gauge
-                fig = go.Figure(go.Indicator(
-                    mode = "gauge+number",
-                    value = semantic_match,
-                    title = {'text': "Semantic Match"},
-                    gauge = {
-                        'axis': {'range': [0, 100]},
-                        'bar': {'color': "darkgreen"},
-                        'steps': [
-                            {'range': [0, 30], 'color': "lightgray"},
-                            {'range': [30, 70], 'color': "gray"},
-                            {'range': [70, 100], 'color': "lightgreen"}
-                        ],
-                        'threshold': {
-                            'line': {'color': "red", 'width': 4},
-                            'thickness': 0.75,
-                            'value': 70
-                        }
-                    }
-                ))
-                st.plotly_chart(fig, use_container_width=True)
-            # Calculate overall match score (weighted average)
-            overall_match = (keyword_match * 0.4) + (semantic_match * 0.6)
-            # Create overall score gauge
-            fig = go.Figure(go.Indicator(
-                mode = "gauge+number+delta",
-                value = overall_match,
-                title = {'text': "Overall Match Score"},
-                delta = {'reference': 75, 'increasing': {'color': "green"}},
-                gauge = {
-                    'axis': {'range': [0, 100]},
-                    'bar': {'color': "darkblue"},
-                    'steps': [
-                        {'range': [0, 50], 'color': "lightgray"},
-                        {'range': [50, 75], 'color': "gray"},
-                        {'range': [75, 100], 'color': "darkblue"}
-                    ],
-                    'threshold': {
-                        'line': {'color': "red", 'width': 4},
-                        'thickness': 0.75,
-                        'value': 75
-                    }
-                }
-            ))
-            st.plotly_chart(fig, use_container_width=True)
             # Display resume summary
             st.subheader("📝 Resume Summary")
-            st.write(analysis_results["summary"])
-        with tab2:
-            # Display skills analysis
-            st.subheader("🎯 Skills Analysis")
             # Create two columns
             col1, col2 = st.columns(2)
             with col1:
-                # Display matched skills with proficiency levels
-                st.subheader("🟢 Skills Present")
-                # Create a DataFrame for the skills table
-                skills_data = []
-                for skill in analysis_results["found_skills"]:
-                    proficiency = analysis_results["skill_proficiencies"].get(skill, "Basic")
-                    skills_data.append({
-                        "Skill": skill.title(),
-                        "Proficiency": proficiency
-                    })
-                if skills_data:
-                    skills_df = pd.DataFrame(skills_data)
-                    # Add proficiency color coding
-                    def color_proficiency(val):
-                        if val == "Advanced":
-                            return 'background-color: #d4f7d4'
-                        elif val == "Intermediate":
-                            return 'background-color: #fff2cc'
-                        else:
-                            return 'background-color: #f2f2f2'
-                    st.dataframe(skills_df.style.applymap(color_proficiency, subset=['Proficiency']),
-                                 use_container_width=True)
-                else:
-                    st.warning("No direct skill matches found.")
-            with col2:
-                # Display missing skills
-                st.subheader("🔴 Skills to Develop")
                 if missing_skills:
-                    missing_df = pd.DataFrame({"Skill": [skill.title() for skill in missing_skills]})
-                    st.dataframe(missing_df, use_container_width=True)
                 else:
                     st.success("Great! The candidate has all the required skills!")
-            # Create a radar chart for skills coverage
-            st.subheader("Skills Coverage")
-            # Prepare data for radar chart
-            categories = job_descriptions[job_title]["skills"]
-            values = [1 if skill in analysis_results["found_skills"] else 0 for skill in categories]
-            # Create radar chart
-            fig = go.Figure()
-            fig.add_trace(go.Scatterpolar(
-                r=values,
-                theta=categories,
-                fill='toself',
-                name='Present Skills'
-            ))
-            fig.add_trace(go.Scatterpolar(
-                r=[1] * len(categories),
-                theta=categories,
-                fill='toself',
-                name='Required Skills',
-                opacity=0.3
-            ))
-            fig.update_layout(
-                polar=dict(
-                    radialaxis=dict(
-                        visible=True,
-                        range=[0, 1]
-                    )),
-                showlegend=True
-            )
-            st.plotly_chart(fig, use_container_width=True)
-        with tab3:
-            # Display experience analysis
-            st.subheader("👨‍💼 Experience Analysis")
-            # Display seniority metrics
-            col1, col2 = st.columns(2)
-            with col1:
-                # Seniority score gauge
-                fig = go.Figure(go.Indicator(
-                    mode="gauge+number",
-                    value=analysis_results["seniority_score"],
-                    title={'text': "Seniority Score"},
-                    gauge={
-                        'axis': {'range': [0, 10]},
-                        'bar': {'color': "darkblue"},
-                        'steps': [
-                            {'range': [0, 3], 'color': "lightgray"},
-                            {'range': [3, 7], 'color': "gray"},
-                            {'range': [7, 10], 'color': "lightblue"}
-                        ],
-                        'threshold': {
-                            'line': {'color': "red", 'width': 4},
-                            'thickness': 0.75,
-                            'value': 7
-                        }
-                    }
-                ))
-                st.plotly_chart(fig, use_container_width=True)
             with col2:
-                # Years of experience
-                fig = go.Figure(go.Indicator(
-                    mode="number+delta",
-                    value=analysis_results["years_experience"],
-                    number={'suffix': " years"},
-                    title={"text": "Years of Experience"},
-                    delta={'reference': 5, 'relative': False}
-                ))
-                st.plotly_chart(fig, use_container_width=True)
-            # Display career progression timeline
-            st.subheader("Career Progression Timeline")
-            if analysis_results["job_entries"]:
-                # Create timeline data
-                timeline_data = []
-                for job in analysis_results["job_entries"]:
-                    # Extract years for visualization
-                    start_year = re.search(r'\d{4}', job["start_date"])
-                    end_year = re.search(r'\d{4}', job["end_date"]) if job["end_date"] != "Present" else None
-                    if start_year:
-                        start_year = int(start_year.group(0))
-                        end_year = int(end_year.group(0)) if end_year else datetime.now().year
-                        timeline_data.append({
-                            "Role": job["title"],
-                            "Company": job["company"],
-                            "Start": start_year,
-                            "End": end_year,
-                            "Duration": end_year - start_year
-                        })
-                if timeline_data:
-                    # Create DataFrame for timeline
-                    timeline_df = pd.DataFrame(timeline_data)
-                    # Sort by start date (ascending)
-                    timeline_df = timeline_df.sort_values(by="Start")
-                    # Create Gantt chart
-                    fig = px.timeline(
-                        timeline_df,
-                        x_start="Start",
-                        x_end="End",
-                        y="Company",
-                        color="Role",
-                        hover_data=["Duration"],
-                        labels={"Company": "Employer"}
-                    )
-                    fig.update_layout(
-                        xaxis_title="Year",
-                        yaxis_title="Employer",
-                        title="Career Progression"
-                    )
-                    st.plotly_chart(fig, use_container_width=True)
                 else:
-                    st.warning("Couldn't extract timeline data from the resume.")
-            else:
-                st.warning("No work experience entries found in the resume.")
         with tab4:
-            # Display career path analysis
-            st.subheader("📈 Career Path Analysis")
-            # Display next role suggestions
-            st.subheader("Suggested Next Roles")
-            for i, role in enumerate(analysis_results["next_roles"]):
-                st.info(f"**Option {i+1}:** {role}")
-            # Add simple career progression visualization
-            st.subheader("Career Progression Path")
-            # Extract current role from latest job entry
-            current_role = analysis_results["job_entries"][0]["title"] if analysis_results["job_entries"] else "Current Position"
-            # Create nodes for career path
-            career_nodes = [current_role] + analysis_results["next_roles"]
-            # Create a simple digraph visualization
-            career_df = pd.DataFrame({
-                "From": [career_nodes[i] for i in range(len(career_nodes)-1)],
-                "To": [career_nodes[i+1] for i in range(len(career_nodes)-1)],
-                "Value": [10 for _ in range(len(career_nodes)-1)]
-            })
-            # Create a Sankey diagram
-            fig = go.Figure(data=[go.Sankey(
-                node=dict(
-                    pad=15,
-                    thickness=20,
-                    line=dict(color="black", width=0.5),
-                    label=career_nodes,
-                    color="blue"
-                ),
-                link=dict(
-                    source=[i for i in range(len(career_nodes)-1)],
-                    target=[i+1 for i in range(len(career_nodes)-1)],
-                    value=[1 for _ in range(len(career_nodes)-1)]
-                )
-            )])
-            fig.update_layout(title_text="Potential Career Path", font_size=12)
-            st.plotly_chart(fig, use_container_width=True)
         with tab5:
-            # Display fraud detection analysis
-            st.subheader("🚩 Verification Points")
-            if analysis_results["fraud_signals"]:
-                st.warning("The following points may require verification:")
-                for signal in analysis_results["fraud_signals"]:
-                    st.markdown(f"- {signal}")
             else:
-                st.success("No significant inconsistencies detected in the resume.")
-            # Add common verification tips
-            st.subheader("Recommended Verification Steps")
-            st.markdown("""
-            Even when no inconsistencies are detected, consider these verification steps:
-            1. **Reference Checks**: Contact previous employers to confirm employment dates and responsibilities
-            2. **Skills Assessment**: Use technical interviews or tests to verify claimed skills
-            3. **Education Verification**: Confirm degrees and certifications with educational institutions
-            4. **Portfolio Review**: Examine work samples or project contributions
-            5. **Online Presence**: Check LinkedIn, GitHub, or other professional profiles for consistency
-            """)
         with tab6:
             # Display career advice
-            st.subheader("🚀 Career Advice and Development Plan")
-            if st.button("Generate Personalized Career Advice"):
-                with st.spinner("Generating detailed career advice and development plan..."):
-                    advice = generate_career_advice(text, job_title, analysis_results["found_skills"], missing_skills)
                     st.markdown(advice)
     except Exception as e:
         st.error(f"An error occurred while processing the resume: {str(e)}")
-        st.exception(e)
 # Add footer
 st.markdown("---")
-st.markdown("Made with ❤️ using Streamlit, Hugging Face, and Advanced AI")

 import pdfplumber
 import io
 import spacy
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import subprocess
 import sys
 import torch
+import re
+import pandas as pd
+import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
+from datetime import datetime
+import dateparser
+from sentence_transformers import SentenceTransformer
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from sklearn.metrics.pairwise import cosine_similarity
+import faiss
+import requests
+from bs4 import BeautifulSoup
+import networkx as nx
+import Levenshtein
+import json
+import matplotlib.pyplot as plt
+from io import BytesIO
+import base64
+from sentence_transformers import util
+# Download NLTK resources
 @st.cache_resource
 def download_nltk_resources():
+    nltk.download('punkt')
+    nltk.download('stopwords')
+    nltk.download('wordnet')
+    nltk.download('averaged_perceptron_tagger')
 download_nltk_resources()
 st.set_page_config(
+    page_title="Resume Screener & Skill Extractor",
     page_icon="📄",
     layout="wide"
 )
 # Load the NLP models
 @st.cache_resource
 def load_models():
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    nlp = download_spacy_model()
+    # Load sentence transformer model for semantic matching
+    sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
+    # Load Qwen3-8B model for career advice
     try:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
+        qwen_model = AutoModelForCausalLM.from_pretrained(
+            "Qwen/Qwen3-8B",
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+            device_map="auto"
+        )
     except Exception as e:
+        st.error(f"Failed to load Qwen3-8B model: {str(e)}")
+        qwen_tokenizer = None
+        qwen_model = None
+    return summarizer, nlp, qwen_tokenizer, qwen_model, sentence_model
 # Initialize models
+summarizer, nlp, qwen_tokenizer, qwen_model, sentence_model = load_models()
 # Job descriptions and required skills
 job_descriptions = {
         "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
                   "git", "cloud", "web development", "software development", "coding"],
         "description": "Looking for software engineers with strong programming skills and experience in software development.",
+        "must_have": ["python", "git", "algorithms"],
+        "nice_to_have": ["cloud", "java", "javascript"],
+        "seniority_levels": {
+            "Junior": "0-2 years of experience, familiar with basic programming concepts",
+            "Mid-level": "3-5 years of experience, proficient in multiple languages, experience with system design",
+            "Senior": "6+ years of experience, expert in software architecture, mentoring, and leading projects"
+        }
     },
     "Interaction Designer": {
         "skills": ["ui", "ux", "user research", "wireframing", "prototyping", "figma",
                   "sketch", "adobe", "design thinking", "interaction design"],
         "description": "Seeking interaction designers with expertise in user experience and interface design.",
+        "must_have": ["ui", "ux", "prototyping"],
+        "nice_to_have": ["figma", "sketch", "user research"],
+        "seniority_levels": {
+            "Junior": "0-2 years of experience, basic design skills, understanding of UX principles",
+            "Mid-level": "3-5 years of experience, strong portfolio, experience with user research",
+            "Senior": "6+ years of experience, leadership in design systems, driving design strategy"
+        }
     },
     "Data Scientist": {
         "skills": ["python", "r", "statistics", "machine learning", "data analysis",
                   "sql", "tensorflow", "pytorch", "pandas", "numpy"],
         "description": "Looking for data scientists with strong analytical and machine learning skills.",
+        "must_have": ["python", "statistics", "machine learning"],
+        "nice_to_have": ["tensorflow", "pytorch", "r"],
+        "seniority_levels": {
+            "Junior": "0-2 years of experience, basic knowledge of statistics and ML algorithms",
+            "Mid-level": "3-5 years of experience, model development, feature engineering",
+            "Senior": "6+ years of experience, advanced ML techniques, research experience"
+        }
+    },
+    "Product Manager": {
+        "skills": ["product strategy", "roadmap planning", "user stories", "agile", "market research",
+                   "stakeholder management", "analytics", "user experience", "a/b testing", "prioritization"],
+        "description": "Seeking product managers who can drive product vision, strategy, and execution.",
+        "must_have": ["product strategy", "roadmap planning", "stakeholder management"],
+        "nice_to_have": ["agile", "analytics", "a/b testing"],
+        "seniority_levels": {
+            "Junior": "0-2 years of experience, assisting with feature definition and user stories",
+            "Mid-level": "3-5 years of experience, owning products/features, market research",
+            "Senior": "6+ years of experience, defining product vision, managing teams, strategic planning"
+        }
+    },
+    "DevOps Engineer": {
+        "skills": ["linux", "aws", "docker", "kubernetes", "ci/cd", "terraform",
+                   "ansible", "monitoring", "scripting", "automation", "security"],
+        "description": "Looking for DevOps engineers to build and maintain infrastructure and deployment pipelines.",
+        "must_have": ["linux", "docker", "ci/cd"],
+        "nice_to_have": ["kubernetes", "terraform", "aws"],
+        "seniority_levels": {
+            "Junior": "0-2 years of experience, basic system administration, scripting",
+            "Mid-level": "3-5 years of experience, container orchestration, infrastructure as code",
+            "Senior": "6+ years of experience, architecture design, security, team leadership"
+        }
     }
 }
             text += page.extract_text() or ""
     return text
+def analyze_resume(text, job_title):
+    # Extract relevant skills
     doc = nlp(text.lower())
     found_skills = []
     required_skills = job_descriptions[job_title]["skills"]
         if skill in text.lower():
             found_skills.append(skill)
     # Generate summary
     chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
     summaries = []
         summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
         summaries.append(summary)
+    # Extract experience timeline
+    experiences = extract_experience(text)
+    # Calculate semantic match score
+    match_score = semantic_matching(text, job_title)
+    # Estimate seniority
+    seniority, years_experience, leadership_count, must_have_percentage = estimate_seniority(experiences, found_skills, job_title)
+    # Extract skill levels
+    skill_levels = extract_skill_levels(text, found_skills)
+    # Check for timeline inconsistencies
+    inconsistencies = check_timeline_inconsistencies(experiences)
+    # Verify companies
+    company_verification = verify_companies(experiences)
+    # Predict career trajectory
+    career_prediction = predict_career_trajectory(experiences, seniority, job_title)
     return {
+        'found_skills': found_skills,
+        'summary': " ".join(summaries),
+        'experiences': experiences,
+        'match_score': match_score,
+        'seniority': seniority,
+        'years_experience': years_experience,
+        'skill_levels': skill_levels,
+        'inconsistencies': inconsistencies,
+        'company_verification': company_verification,
+        'career_prediction': career_prediction
     }
 def generate_career_advice(resume_text, job_title, found_skills, missing_skills):
+    if qwen_model is None or qwen_tokenizer is None:
+        return "Career advice model not available. Please check the model installation."
+    # Create a prompt for the model
+    prompt = f"""
+You are a professional career advisor. Based on the resume and the target job position,
+provide personalized advice on skills to develop and suggest projects that would help the candidate
+become a better fit for the position.
+Resume summary: {resume_text[:1000]}...
+Target position: {job_title}
+Job requirements: {job_descriptions[job_title]['description']}
+Skills the candidate has: {', '.join(found_skills)}
+Skills the candidate needs to develop: {', '.join(missing_skills)}
+Provide the following:
+1. Specific advice on how to develop the missing skills
+2. 3-5 project ideas that would showcase these skills
+3. Resources for learning (courses, books, websites)
 """
+    # Generate advice using Qwen3-8B
+    try:
+        inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)
+        with torch.no_grad():
+            outputs = qwen_model.generate(
+                **inputs,
+                max_new_tokens=1024,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True
+            )
+        advice = qwen_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+        return advice
+    except Exception as e:
+        return f"Failed to generate career advice: {str(e)}"
 # Streamlit UI
+st.title("📄 Resume Screener & Skill Extractor")
 # Add description
 st.markdown("""
+This app helps recruiters analyze resumes by:
+- Extracting relevant skills for specific job positions
+- Generating a concise summary of the candidate's background
+- Identifying skill gaps for the selected role
+- Providing personalized career advice and project recommendations
 """)
 # Create two columns
     # Show job description
     if job_title:
+        st.info(f"**Required Skills:**\n" +
                 "\n".join([f"- {skill.title()}" for skill in job_descriptions[job_title]["skills"]]))
 if uploaded_file and job_title:
     try:
         # Show spinner while processing
+        with st.spinner("Analyzing resume..."):
             # Extract text from PDF
             text = extract_text_from_pdf(uploaded_file)
             # Analyze resume
+            resume_data = analyze_resume(text, job_title)
             # Calculate missing skills
             missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
+                            if skill not in resume_data['found_skills']]
         # Display results in tabs
         tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
+            "📊 Skills Match",
+            "📝 Resume Summary",
+            "🎯 Skills Gap",
+            "👨‍💼 Career Path",
+            "🔍 Authentication",
             "🚀 Career Advice"
         ])
         with tab1:
+            # First create columns for skill match percentage and semantic match
             col1, col2 = st.columns(2)
             with col1:
+                # Display matched skills
+                st.subheader("🎯 Matched Skills")
+                if resume_data['found_skills']:
+                    for skill in resume_data['found_skills']:
+                        # Show skill with proficiency level
+                        level = resume_data['skill_levels'].get(skill, 'intermediate')
+                        level_emoji = "🟢" if level == 'advanced' else "🟡" if level == 'intermediate' else "🟠"
+                        st.success(f"{level_emoji} {skill.title()} ({level.title()})")
+                    # Calculate match percentage
+                    match_percentage = len(resume_data['found_skills']) / len(job_descriptions[job_title]["skills"]) * 100
+                    st.metric("Skills Match", f"{match_percentage:.1f}%")
+                else:
+                    st.warning("No direct skill matches found.")
             with col2:
+                # Display semantic match score
+                st.subheader("💡 Semantic Match")
+                st.metric("Overall Match Score", f"{resume_data['match_score']:.1f}%")
+                # Display must-have skills match
+                must_have_skills = job_descriptions[job_title]["must_have"]
+                must_have_count = sum(1 for skill in must_have_skills if skill in resume_data['found_skills'])
+                must_have_percentage = (must_have_count / len(must_have_skills)) * 100
+                st.write("Must-have skills:")
+                st.progress(must_have_percentage / 100)
+                st.write(f"{must_have_count} out of {len(must_have_skills)} ({must_have_percentage:.1f}%)")
+                # Professional level assessment
+                st.subheader("🧠 Seniority Assessment")
+                st.info(f"**{resume_data['seniority']}** ({resume_data['years_experience']:.1f} years equivalent experience)")
+                st.write(job_descriptions[job_title]["seniority_levels"][resume_data['seniority']])
+        with tab2:
             # Display resume summary
             st.subheader("📝 Resume Summary")
+            st.write(resume_data['summary'])
+            # Display experience timeline
+            st.subheader("⏳ Experience Timeline")
+            if resume_data['experiences']:
+                # Convert experiences to dataframe for display
+                exp_data = []
+                for exp in resume_data['experiences']:
+                    if 'start_date' in exp and 'end_date' in exp:
+                        exp_data.append({
+                            'Company': exp['company'],
+                            'Role': exp['role'],
+                            'Start Date': exp['start_date'].strftime('%b %Y') if exp['start_date'] else 'Unknown',
+                            'End Date': exp['end_date'].strftime('%b %Y') if exp['end_date'] != datetime.now() else 'Present',
+                            'Duration (months)': exp.get('duration_months', 'Unknown')
+                        })
+                    else:
+                        exp_data.append({
+                            'Company': exp['company'],
+                            'Role': exp['role'],
+                            'Duration': exp.get('duration', 'Unknown')
+                        })
+                if exp_data:
+                    exp_df = pd.DataFrame(exp_data)
+                    st.dataframe(exp_df)
+                    # Create a timeline visualization if dates are available
+                    timeline_data = [exp for exp in resume_data['experiences'] if 'start_date' in exp and 'end_date' in exp]
+                    if timeline_data:
+                        # Sort by start date
+                        timeline_data = sorted(timeline_data, key=lambda x: x['start_date'])
+                        # Create figure
+                        fig = go.Figure()
+                        for i, exp in enumerate(timeline_data):
+                            fig.add_trace(go.Bar(
+                                x=[(exp['end_date'] - exp['start_date']).days / 30],  # Duration in months
+                                y=[exp['company']],
+                                orientation='h',
+                                name=exp['role'],
+                                hovertext=f"{exp['role']} at {exp['company']}<br>{exp['start_date'].strftime('%b %Y')} - {exp['end_date'].strftime('%b %Y') if exp['end_date'] != datetime.now() else 'Present'}<br>Duration: {exp.get('duration_months', 0)} months",
+                                marker=dict(color=px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)])
+                            ))
+                        fig.update_layout(
+                            title="Career Timeline",
+                            xaxis_title="Duration (months)",
+                            yaxis_title="Company",
+                            height=400,
+                            margin=dict(l=0, r=0, b=0, t=30)
+                        )
+                        st.plotly_chart(fig, use_container_width=True)
+            else:
+                st.warning("No work experience data could be extracted.")
+        with tab3:
+            # Display missing skills
+            st.subheader("📌 Skills to Develop")
             # Create two columns
             col1, col2 = st.columns(2)
             with col1:
+                # Missing skills
                 if missing_skills:
+                    for skill in missing_skills:
+                        st.warning(f"➖ {skill.title()}")
                 else:
                     st.success("Great! The candidate has all the required skills!")
             with col2:
+                # Skills gap analysis
+                st.subheader("🔍 Gap Analysis")
+                # Show must-have skills that are missing
+                missing_must_have = [skill for skill in job_descriptions[job_title]["must_have"]
+                                   if skill not in resume_data['found_skills']]
+                if missing_must_have:
+                    st.error("**Critical Skills Missing:**")
+                    for skill in missing_must_have:
+                        st.write(f"- {skill.title()}")
+                    st.markdown("These are must-have skills for this position.")
                 else:
+                    st.success("Candidate has all the must-have skills for this position!")
+                # Show nice-to-have skills gap
+                missing_nice_to_have = [skill for skill in job_descriptions[job_title]["nice_to_have"]
+                                      if skill not in resume_data['found_skills']]
+                if missing_nice_to_have:
+                    st.warning("**Nice-to-Have Skills Missing:**")
+                    for skill in missing_nice_to_have:
+                        st.write(f"- {skill.title()}")
+                else:
+                    st.success("Candidate has all the nice-to-have skills!")
         with tab4:
+            # Display career path insights
+            st.subheader("👨‍💼 Career Trajectory")
+            # Show career prediction
+            st.info(resume_data['career_prediction'])
+            # Show experience trends
+            st.subheader("📈 Experience Analysis")
+            # Check for job hopping
+            if len(resume_data['experiences']) >= 3:
+                # Calculate average job duration
+                durations = [exp.get('duration_months', 0) for exp in resume_data['experiences']
+                            if 'duration_months' in exp]
+                if durations:
+                    avg_duration = sum(durations) / len(durations)
+                    if avg_duration < 12:
+                        st.warning(f"🚩 **Frequent Job Changes**: Average job duration is only {avg_duration:.1f} months")
+                    elif avg_duration < 24:
+                        st.warning(f"⚠️ **Moderate Job Hopping**: Average job duration is {avg_duration:.1f} months")
+                    else:
+                        st.success(f"✅ **Stable Employment**: Average job duration is {avg_duration:.1f} months")
+            # Show inconsistencies if any
+            if resume_data['inconsistencies']:
+                st.subheader("⚠️ Timeline Inconsistencies")
+                for issue in resume_data['inconsistencies']:
+                    if issue['type'] == 'overlap':
+                        st.warning(issue['description'])
+                    elif issue['type'] == 'gap':
+                        st.info(issue['description'])
         with tab5:
+            # Display authentication signals
+            st.subheader("🔍 Resume Authentication")
+            # Company verification results
+            st.write("**Company Verification Results:**")
+            if resume_data['company_verification']:
+                # Count suspicious companies
+                suspicious_count = sum(1 for v in resume_data['company_verification']
+                                    if v['status'] == 'suspicious')
+                if suspicious_count == 0:
+                    st.success("✅ All companies mentioned in the resume passed basic verification")
+                else:
+                    st.warning(f"⚠️ {suspicious_count} companies require further verification")
+                # Display verification details
+                verification_data = [{
+                    'Company': v['company'],
+                    'Status': v['status'].title(),
+                    'Notes': v['reason']
+                } for v in resume_data['company_verification']]
+                st.dataframe(pd.DataFrame(verification_data))
             else:
+                st.info("No company information found for verification.")
+            # Timeline consistency check
+            st.write("**Timeline Consistency Check:**")
+            if not resume_data['inconsistencies']:
+                st.success("✅ No timeline inconsistencies detected")
+            else:
+                st.warning(f"⚠️ {len(resume_data['inconsistencies'])} timeline inconsistencies found")
+                for issue in resume_data['inconsistencies']:
+                    st.write(f"- {issue['description']}")
         with tab6:
             # Display career advice
+            st.subheader("🚀 Career Advice and Project Recommendations")
+            if st.button("Generate Career Advice"):
+                with st.spinner("Generating personalized career advice..."):
+                    advice = generate_career_advice(text, job_title, resume_data['found_skills'], missing_skills)
                     st.markdown(advice)
     except Exception as e:
         st.error(f"An error occurred while processing the resume: {str(e)}")
 # Add footer
 st.markdown("---")
+st.markdown("Made with ❤️ using Streamlit and Hugging Face")
+# Semantic matching between resume and job description
+def semantic_matching(resume_text, job_title):
+    job_desc = job_descriptions[job_title]["description"]
+    # Encode texts using sentence transformers
+    resume_embedding = sentence_model.encode(resume_text, convert_to_tensor=True)
+    job_embedding = sentence_model.encode(job_desc, convert_to_tensor=True)
+    # Calculate cosine similarity
+    cos_sim = cosine_similarity(
+        resume_embedding.cpu().numpy().reshape(1, -1),
+        job_embedding.cpu().numpy().reshape(1, -1)
+    )[0][0]
+    return cos_sim * 100  # Convert to percentage
+# Extract experience timeline from resume
+def extract_experience(text):
+    # Pattern to find work experience entries
+    # Look for patterns like "Company Name | Role | Jan 2020 - Present"
+    exp_pattern = r"(?i)(.*?(?:inc|llc|ltd|company|corp|corporation|group)?)\s*(?:[|•-]\s*)?(.*?)(?:[|•-]\s*)((?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\w\s,]*\d{4}\s*(?:-|to|–)\s*(?:(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\w\s,]*\d{4}|present))"
+    experiences = []
+    for match in re.finditer(exp_pattern, text, re.IGNORECASE):
+        company = match.group(1).strip()
+        role = match.group(2).strip()
+        duration = match.group(3).strip()
+        # Parse dates
+        try:
+            date_range = duration.split('-') if '-' in duration else duration.split('to') if 'to' in duration else duration.split('–')
+            start_date = dateparser.parse(date_range[0].strip())
+            if 'present' in date_range[1].lower():
+                end_date = datetime.now()
+            else:
+                end_date = dateparser.parse(date_range[1].strip())
+            if start_date and end_date:
+                # Calculate duration in months
+                months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month)
+                experiences.append({
+                    'company': company,
+                    'role': role,
+                    'start_date': start_date,
+                    'end_date': end_date,
+                    'duration_months': months
+                })
+        except:
+            # If date parsing fails, still include the experience without dates
+            experiences.append({
+                'company': company,
+                'role': role,
+                'duration': duration
+            })
+    return experiences
+# Estimate seniority based on experience and skills
+def estimate_seniority(experiences, found_skills, job_title):
+    # Calculate total experience in years
+    total_months = sum(exp.get('duration_months', 0) for exp in experiences if 'duration_months' in exp)
+    total_years = total_months / 12
+    # Count leadership keywords in roles
+    leadership_keywords = ['lead', 'senior', 'manager', 'head', 'principal', 'architect', 'director']
+    leadership_count = 0
+    for exp in experiences:
+        role = exp.get('role', '').lower()
+        for keyword in leadership_keywords:
+            if keyword in role:
+                leadership_count += 1
+                break
+    # Calculate skill match percentage for must-have skills
+    must_have_skills = job_descriptions[job_title]["must_have"]
+    must_have_count = sum(1 for skill in must_have_skills if skill in [s.lower() for s in found_skills])
+    must_have_percentage = (must_have_count / len(must_have_skills)) * 100 if must_have_skills else 0
+    # Determine seniority level
+    if total_years < 3:
+        seniority = "Junior"
+    elif total_years < 6:
+        seniority = "Mid-level"
+    else:
+        seniority = "Senior"
+    # Adjust based on leadership roles and skill match
+    if leadership_count >= 2 and seniority != "Senior":
+        seniority = "Senior" if total_years >= 4 else seniority
+    if must_have_percentage < 50 and seniority == "Senior":
+        seniority = "Mid-level"
+    return seniority, total_years, leadership_count, must_have_percentage
+# Check for timeline inconsistencies
+def check_timeline_inconsistencies(experiences):
+    if not experiences:
+        return []
+    inconsistencies = []
+    sorted_experiences = sorted(
+        [exp for exp in experiences if 'start_date' in exp and 'end_date' in exp],
+        key=lambda x: x['start_date']
+    )
+    for i in range(len(sorted_experiences) - 1):
+        current = sorted_experiences[i]
+        next_exp = sorted_experiences[i + 1]
+        # Check for overlapping full-time roles
+        if current['end_date'] > next_exp['start_date']:
+            overlap_months = (current['end_date'].year - next_exp['start_date'].year) * 12 + \
+                            (current['end_date'].month - next_exp['start_date'].month)
+            if overlap_months > 1:  # Allow 1 month overlap for transitions
+                inconsistencies.append({
+                    'type': 'overlap',
+                    'description': f"Overlapping roles: {current['company']} and {next_exp['company']} " +
+                                   f"overlap by {overlap_months} months"
+                })
+    # Check for gaps in employment
+    for i in range(len(sorted_experiences) - 1):
+        current = sorted_experiences[i]
+        next_exp = sorted_experiences[i + 1]
+        gap_months = (next_exp['start_date'].year - current['end_date'].year) * 12 + \
+                     (next_exp['start_date'].month - current['end_date'].month)
+        if gap_months > 3:  # Flag gaps longer than 3 months
+            inconsistencies.append({
+                'type': 'gap',
+                'description': f"Employment gap of {gap_months} months between " +
+                               f"{current['company']} and {next_exp['company']}"
+            })
+    return inconsistencies
+# Verify company existence (simplified version)
+def verify_companies(experiences):
+    verification_results = []
+    for exp in experiences:
+        company = exp.get('company', '')
+        if not company:
+            continue
+        # Simple heuristic - companies less than 3 characters are suspicious
+        if len(company) < 3:
+            verification_results.append({
+                'company': company,
+                'status': 'suspicious',
+                'reason': 'Company name too short'
+            })
+            continue
+        # Check if company matches common fake patterns
+        fake_patterns = ['abc company', 'xyz corp', 'my company', 'personal project']
+        if any(pattern in company.lower() for pattern in fake_patterns):
+            verification_results.append({
+                'company': company,
+                'status': 'suspicious',
+                'reason': 'Matches pattern of fake company names'
+            })
+            continue
+        # In a real implementation, you'd call an API to check if the company exists
+        # For this demo, we'll just mark all others as verified
+        verification_results.append({
+            'company': company,
+            'status': 'verified',
+            'reason': 'Passed basic verification checks'
+        })
+    return verification_results
+# Extract skill levels from text
+def extract_skill_levels(text, skills):
+    skill_levels = {}
+    proficiency_indicators = {
+        'basic': ['basic', 'familiar', 'beginner', 'fundamentals', 'exposure'],
+        'intermediate': ['intermediate', 'proficient', 'experienced', 'competent', 'skilled'],
+        'advanced': ['advanced', 'expert', 'mastery', 'specialist', 'lead', 'senior']
+    }
+    for skill in skills:
+        # Look for sentences containing the skill
+        sentences = re.findall(r'[^.!?]*%s[^.!?]*[.!?]' % re.escape(skill), text.lower())
+        # Default level
+        level = 'intermediate'
+        # Check for years of experience indicators
+        years_pattern = re.compile(r'(\d+)\s*(?:\+)?\s*years?(?:\s+of)?\s+(?:experience|exp)?\s+(?:with|in|using)?\s+%s' % re.escape(skill), re.IGNORECASE)
+        for sentence in sentences:
+            years_match = years_pattern.search(sentence)
+            if years_match:
+                years = int(years_match.group(1))
+                if years < 2:
+                    level = 'basic'
+                elif years < 5:
+                    level = 'intermediate'
+                else:
+                    level = 'advanced'
+                break
+        # Check for proficiency indicators
+        if level == 'intermediate':  # Only override if not already set by years
+            for level_name, indicators in proficiency_indicators.items():
+                for indicator in indicators:
+                    pattern = re.compile(r'%s\s+(?:\w+\s+){0,3}%s' % (indicator, re.escape(skill)), re.IGNORECASE)
+                    if any(pattern.search(sentence) for sentence in sentences):
+                        level = level_name
+                        break
+                if level != 'intermediate':
+                    break
+        skill_levels[skill] = level
+    return skill_levels
+# Generate career trajectory prediction
+def predict_career_trajectory(experiences, seniority, job_title):
+    if not experiences:
+        return "Unable to predict trajectory due to insufficient experience data."
+    # Extract roles in chronological order
+    roles = [exp.get('role', '').lower() for exp in experiences if 'role' in exp]
+    # If less than 2 roles, not enough data for prediction
+    if len(roles) < 2:
+        if seniority == "Junior":
+            next_role = "Mid-level " + job_title
+        elif seniority == "Mid-level":
+            next_role = "Senior " + job_title
+        else:  # Senior
+            leadership_titles = {
+                "Software Engineer": "Technical Lead or Engineering Manager",
+                "Data Scientist": "Lead Data Scientist or Data Science Manager",
+                "Interaction Designer": "Design Lead or UX Director",
+                "Product Manager": "Senior Product Manager or Director of Product",
+                "DevOps Engineer": "DevOps Lead or Infrastructure Architect"
+            }
+            next_role = leadership_titles.get(job_title, f"Director of {job_title}")
+        return f"Based on current seniority level, the next logical role could be: {next_role}"
+    # Check for upward mobility patterns
+    progression_indicators = ['junior', 'senior', 'lead', 'manager', 'director', 'vp', 'head', 'chief']
+    current_level = -1
+    for role in roles:
+        for i, indicator in enumerate(progression_indicators):
+            if indicator in role:
+                if i > current_level:
+                    current_level = i
+    # Predict next role based on current level
+    if current_level < len(progression_indicators) - 1:
+        next_level = progression_indicators[current_level + 1]
+        # Map to specific job titles
+        if next_level == 'senior' and 'senior' not in roles[-1].lower():
+            next_role = f"Senior {job_title}"
+        elif next_level == 'lead':
+            next_role = f"{job_title} Lead"
+        elif next_level == 'manager':
+            if job_title == "Software Engineer":
+                next_role = "Engineering Manager"
+            else:
+                next_role = f"{job_title} Manager"
+        elif next_level == 'director':
+            next_role = f"Director of {job_title}s"
+        elif next_level == 'vp':
+            next_role = f"VP of {job_title}s"
+        elif next_level == 'head':
+            next_role = f"Head of {job_title}"
+        elif next_level == 'chief':
+            if job_title == "Software Engineer":
+                next_role = "CTO (Chief Technology Officer)"
+            elif job_title == "Data Scientist":
+                next_role = "Chief Data Officer"
+            elif job_title == "Product Manager":
+                next_role = "Chief Product Officer"
+            else:
+                next_role = f"Chief {job_title} Officer"
+        else:
+            next_role = f"{next_level.title()} {job_title}"
+    else:
+        next_role = "Executive Leadership or Strategic Advisory roles"
+    return f"Based on career progression, the next logical role could be: {next_role}"

fix_dependencies.py DELETED Viewed

@@ -1,48 +0,0 @@
-import subprocess
-import sys
-def fix_dependencies():
-    """
-    Fix dependency issues by installing compatible versions of required packages
-    """
-    print("Fixing dependencies for Resume Screener application...")
-    # List of compatible package versions
-    packages = [
-        "streamlit==1.22.0",
-        "pdfplumber==0.9.0",
-        "spacy>=3.4.0",
-        "transformers==4.28.1",
-        "torch==1.13.1",
-        "huggingface-hub==0.14.1",
-        "sentence-transformers==2.2.2",
-        "nltk==3.8.1",
-        "plotly==5.14.1",
-        "pandas==1.5.3",
-        "numpy==1.24.3",
-        "matplotlib==3.7.1",
-        "pydantic==1.10.8",
-        "protobuf<4.0.0",
-        "tqdm>=4.27",
-        "regex>=2022.1.18",
-        "scikit-learn==1.0.2",
-        "scipy==1.8.1"
-    ]
-    # Install each package
-    for package in packages:
-        print(f"Installing {package}...")
-        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
-    # Download spaCy model
-    print("Downloading spaCy model...")
-    subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
-    # Download NLTK data
-    print("Downloading NLTK data...")
-    subprocess.check_call([sys.executable, "-c", "import nltk; nltk.download('punkt')"])
-    print("Dependencies fixed successfully!")
-if __name__ == "__main__":
-    fix_dependencies()

requirements.txt CHANGED Viewed

@@ -15,4 +15,10 @@ protobuf<4.0.0
 tqdm>=4.27
 regex>=2022.1.18
 scikit-learn==1.0.2
-scipy==1.8.1

 tqdm>=4.27
 regex>=2022.1.18
 scikit-learn==1.0.2
+scipy==1.8.1
+dateparser==1.1.8
+python-Levenshtein==0.21.1
+networkx==2.8.8
+faiss-cpu==1.7.4
+beautifulsoup4==4.12.2
+requests==2.31.0