Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 28

Commit

19a0df1

verified ·

1 Parent(s): ae778e2

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -131

app.py CHANGED Viewed

@@ -167,10 +167,10 @@ def basic_summarize(text, max_length=100):
     summary = " ".join(summary_sentences)
     return summary
-# Custom classification function for comprehensive job fit assessment
 def evaluate_job_fit(resume_summary, job_requirements, models):
     """
-    Use model to evaluate job fit with comprehensive analysis across multiple dimensions
     """
     start_time = time.time()
@@ -180,121 +180,41 @@ def evaluate_job_fit(resume_summary, job_requirements, models):
     job_title = job_requirements["title"]
     job_summary = job_requirements["summary"]
-    # Create a comprehensive analysis prompt for the model to evaluate
-    analysis_prompt = f"""
-    RESUME SUMMARY:
-    {resume_summary}
-    JOB DESCRIPTION:
-    Title: {job_title}
-    Required experience: {years_required} years
-    Required skills: {', '.join(required_skills)}
-    Description: {job_summary}
-    TASK: Analyze how well the candidate matches this job based on:
-    1. Technical skills match
-    2. Experience level match
-    3. Role/position alignment
-    4. Industry familiarity
-    5. Potential for success in this position
-    Assign a score from 0-2 where:
-    0 = NOT FIT (major gaps in requirements)
-    1 = POTENTIAL FIT (meets some key requirements)
-    2 = GOOD FIT (meets most or all key requirements)
-    """
-    # Truncate prompt if needed to fit model's input limits
-    max_prompt_length = 1024  # Set a reasonable limit
-    if len(analysis_prompt) > max_prompt_length:
-        analysis_prompt = analysis_prompt[:max_prompt_length]
-    # Use sentiment analysis model for evaluation
-    fit_score = 0  # Default score
-    # Run multiple sub-analyses to build confidence in our result
-    sub_analyses = []
-    # Function to run model evaluation
-    def run_model_evaluation(prompt_text):
-        if has_pipeline and 'evaluator' in models:
-            result = models['evaluator'](prompt_text)
-            # Convert sentiment to score
-            if result[0]['label'] == 'POSITIVE' and result[0]['score'] > 0.8:
-                return 2  # Strong positive = good fit
-            elif result[0]['label'] == 'NEUTRAL':
-                return 1  # neutral fit = potential fit
-            else:
-                return 0  # Negative = not fit
-        else:
-            # Manual implementation if pipeline not available
-            tokenizer = models['evaluator_tokenizer']
-            model = models['evaluator_model']
-            # Truncate to avoid exceeding model's max length
-            max_length = tokenizer.model_max_length if hasattr(tokenizer, 'model_max_length') else 512
-            truncated_text = " ".join(prompt_text.split()[:max_length])
-            inputs = tokenizer(truncated_text, return_tensors="pt", truncation=True, max_length=max_length)
-            with torch.no_grad():
-                outputs = model(**inputs)
-            probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
-            positive_prob = probabilities[0][1].item()  # Positive class probability
-            # Convert probability to score
-            if positive_prob > 0.8:
-                return 2
-            elif positive_prob > 0.6:
-                return 1
-            else:
-                return 0
-    # Run skills analysis
-    skills_prompt = f"""
-    RESUME SKILLS: {resume_summary}
-    JOB REQUIRED SKILLS: {', '.join(required_skills)}
-    Does the candidate have most of the required technical skills for this position?
-    """
-    skills_score = run_model_evaluation(skills_prompt)
-    sub_analyses.append(skills_score)
-    # Run experience analysis
-    experience_prompt = f"""
-    RESUME EXPERIENCE: {resume_summary}
-    JOB REQUIRED EXPERIENCE: {years_required} years in {job_title}
-    Does the candidate have sufficient years of relevant experience for this position?
-    """
-    experience_score = run_model_evaluation(experience_prompt)
-    sub_analyses.append(experience_score)
-    # Run role alignment analysis
-    role_prompt = f"""
-    CANDIDATE PROFILE: {resume_summary}
-    JOB ROLE: {job_title}, {job_summary}
-    Is the candidate's background well-aligned with this job role and responsibilities?
-    """
-    role_score = run_model_evaluation(role_prompt)
-    sub_analyses.append(role_score)
-    # Calculate overall score (weighted average)
-    # Skills: 40%, Experience: 30%, Role alignment: 30%
-    weights = [0.4, 0.3, 0.3]
-    weighted_score = sum(score * weight for score, weight in zip(sub_analyses, weights))
-    # Convert to integer score (0-2)
-    if weighted_score >= 1.5:
-        fit_score = 2
-    elif weighted_score >= 0.8:
-        fit_score = 1
-    else:
-        fit_score = 0
-    # Extract key information from resume for assessment
-    # Parse name, age, industry from resume summary
     name_match = re.search(r'Name:\s*(.*?)(?=\n|\Z)', resume_summary)
     name = name_match.group(1).strip() if name_match else "The candidate"
@@ -304,19 +224,27 @@ def evaluate_job_fit(resume_summary, job_requirements, models):
     industry_match = re.search(r'Expected Industry:\s*(.*?)(?=\n|\Z)', resume_summary)
     industry = industry_match.group(1).strip() if industry_match else "unspecified industry"
-    # Count matching skills but don't show the percentage in output
-    resume_lower = resume_summary.lower()
-    matching_skills = [skill for skill in required_skills if skill.lower() in resume_lower]
-    missing_skills = [skill for skill in required_skills if skill.lower() not in resume_lower]
-    # Generate assessment text based on score with more holistic evaluation
     if fit_score == 2:
-        fit_assessment = f"{fit_score}: {name} demonstrates strong alignment with the {job_title} position. Their background in {industry} and professional experience appear well-suited for this role's requirements. The technical expertise matches what the position demands."
     elif fit_score == 1:
-        fit_assessment = f"{fit_score}: {name} shows potential for the {job_title} role with some relevant experience, though there are gaps in certain technical areas. Their {industry} background provides partial alignment with the position requirements. Additional training might be needed in {', '.join(missing_skills[:2])} if pursuing this opportunity."
     else:
-        # For score 0, be constructive but honest
-        fit_assessment = f"{fit_score}: {name}'s current background shows limited alignment with this {job_title} position. Their experience level and technical background differ significantly from the role requirements. A position better matching their {industry} expertise might be more suitable."
     execution_time = time.time() - start_time
@@ -511,15 +439,15 @@ def extract_skills(text):
     """Extract key skills from the resume"""
     # Common skill categories - reduced keyword list for speed
     skill_categories = {
-        "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
-        "Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "Algorithms"],
-        "Database": ["SQL", "MySQL", "MongoDB", "Database", "NoSQL", "PostgreSQL"],
-        "Web Development": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack"],
-        "Software Development": ["Agile", "Scrum", "Git", "DevOps", "Docker", "System Design"],
-        "Cloud": ["AWS", "Azure", "Google Cloud", "Cloud Computing"],
         "Security": ["Cybersecurity", "Network Security", "Encryption", "Security"],
-        "Business": ["Project Management", "Business Analysis", "Leadership", "Teamwork"],
-        "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
     }
     # Process everything at once
@@ -589,13 +517,19 @@ def extract_job_requirements(job_description, models):
     """
     Extract key requirements from a job description
     """
-    # Common technical skills to look for
     tech_skills = [
         "Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL", "Ruby", "PHP", "Swift", "Kotlin",
         "React", "Angular", "Vue", "Node.js", "HTML", "CSS", "Django", "Flask", "Spring", "REST API", "GraphQL",
         "Machine Learning", "TensorFlow", "PyTorch", "Data Science", "AI", "Big Data", "Deep Learning", "NLP",
         "AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions", "Terraform",
-        "MySQL", "PostgreSQL", "MongoDB", "Redis", "Elasticsearch", "DynamoDB", "Cassandra"
     ]
     # Clean the text for processing
@@ -636,6 +570,19 @@ def extract_job_requirements(job_description, models):
     # Extract required skills
     required_skills = [skill for skill in tech_skills if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text)]
     # Create a simple summary of the job using the summarize_text function
     job_summary = summarize_text(job_description, models, max_length=100)
@@ -729,8 +676,10 @@ def main():
                 2: "GOOD FIT"
             }
-            # Show the score prominently
-            st.markdown(f"## {fit_labels[fit_score]}")
             # Display assessment
             st.markdown(assessment)

     summary = " ".join(summary_sentences)
     return summary
+# Modified job fit evaluation function that uses a direct scoring approach
 def evaluate_job_fit(resume_summary, job_requirements, models):
     """
+    Use a more direct method to evaluate job fit, rather than relying solely on sentiment analysis
     """
     start_time = time.time()
     job_title = job_requirements["title"]
     job_summary = job_requirements["summary"]
+    # Extract skills from resume
+    skills_mentioned = extract_skills(resume_summary)
+    # Calculate skill match percentage
+    matching_skills = [skill for skill in required_skills if skill in skills_mentioned]
+    skill_match_percentage = len(matching_skills) / len(required_skills) if required_skills else 0
+    # Extract experience level from resume
+    experience_pattern = r'(\d+)\+?\s*years?\s*(?:of)?\s*experience'
+    experience_match = re.search(experience_pattern, resume_summary, re.IGNORECASE)
+    years_experience = 0
+    if experience_match:
+        try:
+            years_experience = int(experience_match.group(1))
+        except:
+            years_experience = 0
+    # Experience match
+    exp_match_ratio = min(1.0, years_experience / max(1, years_required)) if years_required > 0 else 0.5
+    # Check job title match
+    job_title_lower = job_title.lower()
+    title_match = 0
+    # Look for job title words in resume
+    title_words = [word for word in job_title_lower.split() if len(word) > 3]
+    title_matches = sum(1 for word in title_words if word in resume_summary.lower())
+    title_match = title_matches / len(title_words) if title_words else 0
+    # Calculate scores for each dimension
+    skill_score = min(2, skill_match_percentage * 3)  # 0-2 scale
+    exp_score = min(2, exp_match_ratio * 2)  # 0-2 scale
+    title_score = min(2, title_match * 2)  # 0-2 scale
+    # Extract name, age, industry from resume summary
     name_match = re.search(r'Name:\s*(.*?)(?=\n|\Z)', resume_summary)
     name = name_match.group(1).strip() if name_match else "The candidate"
     industry_match = re.search(r'Expected Industry:\s*(.*?)(?=\n|\Z)', resume_summary)
     industry = industry_match.group(1).strip() if industry_match else "unspecified industry"
+    # Calculate weighted final score
+    # Skills: 50%, Experience: 30%, Title match: 20%
+    weighted_score = (skill_score * 0.5) + (exp_score * 0.3) + (title_score * 0.2)
+    # Convert to integer score (0-2)
+    if weighted_score >= 1.5:
+        fit_score = 2  # Good fit
+    elif weighted_score >= 0.8:
+        fit_score = 1  # Potential fit
+    else:
+        fit_score = 0  # Not a fit
+    # Generate assessment text based on score
+    missing_skills = [skill for skill in required_skills if skill not in skills_mentioned]
     if fit_score == 2:
+        fit_assessment = f"{fit_score}: GOOD FIT - {name} demonstrates strong alignment with the {job_title} position. Their background in {industry} and professional experience appear well-suited for this role's requirements. The technical expertise matches what the position demands."
     elif fit_score == 1:
+        fit_assessment = f"{fit_score}: POTENTIAL FIT - {name} shows potential for the {job_title} role with some relevant experience, though there are gaps in certain technical areas. Their {industry} background provides partial alignment with the position requirements. Additional training might be needed in {', '.join(missing_skills[:2])} if pursuing this opportunity."
     else:
+        fit_assessment = f"{fit_score}: NOT FIT - {name}'s current background shows limited alignment with this {job_title} position. Their experience level and technical background differ significantly from the role requirements. A position better matching their {industry} expertise might be more suitable."
     execution_time = time.time() - start_time
     """Extract key skills from the resume"""
     # Common skill categories - reduced keyword list for speed
     skill_categories = {
+        "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go", "React", "Angular", "Vue", "Node.js"],
+        "Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "Algorithms", "NLP", "Deep Learning"],
+        "Database": ["SQL", "MySQL", "MongoDB", "Database", "NoSQL", "PostgreSQL", "Oracle", "Redis"],
+        "Web Development": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack", "REST API", "GraphQL"],
+        "Software Development": ["Agile", "Scrum", "Git", "DevOps", "Docker", "System Design", "CI/CD", "Jenkins"],
+        "Cloud": ["AWS", "Azure", "Google Cloud", "Cloud Computing", "Lambda", "S3", "EC2"],
         "Security": ["Cybersecurity", "Network Security", "Encryption", "Security"],
+        "Business": ["Project Management", "Business Analysis", "Leadership", "Teamwork", "Agile", "Scrum"],
+        "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe", "Figma"]
     }
     # Process everything at once
     """
     Extract key requirements from a job description
     """
+    # Common technical skills to look for - expanded list for better matching
     tech_skills = [
         "Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL", "Ruby", "PHP", "Swift", "Kotlin",
         "React", "Angular", "Vue", "Node.js", "HTML", "CSS", "Django", "Flask", "Spring", "REST API", "GraphQL",
         "Machine Learning", "TensorFlow", "PyTorch", "Data Science", "AI", "Big Data", "Deep Learning", "NLP",
         "AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions", "Terraform",
+        "MySQL", "PostgreSQL", "MongoDB", "Redis", "Elasticsearch", "DynamoDB", "Cassandra", "Oracle",
+        "Project Management", "Agile", "Scrum", "UX/UI", "Design", "Leadership", "Team Management",
+        "Communication Skills", "Problem Solving", "Critical Thinking", "Blockchain", "Information Security",
+        "Networking", "Linux", "Windows Server", "Excel", "PowerPoint", "Word", "Tableau", "Power BI", "R",
+        "SPSS", "SAS", "Spark", "Hadoop", "JIRA", "Confluence", "Git", "SVN", "Testing", "QA", "DevOps",
+        "Full Stack", "Mobile Development", "Android", "iOS", "React Native", "Flutter", "SEO", "Marketing",
+        "Sales", "Customer Service", "Business Analysis", "Data Analysis", "Accounting", "Finance"
     ]
     # Clean the text for processing
     # Extract required skills
     required_skills = [skill for skill in tech_skills if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text)]
+    # If no skills found, use some default important ones to avoid empty lists
+    if not required_skills:
+        # Extract some common words that might be skills
+        words = re.findall(r'\b\w{4,}\b', clean_job_text)
+        word_counts = {}
+        for word in words:
+            if word not in ["with", "that", "this", "have", "from", "they", "will", "what", "your", "their", "about"]:
+                word_counts[word] = word_counts.get(word, 0) + 1
+        # Get the top 5 most common words as potential skills
+        sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
+        required_skills = [word.capitalize() for word, _ in sorted_words[:5]]
     # Create a simple summary of the job using the summarize_text function
     job_summary = summarize_text(job_description, models, max_length=100)
                 2: "GOOD FIT"
             }
+            # Show the score prominently with appropriate coloring
+            score_label = fit_labels[fit_score]
+            score_colors = {0: "red", 1: "orange", 2: "green"}
+            st.markdown(f"<h2 style='color: {score_colors[fit_score]};'>{score_label}</h2>", unsafe_allow_html=True)
             # Display assessment
             st.markdown(assessment)