Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 22

Commit

99e5c00

verified ·

1 Parent(s): 8e57a3e

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -98

app.py CHANGED Viewed

@@ -171,9 +171,6 @@ def basic_summarize(text, max_length=100):
 def evaluate_job_fit(resume_summary, job_requirements, models):
     """
     Use the sentiment model to evaluate job fit with multiple analyses
-    This function deliberately takes time to do a more thorough analysis, creating
-    multiple perspectives for the sentiment model to evaluate.
     """
     start_time = time.time()
@@ -332,19 +329,19 @@ def evaluate_job_fit(resume_summary, job_requirements, models):
     # Now determine the final score (0, 1, or 2)
     if combined_score >= 0.7 and skills_match_percentage >= 70 and experience_match == "sufficient":
-        final_score = 2  # Strong fit
     elif combined_score >= 0.4 or (skills_match_percentage >= 50 and experience_match == "sufficient"):
         final_score = 1  # Potential fit
     else:
         final_score = 0  # Not fit
-    # Generate assessment text based on the score
     if final_score == 2:
-        assessment = f"{final_score}: The candidate is a strong match for this {job_title} position. They have the required {experience_years} years of experience and demonstrate proficiency in key skills including {', '.join(skills_in_resume[:5])}. Their background aligns well with the job requirements."
     elif final_score == 1:
-        assessment = f"{final_score}: The candidate shows potential for this {job_title} position, but has some skill gaps. They match on {skills_match_percentage}% of required skills including {', '.join(skills_in_resume[:3]) if skills_in_resume else 'minimal required skills'}, and their experience is {experience_match}."
     else:
-        assessment = f"{final_score}: The candidate does not appear to be a good match for this {job_title} position. Their profile shows limited alignment with key requirements, matching only {skills_match_percentage}% of required skills, and their experience level is {experience_match}."
     execution_time = time.time() - start_time
@@ -401,7 +398,117 @@ def extract_text_from_file(file_obj):
 # Functions for Information Extraction
 #####################################
-# Cache the extraction functions to avoid reprocessing
 @lru_cache(maxsize=32)
 def extract_name(text_start):
     """Extract candidate name from the beginning of resume text"""
@@ -424,8 +531,9 @@ def extract_name(text_start):
     return "Unknown (please extract from resume)"
-def extract_skills_and_work(text):
-    """Extract both skills and work experience at once to save processing time"""
     # Common skill categories - reduced keyword list for speed
     skill_categories = {
         "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
@@ -439,110 +547,44 @@ def extract_skills_and_work(text):
         "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
     }
-    # Work experience extraction
-    work_headers = [
-        "work experience", "professional experience", "employment history",
-        "work history", "experience"
-    ]
-    next_section_headers = [
-        "education", "skills", "certifications", "projects", "achievements"
-    ]
     # Process everything at once
-    lines = text.split('\n')
     text_lower = text.lower()
     # Skills extraction
-    found_skills = []
     for category, skills in skill_categories.items():
-        category_skills = []
         for skill in skills:
             if skill.lower() in text_lower:
-                category_skills.append(skill)
-        if category_skills:
-            found_skills.append(f"{category}: {', '.join(category_skills)}")
-    # Work experience extraction - simplified approach
-    work_section = []
-    in_work_section = False
-    for idx, line in enumerate(lines):
-        line_lower = line.lower().strip()
-        # Start of work section
-        if not in_work_section:
-            if any(header in line_lower for header in work_headers):
-                in_work_section = True
-                continue
-        # End of work section
-        elif in_work_section:
-            if any(header in line_lower for header in next_section_headers):
-                break
-            if line.strip():
-                work_section.append(line.strip())
-    # Simplified work formatting
-    if not work_section:
-        work_experience = "Work experience not clearly identified"
-    else:
-        # Just take the first 5-7 lines of the work section as a summary
-        work_lines = []
-        company_count = 0
-        current_company = ""
-        for line in work_section:
-            # New company entry often has a date
-            if re.search(r'(19|20)\d{2}', line):
-                company_count += 1
-                if company_count <= 3:  # Limit to 3 most recent positions
-                    current_company = line
-                    work_lines.append(f"**{line}**")
-                else:
-                    break
-            elif company_count <= 3 and len(work_lines) < 10:  # Limit total lines
-                work_lines.append(line)
-        work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
-    skills_formatted = "\n• " + "\n• ".join(found_skills) if found_skills else "No specific technical skills clearly identified"
-    return skills_formatted, work_experience
 #####################################
 # Function: Summarize Resume Text
 #####################################
 def summarize_resume_text(resume_text, models):
     """
-    Generates a structured summary of the resume text
     """
     start_time = time.time()
-    # Use our summarize_text function which handles both pipeline and non-pipeline cases
-    base_summary = summarize_text(resume_text, models, max_length=100)
-    # Extract name from the beginning of the resume
     name = extract_name(resume_text[:500])
-    # Extract skills and work experience
-    skills, work_experience = extract_skills_and_work(resume_text)
-    # Extract education level - simplified approach
-    education_level = "Not specified"
-    education_terms = ["bachelor", "master", "phd", "doctorate", "mba", "degree"]
-    for term in education_terms:
-        if term in resume_text.lower():
-            education_level = "Higher education degree mentioned"
-            break
-    # Format the structured summary
     formatted_summary = f"Name: {name}\n\n"
-    formatted_summary += f"Summary: {base_summary}\n\n"
-    formatted_summary += f"Previous Work Experience: {work_experience}\n\n"
-    formatted_summary += f"Skills: {skills}\n\n"
-    formatted_summary += f"Education: {education_level}"
     execution_time = time.time() - start_time
@@ -688,9 +730,9 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
         # Display fit score with label
         fit_labels = {
-            0: "NOT FIT ❌",
-            1: "POTENTIAL FIT ⚠️",
-            2: "STRONG FIT ✅"
         }
         # Show the score prominently
@@ -706,7 +748,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
         if fit_score == 2:
             st.markdown("""
-            - Apply for this position as you appear to be a strong match
             - Prepare for interviews by focusing on your relevant experience
             - Highlight your matching skills in your cover letter
             """)

 def evaluate_job_fit(resume_summary, job_requirements, models):
     """
     Use the sentiment model to evaluate job fit with multiple analyses
     """
     start_time = time.time()
     # Now determine the final score (0, 1, or 2)
     if combined_score >= 0.7 and skills_match_percentage >= 70 and experience_match == "sufficient":
+        final_score = 2  # Good fit
     elif combined_score >= 0.4 or (skills_match_percentage >= 50 and experience_match == "sufficient"):
         final_score = 1  # Potential fit
     else:
         final_score = 0  # Not fit
+    # Generate concise assessment text based on the score
     if final_score == 2:
+        assessment = f"{final_score}: Skills match {skills_match_percentage}%, Experience match {experience_years}/{years_required} yrs. Strong technical alignment with {len(skills_in_resume)}/{len(required_skills)} required skills."
     elif final_score == 1:
+        assessment = f"{final_score}: Skills match {skills_match_percentage}%, Experience {experience_match}. Meets some requirements but has gaps in {len(required_skills) - len(skills_in_resume)} skill areas."
     else:
+        assessment = f"{final_score}: Skills match only {skills_match_percentage}%, Experience {experience_match}. Significant gaps in critical requirements for this position."
     execution_time = time.time() - start_time
 # Functions for Information Extraction
 #####################################
+# Extract age from resume
+def extract_age(text):
+    """Extract candidate age from resume text"""
+    # Simplified: just check a few common patterns
+    age_patterns = [
+        r'age:?\s*(\d{1,2})',
+        r'(\d{1,2})\s*years\s*old',
+        r'dob:.*(\d{4})', # Year of birth
+        r'date of birth:.*(\d{4})' # Year of birth
+    ]
+    text_lower = text.lower()
+    for pattern in age_patterns:
+        matches = re.search(pattern, text_lower)
+        if matches:
+            # If it's a year of birth, calculate approximate age
+            if len(matches.group(1)) == 4:  # It's a year
+                try:
+                    birth_year = int(matches.group(1))
+                    current_year = 2025  # Current year
+                    return str(current_year - birth_year)
+                except:
+                    pass
+            return matches.group(1)
+    return "Not specified"
+# Extract industry preference
+def extract_industry(text):
+    """Extract expected job industry from resume"""
+    # Common industry keywords
+    industry_keywords = {
+        "Technology": ["software", "programming", "developer", "IT", "tech", "computer", "digital"],
+        "Finance": ["banking", "financial", "accounting", "finance", "analyst"],
+        "Healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor", "patient"],
+        "Education": ["teaching", "teacher", "professor", "education", "university", "school", "academic"],
+        "Marketing": ["marketing", "advertising", "digital marketing", "social media", "brand"],
+        "Engineering": ["engineer", "engineering", "mechanical", "civil", "electrical"],
+        "Data Science": ["data science", "machine learning", "AI", "analytics", "big data"],
+        "Management": ["manager", "management", "leadership", "executive", "director"],
+        "Consulting": ["consultant", "consulting", "advisor"],
+        "Sales": ["sales", "business development", "account manager", "client relations"]
+    }
+    text_lower = text.lower()
+    industry_counts = {}
+    for industry, keywords in industry_keywords.items():
+        count = sum(text_lower.count(keyword.lower()) for keyword in keywords)
+        if count > 0:
+            industry_counts[industry] = count
+    if industry_counts:
+        # Return the industry with the highest keyword count
+        return max(industry_counts.items(), key=lambda x: x[1])[0]
+    return "Not clearly specified"
+# Extract job position preference
+def extract_job_position(text):
+    """Extract expected job position from resume"""
+    # Look for objective or summary section
+    objective_patterns = [
+        r'objective:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
+        r'career\s*objective:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
+        r'professional\s*summary:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
+        r'summary:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
+        r'seeking\s*(?:a|an)?\s*(?:position|role|opportunity)\s*(?:as|in)?\s*(?:a|an)?\s*([^.]*)'
+    ]
+    text_lower = text.lower()
+    for pattern in objective_patterns:
+        match = re.search(pattern, text_lower, re.IGNORECASE | re.DOTALL)
+        if match:
+            objective_text = match.group(1).strip()
+            # Look for job titles in the objective
+            job_titles = ["developer", "engineer", "analyst", "manager", "director", "specialist",
+                          "coordinator", "consultant", "designer", "architect", "administrator"]
+            for title in job_titles:
+                if title in objective_text:
+                    # Try to get the full title with context
+                    title_pattern = r'(?:a|an)?\s*(\w+\s+' + title + r'|\w+\s+\w+\s+' + title + r')'
+                    title_match = re.search(title_pattern, objective_text)
+                    if title_match:
+                        return title_match.group(1).strip().title()
+                    return title.title()
+            # If no specific title found but we have objective text, return a summary
+            if len(objective_text) > 10:
+                # Truncate and clean up objective
+                words = objective_text.split()
+                if len(words) > 10:
+                    return " ".join(words[:10]).title() + "..."
+                return objective_text.title()
+    # Check current/most recent job title
+    job_patterns = [
+        r'experience:.*?(\w+\s+\w+(?:\s+\w+)?)(?=\s*at|\s*\(|\s*-|\s*,|\s*\d{4}|\n)',
+        r'(\w+\s+\w+(?:\s+\w+)?)\s*\(\s*current\s*\)',
+        r'(\w+\s+\w+(?:\s+\w+)?)\s*\(\s*present\s*\)'
+    ]
+    for pattern in job_patterns:
+        match = re.search(pattern, text_lower, re.IGNORECASE)
+        if match:
+            return match.group(1).strip().title()
+    return "Not explicitly stated"
+# Extract name
 @lru_cache(maxsize=32)
 def extract_name(text_start):
     """Extract candidate name from the beginning of resume text"""
     return "Unknown (please extract from resume)"
+# Extract skills
+def extract_skills(text):
+    """Extract key skills from the resume"""
     # Common skill categories - reduced keyword list for speed
     skill_categories = {
         "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
         "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
     }
     # Process everything at once
     text_lower = text.lower()
     # Skills extraction
+    all_skills = []
     for category, skills in skill_categories.items():
         for skill in skills:
             if skill.lower() in text_lower:
+                all_skills.append(skill)
+    return all_skills
 #####################################
 # Function: Summarize Resume Text
 #####################################
 def summarize_resume_text(resume_text, models):
     """
+    Generates a structured summary of the resume text with the critical information
     """
     start_time = time.time()
+    # Extract critical information
     name = extract_name(resume_text[:500])
+    age = extract_age(resume_text)
+    industry = extract_industry(resume_text)
+    job_position = extract_job_position(resume_text)
+    skills = extract_skills(resume_text)
+    # Use our summarize_text function for a general summary
+    general_summary = summarize_text(resume_text, models, max_length=100)
+    # Format the structured summary with different paragraphs for each critical piece
     formatted_summary = f"Name: {name}\n\n"
+    formatted_summary += f"Age: {age}\n\n"
+    formatted_summary += f"Expected Industry: {industry}\n\n"
+    formatted_summary += f"Expected Job Position: {job_position}\n\n"
+    formatted_summary += f"Skills: {', '.join(skills)}\n\n"
+    formatted_summary += f"Summary: {general_summary}"
     execution_time = time.time() - start_time
         # Display fit score with label
         fit_labels = {
+            0: "NOT FIT",
+            1: "POTENTIAL FIT",
+            2: "GOOD FIT"
         }
         # Show the score prominently
         if fit_score == 2:
             st.markdown("""
+            - Apply for this position as you appear to be a good match
             - Prepare for interviews by focusing on your relevant experience
             - Highlight your matching skills in your cover letter
             """)