Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 20

Commit

4110522

verified ·

1 Parent(s): a739933

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -138

app.py CHANGED Viewed

@@ -315,185 +315,222 @@ def summarize_resume_text(resume_text):
 #####################################
 def analyze_google_fit(resume_summary):
     """
-    Analyze how well the candidate fits Google's requirements with detailed category breakdowns.
     """
-    start_time = time.time()
-    # Define Google's key skill categories with more detailed keywords
-    google_keywords = {
-        "technical_skills": ["python", "java", "c++", "javascript", "go", "sql", "algorithms", "data structures",
-                           "coding", "software development", "git", "programming", "backend", "frontend", "full-stack"],
-        "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data",
-                         "tensorflow", "deep learning", "distributed systems", "kubernetes", "microservices"],
-        "problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging",
-                           "optimization", "scalability", "system design", "complexity", "efficiency"],
-        "innovation": ["innovation", "creative", "creativity", "design thinking", "research", "novel solutions",
-                      "patents", "publications", "unique approaches", "cutting-edge"],
-        "soft_skills": ["team", "leadership", "collaboration", "communication", "agile", "project management",
-                       "mentoring", "cross-functional", "presentation", "stakeholder management"]
-    }
-    # Category weights with descriptive labels
-    category_weights = {
-        "technical_skills": {"weight": 0.35, "label": "Technical Programming Skills"},
-        "advanced_tech": {"weight": 0.25, "label": "Advanced Technology Knowledge"},
-        "problem_solving": {"weight": 0.20, "label": "Problem Solving Abilities"},
-        "innovation": {"weight": 0.10, "label": "Innovation Mindset"},
-        "soft_skills": {"weight": 0.10, "label": "Collaboration & Leadership"}
-    }
-    resume_lower = resume_summary.lower()
-    # Calculate category scores and store detailed information
-    category_scores = {}
-    category_details = {}
-    found_skills = {}
-    for category, keywords in google_keywords.items():
-        # Find the specific matching keywords for feedback
-        category_matches = [keyword for keyword in keywords if keyword in resume_lower]
-        found_skills[category] = category_matches
-        # Count matches but cap at a reasonable level
-        matches = len(category_matches)
-        total_keywords = len(keywords)
-        # Calculate raw percentage for this category
-        raw_percentage = int((matches / total_keywords) * 100)
-        # Apply logarithmic scaling for more realistic scores
-        if matches == 0:
-            adjusted_score = 0.0
-        else:
-            # Logarithmic scaling to prevent perfect scores
-            adjusted_score = min(0.95, (math.log(matches + 1) / math.log(min(total_keywords, 8) + 1)))
-        # Store both raw and adjusted scores for feedback
-        category_scores[category] = adjusted_score
-        category_details[category] = {
-            "raw_percentage": raw_percentage,
-            "adjusted_score": int(adjusted_score * 100),
-            "matching_keywords": category_matches,
-            "total_keywords": total_keywords,
-            "matches": matches
-        }
-    # Calculate weighted score
-    weighted_score = sum(score * category_weights[category]["weight"] for category, score in category_scores.items())
-    # Apply final curve to keep scores in a realistic range
-    match_percentage = min(92, max(35, int(weighted_score * 100)))
-    # Find top strengths and areas for improvement
-    strengths = [(category_weights[cat]["label"], details["adjusted_score"])
-                for cat, details in category_details.items()
-                if details["adjusted_score"] >= 60]
-    weaknesses = [(category_weights[cat]["label"], details["adjusted_score"])
-                 for cat, details in category_details.items()
-                 if details["adjusted_score"] < 50]
-    # Sort strengths and weaknesses by score
-    strengths.sort(key=lambda x: x[1], reverse=True)
-    weaknesses.sort(key=lambda x: x[1])
-    # Create a more detailed prompt for assessment
-    strength_text = ", ".join([f"{s[0]}" for s in strengths[:3]]) if strengths else "limited applicable skills"
-    weakness_text = ", ".join([f"{w[0]}" for w in weaknesses[:3]]) if weaknesses else "no obvious weaknesses"
-    # Extract key resume elements
-    skills_match = re.search(r'Skills:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
-    skills_text = skills_match.group(0) if skills_match else ""
-    work_match = re.search(r'Previous Work Experience:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
-    work_text = work_match.group(0) if work_match else ""
-    # List specific matching skills for more detailed assessment
-    specific_skills = []
-    for category, matches in found_skills.items():
-        if matches:
-            specific_skills.extend(matches[:3])  # Take up to 3 skills from each category
-    specific_skills_text = ", ".join(specific_skills[:8]) if specific_skills else "limited identifiable skills"
     prompt = f"""
-Write a detailed assessment of a job candidate for Google.
-Resume highlights: Skills in {specific_skills_text}. {work_text[:200]}
-Strengths: {strength_text}
-Areas for improvement: {weakness_text}
-Match percentage: {match_percentage}%
-Write a detailed 3-5 sentence assessment beginning with "This candidate". Be specific about skills, experiences,
-strengths, weaknesses, and how they align with Google. Mention specific technical skills where relevant.
 """
     try:
-        # Generate the assessment
         assessment_results = models['evaluator'](
             prompt,
-            max_length=350,  # Longer assessment
             do_sample=True,
-            temperature=0.7,  # Higher temperature for more detailed output
             num_return_sequences=3
         )
         # Find the best response
-        assessment = None
         for result in assessment_results:
             text = result['generated_text'].strip()
-            # Remove prompt artifacts
-            text = re.sub(r'Write a detailed assessment.*?Match percentage:.*?%', '', text, flags=re.DOTALL)
-            text = re.sub(r'Write a detailed 3-5 sentence assessment.*?', '', text, flags=re.DOTALL)
-            # Check if it looks valid
-            if "this candidate" in text.lower() and len(text) > 100:
-                assessment = text
                 break
-        # If no good response was found, fall back to manual assessment
-        if not assessment:
-            assessment = generate_detailed_manual_assessment(resume_summary, strengths, weaknesses, specific_skills, match_percentage)
     except Exception as e:
-        # Fallback to detailed manual assessment
-        assessment = generate_detailed_manual_assessment(resume_summary, strengths, weaknesses, specific_skills, match_percentage)
-        print(f"Error in assessment generation: {e}")
     # Final cleanup
     assessment = assessment.strip()
-    if not assessment.startswith("This candidate"):
-        assessment = f"This candidate {assessment}"
-    execution_time = time.time() - start_time
     return assessment, match_percentage, category_details, execution_time
-def generate_detailed_manual_assessment(resume_summary, strengths, weaknesses, specific_skills, match_percentage):
     """
-    Generate a detailed manual assessment when the model fails.
     """
-    # Start with strengths
-    if strengths:
-        assessment = f"This candidate demonstrates proficiency in {', '.join([s[0] for s in strengths[:2]])}. "
-        if specific_skills:
-            assessment += f"Their experience with {', '.join(specific_skills[:4])} aligns with Google's technical requirements. "
     else:
-        assessment = "This candidate has limited alignment with Google's key requirements based on the resume provided. "
-        if specific_skills:
-            assessment += f"While they have some experience with {', '.join(specific_skills[:3])}, these skills alone may not be sufficient. "
-    # Add weaknesses
-    if weaknesses:
-        assessment += f"To improve their candidacy for Google, they should focus on developing stronger {' and '.join([w[0].lower() for w in weaknesses[:2]])}. "
-    # Add conclusion with match percentage
     if match_percentage >= 70:
-        assessment += f"Overall, they show good potential for certain roles at Google with a {match_percentage}% match to requirements."
     elif match_percentage >= 50:
-        assessment += f"With targeted skill development, they may become a stronger candidate for Google, currently showing a {match_percentage}% match."
     else:
-        assessment += f"Significant skill development would be needed before they could be considered a strong Google candidate, with a current match of {match_percentage}%."
     return assessment

 #####################################
 def analyze_google_fit(resume_summary):
     """
+    Analyze how well the candidate fits Google's requirements.
+    Only modifying the T5 prompt to get better expert assessments.
     """
+    # [Keep all the existing code for score calculation unchanged]
+    # Get more specific information for a better prompt
+    # Get top skills across all categories (up to 5 total)
+    all_matching_skills = []
+    for category, matches in found_skills.items():
+        if matches:
+            all_matching_skills.extend(matches)
+    top_skills = list(set(all_matching_skills))[:5]  # Remove duplicates and take top 5
+    skills_text = ", ".join(top_skills) if top_skills else "limited relevant skills"
+    # Get strongest and weakest categories for more specific feedback
+    categories_sorted = sorted(category_details.items(), key=lambda x: x[1]["adjusted_score"], reverse=True)
+    top_category = category_weights[categories_sorted[0][0]]["label"]
+    weak_category = category_weights[categories_sorted[-1][0]]["label"]
+    # Extract work experience highlights
+    experience_match = re.search(r'Previous Work Experience:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
+    experience_text = experience_match.group(0) if experience_match else ""
+    # Extract just 1-2 key experiences
+    experiences = re.findall(r'([A-Z][^.]*?company|[A-Z][^.]*?engineer|[A-Z][^.]*?developer|[A-Z][^.]*?Google|[A-Z][^.]*?Microsoft|[A-Z][^.]*?Amazon)', experience_text)
+    experience_highlights = ", ".join(experiences[:2]) if experiences else "work experience"
+    # Create a more specific prompt for T5 that focuses on detailed assessment
     prompt = f"""
+Generate Google candidate assessment.
+Skills detected: {skills_text}.
+Experience: {experience_highlights}.
+Strongest area: {top_category} ({categories_sorted[0][1]["adjusted_score"]}%).
+Weakest area: {weak_category} ({categories_sorted[-1][1]["adjusted_score"]}%).
+Overall match: {match_percentage}%.
+Write a detailed 3-4 sentence assessment for this Google applicant.
+Start with "This candidate" and include:
+1. Specific strengths relating to Google's needs
+2. Technical skills evaluation
+3. Areas for improvement
+4. Overall Google fit assessment
+This candidate
 """
     try:
+        # Generate the assessment using T5
         assessment_results = models['evaluator'](
             prompt,
+            max_length=300,
             do_sample=True,
+            temperature=0.7,
             num_return_sequences=3
         )
         # Find the best response
+        best_assessment = None
         for result in assessment_results:
             text = result['generated_text'].strip()
+            # Clean up and check if valid
+            text = re.sub(r'Generate Google candidate assessment.*?Overall match:.*?%\.', '', text, flags=re.DOTALL)
+            text = re.sub(r'Write a detailed.*?This candidate', 'This candidate', text, flags=re.DOTALL)
+            # Check if it's a good response
+            if text.lower().startswith("this candidate") and len(text) > 100 and "1." not in text and "2." not in text:
+                best_assessment = text
                 break
+        # Use the best response or the first one if none were ideal
+        if best_assessment:
+            assessment = best_assessment
+        else:
+            # Use first response but clean it up
+            text = assessment_results[0]['generated_text']
+            text = re.sub(r'Generate Google candidate assessment.*?Overall match:.*?%\.', '', text, flags=re.DOTALL)
+            text = re.sub(r'Write a detailed.*?This candidate', 'This candidate', text, flags=re.DOTALL)
+            # Remove numbering if present
+            text = re.sub(r'\d\.\s', '', text)
+            if not text.lower().startswith("this candidate"):
+                text = "This candidate " + text
+            assessment = text
     except Exception as e:
+        # Fall back to manual assessment
+        print(f"Error in T5 assessment generation: {e}")
+        assessment = f"""This candidate demonstrates some skills relevant to Google, particularly in {top_category}. Their experience with {skills_text} could be valuable, though they would benefit from strengthening their {weak_category}. Based on the resume analysis, they appear to be a {match_percentage}% match for Google's requirements."""
     # Final cleanup
+    # Remove any remaining artifacts or formatting
+    assessment = re.sub(r'\n+', ' ', assessment)
+    assessment = re.sub(r'\s+', ' ', assessment)
     assessment = assessment.strip()
+    # Make sure percentages are consistent
+    assessment = re.sub(r'\b\d{1,2}%\b', f"{match_percentage}%", assessment)
+    # [Keep the return statement and rest of function the same]
     return assessment, match_percentage, category_details, execution_time
+def generate_expert_assessment(resume_summary, match_percentage, category_details, found_skills):
     """
+    Generate a comprehensive expert assessment based on the resume analysis.
+    This is a specialized function to create high-quality, specific assessments.
     """
+    # Sort categories by score to identify top strengths and weaknesses
+    categories = list(category_details.keys())
+    categories.sort(key=lambda cat: category_details[cat]["adjusted_score"], reverse=True)
+    # Identify top strengths (top 2 categories)
+    top_strengths = categories[:2]
+    # Identify main weaknesses (bottom 2 categories, but only if score is below 50%)
+    weaknesses = [cat for cat in categories if category_details[cat]["adjusted_score"] < 50]
+    # Extract relevant skills for top strengths (up to 3 skills per strength)
+    strength_skills = []
+    for category in top_strengths:
+        matches = found_skills[category][:3] if found_skills[category] else []
+        strength_skills.extend(matches)
+    # Extract experience snippets from resume
+    experience_match = re.search(r'Previous Work Experience:(.*?)(?=\n\n|$)', resume_summary, re.DOTALL)
+    experience_text = experience_match.group(1) if experience_match else ""
+    # Find relevant company names or roles that might be impressive
+    company_pattern = r'\b(Google|Microsoft|Amazon|Apple|Facebook|Meta|Twitter|LinkedIn|Uber|Airbnb|Netflix|Oracle|IBM|Intel|Adobe|Salesforce)\b'
+    companies = re.findall(company_pattern, experience_text, re.IGNORECASE)
+    # Determine the expertise level based on score
+    if match_percentage >= 75:
+        expertise_level = "strong"
+    elif match_percentage >= 60:
+        expertise_level = "solid"
+    elif match_percentage >= 45:
+        expertise_level = "moderate"
     else:
+        expertise_level = "limited"
+    # Start building assessment
+    assessment = f"This candidate demonstrates {expertise_level} potential for Google, with particular strengths in "
+    # Add strengths with specific skills
+    if top_strengths:
+        strength_labels = []
+        for strength in top_strengths:
+            label = {"technical_skills": "technical programming",
+                    "advanced_tech": "advanced technology",
+                    "problem_solving": "problem-solving",
+                    "innovation": "innovation",
+                    "soft_skills": "collaboration and leadership"}[strength]
+            strength_labels.append(label)
+        assessment += f"{' and '.join(strength_labels)}. "
+        # Add specific skills if available
+        if strength_skills:
+            assessment += f"Their experience with {', '.join(strength_skills[:4])} "
+            # Add relevance to Google
+            if any(skill in ['machine learning', 'ai', 'python', 'java', 'c++', 'cloud'] for skill in strength_skills):
+                assessment += "directly aligns with Google's technical requirements. "
+            else:
+                assessment += "is relevant to Google's technology stack. "
+    else:
+        assessment += "few areas that align closely with Google's requirements. "
+    # Add context from work experience if relevant companies found
+    if companies:
+        unique_companies = list(set([c.lower() for c in companies]))
+        if len(unique_companies) > 1:
+            assessment += f"Their experience at companies like {', '.join(unique_companies[:2])} provides valuable industry context. "
+        else:
+            assessment += f"Their experience at {unique_companies[0]} provides relevant industry context. "
+    # Add weaknesses and improvement suggestions
+    if weaknesses:
+        assessment += "However, to improve their candidacy, they should strengthen their "
+        weakness_labels = []
+        for weakness in weaknesses[:2]:  # Only mention top 2 weaknesses
+            label = {"technical_skills": "technical programming skills",
+                    "advanced_tech": "knowledge of advanced technologies",
+                    "problem_solving": "problem-solving capabilities",
+                    "innovation": "innovation mindset",
+                    "soft_skills": "teamwork and collaboration abilities"}[weakness]
+            weakness_labels.append(label)
+        assessment += f"{' and '.join(weakness_labels)}, "
+        # Add specific improvement suggestion
+        if "technical_skills" in weaknesses:
+            assessment += "particularly by building projects with modern languages like Python, Java, or Go. "
+        elif "advanced_tech" in weaknesses:
+            assessment += "ideally by gaining exposure to machine learning, cloud systems, or distributed computing. "
+        elif "problem_solving" in weaknesses:
+            assessment += "by practicing algorithmic problems and system design challenges. "
+        elif "innovation" in weaknesses:
+            assessment += "through projects that demonstrate creative thinking and novel solutions. "
+        elif "soft_skills" in weaknesses:
+            assessment += "by highlighting collaborative projects and leadership experiences. "
+    # Add final evaluation with match percentage
     if match_percentage >= 70:
+        assessment += f"Overall, this candidate shows good alignment with Google's culture of innovation and technical excellence, with a {match_percentage}% match to the company's requirements."
     elif match_percentage >= 50:
+        assessment += f"With these improvements, the candidate could become more competitive for Google positions, currently showing a {match_percentage}% match to the company's requirements."
     else:
+        assessment += f"Significant development in these areas would be needed before they could be considered a strong Google candidate, with a current match of {match_percentage}% to requirements."
     return assessment