Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 19

Commit

92e31bf

verified ·

1 Parent(s): 0a0fafe

Update app.py

Browse files

Files changed (1) hide show

app.py +176 -123

app.py CHANGED Viewed

@@ -315,173 +315,187 @@ def summarize_resume_text(resume_text):
 #####################################
 def analyze_google_fit(resume_summary):
     """
-    Analyze how well the candidate fits Google's requirements.
-    This uses the model to generate a natural language assessment with a realistic match score.
     """
     start_time = time.time()
-    # First, calculate a realistic score based on keyword matching and balanced criteria
     google_keywords = {
-        "technical_skills": ["python", "java", "c++", "javascript", "go", "sql", "algorithms", "data structures", "coding"],
-        "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data", "tensorflow", "deep learning"],
-        "problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging", "optimization"],
-        "innovation": ["innovation", "creative", "creativity", "design thinking", "research", "novel"],
-        "soft_skills": ["team", "leadership", "collaboration", "communication", "agile", "project management"]
     }
-    # Calculate realistic score with category weights
     category_weights = {
-        "technical_skills": 0.35,
-        "advanced_tech": 0.25,
-        "problem_solving": 0.20,
-        "innovation": 0.10,
-        "soft_skills": 0.10
     }
     resume_lower = resume_summary.lower()
     category_scores = {}
     for category, keywords in google_keywords.items():
         # Count matches but cap at a reasonable level
-        matches = sum(1 for keyword in keywords if keyword in resume_lower)
-        max_matches = min(len(keywords), 5)  # Cap maximum possible matches
-        # Calculate category score with diminishing returns
-        # First few matches matter more than later ones
         if matches == 0:
-            category_scores[category] = 0.0
         else:
-            # Logarithmic scaling to prevent perfect scores and create more realistic distribution
-            category_scores[category] = min(0.9, (math.log(matches + 1) / math.log(max_matches + 1)) * 0.9)
-    # Calculate weighted score (max should be around 80-85% for an exceptional candidate)
-    weighted_score = sum(score * category_weights[category] for category, score in category_scores.items())
     # Apply final curve to keep scores in a realistic range
-    # Even exceptional candidates should rarely exceed 90%
     match_percentage = min(92, max(35, int(weighted_score * 100)))
-    # Now create a focused prompt for generating the assessment
-    strengths = [category.replace("_", " ") for category, score in category_scores.items() if score > 0.5]
-    weaknesses = [category.replace("_", " ") for category, score in category_scores.items() if score < 0.4]
-    # Extract key parts from resume for better context
     skills_match = re.search(r'Skills:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
     skills_text = skills_match.group(0) if skills_match else ""
     work_match = re.search(r'Previous Work Experience:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
     work_text = work_match.group(0) if work_match else ""
     prompt = f"""
-Resume shows: {skills_text} {work_text}
-Google needs: {GOOGLE_DESCRIPTION[:100]}
-Analyze fit (strengths: {', '.join(strengths)}, areas for improvement: {', '.join(weaknesses)})
-This candidate """
     try:
         # Generate the assessment
         assessment_results = models['evaluator'](
             prompt,
-            max_length=250,
             do_sample=True,
-            temperature=0.4,
-            num_return_sequences=2
         )
-        # Find a good response
         assessment = None
         for result in assessment_results:
             text = result['generated_text'].strip()
-            # Clean up obvious artifacts
-            text = text.replace("This candidate This candidate", "This candidate")
-            text = re.sub(r'(Resume shows:|Google needs:|Analyze fit|strengths:|areas for improvement:)', '', text)
             # Check if it looks valid
-            if text.startswith("This candidate") and len(text) > 40:
                 assessment = text
                 break
         # If no good response was found, fall back to manual assessment
         if not assessment:
-            assessment, _ = generate_manual_assessment(resume_summary, match_percentage)
     except Exception as e:
-        # Fallback assessment with the calculated match percentage
-        assessment, _ = generate_manual_assessment(resume_summary, match_percentage)
         print(f"Error in assessment generation: {e}")
-    # Final cleanup to remove any remaining prompt artifacts
-    assessment = re.sub(r'score: \d+%', '', assessment)  # Remove any existing score
-    # Add the calculated score if not already present
-    if "%" not in assessment:
-        assessment += f" Overall, they have approximately a {match_percentage}% match with Google's requirements."
     execution_time = time.time() - start_time
-    return assessment, match_percentage, execution_time
-def generate_manual_assessment(resume_summary, match_percentage):
     """
-    Generate a manual assessment based on keywords in the resume
-    as a fallback when the model fails. Uses the pre-calculated match percentage.
     """
-    # Define key Google skill categories
-    key_skills = {
-        "technical": ["python", "java", "javascript", "c++", "go", "programming", "coding", "software development"],
-        "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data"],
-        "problem_solving": ["problem solving", "algorithms", "analytical", "critical thinking", "troubleshooting"],
-        "innovation": ["innovation", "creative", "creativity", "design thinking"],
-        "teamwork": ["team", "leadership", "collaboration", "communication", "agile"]
-    }
-    summary_lower = resume_summary.lower()
-    # Count matches in each category
-    strengths = []
-    weaknesses = []
-    for category, keywords in key_skills.items():
-        matches = sum(1 for keyword in keywords if keyword in summary_lower)
-        if matches >= 2:
-            if category == "technical":
-                strengths.append("strong technical skills")
-            elif category == "advanced_tech":
-                strengths.append("experience with advanced technologies")
-            elif category == "problem_solving":
-                strengths.append("problem-solving abilities")
-            elif category == "innovation":
-                strengths.append("innovative thinking")
-            elif category == "teamwork":
-                strengths.append("teamwork and collaboration skills")
-        elif matches == 0:
-            if category == "technical":
-                weaknesses.append("technical programming skills")
-            elif category == "advanced_tech":
-                weaknesses.append("knowledge of advanced technologies")
-            elif category == "problem_solving":
-                weaknesses.append("demonstrated problem-solving capabilities")
-            elif category == "innovation":
-                weaknesses.append("innovation mindset")
-            elif category == "teamwork":
-                weaknesses.append("team collaboration experience")
-    # Construct assessment
-    assessment = f"This candidate demonstrates {', '.join(strengths[:2])} " if strengths else "This candidate "
-    if len(strengths) > 2:
-        assessment += f"as well as {strengths[2]}. "
     else:
-        assessment += ". "
     if weaknesses:
-        assessment += f"However, they could benefit from developing stronger {' and '.join(weaknesses[:2])}. "
-    assessment += f"Based on the resume analysis, they appear to be a {match_percentage}% match for Google's requirements."
-    return assessment, match_percentage
 #####################################
 # Main Streamlit Interface
@@ -528,30 +542,69 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
         st.info(f"Summary generated in {summarization_time:.2f} seconds")
         # Step 3: Generate Google fit assessment
-        status_text.text("Step 3/3: Evaluating Google fit...")
-        assessment, match_percentage, assessment_time = analyze_google_fit(summary)
-        progress_bar.progress(100)
-        # Clear status messages
-        status_text.empty()
-        # Display Google fit results
-        st.subheader("Google Fit Assessment")
-        # Display match percentage with appropriate color and emoji - with more realistic thresholds
-        if match_percentage >= 85:
-            st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
-        elif match_percentage >= 70:
-            st.success(f"**Overall Google Match Score:** {match_percentage}% ✅")
-        elif match_percentage >= 50:
-            st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
-        else:
-            st.error(f"**Overall Google Match Score:** {match_percentage}% 🔍")
         # Display assessment
         st.markdown("### Expert Assessment")
         st.markdown(assessment)
         st.info(f"Assessment completed in {assessment_time:.2f} seconds")
         # Add potential next steps based on the match percentage

 #####################################
 def analyze_google_fit(resume_summary):
     """
+    Analyze how well the candidate fits Google's requirements with detailed category breakdowns.
     """
     start_time = time.time()
+    # Define Google's key skill categories with more detailed keywords
     google_keywords = {
+        "technical_skills": ["python", "java", "c++", "javascript", "go", "sql", "algorithms", "data structures",
+                           "coding", "software development", "git", "programming", "backend", "frontend", "full-stack"],
+        "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data",
+                         "tensorflow", "deep learning", "distributed systems", "kubernetes", "microservices"],
+        "problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging",
+                           "optimization", "scalability", "system design", "complexity", "efficiency"],
+        "innovation": ["innovation", "creative", "creativity", "design thinking", "research", "novel solutions",
+                      "patents", "publications", "unique approaches", "cutting-edge"],
+        "soft_skills": ["team", "leadership", "collaboration", "communication", "agile", "project management",
+                       "mentoring", "cross-functional", "presentation", "stakeholder management"]
     }
+    # Category weights with descriptive labels
     category_weights = {
+        "technical_skills": {"weight": 0.35, "label": "Technical Programming Skills"},
+        "advanced_tech": {"weight": 0.25, "label": "Advanced Technology Knowledge"},
+        "problem_solving": {"weight": 0.20, "label": "Problem Solving Abilities"},
+        "innovation": {"weight": 0.10, "label": "Innovation Mindset"},
+        "soft_skills": {"weight": 0.10, "label": "Collaboration & Leadership"}
     }
     resume_lower = resume_summary.lower()
+    # Calculate category scores and store detailed information
     category_scores = {}
+    category_details = {}
+    found_skills = {}
     for category, keywords in google_keywords.items():
+        # Find the specific matching keywords for feedback
+        category_matches = [keyword for keyword in keywords if keyword in resume_lower]
+        found_skills[category] = category_matches
         # Count matches but cap at a reasonable level
+        matches = len(category_matches)
+        total_keywords = len(keywords)
+        # Calculate raw percentage for this category
+        raw_percentage = int((matches / total_keywords) * 100)
+        # Apply logarithmic scaling for more realistic scores
         if matches == 0:
+            adjusted_score = 0.0
         else:
+            # Logarithmic scaling to prevent perfect scores
+            adjusted_score = min(0.95, (math.log(matches + 1) / math.log(min(total_keywords, 8) + 1)))
+        # Store both raw and adjusted scores for feedback
+        category_scores[category] = adjusted_score
+        category_details[category] = {
+            "raw_percentage": raw_percentage,
+            "adjusted_score": int(adjusted_score * 100),
+            "matching_keywords": category_matches,
+            "total_keywords": total_keywords,
+            "matches": matches
+        }
+    # Calculate weighted score
+    weighted_score = sum(score * category_weights[category]["weight"] for category, score in category_scores.items())
     # Apply final curve to keep scores in a realistic range
     match_percentage = min(92, max(35, int(weighted_score * 100)))
+    # Find top strengths and areas for improvement
+    strengths = [(category_weights[cat]["label"], details["adjusted_score"])
+                for cat, details in category_details.items()
+                if details["adjusted_score"] >= 60]
+    weaknesses = [(category_weights[cat]["label"], details["adjusted_score"])
+                 for cat, details in category_details.items()
+                 if details["adjusted_score"] < 50]
+    # Sort strengths and weaknesses by score
+    strengths.sort(key=lambda x: x[1], reverse=True)
+    weaknesses.sort(key=lambda x: x[1])
+    # Create a more detailed prompt for assessment
+    strength_text = ", ".join([f"{s[0]}" for s in strengths[:3]]) if strengths else "limited applicable skills"
+    weakness_text = ", ".join([f"{w[0]}" for w in weaknesses[:3]]) if weaknesses else "no obvious weaknesses"
+    # Extract key resume elements
     skills_match = re.search(r'Skills:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
     skills_text = skills_match.group(0) if skills_match else ""
     work_match = re.search(r'Previous Work Experience:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
     work_text = work_match.group(0) if work_match else ""
+    # List specific matching skills for more detailed assessment
+    specific_skills = []
+    for category, matches in found_skills.items():
+        if matches:
+            specific_skills.extend(matches[:3])  # Take up to 3 skills from each category
+    specific_skills_text = ", ".join(specific_skills[:8]) if specific_skills else "limited identifiable skills"
     prompt = f"""
+Write a detailed assessment of a job candidate for Google.
+Resume highlights: Skills in {specific_skills_text}. {work_text[:200]}
+Strengths: {strength_text}
+Areas for improvement: {weakness_text}
+Match percentage: {match_percentage}%
+Write a detailed 3-5 sentence assessment beginning with "This candidate". Be specific about skills, experiences,
+strengths, weaknesses, and how they align with Google. Mention specific technical skills where relevant.
+"""
     try:
         # Generate the assessment
         assessment_results = models['evaluator'](
             prompt,
+            max_length=350,  # Longer assessment
             do_sample=True,
+            temperature=0.7,  # Higher temperature for more detailed output
+            num_return_sequences=3
         )
+        # Find the best response
         assessment = None
         for result in assessment_results:
             text = result['generated_text'].strip()
+            # Remove prompt artifacts
+            text = re.sub(r'Write a detailed assessment.*?Match percentage:.*?%', '', text, flags=re.DOTALL)
+            text = re.sub(r'Write a detailed 3-5 sentence assessment.*?', '', text, flags=re.DOTALL)
             # Check if it looks valid
+            if "this candidate" in text.lower() and len(text) > 100:
                 assessment = text
                 break
         # If no good response was found, fall back to manual assessment
         if not assessment:
+            assessment = generate_detailed_manual_assessment(resume_summary, strengths, weaknesses, specific_skills, match_percentage)
     except Exception as e:
+        # Fallback to detailed manual assessment
+        assessment = generate_detailed_manual_assessment(resume_summary, strengths, weaknesses, specific_skills, match_percentage)
         print(f"Error in assessment generation: {e}")
+    # Final cleanup
+    assessment = assessment.strip()
+    if not assessment.startswith("This candidate"):
+        assessment = f"This candidate {assessment}"
     execution_time = time.time() - start_time
+    return assessment, match_percentage, category_details, execution_time
+def generate_detailed_manual_assessment(resume_summary, strengths, weaknesses, specific_skills, match_percentage):
     """
+    Generate a detailed manual assessment when the model fails.
     """
+    # Start with strengths
+    if strengths:
+        assessment = f"This candidate demonstrates proficiency in {', '.join([s[0] for s in strengths[:2]])}. "
+        if specific_skills:
+            assessment += f"Their experience with {', '.join(specific_skills[:4])} aligns with Google's technical requirements. "
     else:
+        assessment = "This candidate has limited alignment with Google's key requirements based on the resume provided. "
+        if specific_skills:
+            assessment += f"While they have some experience with {', '.join(specific_skills[:3])}, these skills alone may not be sufficient. "
+    # Add weaknesses
     if weaknesses:
+        assessment += f"To improve their candidacy for Google, they should focus on developing stronger {' and '.join([w[0].lower() for w in weaknesses[:2]])}. "
+    # Add conclusion with match percentage
+    if match_percentage >= 70:
+        assessment += f"Overall, they show good potential for certain roles at Google with a {match_percentage}% match to requirements."
+    elif match_percentage >= 50:
+        assessment += f"With targeted skill development, they may become a stronger candidate for Google, currently showing a {match_percentage}% match."
+    else:
+        assessment += f"Significant skill development would be needed before they could be considered a strong Google candidate, with a current match of {match_percentage}%."
+    return assessment
 #####################################
 # Main Streamlit Interface
         st.info(f"Summary generated in {summarization_time:.2f} seconds")
         # Step 3: Generate Google fit assessment
+status_text.text("Step 3/3: Evaluating Google fit...")
+assessment, match_percentage, category_details, assessment_time = analyze_google_fit(summary)
+progress_bar.progress(100)
+# Clear status messages
+status_text.empty()
+# Display Google fit results
+st.subheader("Google Fit Assessment")
+# Display match percentage with appropriate color and emoji - with more realistic thresholds
+if match_percentage >= 85:
+    st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
+elif match_percentage >= 70:
+    st.success(f"**Overall Google Match Score:** {match_percentage}% ✅")
+elif match_percentage >= 50:
+    st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
+else:
+    st.error(f"**Overall Google Match Score:** {match_percentage}% 🔍")
+# NEW ADDITION: Add detailed score breakdown
+st.markdown("### Score Breakdown")
+# Create a neat table with category scores
+breakdown_data = []
+for category, details in category_details.items():
+    label = {"technical_skills": "Technical Programming Skills",
+             "advanced_tech": "Advanced Technology Knowledge",
+             "problem_solving": "Problem Solving Abilities",
+             "innovation": "Innovation Mindset",
+             "soft_skills": "Collaboration & Leadership"}[category]
+    # Create a visual indicator for the score
+    score = details["adjusted_score"]
+    # Add formatted breakdown row
+    breakdown_data.append({
+        "Category": label,
+        "Score": f"{score}%",
+        "Matching Skills": ", ".join(details["matching_keywords"][:3]) if details["matching_keywords"] else "None detected"
+    })
+        # Convert to DataFrame and display
+        import pandas as pd
+        breakdown_df = pd.DataFrame(breakdown_data)
+        st.table(breakdown_df)
+        # Show a note about how scores are calculated
+        with st.expander("How are these scores calculated?"):
+            st.markdown("""
+            - **Technical Programming Skills** (35% of total): Evaluates coding languages, software development tools, and core programming concepts
+            - **Advanced Technology Knowledge** (25% of total): Assesses experience with cutting-edge technologies like AI, ML, cloud systems
+            - **Problem Solving Abilities** (20% of total): Measures analytical thinking, algorithm design, and optimization skills
+            - **Innovation Mindset** (10% of total): Looks for creativity, research orientation, and novel approaches
+            - **Collaboration & Leadership** (10% of total): Evaluates team skills, communication, and project management
+            Scores are calculated based on keyword matches in your resume, with diminishing returns applied (first few skills matter more than later ones).
+    """)
         # Display assessment
         st.markdown("### Expert Assessment")
         st.markdown(assessment)
         st.info(f"Assessment completed in {assessment_time:.2f} seconds")
         # Add potential next steps based on the match percentage