Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 19

Commit

ee0c7bb

verified ·

1 Parent(s): 2989c23

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -53

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import docx2txt
 import tempfile
 import time
 import re
 import concurrent.futures
 from functools import lru_cache
 from transformers import pipeline
@@ -315,71 +316,173 @@ def summarize_resume_text(resume_text):
 def analyze_google_fit(resume_summary):
     """
     Analyze how well the candidate fits Google's requirements.
-    This uses the T5 model to generate a natural language assessment.
     """
     start_time = time.time()
-    # Carefully craft a prompt that won't be repeated in the output
     prompt = f"""
-Google hiring analysis:
-Resume summary: {resume_summary}
-Google requirements: {GOOGLE_DESCRIPTION}
-Evaluate this candidate for Google. Consider technical skills, experience, and culture fit.
-Write a paragraph starting with "This candidate" describing their fit for Google.
-Include strengths, weaknesses, and a match percentage (0-100%).
-"""
     try:
         # Generate the assessment
-        response = models['evaluator'](
             prompt,
-            max_length=300,
             do_sample=True,
-            temperature=0.7
         )
-        assessment = response[0]['generated_text']
-        # Clean up assessment to prevent instruction leakage
-        assessment = assessment.replace("Google hiring analysis:", "")
-        assessment = assessment.replace("Resume summary:", "")
-        assessment = assessment.replace("Google requirements:", "")
-        assessment = assessment.replace("Evaluate this candidate for Google.", "")
-        assessment = assessment.replace("Write a paragraph", "")
-        assessment = assessment.replace("starting with", "")
-        assessment = assessment.replace("Consider technical skills, experience, and culture fit.", "")
-        assessment = assessment.replace("Include strengths, weaknesses, and a match percentage", "")
-        # Make sure it starts properly
-        if not assessment.strip().startswith("This candidate"):
-            assessment = "This candidate " + assessment.strip()
-        # Extract match percentage if present
-        match_percentage = None
-        percentage_pattern = r'(\d{1,3})%'
-        match = re.search(percentage_pattern, assessment)
-        if match:
-            match_percentage = int(match.group(1))
-            # Ensure it's in valid range
-            match_percentage = min(100, max(0, match_percentage))
-        # If no percentage was found in the text, default to 50%
-        if match_percentage is None:
-            match_percentage = 50
-            # Add a percentage to the end of assessment
-            assessment += f" Overall match: {match_percentage}%."
     except Exception as e:
-        # Fallback if model fails
-        assessment = "This candidate's resume has been analyzed. Based on skills and experience, they may have some relevant qualities for Google, but a detailed assessment couldn't be generated. Please review the resume summary manually."
-        match_percentage = 50
     execution_time = time.time() - start_time
     return assessment, match_percentage, execution_time
 #####################################
 # Main Streamlit Interface
 #####################################
@@ -434,8 +537,8 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
         # Display Google fit results
         st.subheader("Google Fit Assessment")
-        # Display match percentage with appropriate color and emoji
         if match_percentage >= 85:
             st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
         elif match_percentage >= 70:
@@ -444,10 +547,10 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
             st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
         else:
             st.error(f"**Overall Google Match Score:** {match_percentage}% 🔍")
-        # Display assessment
-        st.markdown("### Expert Assessment")
-        st.markdown(assessment)
         st.info(f"Assessment completed in {assessment_time:.2f} seconds")

 import tempfile
 import time
 import re
+import math
 import concurrent.futures
 from functools import lru_cache
 from transformers import pipeline
 def analyze_google_fit(resume_summary):
     """
     Analyze how well the candidate fits Google's requirements.
+    This uses the model to generate a natural language assessment with a realistic match score.
     """
     start_time = time.time()
+    # First, calculate a realistic score based on keyword matching and balanced criteria
+    google_keywords = {
+        "technical_skills": ["python", "java", "c++", "javascript", "go", "sql", "algorithms", "data structures", "coding"],
+        "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data", "tensorflow", "deep learning"],
+        "problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging", "optimization"],
+        "innovation": ["innovation", "creative", "creativity", "design thinking", "research", "novel"],
+        "soft_skills": ["team", "leadership", "collaboration", "communication", "agile", "project management"]
+    }
+    # Calculate realistic score with category weights
+    category_weights = {
+        "technical_skills": 0.35,
+        "advanced_tech": 0.25,
+        "problem_solving": 0.20,
+        "innovation": 0.10,
+        "soft_skills": 0.10
+    }
+    resume_lower = resume_summary.lower()
+    category_scores = {}
+    for category, keywords in google_keywords.items():
+        # Count matches but cap at a reasonable level
+        matches = sum(1 for keyword in keywords if keyword in resume_lower)
+        max_matches = min(len(keywords), 5)  # Cap maximum possible matches
+        # Calculate category score with diminishing returns
+        # First few matches matter more than later ones
+        if matches == 0:
+            category_scores[category] = 0.0
+        else:
+            # Logarithmic scaling to prevent perfect scores and create more realistic distribution
+            category_scores[category] = min(0.9, (math.log(matches + 1) / math.log(max_matches + 1)) * 0.9)
+    # Calculate weighted score (max should be around 80-85% for an exceptional candidate)
+    weighted_score = sum(score * category_weights[category] for category, score in category_scores.items())
+    # Apply final curve to keep scores in a realistic range
+    # Even exceptional candidates should rarely exceed 90%
+    match_percentage = min(92, max(35, int(weighted_score * 100)))
+    # Now create a focused prompt for generating the assessment
+    strengths = [category.replace("_", " ") for category, score in category_scores.items() if score > 0.5]
+    weaknesses = [category.replace("_", " ") for category, score in category_scores.items() if score < 0.4]
+    # Extract key parts from resume for better context
+    skills_match = re.search(r'Skills:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
+    skills_text = skills_match.group(0) if skills_match else ""
+    work_match = re.search(r'Previous Work Experience:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
+    work_text = work_match.group(0) if work_match else ""
     prompt = f"""
+Resume shows: {skills_text} {work_text}
+Google needs: {GOOGLE_DESCRIPTION[:100]}
+Analyze fit (strengths: {', '.join(strengths)}, areas for improvement: {', '.join(weaknesses)})
+This candidate """
     try:
         # Generate the assessment
+        assessment_results = models['evaluator'](
             prompt,
+            max_length=250,
             do_sample=True,
+            temperature=0.4,
+            num_return_sequences=2
         )
+        # Find a good response
+        assessment = None
+        for result in assessment_results:
+            text = result['generated_text'].strip()
+            # Clean up obvious artifacts
+            text = text.replace("This candidate This candidate", "This candidate")
+            text = re.sub(r'(Resume shows:|Google needs:|Analyze fit|strengths:|areas for improvement:)', '', text)
+            # Check if it looks valid
+            if text.startswith("This candidate") and len(text) > 40:
+                assessment = text
+                break
+        # If no good response was found, fall back to manual assessment
+        if not assessment:
+            assessment, _ = generate_manual_assessment(resume_summary, match_percentage)
     except Exception as e:
+        # Fallback assessment with the calculated match percentage
+        assessment, _ = generate_manual_assessment(resume_summary, match_percentage)
+        print(f"Error in assessment generation: {e}")
+    # Final cleanup to remove any remaining prompt artifacts
+    assessment = re.sub(r'score: \d+%', '', assessment)  # Remove any existing score
+    # Add the calculated score if not already present
+    if "%" not in assessment:
+        assessment += f" Overall, they have approximately a {match_percentage}% match with Google's requirements."
     execution_time = time.time() - start_time
     return assessment, match_percentage, execution_time
+def generate_manual_assessment(resume_summary, match_percentage):
+    """
+    Generate a manual assessment based on keywords in the resume
+    as a fallback when the model fails. Uses the pre-calculated match percentage.
+    """
+    # Define key Google skill categories
+    key_skills = {
+        "technical": ["python", "java", "javascript", "c++", "go", "programming", "coding", "software development"],
+        "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data"],
+        "problem_solving": ["problem solving", "algorithms", "analytical", "critical thinking", "troubleshooting"],
+        "innovation": ["innovation", "creative", "creativity", "design thinking"],
+        "teamwork": ["team", "leadership", "collaboration", "communication", "agile"]
+    }
+    summary_lower = resume_summary.lower()
+    # Count matches in each category
+    strengths = []
+    weaknesses = []
+    for category, keywords in key_skills.items():
+        matches = sum(1 for keyword in keywords if keyword in summary_lower)
+        if matches >= 2:
+            if category == "technical":
+                strengths.append("strong technical skills")
+            elif category == "advanced_tech":
+                strengths.append("experience with advanced technologies")
+            elif category == "problem_solving":
+                strengths.append("problem-solving abilities")
+            elif category == "innovation":
+                strengths.append("innovative thinking")
+            elif category == "teamwork":
+                strengths.append("teamwork and collaboration skills")
+        elif matches == 0:
+            if category == "technical":
+                weaknesses.append("technical programming skills")
+            elif category == "advanced_tech":
+                weaknesses.append("knowledge of advanced technologies")
+            elif category == "problem_solving":
+                weaknesses.append("demonstrated problem-solving capabilities")
+            elif category == "innovation":
+                weaknesses.append("innovation mindset")
+            elif category == "teamwork":
+                weaknesses.append("team collaboration experience")
+    # Construct assessment
+    assessment = f"This candidate demonstrates {', '.join(strengths[:2])} " if strengths else "This candidate "
+    if len(strengths) > 2:
+        assessment += f"as well as {strengths[2]}. "
+    else:
+        assessment += ". "
+    if weaknesses:
+        assessment += f"However, they could benefit from developing stronger {' and '.join(weaknesses[:2])}. "
+    assessment += f"Based on the resume analysis, they appear to be a {match_percentage}% match for Google's requirements."
+    return assessment, match_percentage
 #####################################
 # Main Streamlit Interface
 #####################################
         # Display Google fit results
         st.subheader("Google Fit Assessment")
+        # Display match percentage with appropriate color and emoji - with more realistic thresholds
         if match_percentage >= 85:
             st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
         elif match_percentage >= 70:
             st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
         else:
             st.error(f"**Overall Google Match Score:** {match_percentage}% 🔍")
+# Display assessment
+st.markdown("### Expert Assessment")
+st.markdown(assessment)
         st.info(f"Assessment completed in {assessment_time:.2f} seconds")