Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

3e9d890

verified ·

1 Parent(s): ca31f44

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -67

app.py CHANGED Viewed

@@ -38,13 +38,11 @@ def load_models():
         # Load smaller summarization model for speed
         models['summarizer'] = pipeline("summarization", model="facebook/bart-large-cnn", max_length=130)
-        # Load TinyLlama model for evaluation
         models['evaluator'] = pipeline(
-            "text-generation",
-            model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-            max_new_tokens=200,
-            do_sample=True,
-            temperature=0.7
         )
         return models
@@ -305,85 +303,94 @@ def summarize_resume_text(resume_text):
     return formatted_summary, execution_time
 #####################################
-# Function: Evaluate Google Fit with TinyLlama
 #####################################
 @st.cache_data(show_spinner=False)
 def evaluate_google_fit(candidate_summary, _evaluator=None):
     """
-    Use TinyLlama to evaluate how well the candidate matches with Google's requirements.
     """
     start_time = time.time()
     evaluator = _evaluator or models['evaluator']
-    # Format the chat prompt for TinyLlama's chat format
-    prompt = f"""<|im_start|>system
-You are an expert technical recruiter at Google. Your task is to evaluate how well a candidate's profile matches with Google's hiring requirements. Be focused and specific in your evaluation.
-<|im_end|>
-<|im_start|>user
-I need to evaluate if this candidate is a good fit for Google. Please:
-1. Score the candidate's fit for Google from 0-100
-2. Write a brief evaluation (2-3 sentences) explaining why they would or wouldn't be a good fit
-3. Mention 1-2 specific strengths relevant to Google
-4. Mention 1 specific area where they might need improvement to better fit Google's requirements
-Candidate Profile:
-{candidate_summary}
-Google's Requirements:
-{GOOGLE_DESCRIPTION}
-<|im_end|>
-<|im_start|>assistant
 """
-    # Generate the response
-    response = evaluator(prompt)[0]['generated_text']
-    # Extract just the assistant's response after the prompt
-    assistant_response_start = response.find("<|im_start|>assistant") + len("<|im_start|>assistant")
-    assistant_response = response[assistant_response_start:].strip()
-    # Remove any trailing tag if present
-    if "<|im_end|>" in assistant_response:
-        assistant_response = assistant_response.split("<|im_end|>")[0].strip()
-    # Try to extract the score from the response
-    score_match = re.search(r'(\d{1,3})/100|score:?\s*(\d{1,3})|rating:?\s*(\d{1,3})|suitability:?\s*(\d{1,3})',
-                           assistant_response.lower())
-    if score_match:
-        # Find the first group that matched and isn't None
-        for group in score_match.groups():
-            if group is not None:
-                score = int(group)
-                normalized_score = min(100, max(0, score)) / 100  # Ensure it's in 0-1 range
-                break
-        else:
-            normalized_score = 0.5  # Default if no group was extracted
-    else:
-        # If no explicit score, try to infer from sentiments
-        positive_words = ['excellent', 'perfect', 'outstanding', 'ideal', 'great', 'strong']
-        negative_words = ['poor', 'inadequate', 'insufficient', 'lacks', 'mismatch', 'weak']
-        positive_count = sum(assistant_response.lower().count(word) for word in positive_words)
-        negative_count = sum(assistant_response.lower().count(word) for word in negative_words)
-        if positive_count > negative_count * 2:
-            normalized_score = 0.85
-        elif positive_count > negative_count:
-            normalized_score = 0.7
-        elif negative_count > positive_count * 2:
-            normalized_score = 0.3
-        elif negative_count > positive_count:
-            normalized_score = 0.4
-        else:
-            normalized_score = 0.5
     execution_time = time.time() - start_time
-    return normalized_score, assistant_response, execution_time
 #####################################
 # Main Streamlit Interface - with Progress Reporting
@@ -453,7 +460,7 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
         else:
             st.error(f"**Google Match Score:** {score_percent}% 🔍")
-        # Display the full evaluation
         st.markdown("### Feedback from Google AI Recruiter")
         st.markdown(evaluation)
@@ -469,9 +476,9 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
             """)
         elif fit_score >= 0.60:
             st.markdown("""
-            - Focus on strengthening the improvement areas mentioned in the evaluation
             - Work on projects that demonstrate your skills in Google's key technology areas
-            - Consider taking additional courses in areas where Google has shown interest
             """)
         else:
             st.markdown("""

         # Load smaller summarization model for speed
         models['summarizer'] = pipeline("summarization", model="facebook/bart-large-cnn", max_length=130)
+        # Load T5-small model for evaluation
         models['evaluator'] = pipeline(
+            "text2text-generation",
+            model="google-t5/t5-small",
+            max_length=200
         )
         return models
     return formatted_summary, execution_time
 #####################################
+# Function: Calculate Google Match Score
+#####################################
+def calculate_google_match_score(candidate_summary):
+    """
+    Calculate a match score based on skills and experience in the candidate summary
+    compared with what Google requires.
+    """
+    # Key skills and keywords that Google values
+    google_keywords = {
+        "high_value": [
+            "python", "java", "c++", "go", "javascript",
+            "algorithms", "data structures", "system design",
+            "artificial intelligence", "machine learning", "problem solving",
+            "cloud computing", "cybersecurity", "ux/ui"
+        ],
+        "medium_value": [
+            "react", "angular", "node.js", "sql", "nosql", "git",
+            "agile", "scrum", "docker", "kubernetes", "aws", "azure",
+            "analytics", "automation", "leadership", "teamwork"
+        ]
+    }
+    summary_lower = candidate_summary.lower()
+    # Count occurrences of high and medium value keywords
+    high_value_count = sum(summary_lower.count(keyword) for keyword in google_keywords["high_value"])
+    medium_value_count = sum(summary_lower.count(keyword) for keyword in google_keywords["medium_value"])
+    # Calculate a weighted score
+    score = (high_value_count * 2 + medium_value_count) / (len(google_keywords["high_value"]) * 2 + len(google_keywords["medium_value"]))
+    # Normalize to 0-1 range
+    normalized_score = min(1.0, max(0.0, score * 2.5))
+    return normalized_score
+#####################################
+# Function: Evaluate Google Fit with T5
 #####################################
 @st.cache_data(show_spinner=False)
 def evaluate_google_fit(candidate_summary, _evaluator=None):
     """
+    Use T5-small model to evaluate how well the candidate matches with Google's requirements.
+    Uses third-person tone in the evaluation.
     """
     start_time = time.time()
     evaluator = _evaluator or models['evaluator']
+    # Calculate a match score
+    match_score = calculate_google_match_score(candidate_summary)
+    score_percent = int(match_score * 100)
+    # Create a template for the T5 prompt
+    # T5 works well with task prefixes
+    prompt = f"""
+Evaluate in third-person tone if this candidate is a good fit for Google based on the resume summary and Google's requirements.
+Mention specific skills that match or don't match. Keep it concise.
+Resume Summary: {candidate_summary[:500]}
+Google Requirements: {GOOGLE_DESCRIPTION[:500]}
+Score: {score_percent}/100
 """
+    # Generate the evaluation
+    evaluation_result = evaluator(prompt)[0]['generated_text']
+    # Ensure the output uses third-person tone
+    # T5-small may not always follow instructions perfectly, so we'll check and adjust
+    first_person_pronouns = ["i ", "i'm", "i am", "my ", "mine ", "we ", "our "]
+    second_person_pronouns = ["you ", "your ", "yours "]
+    evaluation_lower = evaluation_result.lower()
+    # If the model used first or second person, prepend a third-person context
+    if any(pronoun in evaluation_lower for pronoun in first_person_pronouns + second_person_pronouns):
+        evaluation_result = f"The candidate {evaluation_result}"
+    # Ensure evaluation starts with third-person phrasing if it doesn't already
+    if not any(evaluation_result.lower().startswith(phrase) for phrase in
+              ["this candidate", "the candidate", "candidate", "this applicant", "the applicant"]):
+        evaluation_result = f"This candidate {evaluation_result}"
     execution_time = time.time() - start_time
+    return match_score, evaluation_result, execution_time
 #####################################
 # Main Streamlit Interface - with Progress Reporting
         else:
             st.error(f"**Google Match Score:** {score_percent}% 🔍")
+        # Display the evaluation in third-person tone
         st.markdown("### Feedback from Google AI Recruiter")
         st.markdown(evaluation)
             """)
         elif fit_score >= 0.60:
             st.markdown("""
+            - Focus on strengthening the areas mentioned in the evaluation
             - Work on projects that demonstrate your skills in Google's key technology areas
+            - Consider taking additional courses in algorithms, system design, or other Google focus areas
             """)
         else:
             st.markdown("""