Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

8e90008

verified ·

1 Parent(s): de6503f

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -89

app.py CHANGED Viewed

@@ -41,7 +41,7 @@ def load_models():
         # Load T5-small model for evaluation
         models['evaluator'] = pipeline(
             "text2text-generation",
-            model="microsoft/DialoGPT-small",
             max_length=200
         )
@@ -303,94 +303,121 @@ def summarize_resume_text(resume_text):
     return formatted_summary, execution_time
 #####################################
-# Function: Calculate Google Match Score
 #####################################
 def calculate_google_match_score(candidate_summary):
     """
-    Calculate a match score based on skills and experience in the candidate summary
     compared with what Google requires.
     """
-    # Key skills and keywords that Google values
-    google_keywords = {
-        "high_value": [
-            "python", "java", "c++", "go", "javascript",
-            "algorithms", "data structures", "system design",
-            "artificial intelligence", "machine learning", "problem solving",
-            "cloud computing", "cybersecurity", "ux/ui"
-        ],
-        "medium_value": [
-            "react", "angular", "node.js", "sql", "nosql", "git",
-            "agile", "scrum", "docker", "kubernetes", "aws", "azure",
-            "analytics", "automation", "leadership", "teamwork"
-        ]
     }
     summary_lower = candidate_summary.lower()
-    # Count occurrences of high and medium value keywords
-    high_value_count = sum(summary_lower.count(keyword) for keyword in google_keywords["high_value"])
-    medium_value_count = sum(summary_lower.count(keyword) for keyword in google_keywords["medium_value"])
-    # Calculate a weighted score
-    score = (high_value_count * 2 + medium_value_count) / (len(google_keywords["high_value"]) * 2 + len(google_keywords["medium_value"]))
-    # Normalize to 0-1 range
-    normalized_score = min(1.0, max(0.0, score * 2.5))
-    return normalized_score
 #####################################
-# Function: Evaluate Google Fit with T5
 #####################################
 @st.cache_data(show_spinner=False)
-def evaluate_google_fit(candidate_summary, _evaluator=None):
     """
-    Use T5-small model to evaluate how well the candidate matches with Google's requirements.
-    Uses third-person tone in the evaluation.
     """
     start_time = time.time()
     evaluator = _evaluator or models['evaluator']
-    # Calculate a match score
-    match_score = calculate_google_match_score(candidate_summary)
-    score_percent = int(match_score * 100)
-    # Create a template for the T5 prompt
-    # T5 works well with task prefixes
     prompt = f"""
-Evaluate in third-person tone if this candidate is a good fit for Google based on the resume summary and Google's requirements.
-Mention specific skills that match or don't match. Keep it concise.
-Resume Summary: {candidate_summary[:500]}
-Google Requirements: {GOOGLE_DESCRIPTION[:500]}
-Score: {score_percent}/100
 """
-    # Generate the evaluation
-    evaluation_result = evaluator(prompt)[0]['generated_text']
-    # Ensure the output uses third-person tone
-    # T5-small may not always follow instructions perfectly, so we'll check and adjust
-    first_person_pronouns = ["i ", "i'm", "i am", "my ", "mine ", "we ", "our "]
-    second_person_pronouns = ["you ", "your ", "yours "]
-    evaluation_lower = evaluation_result.lower()
-    # If the model used first or second person, prepend a third-person context
-    if any(pronoun in evaluation_lower for pronoun in first_person_pronouns + second_person_pronouns):
-        evaluation_result = f"The candidate {evaluation_result}"
-    # Ensure evaluation starts with third-person phrasing if it doesn't already
-    if not any(evaluation_result.lower().startswith(phrase) for phrase in
-              ["this candidate", "the candidate", "candidate", "this applicant", "the applicant"]):
-        evaluation_result = f"This candidate {evaluation_result}"
     execution_time = time.time() - start_time
-    return match_score, evaluation_result, execution_time
 #####################################
 # Main Streamlit Interface - with Progress Reporting
@@ -398,10 +425,10 @@ Score: {score_percent}/100
 st.title("Google Resume Match Analyzer")
 st.markdown(
     """
-Upload your resume file in **.docx**, **.doc**, or **.txt** format to see how well the candidate matchs with Google's hiring requirements. The app performs the following tasks:
-1. Extracts text from candidate's resume.
 2. Uses AI to generate a structured candidate summary.
-3. Evaluates whether candidates fit for Google or not, providing a match score and specific feedback.
 """
 )
@@ -413,7 +440,7 @@ with st.expander("Google's Requirements", expanded=False):
 uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
 # Process button with optimized flow
-if uploaded_file is not None and st.button("Analyze Google Fit"):
     # Create a placeholder for the progress bar
     progress_bar = st.progress(0)
     status_text = st.empty()
@@ -429,17 +456,18 @@ if uploaded_file is not None and st.button("Analyze Google Fit"):
         # Step 2: Generate summary
         status_text.text("Step 2/3: Analyzing resume and generating summary...")
         summary, summarization_time = summarize_resume_text(resume_text)
-        progress_bar.progress(75)
         # Display summary
-        st.subheader("Candidate Resume Summary")
         st.markdown(summary)
         st.info(f"Summary generated in {summarization_time:.2f} seconds")
-        # Step 3: Evaluate Google fit
-        status_text.text("Step 3/3: Evaluating your fit for Google...")
-        fit_score, evaluation, evaluation_time = evaluate_google_fit(
-            summary, _evaluator=models['evaluator']
         )
         progress_bar.progress(100)
@@ -449,40 +477,50 @@ if uploaded_file is not None and st.button("Analyze Google Fit"):
         # Display Google fit results
         st.subheader("Google Fit Assessment")
-        # Display score with appropriate color and emoji
-        score_percent = int(fit_score * 100)
-        if fit_score >= 0.85:
-            st.success(f"**Google Match Score:** {score_percent}% 🌟")
-        elif fit_score >= 0.70:
-            st.success(f"**Google Match Score:** {score_percent}% ✅")
-        elif fit_score >= 0.50:
-            st.warning(f"**Google Match Score:** {score_percent}% ⚠️")
         else:
-            st.error(f"**Google Match Score:** {score_percent}% 🔍")
-        # Display the evaluation in third-person tone
-        st.markdown("### Feedback from Google AI Recruiter")
-        st.markdown(evaluation)
-        st.info(f"Evaluation completed in {evaluation_time:.2f} seconds")
         # Add potential next steps based on the score
         st.subheader("Recommended Next Steps")
-        if fit_score >= 0.80:
             st.markdown("""
             - Consider applying for positions at Google that match your experience
             - Prepare for technical interviews by practicing algorithms and system design
             - Review Google's interview process and STAR method for behavioral questions
             """)
-        elif fit_score >= 0.60:
-            st.markdown("""
-            - Focus on strengthening the areas mentioned in the evaluation
             - Work on projects that demonstrate your skills in Google's key technology areas
             - Consider taking additional courses in algorithms, system design, or other Google focus areas
             """)
         else:
-            st.markdown("""
-            - Build experience in areas matching Google's requirements
             - Develop projects showcasing problem-solving abilities and technical skills
             - Consider gaining more experience before applying, or target specific Google roles that better match your profile
             """)

         # Load T5-small model for evaluation
         models['evaluator'] = pipeline(
             "text2text-generation",
+            model="google-t5/t5-small",
             max_length=200
         )
     return formatted_summary, execution_time
 #####################################
+# Function: Calculate Google Match Score - Detailed Breakdown
 #####################################
 def calculate_google_match_score(candidate_summary):
     """
+    Calculate a detailed match score breakdown based on skills and experience in the candidate summary
     compared with what Google requires.
+    Returns:
+    - overall_score: A normalized score between 0 and 1
+    - category_scores: A dictionary with scores for each category
+    - score_breakdown: A formatted string explanation of the scoring
     """
+    # Define categories that Google values with specific keywords
+    google_categories = {
+        "Technical Skills": {
+            "keywords": ["python", "java", "c++", "go", "javascript", "sql", "nosql",
+                         "algorithms", "data structures", "system design"],
+            "weight": 0.35
+        },
+        "Advanced Technologies": {
+            "keywords": ["artificial intelligence", "machine learning", "cloud computing",
+                         "ai", "ml", "cloud", "data science", "big data",
+                         "tensorflow", "pytorch", "deep learning"],
+            "weight": 0.25
+        },
+        "Problem Solving": {
+            "keywords": ["problem solving", "algorithms", "analytical", "critical thinking",
+                         "debugging", "troubleshooting", "optimization"],
+            "weight": 0.20
+        },
+        "Innovation & Creativity": {
+            "keywords": ["innovation", "creative", "creativity", "novel", "cutting-edge",
+                        "research", "design thinking", "innovative"],
+            "weight": 0.10
+        },
+        "Teamwork & Leadership": {
+            "keywords": ["team", "leadership", "collaborate", "collaboration", "communication",
+                        "mentoring", "lead", "coordinate", "agile", "scrum"],
+            "weight": 0.10
+        }
     }
     summary_lower = candidate_summary.lower()
+    # Calculate scores for each category
+    category_scores = {}
+    for category, details in google_categories.items():
+        keywords = details["keywords"]
+        max_possible = len(keywords)  # Maximum possible matches
+        # Count matches (unique keywords found)
+        matches = sum(1 for keyword in keywords if keyword in summary_lower)
+        # Calculate category score (0-1 range)
+        if max_possible > 0:
+            raw_score = matches / max_possible
+            # Apply a curve to reward having more matches
+            category_scores[category] = min(1.0, raw_score * 1.5)
+        else:
+            category_scores[category] = 0
+    # Calculate weighted overall score
+    overall_score = sum(
+        score * google_categories[category]["weight"]
+        for category, score in category_scores.items()
+    )
+    # Ensure overall score is in 0-1 range
+    overall_score = min(1.0, max(0.0, overall_score))
+    # Create score breakdown explanation
+    score_breakdown = "**Score Breakdown by Category:**\n\n"
+    for category, score in category_scores.items():
+        percentage = int(score * 100)
+        weight = int(google_categories[category]["weight"] * 100)
+        score_breakdown += f"• **{category}** ({weight}% of total): {percentage}%\n"
+    return overall_score, category_scores, score_breakdown
 #####################################
+# Function: Generate Aspect-Based Feedback with T5
 #####################################
 @st.cache_data(show_spinner=False)
+def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
     """
+    Use T5-small model to generate feedback on the candidate's strongest and weakest areas
+    for Google, based on the category scores.
     """
     start_time = time.time()
     evaluator = _evaluator or models['evaluator']
+    # Sort categories by score
+    sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
+    top_categories = sorted_categories[:2]
+    bottom_categories = sorted_categories[-2:]
+    # Create a prompt for T5
     prompt = f"""
+Generate specific third-person feedback on the candidate's fit for Google.
+Focus on these strengths: {', '.join([cat for cat, _ in top_categories])}.
+And these improvement areas: {', '.join([cat for cat, _ in bottom_categories])}.
 """
+    # Generate focused feedback
+    feedback = evaluator(prompt)[0]['generated_text']
+    # Ensure third-person tone
+    if not any(feedback.lower().startswith(start) for start in ["the candidate", "this candidate"]):
+        feedback = f"This candidate {feedback}"
     execution_time = time.time() - start_time
+    return feedback, execution_time
 #####################################
 # Main Streamlit Interface - with Progress Reporting
 st.title("Google Resume Match Analyzer")
 st.markdown(
     """
+Upload your resume file in **.docx**, **.doc**, or **.txt** format to see how well you match with Google's hiring requirements. The app performs the following tasks:
+1. Extracts text from your resume.
 2. Uses AI to generate a structured candidate summary.
+3. Evaluates your fit for Google across key hiring criteria with a detailed score breakdown.
 """
 )
 uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
 # Process button with optimized flow
+if uploaded_file is not None and st.button("Analyze My Google Fit"):
     # Create a placeholder for the progress bar
     progress_bar = st.progress(0)
     status_text = st.empty()
         # Step 2: Generate summary
         status_text.text("Step 2/3: Analyzing resume and generating summary...")
         summary, summarization_time = summarize_resume_text(resume_text)
+        progress_bar.progress(50)
         # Display summary
+        st.subheader("Your Resume Summary")
         st.markdown(summary)
         st.info(f"Summary generated in {summarization_time:.2f} seconds")
+        # Step 3: Calculate scores and generate feedback
+        status_text.text("Step 3/3: Calculating Google fit scores...")
+        overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
+        feedback, feedback_time = generate_aspect_feedback(
+            summary, category_scores, _evaluator=models['evaluator']
         )
         progress_bar.progress(100)
         # Display Google fit results
         st.subheader("Google Fit Assessment")
+        # Display overall score with appropriate color and emoji
+        score_percent = int(overall_score * 100)
+        if overall_score >= 0.85:
+            st.success(f"**Overall Google Match Score:** {score_percent}% 🌟")
+        elif overall_score >= 0.70:
+            st.success(f"**Overall Google Match Score:** {score_percent}% ✅")
+        elif overall_score >= 0.50:
+            st.warning(f"**Overall Google Match Score:** {score_percent}% ⚠️")
         else:
+            st.error(f"**Overall Google Match Score:** {score_percent}% 🔍")
+        # Display score breakdown
+        st.markdown("### Score Calculation")
+        st.markdown(score_breakdown)
+        # Display focused feedback
+        st.markdown("### Expert Assessment")
+        st.markdown(feedback)
+        st.info(f"Assessment completed in {feedback_time:.2f} seconds")
         # Add potential next steps based on the score
         st.subheader("Recommended Next Steps")
+        # Find the weakest categories
+        weakest_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
+        if overall_score >= 0.80:
             st.markdown("""
             - Consider applying for positions at Google that match your experience
             - Prepare for technical interviews by practicing algorithms and system design
             - Review Google's interview process and STAR method for behavioral questions
             """)
+        elif overall_score >= 0.60:
+            improvement_areas = ", ".join([cat for cat, _ in weakest_categories])
+            st.markdown(f"""
+            - Focus on strengthening these areas: {improvement_areas}
             - Work on projects that demonstrate your skills in Google's key technology areas
             - Consider taking additional courses in algorithms, system design, or other Google focus areas
             """)
         else:
+            improvement_areas = ", ".join([cat for cat, _ in weakest_categories])
+            st.markdown(f"""
+            - Build experience in these critical areas: {improvement_areas}
             - Develop projects showcasing problem-solving abilities and technical skills
             - Consider gaining more experience before applying, or target specific Google roles that better match your profile
             """)