Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

cda9adf

verified ·

1 Parent(s): d3c5eab

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -15

app.py CHANGED Viewed

@@ -42,7 +42,16 @@ def load_models():
             max_length=100,
             truncation=True
         )
-        # We don't need T5 model anymore since we're using template-based feedback
         return models
 # Preload models immediately when app starts
@@ -152,7 +161,7 @@ def extract_industry(text, base_summary):
         "information systems": ["information systems", "ERP", "systems management"]
     }
-    # Use the base summary (already lowercased) to speed up matching
     combined_text = base_summary.lower()
     counts = {}
@@ -210,6 +219,7 @@ def extract_skills_and_work(text):
         for skill in skills:
             if skill.lower() in text_lower:
                 category_skills.append(skill)
         if category_skills:
             found_skills.append(f"{category}: {', '.join(category_skills)}")
@@ -219,6 +229,7 @@ def extract_skills_and_work(text):
     for idx, line in enumerate(lines):
         line_lower = line.lower().strip()
         # Start of work section
         if not in_work_section:
             if any(header in line_lower for header in work_headers):
@@ -228,6 +239,7 @@ def extract_skills_and_work(text):
         elif in_work_section:
             if any(header in line_lower for header in next_section_headers):
                 break
             if line.strip():
                 work_section.append(line.strip())
@@ -235,16 +247,21 @@ def extract_skills_and_work(text):
     if not work_section:
         work_experience = "Work experience not clearly identified"
     else:
         work_lines = []
         company_count = 0
         for line in work_section:
             if re.search(r'(19|20)\d{2}', line):
                 company_count += 1
                 if company_count <= 3:  # Limit to 3 most recent positions
                     work_lines.append(f"**{line}**")
                 else:
                     break
-            elif company_count <= 3 and len(work_lines) < 10:
                 work_lines.append(line)
         work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
@@ -262,16 +279,17 @@ def summarize_resume_text(resume_text):
     """
     start_time = time.time()
-    # First, generate a quick summary using the preloaded model
     max_input_length = 1024  # Model limit
-    # Only summarize the first 1024 characters for speed
-    text_to_summarize = resume_text[:max_input_length]
-    base_summary = models['summarizer'](text_to_summarize, truncation=True)[0]['summary_text']
     # Extract information in parallel where possible
-    # Limit the number of workers to reduce overhead
-    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
-        name_future = executor.submit(extract_name, resume_text[:500])  # Only use the start of text
         age_future = executor.submit(extract_age, resume_text)
         industry_future = executor.submit(extract_industry, resume_text, base_summary)
         skills_work_future = executor.submit(extract_skills_and_work, resume_text)
@@ -290,6 +308,7 @@ def summarize_resume_text(resume_text):
     formatted_summary += f"Skills: {skills}"
     execution_time = time.time() - start_time
     return formatted_summary, execution_time
 #####################################
@@ -299,6 +318,7 @@ def calculate_google_match_score(candidate_summary):
     """
     Calculate a detailed match score breakdown based on skills and experience in the candidate summary
     compared with what Google requires.
     Returns:
     - overall_score: A normalized score between 0 and 1
     - category_scores: A dictionary with scores for each category
@@ -340,19 +360,26 @@ def calculate_google_match_score(candidate_summary):
     category_scores = {}
     for category, details in google_categories.items():
         keywords = details["keywords"]
-        max_possible = len(keywords)
         matches = sum(1 for keyword in keywords if keyword in summary_lower)
         if max_possible > 0:
             raw_score = matches / max_possible
             category_scores[category] = min(1.0, raw_score * 1.5)
         else:
             category_scores[category] = 0
     overall_score = sum(
         score * google_categories[category]["weight"]
         for category, score in category_scores.items()
     )
     overall_score = min(1.0, max(0.0, overall_score))
     # Create score breakdown explanation
@@ -370,15 +397,16 @@ def calculate_google_match_score(candidate_summary):
 #####################################
 def generate_template_feedback(category_scores):
     """
-    Generate comprehensive template-based feedback without using ML model for speed and reliability.
     """
     start_time = time.time()
-    import random
     sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
     top_categories = sorted_categories[:2]
-    bottom_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
     top_feedback_templates = {
         "Technical Skills": [
             "demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
@@ -407,6 +435,7 @@ def generate_template_feedback(category_scores):
         ]
     }
     bottom_feedback_templates = {
         "Technical Skills": [
             "should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
@@ -435,21 +464,32 @@ def generate_template_feedback(category_scores):
         ]
     }
     top_category = top_categories[0][0]
     top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
     bottom_category = bottom_categories[0][0]
     bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
     feedback = f"This candidate {top_feedback} "
     if top_categories[1][1] >= 0.6:
         second_top = top_categories[1][0]
         second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
         feedback += f"The candidate also {second_top_feedback} "
     feedback += f"However, the candidate {bottom_feedback} "
     overall_score = sum(score * weight for (category, score), weight in
                        zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
@@ -461,6 +501,54 @@ def generate_template_feedback(category_scores):
         feedback += "The candidate would need significant development to meet Google's standards."
     execution_time = time.time() - start_time
     return feedback, execution_time
 #####################################
@@ -483,8 +571,13 @@ with st.expander("Google's Requirements", expanded=False):
 # File uploader
 uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
 # Process button with optimized flow
 if uploaded_file is not None and st.button("Analyze My Google Fit"):
     progress_bar = st.progress(0)
     status_text = st.empty()
@@ -501,6 +594,7 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
         summary, summarization_time = summarize_resume_text(resume_text)
         progress_bar.progress(50)
         st.subheader("Your Resume Summary")
         st.markdown(summary)
         st.info(f"Summary generated in {summarization_time:.2f} seconds")
@@ -508,12 +602,24 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
         # Step 3: Calculate scores and generate feedback
         status_text.text("Step 3/3: Calculating Google fit scores...")
         overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
-        feedback, feedback_time = generate_template_feedback(category_scores)
         progress_bar.progress(100)
         status_text.empty()
         st.subheader("Google Fit Assessment")
         score_percent = int(overall_score * 100)
         if overall_score >= 0.85:
             st.success(f"**Overall Google Match Score:** {score_percent}% 🌟")
@@ -524,15 +630,20 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
         else:
             st.error(f"**Overall Google Match Score:** {score_percent}% 🔍")
         st.markdown("### Score Calculation")
         st.markdown(score_breakdown)
         st.markdown("### Expert Assessment")
         st.markdown(feedback)
         st.info(f"Assessment completed in {feedback_time:.2f} seconds")
         st.subheader("Recommended Next Steps")
         weakest_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
         if overall_score >= 0.80:

             max_length=100,
             truncation=True
         )
+        # Load T5-small model for evaluation with optimized settings
+        models['evaluator'] = pipeline(
+            "text-generation",
+            model="facebook/opt-1.3b",
+            max_length=200,
+            num_beams=2,
+            early_stopping=True
+        )
         return models
 # Preload models immediately when app starts
         "information systems": ["information systems", "ERP", "systems management"]
     }
+    # Count occurrences of industry keywords - using the summary to speed up
     combined_text = base_summary.lower()
     counts = {}
         for skill in skills:
             if skill.lower() in text_lower:
                 category_skills.append(skill)
         if category_skills:
             found_skills.append(f"{category}: {', '.join(category_skills)}")
     for idx, line in enumerate(lines):
         line_lower = line.lower().strip()
         # Start of work section
         if not in_work_section:
             if any(header in line_lower for header in work_headers):
         elif in_work_section:
             if any(header in line_lower for header in next_section_headers):
                 break
             if line.strip():
                 work_section.append(line.strip())
     if not work_section:
         work_experience = "Work experience not clearly identified"
     else:
+        # Just take the first 5-7 lines of the work section as a summary
         work_lines = []
         company_count = 0
+        current_company = ""
         for line in work_section:
+            # New company entry often has a date
             if re.search(r'(19|20)\d{2}', line):
                 company_count += 1
                 if company_count <= 3:  # Limit to 3 most recent positions
+                    current_company = line
                     work_lines.append(f"**{line}**")
                 else:
                     break
+            elif company_count <= 3 and len(work_lines) < 10:  # Limit total lines
                 work_lines.append(line)
         work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
     """
     start_time = time.time()
+    # First, generate a quick summary using pre-loaded model
     max_input_length = 1024  # Model limit
+    # Only summarize the first portion of text for speed
+    text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
+    base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
     # Extract information in parallel where possible
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        # These can run in parallel
+        name_future = executor.submit(extract_name, resume_text[:500])  # Only use start of text
         age_future = executor.submit(extract_age, resume_text)
         industry_future = executor.submit(extract_industry, resume_text, base_summary)
         skills_work_future = executor.submit(extract_skills_and_work, resume_text)
     formatted_summary += f"Skills: {skills}"
     execution_time = time.time() - start_time
     return formatted_summary, execution_time
 #####################################
     """
     Calculate a detailed match score breakdown based on skills and experience in the candidate summary
     compared with what Google requires.
     Returns:
     - overall_score: A normalized score between 0 and 1
     - category_scores: A dictionary with scores for each category
     category_scores = {}
     for category, details in google_categories.items():
         keywords = details["keywords"]
+        max_possible = len(keywords)  # Maximum possible matches
+        # Count matches (unique keywords found)
         matches = sum(1 for keyword in keywords if keyword in summary_lower)
+        # Calculate category score (0-1 range)
         if max_possible > 0:
             raw_score = matches / max_possible
+            # Apply a curve to reward having more matches
             category_scores[category] = min(1.0, raw_score * 1.5)
         else:
             category_scores[category] = 0
+    # Calculate weighted overall score
     overall_score = sum(
         score * google_categories[category]["weight"]
         for category, score in category_scores.items()
     )
+    # Ensure overall score is in 0-1 range
     overall_score = min(1.0, max(0.0, overall_score))
     # Create score breakdown explanation
 #####################################
 def generate_template_feedback(category_scores):
     """
+    Generate comprehensive template-based feedback without using ML model for speed.
     """
     start_time = time.time()
+    # Sort categories by score
     sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
     top_categories = sorted_categories[:2]
+    bottom_categories = sorted_categories[-2:]
+    # More detailed template-based feedback for top category
     top_feedback_templates = {
         "Technical Skills": [
             "demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
         ]
     }
+    # More detailed template-based feedback for bottom categories
     bottom_feedback_templates = {
         "Technical Skills": [
             "should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
         ]
     }
+    # Generate feedback with more detailed templates
+    import random
+    # Get top strength feedback
     top_category = top_categories[0][0]
+    top_score = top_categories[0][1]
     top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
+    # Get improvement area feedback
     bottom_category = bottom_categories[0][0]
+    bottom_score = bottom_categories[0][1]
     bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
+    # Construct full feedback
     feedback = f"This candidate {top_feedback} "
+    # Add second strength if it's good
     if top_categories[1][1] >= 0.6:
         second_top = top_categories[1][0]
         second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
         feedback += f"The candidate also {second_top_feedback} "
+    # Add improvement feedback
     feedback += f"However, the candidate {bottom_feedback} "
+    # Add conclusion based on overall score
     overall_score = sum(score * weight for (category, score), weight in
                        zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
         feedback += "The candidate would need significant development to meet Google's standards."
     execution_time = time.time() - start_time
+    return feedback, execution_time
+#####################################
+# Function: Generate Aspect-Based Feedback with T5 - Enhanced with Fallback
+#####################################
+@st.cache_data(show_spinner=False)
+def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
+    """
+    Use T5-small model to generate feedback with robust fallback to template-based feedback.
+    """
+    start_time = time.time()
+    evaluator = _evaluator or models['evaluator']
+    # Sort categories by score
+    sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
+    top_categories = sorted_categories[:2]
+    bottom_categories = sorted_categories[-2:]
+    # Create a more explicit prompt for T5
+    prompt = f"""
+Generate a complete paragraph evaluating a job candidate for Google.
+The candidate is strong in: {', '.join([cat for cat, _ in top_categories])}.
+The candidate needs improvement in: {', '.join([cat for cat, _ in bottom_categories])}.
+Start with 'This candidate' and write at least 3 sentences about their fit for Google.
+"""
+    # Generate focused feedback with error handling
+    try:
+        feedback_result = evaluator(prompt, max_length=200, do_sample=False)
+        feedback = feedback_result[0]['generated_text']
+        # Validate the response - ensure it's not empty or too short
+        if len(feedback.strip()) < 20 or feedback.strip() == "This candidate" or feedback.strip() == "This candidate.":
+            # Fall back to template-based if T5 output is too short
+            return generate_template_feedback(category_scores)
+        # Ensure third-person tone
+        if not any(feedback.lower().startswith(start) for start in ["the candidate", "this candidate"]):
+            feedback = f"This candidate {feedback}"
+    except Exception as e:
+        # Fall back to template if there's an error
+        print(f"Error generating T5 feedback: {e}")
+        return generate_template_feedback(category_scores)
+    execution_time = time.time() - start_time
     return feedback, execution_time
 #####################################
 # File uploader
 uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
+# Add a checkbox for template-based feedback (faster)
+use_template_feedback = st.checkbox("Use faster template-based feedback (no ML)", value=False,
+                                   help="Generate feedback using pre-defined templates instead of T5 model")
 # Process button with optimized flow
 if uploaded_file is not None and st.button("Analyze My Google Fit"):
+    # Create a placeholder for the progress bar
     progress_bar = st.progress(0)
     status_text = st.empty()
         summary, summarization_time = summarize_resume_text(resume_text)
         progress_bar.progress(50)
+        # Display summary
         st.subheader("Your Resume Summary")
         st.markdown(summary)
         st.info(f"Summary generated in {summarization_time:.2f} seconds")
         # Step 3: Calculate scores and generate feedback
         status_text.text("Step 3/3: Calculating Google fit scores...")
         overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
+        # Choose feedback generation method based on checkbox
+        if use_template_feedback:
+            feedback, feedback_time = generate_template_feedback(category_scores)
+        else:
+            feedback, feedback_time = generate_aspect_feedback(
+                summary, category_scores, _evaluator=models['evaluator']
+            )
         progress_bar.progress(100)
+        # Clear status messages
         status_text.empty()
+        # Display Google fit results
         st.subheader("Google Fit Assessment")
+        # Display overall score with appropriate color and emoji
         score_percent = int(overall_score * 100)
         if overall_score >= 0.85:
             st.success(f"**Overall Google Match Score:** {score_percent}% 🌟")
         else:
             st.error(f"**Overall Google Match Score:** {score_percent}% 🔍")
+        # Display score breakdown
         st.markdown("### Score Calculation")
         st.markdown(score_breakdown)
+        # Display focused feedback
         st.markdown("### Expert Assessment")
         st.markdown(feedback)
         st.info(f"Assessment completed in {feedback_time:.2f} seconds")
+        # Add potential next steps based on the score
         st.subheader("Recommended Next Steps")
+        # Find the weakest categories
         weakest_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
         if overall_score >= 0.80: