Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

ce7c5e8

verified ·

1 Parent(s): 8e90008

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -22

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import time
 import re
 import concurrent.futures
 from functools import lru_cache
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 # Set page title and hide sidebar
 st.set_page_config(
@@ -35,14 +35,21 @@ def load_models():
     """Load models at startup - using smaller/faster models"""
     with st.spinner("Loading AI models... This may take a minute on first run."):
         models = {}
-        # Load smaller summarization model for speed
-        models['summarizer'] = pipeline("summarization", model="facebook/bart-large-cnn", max_length=130)
-        # Load T5-small model for evaluation
         models['evaluator'] = pipeline(
             "text2text-generation",
             model="google-t5/t5-small",
-            max_length=200
         )
         return models
@@ -93,7 +100,9 @@ def extract_text_from_file(file_obj):
             text = f"Error processing TXT file: {e}"
     else:
         text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
-    return text
 #####################################
 # Functions for Information Extraction - Optimized
@@ -384,13 +393,124 @@ def calculate_google_match_score(candidate_summary):
     return overall_score, category_scores, score_breakdown
 #####################################
-# Function: Generate Aspect-Based Feedback with T5
 #####################################
 @st.cache_data(show_spinner=False)
 def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
     """
-    Use T5-small model to generate feedback on the candidate's strongest and weakest areas
-    for Google, based on the category scores.
     """
     start_time = time.time()
@@ -401,19 +521,31 @@ def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None
     top_categories = sorted_categories[:2]
     bottom_categories = sorted_categories[-2:]
-    # Create a prompt for T5
     prompt = f"""
-Generate specific third-person feedback on the candidate's fit for Google.
-Focus on these strengths: {', '.join([cat for cat, _ in top_categories])}.
-And these improvement areas: {', '.join([cat for cat, _ in bottom_categories])}.
 """
-    # Generate focused feedback
-    feedback = evaluator(prompt)[0]['generated_text']
-    # Ensure third-person tone
-    if not any(feedback.lower().startswith(start) for start in ["the candidate", "this candidate"]):
-        feedback = f"This candidate {feedback}"
     execution_time = time.time() - start_time
@@ -439,6 +571,10 @@ with st.expander("Google's Requirements", expanded=False):
 # File uploader
 uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
 # Process button with optimized flow
 if uploaded_file is not None and st.button("Analyze My Google Fit"):
     # Create a placeholder for the progress bar
@@ -466,9 +602,15 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
         # Step 3: Calculate scores and generate feedback
         status_text.text("Step 3/3: Calculating Google fit scores...")
         overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
-        feedback, feedback_time = generate_aspect_feedback(
-            summary, category_scores, _evaluator=models['evaluator']
-        )
         progress_bar.progress(100)
         # Clear status messages

 import re
 import concurrent.futures
 from functools import lru_cache
+from transformers import pipeline
 # Set page title and hide sidebar
 st.set_page_config(
     """Load models at startup - using smaller/faster models"""
     with st.spinner("Loading AI models... This may take a minute on first run."):
         models = {}
+        # Use bart-base instead of bart-large-cnn for faster processing
+        models['summarizer'] = pipeline(
+            "summarization",
+            model="facebook/bart-base",
+            max_length=100,
+            truncation=True
+        )
+        # Load T5-small model for evaluation with optimized settings
         models['evaluator'] = pipeline(
             "text2text-generation",
             model="google-t5/t5-small",
+            max_length=200,
+            num_beams=2,
+            early_stopping=True
         )
         return models
             text = f"Error processing TXT file: {e}"
     else:
         text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
+    # Limit text size for faster processing
+    return text[:15000] if text else text
 #####################################
 # Functions for Information Extraction - Optimized
     return overall_score, category_scores, score_breakdown
 #####################################
+# Function: Generate Robust Feedback - Template-Based
+#####################################
+def generate_template_feedback(category_scores):
+    """
+    Generate comprehensive template-based feedback without using ML model for speed.
+    """
+    start_time = time.time()
+    # Sort categories by score
+    sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
+    top_categories = sorted_categories[:2]
+    bottom_categories = sorted_categories[-2:]
+    # More detailed template-based feedback for top category
+    top_feedback_templates = {
+        "Technical Skills": [
+            "demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
+            "shows excellent technical capabilities that align well with Google's engineering requirements.",
+            "possesses the technical expertise needed for Google's development environment."
+        ],
+        "Advanced Technologies": [
+            "has valuable experience with cutting-edge technologies that Google prioritizes in its innovation efforts.",
+            "demonstrates knowledge in advanced technological areas that align with Google's future direction.",
+            "shows proficiency in modern technologies that Google uses in its products and services."
+        ],
+        "Problem Solving": [
+            "exhibits strong problem-solving abilities which are fundamental to Google's engineering culture.",
+            "demonstrates analytical thinking and problem-solving skills that Google seeks in candidates.",
+            "shows the problem-solving aptitude that would be valuable in Google's collaborative environment."
+        ],
+        "Innovation & Creativity": [
+            "shows the creative thinking and innovation mindset that Google values in its workforce.",
+            "demonstrates the innovative approach that would fit well with Google's creative culture.",
+            "exhibits creativity that could contribute to Google's product development process."
+        ],
+        "Teamwork & Leadership": [
+            "demonstrates leadership qualities and teamwork skills that Google looks for in potential employees.",
+            "shows collaborative abilities that would integrate well with Google's team-based structure.",
+            "exhibits the interpersonal skills needed to thrive in Google's collaborative environment."
+        ]
+    }
+    # More detailed template-based feedback for bottom categories
+    bottom_feedback_templates = {
+        "Technical Skills": [
+            "should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
+            "would benefit from developing more depth in technical tools and programming capabilities to meet Google's standards.",
+            "needs to enhance their technical expertise to better align with Google's engineering requirements."
+        ],
+        "Advanced Technologies": [
+            "would benefit from gaining more experience with AI, machine learning, or cloud technologies that Google prioritizes.",
+            "should develop more expertise in advanced technologies like machine learning or data science to increase their value to Google.",
+            "needs more exposure to the cutting-edge technologies that drive Google's innovation."
+        ],
+        "Problem Solving": [
+            "should strengthen their problem-solving abilities, particularly with algorithms and data structures that are crucial for Google interviews.",
+            "would benefit from developing stronger analytical and problem-solving skills to match Google's expectations.",
+            "needs to improve their approach to complex problem-solving to meet Google's standards."
+        ],
+        "Innovation & Creativity": [
+            "could develop a more innovative mindset to better align with Google's creative culture.",
+            "should work on demonstrating more creative thinking in their approach to match Google's innovation focus.",
+            "would benefit from cultivating more creativity and out-of-the-box thinking valued at Google."
+        ],
+        "Teamwork & Leadership": [
+            "should focus on developing stronger leadership and teamwork skills to thrive in Google's collaborative environment.",
+            "would benefit from more experience in collaborative settings to match Google's team-oriented culture.",
+            "needs to strengthen their interpersonal and leadership capabilities to align with Google's expectations."
+        ]
+    }
+    # Generate feedback with more detailed templates
+    import random
+    # Get top strength feedback
+    top_category = top_categories[0][0]
+    top_score = top_categories[0][1]
+    top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
+    # Get improvement area feedback
+    bottom_category = bottom_categories[0][0]
+    bottom_score = bottom_categories[0][1]
+    bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
+    # Construct full feedback
+    feedback = f"This candidate {top_feedback} "
+    # Add second strength if it's good
+    if top_categories[1][1] >= 0.6:
+        second_top = top_categories[1][0]
+        second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
+        feedback += f"The candidate also {second_top_feedback} "
+    # Add improvement feedback
+    feedback += f"However, the candidate {bottom_feedback} "
+    # Add conclusion based on overall score
+    overall_score = sum(score * weight for (category, score), weight in
+                       zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
+    if overall_score >= 0.75:
+        feedback += "Overall, this candidate shows strong potential for success at Google."
+    elif overall_score >= 0.6:
+        feedback += "With these improvements, the candidate could be a good fit for Google."
+    else:
+        feedback += "The candidate would need significant development to meet Google's standards."
+    execution_time = time.time() - start_time
+    return feedback, execution_time
+#####################################
+# Function: Generate Aspect-Based Feedback with T5 - Enhanced with Fallback
 #####################################
 @st.cache_data(show_spinner=False)
 def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
     """
+    Use T5-small model to generate feedback with robust fallback to template-based feedback.
     """
     start_time = time.time()
     top_categories = sorted_categories[:2]
     bottom_categories = sorted_categories[-2:]
+    # Create a more explicit prompt for T5
     prompt = f"""
+Generate a complete paragraph evaluating a job candidate for Google.
+The candidate is strong in: {', '.join([cat for cat, _ in top_categories])}.
+The candidate needs improvement in: {', '.join([cat for cat, _ in bottom_categories])}.
+Start with 'This candidate' and write at least 3 sentences about their fit for Google.
 """
+    # Generate focused feedback with error handling
+    try:
+        feedback_result = evaluator(prompt, max_length=200, do_sample=False)
+        feedback = feedback_result[0]['generated_text']
+        # Validate the response - ensure it's not empty or too short
+        if len(feedback.strip()) < 20 or feedback.strip() == "This candidate" or feedback.strip() == "This candidate.":
+            # Fall back to template-based if T5 output is too short
+            return generate_template_feedback(category_scores)
+        # Ensure third-person tone
+        if not any(feedback.lower().startswith(start) for start in ["the candidate", "this candidate"]):
+            feedback = f"This candidate {feedback}"
+    except Exception as e:
+        # Fall back to template if there's an error
+        print(f"Error generating T5 feedback: {e}")
+        return generate_template_feedback(category_scores)
     execution_time = time.time() - start_time
 # File uploader
 uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
+# Add a checkbox for template-based feedback (faster)
+use_template_feedback = st.checkbox("Use faster template-based feedback (no ML)", value=False,
+                                   help="Generate feedback using pre-defined templates instead of T5 model")
 # Process button with optimized flow
 if uploaded_file is not None and st.button("Analyze My Google Fit"):
     # Create a placeholder for the progress bar
         # Step 3: Calculate scores and generate feedback
         status_text.text("Step 3/3: Calculating Google fit scores...")
         overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
+        # Choose feedback generation method based on checkbox
+        if use_template_feedback:
+            feedback, feedback_time = generate_template_feedback(category_scores)
+        else:
+            feedback, feedback_time = generate_aspect_feedback(
+                summary, category_scores, _evaluator=models['evaluator']
+            )
         progress_bar.progress(100)
         # Clear status messages