Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 22

Commit

986332a

verified ·

1 Parent(s): 88107c2

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -37

app.py CHANGED Viewed

@@ -8,7 +8,20 @@ import time
 import re
 import pandas as pd
 from functools import lru_cache
-from transformers import pipeline
 # Set page title and hide sidebar
 st.set_page_config(
@@ -32,24 +45,116 @@ def load_models():
     """Load models at startup"""
     with st.spinner("Loading AI models... This may take a minute on first run."):
         models = {}
-        # Use bart-base for summarization
-        models['summarizer'] = pipeline(
-            "summarization",
-            model="facebook/bart-base",
-            max_length=100,
-            truncation=True
-        )
         # Load sentiment model for evaluation
-        models['evaluator'] = pipeline(
-            "sentiment-analysis",
-            model="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
-        )
         return models
-# Preload models immediately when app starts
-models = load_models()
 #####################################
 # Function: Extract Text from File
@@ -215,18 +320,14 @@ def extract_skills_and_work(text):
 #####################################
 # Function: Summarize Resume Text
 #####################################
-def summarize_resume_text(resume_text):
     """
     Generates a structured summary of the resume text
     """
     start_time = time.time()
-    # First, generate a quick summary using pre-loaded model
-    max_input_length = 1024  # Model limit
-    # Only summarize the first portion of text for speed
-    text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
-    base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
     # Extract name from the beginning of the resume
     name = extract_name(resume_text[:500])
@@ -308,7 +409,7 @@ def extract_job_requirements(job_description):
     required_skills = [skill for skill in tech_skills if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text)]
     # Create a simple summary of the job
-    job_summary = models['summarizer'](job_description[:1024])[0]['summary_text']
     # Format the job requirements
     job_requirements = {
@@ -323,9 +424,9 @@ def extract_job_requirements(job_description):
 #####################################
 # Function: Analyze Job Fit
 #####################################
-def analyze_job_fit(resume_summary, job_description):
     """
-    Analyze how well the candidate fits the job requirements with the DistilBERT sentiment model.
     Returns a fit score (0-2) and an assessment.
     """
     start_time = time.time()
@@ -406,23 +507,20 @@ def analyze_job_fit(resume_summary, job_description):
     Overall assessment: The candidate's skills and experience {"appear to match well with" if skills_match_percentage >= 60 and experience_match == "sufficient" else "have some gaps compared to"} the job requirements.
     """
-    # Use sentiment analysis model to evaluate the comparison
-    sentiment_result = models['evaluator'](comparison_text)
-    # Map sentiment to score: NEGATIVE = 0, POSITIVE = 1
-    sentiment_score = 1 if sentiment_result[0]['label'] == 'POSITIVE' else 0
     # Derive final score based on sentiment and match metrics
-    if sentiment_score == 1 and skills_match_percentage >= 70 and experience_match == "sufficient":
-        final_score = 2  # Good fit
-    elif sentiment_score == 1 and skills_match_percentage >= 50:
         final_score = 1  # Potential fit
     else:
         final_score = 0  # Not fit
     # Generate assessment text based on the score
     if final_score == 2:
-        assessment = f"{final_score}: The candidate is a good match for this {job_requirements['title']} position. They have the required {experience_years} years of experience and demonstrate proficiency in key skills including {', '.join(skills_in_resume[:5])}. Their background aligns well with the job requirements."
     elif final_score == 1:
         assessment = f"{final_score}: The candidate shows potential for this {job_requirements['title']} position, but has some skill gaps. They match on {skills_match_percentage}% of required skills including {', '.join(skills_in_resume[:3]) if skills_in_resume else 'minimal required skills'}, and their experience is {experience_match}."
     else:
@@ -432,6 +530,9 @@ def analyze_job_fit(resume_summary, job_description):
     return assessment, final_score, execution_time
 #####################################
 # Main Streamlit Interface
 #####################################
@@ -464,7 +565,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
     else:
         # Step 2: Generate summary
         status_text.text("Step 2/3: Analyzing resume and generating summary...")
-        summary, summarization_time = summarize_resume_text(resume_text)
         progress_bar.progress(50)
         # Display summary
@@ -473,7 +574,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
         # Step 3: Generate job fit assessment
         status_text.text("Step 3/3: Evaluating job fit...")
-        assessment, fit_score, assessment_time = analyze_job_fit(summary, job_description)
         progress_bar.progress(100)
         # Clear status messages
@@ -486,7 +587,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
         fit_labels = {
             0: "NOT FIT ❌",
             1: "POTENTIAL FIT ⚠️",
-            2: "GOOD FIT ✅"
         }
         # Show the score prominently
@@ -502,7 +603,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
         if fit_score == 2:
             st.markdown("""
-            - Apply for this position as you appear to be a good match
             - Prepare for interviews by focusing on your relevant experience
             - Highlight your matching skills in your cover letter
             """)

 import re
 import pandas as pd
 from functools import lru_cache
+# Try different import approaches
+try:
+    from transformers import pipeline
+    has_pipeline = True
+except ImportError:
+    try:
+        from transformers import AutoModelForSequenceClassification, AutoTokenizer
+        import torch
+        has_pipeline = False
+        st.warning("Using basic transformers functionality instead of pipeline API")
+    except ImportError:
+        st.error("Transformers library not properly installed. Some features will be limited.")
+        has_pipeline = False
 # Set page title and hide sidebar
 st.set_page_config(
     """Load models at startup"""
     with st.spinner("Loading AI models... This may take a minute on first run."):
         models = {}
         # Load sentiment model for evaluation
+        if has_pipeline:
+            # Use pipeline if available
+            models['evaluator'] = pipeline(
+                "sentiment-analysis",
+                model="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
+            )
+        else:
+            # Fall back to basic model loading
+            try:
+                models['evaluator_model'] = AutoModelForSequenceClassification.from_pretrained(
+                    "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
+                )
+                models['evaluator_tokenizer'] = AutoTokenizer.from_pretrained(
+                    "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
+                )
+            except Exception as e:
+                st.error(f"Error loading models: {e}")
+                models['evaluator_model'] = None
+                models['evaluator_tokenizer'] = None
         return models
+# Manual implementation of text summarization
+def basic_summarize(text, max_length=100):
+    """Basic text summarization by extracting key sentences"""
+    # Split into sentences
+    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
+    # Score sentences by position (earlier is better) and length
+    scored_sentences = []
+    for i, sentence in enumerate(sentences):
+        # Skip very short sentences
+        if len(sentence.split()) < 4:
+            continue
+        # Simple scoring: earlier sentences get higher scores, penalize very long sentences
+        score = 1.0 / (i + 1) - (0.01 * max(0, len(sentence.split()) - 20))
+        scored_sentences.append((score, sentence))
+    # Sort by score
+    scored_sentences.sort(reverse=True)
+    # Get top sentences until we reach max_length
+    summary_sentences = []
+    current_length = 0
+    for _, sentence in scored_sentences:
+        if current_length + len(sentence.split()) <= max_length:
+            summary_sentences.append(sentence)
+            current_length += len(sentence.split())
+        else:
+            break
+    # Re-order sentences to match original order if we have more than one
+    if summary_sentences:
+        original_order = []
+        for sentence in summary_sentences:
+            original_order.append((sentences.index(sentence), sentence))
+        original_order.sort()
+        summary_sentences = [s for _, s in original_order]
+    # Combine into a summary
+    summary = " ".join(summary_sentences)
+    return summary
+# Custom sentiment analysis function as fallback
+def analyze_sentiment(text, models):
+    """Analyze sentiment using available models"""
+    if has_pipeline and 'evaluator' in models:
+        # Use pipeline if available
+        try:
+            result = models['evaluator'](text)
+            return result[0]['label'] == 'POSITIVE'
+        except Exception as e:
+            st.warning(f"Error in pipeline sentiment analysis: {e}")
+    # Fall back to manual model inference
+    if 'evaluator_model' in models and 'evaluator_tokenizer' in models and models['evaluator_model']:
+        try:
+            tokenizer = models['evaluator_tokenizer']
+            model = models['evaluator_model']
+            # Truncate to avoid exceeding model's max length
+            max_length = tokenizer.model_max_length if hasattr(tokenizer, 'model_max_length') else 512
+            truncated_text = " ".join(text.split()[:max_length])
+            inputs = tokenizer(truncated_text, return_tensors="pt", truncation=True, max_length=max_length)
+            with torch.no_grad():
+                outputs = model(**inputs)
+            probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
+            prediction = torch.argmax(probabilities, dim=-1).item()
+            # Usually for sentiment models, 1 = positive, 0 = negative
+            return prediction == 1
+        except Exception as e:
+            st.warning(f"Error in manual sentiment analysis: {e}")
+    # If all else fails, use a simple keyword approach
+    positive_words = ["match", "fit", "qualified", "skilled", "experienced", "suitable", "aligned", "good", "strong"]
+    negative_words = ["mismatch", "gap", "insufficient", "lacking", "inadequate", "limited", "missing", "poor", "weak"]
+    text_lower = text.lower()
+    positive_count = sum(text_lower.count(word) for word in positive_words)
+    negative_count = sum(text_lower.count(word) for word in negative_words)
+    return positive_count > negative_count
 #####################################
 # Function: Extract Text from File
 #####################################
 # Function: Summarize Resume Text
 #####################################
+def summarize_resume_text(resume_text, models):
     """
     Generates a structured summary of the resume text
     """
     start_time = time.time()
+    # Create a basic summary using our custom function
+    base_summary = basic_summarize(resume_text, max_length=100)
     # Extract name from the beginning of the resume
     name = extract_name(resume_text[:500])
     required_skills = [skill for skill in tech_skills if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text)]
     # Create a simple summary of the job
+    job_summary = basic_summarize(job_description, max_length=100)
     # Format the job requirements
     job_requirements = {
 #####################################
 # Function: Analyze Job Fit
 #####################################
+def analyze_job_fit(resume_summary, job_description, models):
     """
+    Analyze how well the candidate fits the job requirements.
     Returns a fit score (0-2) and an assessment.
     """
     start_time = time.time()
     Overall assessment: The candidate's skills and experience {"appear to match well with" if skills_match_percentage >= 60 and experience_match == "sufficient" else "have some gaps compared to"} the job requirements.
     """
+    # Use sentiment analysis function to evaluate the comparison
+    is_positive = analyze_sentiment(comparison_text, models)
     # Derive final score based on sentiment and match metrics
+    if is_positive and skills_match_percentage >= 70 and experience_match == "sufficient":
+        final_score = 2  # Strong fit
+    elif is_positive and skills_match_percentage >= 50:
         final_score = 1  # Potential fit
     else:
         final_score = 0  # Not fit
     # Generate assessment text based on the score
     if final_score == 2:
+        assessment = f"{final_score}: The candidate is a strong match for this {job_requirements['title']} position. They have the required {experience_years} years of experience and demonstrate proficiency in key skills including {', '.join(skills_in_resume[:5])}. Their background aligns well with the job requirements."
     elif final_score == 1:
         assessment = f"{final_score}: The candidate shows potential for this {job_requirements['title']} position, but has some skill gaps. They match on {skills_match_percentage}% of required skills including {', '.join(skills_in_resume[:3]) if skills_in_resume else 'minimal required skills'}, and their experience is {experience_match}."
     else:
     return assessment, final_score, execution_time
+# Load models at startup
+models = load_models()
 #####################################
 # Main Streamlit Interface
 #####################################
     else:
         # Step 2: Generate summary
         status_text.text("Step 2/3: Analyzing resume and generating summary...")
+        summary, summarization_time = summarize_resume_text(resume_text, models)
         progress_bar.progress(50)
         # Display summary
         # Step 3: Generate job fit assessment
         status_text.text("Step 3/3: Evaluating job fit...")
+        assessment, fit_score, assessment_time = analyze_job_fit(summary, job_description, models)
         progress_bar.progress(100)
         # Clear status messages
         fit_labels = {
             0: "NOT FIT ❌",
             1: "POTENTIAL FIT ⚠️",
+            2: "STRONG FIT ✅"
         }
         # Show the score prominently
         if fit_score == 2:
             st.markdown("""
+            - Apply for this position as you appear to be a strong match
             - Prepare for interviews by focusing on your relevant experience
             - Highlight your matching skills in your cover letter
             """)