Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

e1a5956

verified ·

1 Parent(s): d204788

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -259

app.py CHANGED Viewed

@@ -4,11 +4,13 @@ import streamlit as st
 import docx
 import docx2txt
 import tempfile
-from transformers import pipeline
 import numpy as np
 from scipy.spatial.distance import cosine
 import time
 import re
 # Set page title and hide sidebar
 st.set_page_config(
@@ -25,18 +27,18 @@ st.markdown("""
 """, unsafe_allow_html=True)
 #####################################
-# Preload Models
 #####################################
 @st.cache_resource(show_spinner=True)
 def load_models():
-    """Load models at startup"""
     with st.spinner("Loading AI models... This may take a minute on first run."):
         models = {}
-        # Load summarization model
-        models['summarizer'] = pipeline("summarization", model="t5-base")
-        # Load feature extraction model for similarity
-        models['feature_extractor'] = pipeline("feature-extraction", model="bert-base-uncased")
         return models
@@ -46,6 +48,7 @@ models = load_models()
 #####################################
 # Function: Extract Text from File
 #####################################
 def extract_text_from_file(file_obj):
     """
     Extract text from .docx and .doc files.
@@ -88,73 +91,68 @@ def extract_text_from_file(file_obj):
     return text
 #####################################
-# Functions for Information Extraction
 #####################################
-def extract_name(text):
-    """Extract candidate name from resume text"""
-    # Look for common name patterns at the beginning of resumes
-    lines = text.split('\n')
     # Check first few non-empty lines for potential names
     potential_name_lines = [line.strip() for line in lines[:5] if line.strip()]
     if potential_name_lines:
-        # First line is often the name if it's short and doesn't contain common resume headers
         first_line = potential_name_lines[0]
         if 5 <= len(first_line) <= 40 and not any(x in first_line.lower() for x in ["resume", "cv", "curriculum", "vitae", "profile"]):
             return first_line
-    # Look for lines that might contain a name (not containing common keywords)
     for line in potential_name_lines[:3]:
         if len(line.split()) <= 4 and not any(x in line.lower() for x in ["address", "phone", "email", "resume", "cv"]):
             return line
-    # If we couldn't find a clear name
     return "Unknown (please extract from resume)"
 def extract_age(text):
     """Extract candidate age from resume text"""
-    # Look for common age patterns
-    # Look for patterns like "Age: XX" or "XX years old"
     age_patterns = [
         r'age:?\s*(\d{1,2})',
         r'(\d{1,2})\s*years\s*old',
-        r'DOB:?\s*(\d{1,2})[/-](\d{1,2})[/-](\d{2,4})'
     ]
     for pattern in age_patterns:
-        matches = re.search(pattern, text.lower())
         if matches:
-            if pattern == age_patterns[2]:  # DOB pattern
-                # Calculate age from DOB - simplified
-                return "Mentioned in DOB format"
-            else:
-                return matches.group(1)
     return "Not specified"
-def extract_industry(text, summary):
     """Extract expected job industry from resume"""
-    # Look for industry-related keywords
     industry_keywords = {
-        "technology": ["software", "programming", "developer", "IT", "tech", "computer", "web", "data science"],
-        "finance": ["banking", "investment", "financial", "accounting", "finance", "analyst"],
-        "healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor", "patient"],
-        "education": ["teaching", "teacher", "professor", "academic", "education", "school", "university"],
-        "marketing": ["marketing", "advertising", "brand", "digital marketing", "SEO", "social media"],
-        "engineering": ["mechanical", "civil", "electrical", "engineer", "engineering"],
-        "consulting": ["consultant", "consulting", "advisory"],
-        "data science": ["data science", "machine learning", "AI", "analytics", "big data"],
-        "information systems": ["information systems", "ERP", "CRM", "database", "systems management"]
     }
-    # Count occurrences of industry keywords
-    counts = {}
-    text_lower = text.lower()
     for industry, keywords in industry_keywords.items():
-        counts[industry] = sum(text_lower.count(keyword.lower()) for keyword in keywords)
     # Get the industry with the highest count
     if counts:
@@ -163,229 +161,131 @@ def extract_industry(text, summary):
             return likely_industry[0].capitalize()
     # Check for educational background that might indicate industry
-    degrees = ["computer science", "business", "engineering", "medicine", "law", "education",
-              "finance", "marketing", "information systems"]
     for degree in degrees:
-        if degree in text_lower:
             return f"{degree.capitalize()}-related field"
-    return "Not clearly specified (review resume for details)"
-def extract_skills(text, summary):
-    """Extract key skills from resume"""
-    # Common skill categories and associated keywords
     skill_categories = {
-        "Programming": ["Python", "Java", "C++", "JavaScript", "HTML", "CSS", "SQL", "R", "C#", "PHP",
-                       "Ruby", "Swift", "TypeScript", "Go", "Scala", "Kotlin", "Rust"],
-        "Data Science": ["Machine Learning", "Deep Learning", "NLP", "Data Analysis", "Statistics",
-                         "Big Data", "Data Visualization", "TensorFlow", "PyTorch", "Neural Networks",
-                         "Regression", "Classification", "Clustering"],
-        "Database": ["SQL", "MySQL", "PostgreSQL", "MongoDB", "Oracle", "SQLite", "NoSQL", "Database Design",
-                    "Data Modeling", "ETL", "Data Warehousing"],
-        "Web Development": ["React", "Angular", "Vue.js", "Node.js", "Django", "Flask", "Express", "RESTful API",
-                           "Frontend", "Backend", "Full-Stack", "Responsive Design"],
-        "Software Development": ["Agile", "Scrum", "Kanban", "Git", "CI/CD", "TDD", "OOP", "Design Patterns",
-                                "Microservices", "DevOps", "Docker", "Kubernetes"],
-        "Cloud": ["AWS", "Azure", "Google Cloud", "Cloud Computing", "S3", "EC2", "Lambda", "Serverless",
-                 "Cloud Architecture", "IaaS", "PaaS", "SaaS"],
-        "Business": ["Project Management", "Business Analysis", "Communication", "Teamwork", "Leadership",
-                    "Strategy", "Negotiation", "Presentation", "Time Management"],
-        "Tools": ["Excel", "PowerPoint", "Tableau", "Power BI", "JIRA", "Confluence", "Slack", "Microsoft Office",
-                 "Adobe", "Photoshop", "Salesforce"]
     }
-    # Find skills mentioned in the resume
-    found_skills = []
     text_lower = text.lower()
     for category, skills in skill_categories.items():
         category_skills = []
         for skill in skills:
-            # Check for case-insensitive match but preserve original case in output
             if skill.lower() in text_lower:
                 category_skills.append(skill)
         if category_skills:
             found_skills.append(f"{category}: {', '.join(category_skills)}")
-    if found_skills:
-        return "\n• " + "\n• ".join(found_skills)
-    else:
-        return "No specific technical skills clearly identified (review resume for details)"
-def extract_work_experience(text):
-    """Extract work experience from resume"""
-    # Common section headers for work experience
-    work_headers = [
-        "work experience", "professional experience", "employment history",
-        "work history", "experience", "professional background", "career history"
-    ]
-    # Common section headers that might come after work experience
-    next_section_headers = [
-        "education", "skills", "certifications", "projects", "achievements",
-        "languages", "interests", "references", "additional information"
-    ]
-    text_lower = text.lower()
-    lines = text.split('\n')
-    # Find the start of work experience section
-    work_start_idx = -1
-    work_header_used = ""
     for idx, line in enumerate(lines):
         line_lower = line.lower().strip()
-        if any(header in line_lower for header in work_headers):
-            if any(header == line_lower or header + ":" == line_lower for header in work_headers):
-                work_start_idx = idx
-                work_header_used = line.strip()
-                break
-    if work_start_idx == -1:
-        # Try to find work experience by looking for date patterns (common in resumes)
-        date_pattern = r'(19|20)\d{2}\s*(-|–|to)\s*(19|20)\d{2}|present|current|now'
-        for idx, line in enumerate(lines):
-            if re.search(date_pattern, line.lower()):
-                # Check surrounding lines for job titles or company names
-                context = " ".join(lines[max(0, idx-2):min(len(lines), idx+3)])
-                if any(title.lower() in context.lower() for title in ["manager", "developer", "engineer", "analyst", "assistant", "director", "coordinator"]):
-                    work_start_idx = max(0, idx-2)
-                    break
-    if work_start_idx == -1:
-        return "No clear work experience section found"
-    # Find the end of work experience section
-    work_end_idx = len(lines)
-    for idx in range(work_start_idx + 1, len(lines)):
-        line_lower = lines[idx].lower().strip()
-        if any(header in line_lower for header in next_section_headers):
-            if any(header == line_lower or header + ":" == line_lower for header in next_section_headers):
-                work_end_idx = idx
                 break
-    # Extract the work experience section
-    work_section = lines[work_start_idx + 1:work_end_idx]
-    # Process the work experience to make it more concise
-    # Look for companies, positions, dates, and key responsibilities
-    companies = []
-    current_company = {"name": "", "position": "", "dates": "", "description": []}
-    for line in work_section:
-        line = line.strip()
-        if not line:
-            continue
-        # Check if this is likely a new company/position entry
-        if re.search(r'(19|20)\d{2}\s*(-|–|to)\s*(19|20)\d{2}|present|current|now', line.lower()):
-            # Save previous company if it exists
-            if current_company["name"] or current_company["position"]:
-                companies.append(current_company)
-                current_company = {"name": "", "position": "", "dates": "", "description": []}
-            # This line likely contains position/company and dates
-            current_company["dates"] = line
-            # Try to extract position and company
-            parts = re.split(r'(19|20)\d{2}', line, 1)
-            if len(parts) > 1:
-                current_company["position"] = parts[0].strip()
-        elif current_company["dates"] and not current_company["name"]:
-            # This line might be the company name or the continuation of position details
-            current_company["name"] = line
-        else:
-            # This is likely a responsibility or detail
-            current_company["description"].append(line)
-    # Add the last company if it exists
-    if current_company["name"] or current_company["position"]:
-        companies.append(current_company)
-    # Format the work experience
-    if not companies:
-        # Try a different approach - just extract text blocks that might be jobs
-        job_blocks = []
-        current_block = []
         for line in work_section:
-            line = line.strip()
-            if not line:
-                if current_block:
-                    job_blocks.append(" ".join(current_block))
-                    current_block = []
-            else:
-                current_block.append(line)
-        if current_block:
-            job_blocks.append(" ".join(current_block))
-        if job_blocks:
-            return "\n• " + "\n• ".join(job_blocks[:3])  # Limit to top 3 entries
-        else:
-            return "Work experience information could not be clearly structured"
-    # Format the companies into a readable output
-    formatted_experience = []
-    for company in companies[:3]:  # Limit to top 3 most recent positions
-        entry = []
-        if company["position"]:
-            entry.append(f"**{company['position']}**")
-        if company["name"]:
-            entry.append(f"at {company['name']}")
-        if company["dates"]:
-            entry.append(f"({company['dates']})")
-        position_line = " ".join(entry)
-        if company["description"]:
-            # Limit to first 2-3 bullet points for conciseness
-            description = company["description"][:3]
-            description_text = "; ".join(description)
-            formatted_experience.append(f"{position_line} - {description_text}")
-        else:
-            formatted_experience.append(position_line)
-    if formatted_experience:
-        return "\n• " + "\n• ".join(formatted_experience)
-    else:
-        return "Work experience information could not be clearly structured"
 #####################################
-# Function: Summarize Resume Text
 #####################################
 def summarize_resume_text(resume_text, models):
     """
-    Generates a structured summary of the resume text including name, age,
-    expected job industry, skills, and work experience of the candidate.
     """
     start_time = time.time()
     summarizer = models['summarizer']
-    # First, generate a general summary
     max_input_length = 1024  # Model limit
-    if len(resume_text) > max_input_length:
-        chunks = [resume_text[i:i+max_input_length] for i in range(0, min(len(resume_text), 3*max_input_length), max_input_length)]
-        summaries = []
-        for chunk in chunks:
-            chunk_summary = summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
-            summaries.append(chunk_summary)
-        base_summary = " ".join(summaries)
-    else:
-        base_summary = summarizer(resume_text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
-    # Extract specific information using custom extraction logic
-    name = extract_name(resume_text)
-    age = extract_age(resume_text)
-    industry = extract_industry(resume_text, base_summary)
-    skills = extract_skills(resume_text, base_summary)
-    work_experience = extract_work_experience(resume_text)
     # Format the structured summary
     formatted_summary = f"Name: {name}\n"
@@ -399,8 +299,9 @@ def summarize_resume_text(resume_text, models):
     return formatted_summary, execution_time
 #####################################
-# Function: Compare Candidate Summary to Company Prompt
 #####################################
 def compute_suitability(candidate_summary, company_prompt, models):
     """
     Compute the similarity between candidate summary and company prompt.
@@ -410,9 +311,13 @@ def compute_suitability(candidate_summary, company_prompt, models):
     feature_extractor = models['feature_extractor']
-    # Extract features (embeddings)
-    candidate_features = feature_extractor(candidate_summary)
-    company_features = feature_extractor(company_prompt)
     # Convert to numpy arrays and flatten if needed
     candidate_vec = np.mean(np.array(candidate_features[0]), axis=0)
@@ -426,7 +331,7 @@ def compute_suitability(candidate_summary, company_prompt, models):
     return similarity, execution_time
 #####################################
-# Main Streamlit Interface
 #####################################
 st.title("Resume Analyzer and Company Suitability Checker")
 st.markdown(
@@ -448,38 +353,49 @@ company_prompt = st.text_area(
     help="Enter a detailed description of the company culture, role requirements, and desired skills.",
 )
-# Process button
 if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
-    with st.spinner("Processing..."):
-        # Extract text from resume
-        resume_text = extract_text_from_file(uploaded_file)
-        if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
-            st.error(resume_text)
         else:
-            # Generate summary
-            summary, summarization_time = summarize_resume_text(resume_text, models)
-            # Display summary
-            st.subheader("Candidate Summary")
-            st.markdown(summary)
-            st.info(f"Summarization completed in {summarization_time:.2f} seconds")
-            # Only compute similarity if company description is provided
-            if company_prompt:
-                similarity_score, similarity_time = compute_suitability(summary, company_prompt, models)
-                # Display similarity score
-                st.subheader("Suitability Assessment")
-                st.markdown(f"**Matching Score:** {similarity_score:.2%}")
-                st.info(f"Similarity computation completed in {similarity_time:.2f} seconds")
-                # Provide interpretation
-                if similarity_score >= 0.85:
-                    st.success("Excellent match! This candidate's profile is strongly aligned with the company requirements.")
-                elif similarity_score >= 0.70:
-                    st.success("Good match! This candidate shows strong potential for the position.")
-                elif similarity_score >= 0.50:
-                    st.warning("Moderate match. The candidate meets some requirements but there may be gaps.")
-                else:
-                    st.error("Low match. The candidate's profile may not align well with the requirements.")

 import docx
 import docx2txt
 import tempfile
 import numpy as np
 from scipy.spatial.distance import cosine
 import time
 import re
+import concurrent.futures
+from functools import lru_cache
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 # Set page title and hide sidebar
 st.set_page_config(
 """, unsafe_allow_html=True)
 #####################################
+# Preload Models - Optimized
 #####################################
 @st.cache_resource(show_spinner=True)
 def load_models():
+    """Load models at startup - using smaller/faster models"""
     with st.spinner("Loading AI models... This may take a minute on first run."):
         models = {}
+        # Load smaller summarization model for speed
+        models['summarizer'] = pipeline("summarization", model="facebook/bart-large-cnn", max_length=130)
+        # Load smaller feature extraction model for speed
+        models['feature_extractor'] = pipeline("feature-extraction", model="distilbert-base-uncased")
         return models
 #####################################
 # Function: Extract Text from File
 #####################################
+@st.cache_data(show_spinner=False)
 def extract_text_from_file(file_obj):
     """
     Extract text from .docx and .doc files.
     return text
 #####################################
+# Functions for Information Extraction - Optimized
 #####################################
+# Cache the extraction functions to avoid reprocessing
+@lru_cache(maxsize=32)
+def extract_name(text_start):
+    """Extract candidate name from the beginning of resume text"""
+    # Only use the first 500 characters to speed up processing
+    lines = text_start.split('\n')
     # Check first few non-empty lines for potential names
     potential_name_lines = [line.strip() for line in lines[:5] if line.strip()]
     if potential_name_lines:
+        # First line is often the name if it's short and doesn't contain common headers
         first_line = potential_name_lines[0]
         if 5 <= len(first_line) <= 40 and not any(x in first_line.lower() for x in ["resume", "cv", "curriculum", "vitae", "profile"]):
             return first_line
+    # Look for lines that might contain a name
     for line in potential_name_lines[:3]:
         if len(line.split()) <= 4 and not any(x in line.lower() for x in ["address", "phone", "email", "resume", "cv"]):
             return line
     return "Unknown (please extract from resume)"
 def extract_age(text):
     """Extract candidate age from resume text"""
+    # Simplified: just check a few common patterns
     age_patterns = [
         r'age:?\s*(\d{1,2})',
         r'(\d{1,2})\s*years\s*old',
     ]
+    text_lower = text.lower()
     for pattern in age_patterns:
+        matches = re.search(pattern, text_lower)
         if matches:
+            return matches.group(1)
     return "Not specified"
+def extract_industry(text, base_summary):
     """Extract expected job industry from resume"""
+    # Simplified industry keywords focused on the most common ones
     industry_keywords = {
+        "technology": ["software", "programming", "developer", "IT", "tech", "computer"],
+        "finance": ["banking", "financial", "accounting", "finance", "analyst"],
+        "healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor"],
+        "education": ["teaching", "teacher", "professor", "education", "university"],
+        "marketing": ["marketing", "advertising", "digital marketing", "social media"],
+        "engineering": ["engineer", "engineering"],
+        "data science": ["data science", "machine learning", "AI", "analytics"],
+        "information systems": ["information systems", "ERP", "systems management"]
     }
+    # Count occurrences of industry keywords - using the summary to speed up
+    combined_text = base_summary.lower()
+    counts = {}
     for industry, keywords in industry_keywords.items():
+        counts[industry] = sum(combined_text.count(keyword.lower()) for keyword in keywords)
     # Get the industry with the highest count
     if counts:
             return likely_industry[0].capitalize()
     # Check for educational background that might indicate industry
+    degrees = ["computer science", "business", "engineering", "medicine", "education", "finance", "marketing"]
     for degree in degrees:
+        if degree in combined_text:
             return f"{degree.capitalize()}-related field"
+    return "Not clearly specified"
+def extract_skills_and_work(text):
+    """Extract both skills and work experience at once to save processing time"""
+    # Common skill categories - reduced keyword list for speed
     skill_categories = {
+        "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#"],
+        "Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch"],
+        "Database": ["SQL", "MySQL", "MongoDB", "Database"],
+        "Web Development": ["React", "Angular", "Node.js", "Frontend", "Backend"],
+        "Software Development": ["Agile", "Scrum", "Git", "DevOps", "Docker"],
+        "Cloud": ["AWS", "Azure", "Google Cloud", "Cloud"],
+        "Business": ["Project Management", "Business Analysis", "Leadership"],
+        "Tools": ["Excel", "PowerPoint", "Tableau", "Power BI", "JIRA"]
     }
+    # Work experience extraction
+    work_headers = [
+        "work experience", "professional experience", "employment history",
+        "work history", "experience"
+    ]
+    next_section_headers = [
+        "education", "skills", "certifications", "projects", "achievements"
+    ]
+    # Process everything at once
+    lines = text.split('\n')
     text_lower = text.lower()
+    # Skills extraction
+    found_skills = []
     for category, skills in skill_categories.items():
         category_skills = []
         for skill in skills:
             if skill.lower() in text_lower:
                 category_skills.append(skill)
         if category_skills:
             found_skills.append(f"{category}: {', '.join(category_skills)}")
+    # Work experience extraction - simplified approach
+    work_section = []
+    in_work_section = False
     for idx, line in enumerate(lines):
         line_lower = line.lower().strip()
+        # Start of work section
+        if not in_work_section:
+            if any(header in line_lower for header in work_headers):
+                in_work_section = True
+                continue
+        # End of work section
+        elif in_work_section:
+            if any(header in line_lower for header in next_section_headers):
                 break
+            if line.strip():
+                work_section.append(line.strip())
+    # Simplified work formatting
+    if not work_section:
+        work_experience = "Work experience not clearly identified"
+    else:
+        # Just take the first 5-7 lines of the work section as a summary
+        work_lines = []
+        company_count = 0
+        current_company = ""
         for line in work_section:
+            # New company entry often has a date
+            if re.search(r'(19|20)\d{2}', line):
+                company_count += 1
+                if company_count <= 3:  # Limit to 3 most recent positions
+                    current_company = line
+                    work_lines.append(f"**{line}**")
+                else:
+                    break
+            elif company_count <= 3 and len(work_lines) < 10:  # Limit total lines
+                work_lines.append(line)
+        work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
+    skills_formatted = "\n• " + "\n• ".join(found_skills) if found_skills else "No specific technical skills clearly identified"
+    return skills_formatted, work_experience
 #####################################
+# Function: Summarize Resume Text - Optimized
 #####################################
 def summarize_resume_text(resume_text, models):
     """
+    Generates a structured summary of the resume text - optimized for speed
     """
     start_time = time.time()
     summarizer = models['summarizer']
+    # First, generate a quick summary
     max_input_length = 1024  # Model limit
+    # Only summarize the first portion of text for speed
+    text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
+    base_summary = summarizer(text_to_summarize)[0]['summary_text']
+    # Extract information in parallel where possible
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        # These can run in parallel
+        name_future = executor.submit(extract_name, resume_text[:500])  # Only use start of text
+        age_future = executor.submit(extract_age, resume_text)
+        industry_future = executor.submit(extract_industry, resume_text, base_summary)
+        skills_work_future = executor.submit(extract_skills_and_work, resume_text)
+        # Get results
+        name = name_future.result()
+        age = age_future.result()
+        industry = industry_future.result()
+        skills, work_experience = skills_work_future.result()
     # Format the structured summary
     formatted_summary = f"Name: {name}\n"
     return formatted_summary, execution_time
 #####################################
+# Function: Compare Candidate Summary to Company Prompt - Optimized
 #####################################
+@st.cache_data(show_spinner=False)
 def compute_suitability(candidate_summary, company_prompt, models):
     """
     Compute the similarity between candidate summary and company prompt.
     feature_extractor = models['feature_extractor']
+    # Extract features (embeddings) - parallelize this
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        candidate_future = executor.submit(feature_extractor, candidate_summary)
+        company_future = executor.submit(feature_extractor, company_prompt)
+        candidate_features = candidate_future.result()
+        company_features = company_future.result()
     # Convert to numpy arrays and flatten if needed
     candidate_vec = np.mean(np.array(candidate_features[0]), axis=0)
     return similarity, execution_time
 #####################################
+# Main Streamlit Interface - with Progress Reporting
 #####################################
 st.title("Resume Analyzer and Company Suitability Checker")
 st.markdown(
     help="Enter a detailed description of the company culture, role requirements, and desired skills.",
 )
+# Process button with optimized flow
 if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
+    # Create a placeholder for the progress bar
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    # Step 1: Extract text
+    status_text.text("Step 1/3: Extracting text from resume...")
+    resume_text = extract_text_from_file(uploaded_file)
+    progress_bar.progress(25)
+    if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
+        st.error(resume_text)
+    else:
+        # Step 2: Generate summary
+        status_text.text("Step 2/3: Analyzing resume and generating summary...")
+        summary, summarization_time = summarize_resume_text(resume_text, models)
+        progress_bar.progress(75)
+        # Display summary
+        st.subheader("Candidate Summary")
+        st.markdown(summary)
+        st.info(f"Summary generated in {summarization_time:.2f} seconds")
+        # Step 3: Compute similarity
+        status_text.text("Step 3/3: Calculating compatibility with company profile...")
+        similarity_score, similarity_time = compute_suitability(summary, company_prompt, models)
+        progress_bar.progress(100)
+        # Clear status messages
+        status_text.empty()
+        # Display similarity score
+        st.subheader("Suitability Assessment")
+        st.markdown(f"**Matching Score:** {similarity_score:.2%}")
+        st.info(f"Compatibility assessment completed in {similarity_time:.2f} seconds")
+        # Provide interpretation
+        if similarity_score >= 0.85:
+            st.success("Excellent match! This candidate's profile is strongly aligned with the company requirements.")
+        elif similarity_score >= 0.70:
+            st.success("Good match! This candidate shows strong potential for the position.")
+        elif similarity_score >= 0.50:
+            st.warning("Moderate match. The candidate meets some requirements but there may be gaps.")
         else:
+            st.error("Low match. The candidate's profile may not align well with the requirements.")