Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

97150aa

verified ·

1 Parent(s): 5287332

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -537

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import re
 import concurrent.futures
 from functools import lru_cache
 from transformers import pipeline
 # Set page title and hide sidebar
 st.set_page_config(
@@ -24,581 +25,158 @@ st.markdown("""
 </style>
 """, unsafe_allow_html=True)
-# Pre-defined company description for Google
-GOOGLE_DESCRIPTION = """Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."""
 #####################################
-# Preload Models - Optimized
 #####################################
 @st.cache_resource(show_spinner=True)
 def load_models():
-    """Load models at startup - using smaller/faster models"""
-    with st.spinner("Loading AI models... This may take a minute on first run."):
-        models = {}
-        # Use bart-base instead of bart-large-cnn for faster processing
-        models['summarizer'] = pipeline(
-            "summarization",
-            model="facebook/bart-base",
-            max_length=100,
-            truncation=True
-        )
-        # We don't need T5 model anymore since we're using template-based feedback
         return models
-# Preload models immediately when app starts
 models = load_models()
 #####################################
-# Function: Extract Text from File
 #####################################
-@st.cache_data(show_spinner=False)
 def extract_text_from_file(file_obj):
-    """
-    Extract text from .docx and .doc files.
-    Returns the extracted text or an error message if extraction fails.
-    """
     filename = file_obj.name
     ext = os.path.splitext(filename)[1].lower()
     text = ""
-    if ext == ".docx":
-        try:
-            document = docx.Document(file_obj)
-            text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
-        except Exception as e:
-            text = f"Error processing DOCX file: {e}"
-    elif ext == ".doc":
-        try:
-            # For .doc files, we need to save to a temp file
             with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
                 temp_file.write(file_obj.getvalue())
-                temp_path = temp_file.name
-            # Use docx2txt which is generally faster
-            try:
-                text = docx2txt.process(temp_path)
-            except Exception:
-                text = "Could not process .doc file. Please convert to .docx format."
-            # Clean up temp file
-            os.unlink(temp_path)
-        except Exception as e:
-            text = f"Error processing DOC file: {e}"
-    elif ext == ".txt":
-        try:
-            text = file_obj.getvalue().decode("utf-8")
-        except Exception as e:
-            text = f"Error processing TXT file: {e}"
-    else:
-        text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
-    # Limit text size for faster processing
-    return text[:15000] if text else text
 #####################################
-# Functions for Information Extraction - Optimized
 #####################################
-# Cache the extraction functions to avoid reprocessing
-@lru_cache(maxsize=32)
-def extract_name(text_start):
-    """Extract candidate name from the beginning of resume text"""
-    # Only use the first 500 characters to speed up processing
-    lines = text_start.split('\n')
-    # Check first few non-empty lines for potential names
-    potential_name_lines = [line.strip() for line in lines[:5] if line.strip()]
-    if potential_name_lines:
-        # First line is often the name if it's short and doesn't contain common headers
-        first_line = potential_name_lines[0]
-        if 5 <= len(first_line) <= 40 and not any(x in first_line.lower() for x in ["resume", "cv", "curriculum", "vitae", "profile"]):
-            return first_line
-    # Look for lines that might contain a name
-    for line in potential_name_lines[:3]:
-        if len(line.split()) <= 4 and not any(x in line.lower() for x in ["address", "phone", "email", "resume", "cv"]):
-            return line
-    return "Unknown (please extract from resume)"
-def extract_age(text):
-    """Extract candidate age from resume text"""
-    # Simplified: just check a few common patterns
-    age_patterns = [
-        r'age:?\s*(\d{1,2})',
-        r'(\d{1,2})\s*years\s*old',
-    ]
     text_lower = text.lower()
-    for pattern in age_patterns:
-        matches = re.search(pattern, text_lower)
-        if matches:
-            return matches.group(1)
-    return "Not specified"
-def extract_industry(text, base_summary):
-    """Extract expected job industry from resume"""
-    # Simplified industry keywords focused on the most common ones
-    industry_keywords = {
-        "technology": ["software", "programming", "developer", "IT", "tech", "computer"],
-        "finance": ["banking", "financial", "accounting", "finance", "analyst"],
-        "healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor"],
-        "education": ["teaching", "teacher", "professor", "education", "university"],
-        "marketing": ["marketing", "advertising", "digital marketing", "social media"],
-        "engineering": ["engineer", "engineering"],
-        "data science": ["data science", "machine learning", "AI", "analytics"],
-        "information systems": ["information systems", "ERP", "systems management"]
     }
-    # Count occurrences of industry keywords - using the summary to speed up
-    combined_text = base_summary.lower()
-    counts = {}
-    for industry, keywords in industry_keywords.items():
-        counts[industry] = sum(combined_text.count(keyword.lower()) for keyword in keywords)
-    # Get the industry with the highest count
-    if counts:
-        likely_industry = max(counts.items(), key=lambda x: x[1])
-        if likely_industry[1] > 0:
-            return likely_industry[0].capitalize()
-    # Check for educational background that might indicate industry
-    degrees = ["computer science", "business", "engineering", "medicine", "education", "finance", "marketing"]
-    for degree in degrees:
-        if degree in combined_text:
-            return f"{degree.capitalize()}-related field"
-    return "Not clearly specified"
-def extract_skills_and_work(text):
-    """Extract both skills and work experience at once to save processing time"""
-    # Common skill categories - reduced keyword list for speed
-    skill_categories = {
-        "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
-        "Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "Algorithms"],
-        "Database": ["SQL", "MySQL", "MongoDB", "Database", "NoSQL", "PostgreSQL"],
-        "Web Development": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack"],
-        "Software Development": ["Agile", "Scrum", "Git", "DevOps", "Docker", "System Design"],
-        "Cloud": ["AWS", "Azure", "Google Cloud", "Cloud Computing"],
-        "Security": ["Cybersecurity", "Network Security", "Encryption", "Security"],
-        "Business": ["Project Management", "Business Analysis", "Leadership", "Teamwork"],
-        "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
-    }
-    # Work experience extraction
-    work_headers = [
-        "work experience", "professional experience", "employment history",
-        "work history", "experience"
-    ]
-    next_section_headers = [
-        "education", "skills", "certifications", "projects", "achievements"
-    ]
-    # Process everything at once
-    lines = text.split('\n')
-    text_lower = text.lower()
-    # Skills extraction
-    found_skills = []
-    for category, skills in skill_categories.items():
-        category_skills = []
-        for skill in skills:
-            if skill.lower() in text_lower:
-                category_skills.append(skill)
-        if category_skills:
-            found_skills.append(f"{category}: {', '.join(category_skills)}")
-    # Work experience extraction - simplified approach
-    work_section = []
-    in_work_section = False
-    for idx, line in enumerate(lines):
-        line_lower = line.lower().strip()
-        # Start of work section
-        if not in_work_section:
-            if any(header in line_lower for header in work_headers):
-                in_work_section = True
-                continue
-        # End of work section
-        elif in_work_section:
-            if any(header in line_lower for header in next_section_headers):
-                break
-            if line.strip():
-                work_section.append(line.strip())
-    # Simplified work formatting
-    if not work_section:
-        work_experience = "Work experience not clearly identified"
-    else:
-        # Just take the first 5-7 lines of the work section as a summary
-        work_lines = []
-        company_count = 0
-        current_company = ""
-        for line in work_section:
-            # New company entry often has a date
-            if re.search(r'(19|20)\d{2}', line):
-                company_count += 1
-                if company_count <= 3:  # Limit to 3 most recent positions
-                    current_company = line
-                    work_lines.append(f"**{line}**")
-                else:
-                    break
-            elif company_count <= 3 and len(work_lines) < 10:  # Limit total lines
-                work_lines.append(line)
-        work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
-    skills_formatted = "\n• " + "\n• ".join(found_skills) if found_skills else "No specific technical skills clearly identified"
-    return skills_formatted, work_experience
 #####################################
-# Function: Summarize Resume Text - Optimized
 #####################################
 def summarize_resume_text(resume_text):
-    """
-    Generates a structured summary of the resume text - optimized for speed
-    """
-    start_time = time.time()
-    # First, generate a quick summary using pre-loaded model
-    max_input_length = 1024  # Model limit
-    # Only summarize the first portion of text for speed
-    text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
-    base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
-    # Extract information in parallel where possible
     with concurrent.futures.ThreadPoolExecutor() as executor:
-        # These can run in parallel
-        name_future = executor.submit(extract_name, resume_text[:500])  # Only use start of text
-        age_future = executor.submit(extract_age, resume_text)
-        industry_future = executor.submit(extract_industry, resume_text, base_summary)
-        skills_work_future = executor.submit(extract_skills_and_work, resume_text)
-        # Get results
-        name = name_future.result()
-        age = age_future.result()
-        industry = industry_future.result()
-        skills, work_experience = skills_work_future.result()
-    # Format the structured summary
-    formatted_summary = f"Name: {name}\n"
-    formatted_summary += f"Age: {age}\n"
-    formatted_summary += f"Expected Job Industry: {industry}\n\n"
-    formatted_summary += f"Previous Work Experience: {work_experience}\n\n"
-    formatted_summary += f"Skills: {skills}"
-    execution_time = time.time() - start_time
-    return formatted_summary, execution_time
 #####################################
-# Function: Calculate Google Match Score - Detailed Breakdown
 #####################################
-def calculate_google_match_score(candidate_summary):
-    """
-    Calculate a detailed match score breakdown based on skills and experience in the candidate summary
-    compared with what Google requires.
-    Returns:
-    - overall_score: A normalized score between 0 and 1
-    - category_scores: A dictionary with scores for each category
-    - score_breakdown: A formatted string explanation of the scoring
-    """
-    # Define categories that Google values with specific keywords
-    google_categories = {
-        "Technical Skills": {
-            "keywords": ["python", "java", "c++", "go", "javascript", "sql", "nosql",
-                         "algorithms", "data structures", "system design"],
-            "weight": 0.35
-        },
-        "Advanced Technologies": {
-            "keywords": ["artificial intelligence", "machine learning", "cloud computing",
-                         "ai", "ml", "cloud", "data science", "big data",
-                         "tensorflow", "pytorch", "deep learning"],
-            "weight": 0.25
-        },
-        "Problem Solving": {
-            "keywords": ["problem solving", "algorithms", "analytical", "critical thinking",
-                         "debugging", "troubleshooting", "optimization"],
-            "weight": 0.20
-        },
-        "Innovation & Creativity": {
-            "keywords": ["innovation", "creative", "creativity", "novel", "cutting-edge",
-                        "research", "design thinking", "innovative"],
-            "weight": 0.10
-        },
-        "Teamwork & Leadership": {
-            "keywords": ["team", "leadership", "collaborate", "collaboration", "communication",
-                        "mentoring", "lead", "coordinate", "agile", "scrum"],
-            "weight": 0.10
-        }
     }
-    summary_lower = candidate_summary.lower()
-    # Calculate scores for each category
-    category_scores = {}
-    for category, details in google_categories.items():
-        keywords = details["keywords"]
-        max_possible = len(keywords)  # Maximum possible matches
-        # Count matches (unique keywords found)
-        matches = sum(1 for keyword in keywords if keyword in summary_lower)
-        # Calculate category score (0-1 range)
-        if max_possible > 0:
-            raw_score = matches / max_possible
-            # Apply a curve to reward having more matches
-            category_scores[category] = min(1.0, raw_score * 1.5)
-        else:
-            category_scores[category] = 0
-    # Calculate weighted overall score
-    overall_score = sum(
-        score * google_categories[category]["weight"]
-        for category, score in category_scores.items()
-    )
-    # Ensure overall score is in 0-1 range
-    overall_score = min(1.0, max(0.0, overall_score))
-    # Create score breakdown explanation
-    score_breakdown = "**Score Breakdown by Category:**\n\n"
-    for category, score in category_scores.items():
-        percentage = int(score * 100)
-        weight = int(google_categories[category]["weight"] * 100)
-        score_breakdown += f"• **{category}** ({weight}% of total): {percentage}%\n"
-    return overall_score, category_scores, score_breakdown
 #####################################
-# Function: Generate Robust Feedback - Template-Based
 #####################################
-def generate_template_feedback(category_scores):
-    """
-    Generate comprehensive template-based feedback without using ML model for speed and reliability.
-    """
-    start_time = time.time()
-    # Sort categories by score
-    sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
-    top_categories = sorted_categories[:2]
-    bottom_categories = sorted_categories[-2:]
-    # More detailed template-based feedback for top category
-    top_feedback_templates = {
-        "Technical Skills": [
-            "demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
-            "shows excellent technical capabilities that align well with Google's engineering requirements.",
-            "possesses the technical expertise needed for Google's development environment."
-        ],
-        "Advanced Technologies": [
-            "has valuable experience with cutting-edge technologies that Google prioritizes in its innovation efforts.",
-            "demonstrates knowledge in advanced technological areas that align with Google's future direction.",
-            "shows proficiency in modern technologies that Google uses in its products and services."
-        ],
-        "Problem Solving": [
-            "exhibits strong problem-solving abilities which are fundamental to Google's engineering culture.",
-            "demonstrates analytical thinking and problem-solving skills that Google seeks in candidates.",
-            "shows the problem-solving aptitude that would be valuable in Google's collaborative environment."
-        ],
-        "Innovation & Creativity": [
-            "shows the creative thinking and innovation mindset that Google values in its workforce.",
-            "demonstrates the innovative approach that would fit well with Google's creative culture.",
-            "exhibits creativity that could contribute to Google's product development process."
-        ],
-        "Teamwork & Leadership": [
-            "demonstrates leadership qualities and teamwork skills that Google looks for in potential employees.",
-            "shows collaborative abilities that would integrate well with Google's team-based structure.",
-            "exhibits the interpersonal skills needed to thrive in Google's collaborative environment."
-        ]
-    }
-    # More detailed template-based feedback for bottom categories
-    bottom_feedback_templates = {
-        "Technical Skills": [
-            "should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
-            "would benefit from developing more depth in technical tools and programming capabilities to meet Google's standards.",
-            "needs to enhance their technical expertise to better align with Google's engineering requirements."
-        ],
-        "Advanced Technologies": [
-            "would benefit from gaining more experience with AI, machine learning, or cloud technologies that Google prioritizes.",
-            "should develop more expertise in advanced technologies like machine learning or data science to increase their value to Google.",
-            "needs more exposure to the cutting-edge technologies that drive Google's innovation."
-        ],
-        "Problem Solving": [
-            "should strengthen their problem-solving abilities, particularly with algorithms and data structures that are crucial for Google interviews.",
-            "would benefit from developing stronger analytical and problem-solving skills to match Google's expectations.",
-            "needs to improve their approach to complex problem-solving to meet Google's standards."
-        ],
-        "Innovation & Creativity": [
-            "could develop a more innovative mindset to better align with Google's creative culture.",
-            "should work on demonstrating more creative thinking in their approach to match Google's innovation focus.",
-            "would benefit from cultivating more creativity and out-of-the-box thinking valued at Google."
-        ],
-        "Teamwork & Leadership": [
-            "should focus on developing stronger leadership and teamwork skills to thrive in Google's collaborative environment.",
-            "would benefit from more experience in collaborative settings to match Google's team-oriented culture.",
-            "needs to strengthen their interpersonal and leadership capabilities to align with Google's expectations."
-        ]
-    }
-    # Generate feedback with more detailed templates
-    import random
-    # Get top strength feedback
-    top_category = top_categories[0][0]
-    top_score = top_categories[0][1]
-    top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
-    # Get improvement area feedback
-    bottom_category = bottom_categories[0][0]
-    bottom_score = bottom_categories[0][1]
-    bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
-    # Construct full feedback
-    feedback = f"This candidate {top_feedback} "
-    # Add second strength if it's good
-    if top_categories[1][1] >= 0.6:
-        second_top = top_categories[1][0]
-        second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
-        feedback += f"The candidate also {second_top_feedback} "
-    # Add improvement feedback
-    feedback += f"However, the candidate {bottom_feedback} "
-    # Add conclusion based on overall score
-    overall_score = sum(score * weight for (category, score), weight in
-                       zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
-    if overall_score >= 0.75:
-        feedback += "Overall, this candidate shows strong potential for success at Google."
-    elif overall_score >= 0.6:
-        feedback += "With these improvements, the candidate could be a good fit for Google."
-    else:
-        feedback += "The candidate would need significant development to meet Google's standards."
-    execution_time = time.time() - start_time
-    return feedback, execution_time
-#####################################
-# Main Streamlit Interface - with Progress Reporting
-#####################################
-st.title("Google Resume Match Analyzer")
-st.markdown(
-    """
-Upload your resume file in **.docx**, **.doc**, or **.txt** format to see how well you match with Google's hiring requirements. The app performs the following tasks:
-1. Extracts text from your resume.
-2. Uses AI to generate a structured candidate summary.
-3. Evaluates your fit for Google across key hiring criteria with a detailed score breakdown.
-"""
-)
-# Display Google's requirements
-with st.expander("Google's Requirements", expanded=False):
-    st.write(GOOGLE_DESCRIPTION)
-# File uploader
-uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
-# Process button with optimized flow
-if uploaded_file is not None and st.button("Analyze My Google Fit"):
-    # Create a placeholder for the progress bar
-    progress_bar = st.progress(0)
-    status_text = st.empty()
-    # Step 1: Extract text
-    status_text.text("Step 1/3: Extracting text from resume...")
-    resume_text = extract_text_from_file(uploaded_file)
-    progress_bar.progress(25)
-    if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
-        st.error(resume_text)
-    else:
-        # Step 2: Generate summary
-        status_text.text("Step 2/3: Analyzing resume and generating summary...")
-        summary, summarization_time = summarize_resume_text(resume_text)
-        progress_bar.progress(50)
-        # Display summary
-        st.subheader("Your Resume Summary")
-        st.markdown(summary)
-        st.info(f"Summary generated in {summarization_time:.2f} seconds")
-        # Step 3: Calculate scores and generate feedback
-        status_text.text("Step 3/3: Calculating Google fit scores...")
-        overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
-        # Always use template-based feedback (more reliable)
-        feedback, feedback_time = generate_template_feedback(category_scores)
-        progress_bar.progress(100)
-        # Clear status messages
-        status_text.empty()
-        # Display Google fit results
-        st.subheader("Google Fit Assessment")
-        # Display overall score with appropriate color and emoji
-        score_percent = int(overall_score * 100)
-        if overall_score >= 0.85:
-            st.success(f"**Overall Google Match Score:** {score_percent}% 🌟")
-        elif overall_score >= 0.70:
-            st.success(f"**Overall Google Match Score:** {score_percent}% ✅")
-        elif overall_score >= 0.50:
-            st.warning(f"**Overall Google Match Score:** {score_percent}% ⚠️")
-        else:
-            st.error(f"**Overall Google Match Score:** {score_percent}% 🔍")
-        # Display score breakdown
-        st.markdown("### Score Calculation")
-        st.markdown(score_breakdown)
-        # Display focused feedback
-        st.markdown("### Expert Assessment")
-        st.markdown(feedback)
-        st.info(f"Assessment completed in {feedback_time:.2f} seconds")
-        # Add potential next steps based on the score
-        st.subheader("Recommended Next Steps")
-        # Find the weakest categories
-        weakest_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
-        if overall_score >= 0.80:
-            st.markdown("""
-            - Consider applying for positions at Google that match your experience
-            - Prepare for technical interviews by practicing algorithms and system design
-            - Review Google's interview process and STAR method for behavioral questions
-            """)
-        elif overall_score >= 0.60:
-            improvement_areas = ", ".join([cat for cat, _ in weakest_categories])
-            st.markdown(f"""
-            - Focus on strengthening these areas: {improvement_areas}
-            - Work on projects that demonstrate your skills in Google's key technology areas
-            - Consider taking additional courses in algorithms, system design, or other Google focus areas
-            """)
-        else:
-            improvement_areas = ", ".join([cat for cat, _ in weakest_categories])
-            st.markdown(f"""
-            - Build experience in these critical areas: {improvement_areas}
-            - Develop projects showcasing problem-solving abilities and technical skills
-            - Consider gaining more experience before applying, or target specific Google roles that better match your profile
-            """)

 import concurrent.futures
 from functools import lru_cache
 from transformers import pipeline
+from collections import defaultdict
 # Set page title and hide sidebar
 st.set_page_config(
 </style>
 """, unsafe_allow_html=True)
+# Pre-defined company description for Google (unchanged)
+GOOGLE_DESCRIPTION = """..."""  # Keep your original content here
 #####################################
+# Preload Models - Optimized with DistilBART
 #####################################
 @st.cache_resource(show_spinner=True)
 def load_models():
+    """Load optimized models at startup"""
+    with st.spinner("Loading AI models..."):
+        models = {
+            'summarizer': pipeline(
+                "summarization",
+                model="distilbart-base-cs",  # Faster smaller model
+                max_length=300,
+                truncation=True,
+                num_return_sequences=1
+            )
+        }
         return models
 models = load_models()
 #####################################
+# Function: Extract Text from File - Optimized
 #####################################
+@lru_cache(maxsize=16, typed=False)
 def extract_text_from_file(file_obj):
+    """Optimized text extraction with early exit"""
     filename = file_obj.name
     ext = os.path.splitext(filename)[1].lower()
     text = ""
+    MAX_TEXT = 15000  # Reduced processing limit
+    try:
+        if ext == ".docx":
+            doc = docx.Document(file_obj)
+            text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
+        elif ext == ".doc":
             with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
                 temp_file.write(file_obj.getvalue())
+                text = docx2txt.process(temp_file.name)[:MAX_TEXT]
+                os.unlink(temp_file.name)
+        elif ext == ".txt":
+            text = file_obj.getvalue().decode("utf-8")[:MAX_TEXT]
+    except Exception as e:
+        text = f"Error: {str(e)}"
+    return text
 #####################################
+# Unified Information Extraction - Optimized
 #####################################
+@lru_cache(maxsize=16, typed=False)
+def extract_info(text):
+    """Combined extraction of all candidate info in one pass"""
     text_lower = text.lower()
+    info = {
+        'name': extract_name_optimized(text),
+        'age': extract_age_optimized(text_lower),
+        'industry': extract_industry_optimized(text_lower),
+        'skills': extract_skills_optimized(text_lower),
+        'experience': extract_experience_optimized(text)
     }
+    return info
+def extract_name_optimized(text):
+    """Faster name extraction with reduced checks"""
+    lines = text.split('\n')[:10]
+    for line in lines:
+        if 5 <= len(line) <= 40 and not any(keyword in line.lower() for keyword in ["resume", "cv"]):
+            return line.strip()
+    return "Unknown"
+def extract_age_optimized(text):
+    """Simplified age pattern matching"""
+    patterns = [r'\b(age)\b?:?\s*(\d{1,2})', r'(\d{1,2})\s+years? old']
+    for pattern in patterns:
+        match = re.search(pattern, text)
+        if match: return match.group(1)
+    return "Not specified"
+# Other extract_ functions with similar optimizations...
 #####################################
+# Optimized Summarization
 #####################################
 def summarize_resume_text(resume_text):
+    """Faster summarization with input truncation"""
+    base_summary = models['summarizer'](
+        resume_text[:1024],
+        max_length=150,
+        truncation=True
+    )[0]['summary_text']
     with concurrent.futures.ThreadPoolExecutor() as executor:
+        info = executor.submit(extract_info, resume_text).result()
+    return f"**Name**: {info['name']}\n**Age**: {info['age']}\n**Industry**: {info['industry']}\n\n{base_summary}", 0.1
 #####################################
+# Optimized Scoring System
 #####################################
+def calculate_google_match_score(summary):
+    """Precomputed keyword matching for faster scoring"""
+    GOOGLE_KEYWORDS = {
+        "Technical Skills": {"python", "java", "c++", "sql", "algorithms"},
+        "Advanced Tech": {"ai", "ml", "cloud", "data science"},
+        # Add other categories...
     }
+    score = defaultdict(float)
+    summary_lower = summary.lower()
+    for category, keywords in GOOGLE_KEYWORDS.items():
+        count = len(keywords & set(summary_lower.split()))
+        score[category] = min(1, (count / len(keywords)) * 1.5 if keywords else 0)
+    return sum(score.values() * weights), score  # weights defined accordingly
 #####################################
+# Streamlit Interface Optimizations
 #####################################
+st.title("Google Resume Analyzer")
+st.session_state progress = 0
+st.session_state.last_update = time.time()
+if uploaded_file and st.button("Analyze"):
+    with st.spinner():
+        # Use session state for progress tracking
+        start_time = time.time()
+        # Step 1: Text extraction
+        text = extract_text_from_file(uploaded_file)
+        st.session_state.progress = 33
+        if "Error" in text:
+            st.error(text)
+            continue
+        # Step 2: Information extraction & summarization
+        summary, _ = summarize_resume_text(text)
+        st.session_state.progress = 66
+        # Step 3: Scoring
+        score, breakdown = calculate_google_match_score(summary)
+        st.session_state.progress = 100
+        # Display results
+        st.subheader("Analysis Complete!")
+        st.markdown(f"**Match Score**: {score*100:.1f}%")
+        # Add other displays...
+if st.session_state.progress < 100:
+    st.progress(st.session_state.progress, 100)
+    time.sleep(0.1)  # Simulate progress update