Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

File size: 6,113 Bytes

cf8a522
4077883
8e1d297
92f45fe
2e98a93
 
e0405b6
1a0f22c
e1a5956
 
ce7c5e8
97150aa
d2d6501
5d07781
 
ca31f44
5d07781
 
 
 
 
 
 
 
 
 
8e1d297
97150aa
 
ca31f44
8e1d297
97150aa
c6d228e
d2d6501
5d07781
97150aa
 
 
 
 
 
 
 
 
 
 
d2d6501
 
 
c6d228e
 
97150aa
8e1d297
97150aa
501c91b
97150aa
501c91b
 
92f45fe
97150aa
 
 
 
 
 
 
cf98c48
 
97150aa
 
 
 
 
 
ce7c5e8
97150aa
8e1d297
1a0f22c
97150aa
1a0f22c
97150aa
 
 
e1a5956
97150aa
 
 
 
 
 
1a0f22c
97150aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a0f22c
97150aa
d204788
8e1d297
97150aa
7716c5c
e33d65b
97150aa
 
 
 
 
 
c6d228e
e1a5956
97150aa
0d4f4dd
97150aa
d836318
cccaa8e
97150aa
3e9d890
97150aa
 
 
 
 
 
3e9d890
 
97150aa
 
3e9d890
97150aa
 
 
3e9d890
97150aa
3e9d890
 
97150aa
ce7c5e8
97150aa

import os
import io
import streamlit as st
import docx
import docx2txt
import tempfile
import time
import re
import concurrent.futures
from functools import lru_cache
from transformers import pipeline
from collections import defaultdict

# Set page title and hide sidebar
st.set_page_config(
    page_title="Resume-Google Job Match Analyzer",
    initial_sidebar_state="collapsed"
)

# Hide sidebar completely with custom CSS
st.markdown("""
<style>
    [data-testid="collapsedControl"] {display: none;}
    section[data-testid="stSidebar"] {display: none;}
</style>
""", unsafe_allow_html=True)

# Pre-defined company description for Google (unchanged)
GOOGLE_DESCRIPTION = """..."""  # Keep your original content here

#####################################
# Preload Models - Optimized with DistilBART
#####################################
@st.cache_resource(show_spinner=True)
def load_models():
    """Load optimized models at startup"""
    with st.spinner("Loading AI models..."):
        models = {
            'summarizer': pipeline(
                "summarization", 
                model="distilbart-base-cs",  # Faster smaller model
                max_length=300,
                truncation=True,
                num_return_sequences=1
            )
        }
        return models

models = load_models()

#####################################
# Function: Extract Text from File - Optimized
#####################################
@lru_cache(maxsize=16, typed=False)
def extract_text_from_file(file_obj):
    """Optimized text extraction with early exit"""
    filename = file_obj.name
    ext = os.path.splitext(filename)[1].lower()
    text = ""
    MAX_TEXT = 15000  # Reduced processing limit
    
    try:
        if ext == ".docx":
            doc = docx.Document(file_obj)
            text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
        elif ext == ".doc":
            with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
                temp_file.write(file_obj.getvalue())
                text = docx2txt.process(temp_file.name)[:MAX_TEXT]
                os.unlink(temp_file.name)
        elif ext == ".txt":
            text = file_obj.getvalue().decode("utf-8")[:MAX_TEXT]
    except Exception as e:
        text = f"Error: {str(e)}"
    
    return text

#####################################
# Unified Information Extraction - Optimized
#####################################
@lru_cache(maxsize=16, typed=False)
def extract_info(text):
    """Combined extraction of all candidate info in one pass"""
    text_lower = text.lower()
    info = {
        'name': extract_name_optimized(text),
        'age': extract_age_optimized(text_lower),
        'industry': extract_industry_optimized(text_lower),
        'skills': extract_skills_optimized(text_lower),
        'experience': extract_experience_optimized(text)
    }
    return info

def extract_name_optimized(text):
    """Faster name extraction with reduced checks"""
    lines = text.split('\n')[:10]
    for line in lines:
        if 5 <= len(line) <= 40 and not any(keyword in line.lower() for keyword in ["resume", "cv"]):
            return line.strip()
    return "Unknown"

def extract_age_optimized(text):
    """Simplified age pattern matching"""
    patterns = [r'\b(age)\b?:?\s*(\d{1,2})', r'(\d{1,2})\s+years? old']
    for pattern in patterns:
        match = re.search(pattern, text)
        if match: return match.group(1)
    return "Not specified"

# Other extract_ functions with similar optimizations...

#####################################
# Optimized Summarization
#####################################
def summarize_resume_text(resume_text):
    """Faster summarization with input truncation"""
    base_summary = models['summarizer'](
        resume_text[:1024], 
        max_length=150,
        truncation=True
    )[0]['summary_text']
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        info = executor.submit(extract_info, resume_text).result()
    
    return f"**Name**: {info['name']}\n**Age**: {info['age']}\n**Industry**: {info['industry']}\n\n{base_summary}", 0.1

#####################################
# Optimized Scoring System
#####################################
def calculate_google_match_score(summary):
    """Precomputed keyword matching for faster scoring"""
    GOOGLE_KEYWORDS = {
        "Technical Skills": {"python", "java", "c++", "sql", "algorithms"},
        "Advanced Tech": {"ai", "ml", "cloud", "data science"},
        # Add other categories...
    }
    
    score = defaultdict(float)
    summary_lower = summary.lower()
    
    for category, keywords in GOOGLE_KEYWORDS.items():
        count = len(keywords & set(summary_lower.split()))
        score[category] = min(1, (count / len(keywords)) * 1.5 if keywords else 0)
    
    return sum(score.values() * weights), score  # weights defined accordingly

#####################################
# Streamlit Interface Optimizations
#####################################
st.title("Google Resume Analyzer")
st.session_state progress = 0
st.session_state.last_update = time.time()

if uploaded_file and st.button("Analyze"):
    with st.spinner():
        # Use session state for progress tracking
        start_time = time.time()
        
        # Step 1: Text extraction
        text = extract_text_from_file(uploaded_file)
        st.session_state.progress = 33
        if "Error" in text:
            st.error(text)
            continue
        
        # Step 2: Information extraction & summarization
        summary, _ = summarize_resume_text(text)
        st.session_state.progress = 66
        
        # Step 3: Scoring
        score, breakdown = calculate_google_match_score(summary)
        st.session_state.progress = 100
        
        # Display results
        st.subheader("Analysis Complete!")
        st.markdown(f"**Match Score**: {score*100:.1f}%")
        # Add other displays...

if st.session_state.progress < 100:
    st.progress(st.session_state.progress, 100)
    time.sleep(0.1)  # Simulate progress update