Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

File size: 5,333 Bytes

cf8a522
4077883
8e1d297
92f45fe
2e98a93
 
e0405b6
1a0f22c
e1a5956
 
ce7c5e8
d2d6501
46ff202
ca31f44
8e1d297
97150aa
c6d228e
d2d6501
5d07781
97150aa
 
46ff202
97150aa
 
 
46ff202
 
97150aa
 
 
 
d2d6501
 
 
c6d228e
 
97150aa
8e1d297
97150aa
501c91b
97150aa
501c91b
 
92f45fe
97150aa
 
 
 
 
46ff202
97150aa
 
46ff202
 
97150aa
46ff202
97150aa
 
ce7c5e8
97150aa
8e1d297
46ff202
d204788
8e1d297
97150aa
7716c5c
e33d65b
97150aa
46ff202
 
 
 
97150aa
46ff202
 
97150aa
 
c6d228e
46ff202
e1a5956
46ff202
 
 
 
 
 
 
 
 
 
0d4f4dd
46ff202
 
d836318
46ff202
3e9d890
 
46ff202
ce7c5e8
97150aa
46ff202
 
 
 
 
 
 
 
97150aa
 
46ff202
 
 
 
97150aa
 
46ff202
 
 
 
 
97150aa
 
46ff202
 
 
97150aa
 
46ff202
97150aa
 
 
46ff202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97150aa
46ff202

import os
import io
import streamlit as st
import docx
import docx2txt
import tempfile
import time
import re
import concurrent.futures
from functools import lru_cache
from transformers import pipeline

# ... [Keep your existing configurations and constants] ...

#####################################
# Preload Models - Optimized with DistilBART
#####################################
@st.cache_resource(show_spinner=True)
def load_models():
    """Load optimized models at startup"""
    with st.spinner("Loading AI models..."):
        # Use smaller, faster model
        models = {
            'summarizer': pipeline(
                "summarization", 
                model="distilbart-base-cs",  # Faster than BART
                max_length=300,              # Reduced context window
                truncation=True,
                num_return_sequences=1
            )
        }
        return models

models = load_models()

#####################################
# Function: Extract Text from File - Optimized
#####################################
@lru_cache(maxsize=16, typed=False)
def extract_text_from_file(file_obj):
    """Optimized text extraction with early exit"""
    filename = file_obj.name
    ext = os.path.splitext(filename)[1].lower()
    text = ""
    MAX_TEXT = 15000  # Reduced processing limit
    
    try:
        if ext == ".docx":
            doc = docx.Document(file_obj)
            # Only process first 50 paragraphs (approx 10 pages)
            text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
        elif ext == ".doc":
            # Direct conversion using docx2txt
            text = docx2txt.process(file_obj.stream.read())[:MAX_TEXT]
        elif ext == ".txt":
            text = file_obj.read().decode("utf-8")[:MAX_TEXT]
    except Exception as e:
        text = f"Error: {str(e)}"
    
    return text

# ... [Keep your existing extraction functions] ...

#####################################
# Optimized Summarization
#####################################
def summarize_resume_text(resume_text):
    """Faster summarization with input truncation"""
    start_time = time.time()
    
    # Truncate text for summarization
    text_to_summarize = resume_text[:1024]
    base_summary = models['summarizer'](
        text_to_summarize, 
        max_length=150,  # Smaller summary
        truncation=True
    )[0]['summary_text']
    
    # Parallel extraction with thread pool
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Reduced number of parallel tasks
        name_future = executor.submit(extract_name, resume_text[:200])
        age_future = executor.submit(extract_age, resume_text)
        industry_future = executor.submit(extract_industry, resume_text, base_summary)
        
        # Get results
        name = name_future.result()
        age = age_future.result()
        industry = industry_future.result()
        skills, work = extract_skills_and_work(resume_text)  # Sequential
    
    # Format summary (simplified)
    return f"**Name**: {name}\n**Age**: {age}\n**Industry**: {industry}\n\n{base_summary}", 0.1

# ... [Keep your scoring and feedback functions] ...

#####################################
# Optimized Streamlit Interface
#####################################
st.title("Google Resume Analyzer")

# Initialize session state properly
if 'progress' not in st.session_state:
    st.session_state['progress'] = 0
if 'last_update' not in st.session_state:
    st.session_state['last_update'] = time.time()

uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])

if uploaded_file and st.button("Analyze"):
    # Use exponential backoff for progress updates
    progress_interval = 0.1
    max_retries = 10
    
    with st.spinner():
        # Step 1: Text extraction
        st.session_state['progress'] = 33
        resume_text = extract_text_from_file(uploaded_file)
        if "Error" in resume_text:
            st.error(resume_text)
            st.session_state['progress'] = 100
            continue
        
        # Step 2: Summarization
        st.session_state['progress'] = 66
        summary, _ = summarize_resume_text(resume_text)
        
        # Step 3: Scoring
        st.session_state['progress'] = 100
        
        # Display results
        st.subheader("Analysis Complete!")
        st.markdown(summary)
        
        # Display scores
        overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
        show_score(overall_score)
        
        # Display feedback
        feedback, _ = generate_template_feedback(category_scores)
        st.markdown(feedback)

# Progress bar implementation
if st.session_state['progress'] < 100:
    st.progress(st.session_state['progress'], 100)
    time.sleep(progress_interval)

def show_score(score):
    """Display score with appropriate formatting"""
    score_percent = int(score * 100)
    if score >= 0.85:
        st.success(f"**Match Score**: {score_percent}% 🌟")
    elif score >= 0.70:
        st.success(f"**Match Score**: {score_percent}% ✅")
    elif score >= 0.50:
        st.warning(f"**Match Score**: {score_percent}% ⚠️")
    else:
        st.error(f"**Match Score**: {score_percent}% 🔍")

# ... [Keep your remaining functions] ...