CR7CAD's picture
Update app.py
46ff202 verified
raw
history blame
5.33 kB
import os
import io
import streamlit as st
import docx
import docx2txt
import tempfile
import time
import re
import concurrent.futures
from functools import lru_cache
from transformers import pipeline
# ... [Keep your existing configurations and constants] ...
#####################################
# Preload Models - Optimized with DistilBART
#####################################
@st.cache_resource(show_spinner=True)
def load_models():
"""Load optimized models at startup"""
with st.spinner("Loading AI models..."):
# Use smaller, faster model
models = {
'summarizer': pipeline(
"summarization",
model="distilbart-base-cs", # Faster than BART
max_length=300, # Reduced context window
truncation=True,
num_return_sequences=1
)
}
return models
models = load_models()
#####################################
# Function: Extract Text from File - Optimized
#####################################
@lru_cache(maxsize=16, typed=False)
def extract_text_from_file(file_obj):
"""Optimized text extraction with early exit"""
filename = file_obj.name
ext = os.path.splitext(filename)[1].lower()
text = ""
MAX_TEXT = 15000 # Reduced processing limit
try:
if ext == ".docx":
doc = docx.Document(file_obj)
# Only process first 50 paragraphs (approx 10 pages)
text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
elif ext == ".doc":
# Direct conversion using docx2txt
text = docx2txt.process(file_obj.stream.read())[:MAX_TEXT]
elif ext == ".txt":
text = file_obj.read().decode("utf-8")[:MAX_TEXT]
except Exception as e:
text = f"Error: {str(e)}"
return text
# ... [Keep your existing extraction functions] ...
#####################################
# Optimized Summarization
#####################################
def summarize_resume_text(resume_text):
"""Faster summarization with input truncation"""
start_time = time.time()
# Truncate text for summarization
text_to_summarize = resume_text[:1024]
base_summary = models['summarizer'](
text_to_summarize,
max_length=150, # Smaller summary
truncation=True
)[0]['summary_text']
# Parallel extraction with thread pool
with concurrent.futures.ThreadPoolExecutor() as executor:
# Reduced number of parallel tasks
name_future = executor.submit(extract_name, resume_text[:200])
age_future = executor.submit(extract_age, resume_text)
industry_future = executor.submit(extract_industry, resume_text, base_summary)
# Get results
name = name_future.result()
age = age_future.result()
industry = industry_future.result()
skills, work = extract_skills_and_work(resume_text) # Sequential
# Format summary (simplified)
return f"**Name**: {name}\n**Age**: {age}\n**Industry**: {industry}\n\n{base_summary}", 0.1
# ... [Keep your scoring and feedback functions] ...
#####################################
# Optimized Streamlit Interface
#####################################
st.title("Google Resume Analyzer")
# Initialize session state properly
if 'progress' not in st.session_state:
st.session_state['progress'] = 0
if 'last_update' not in st.session_state:
st.session_state['last_update'] = time.time()
uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])
if uploaded_file and st.button("Analyze"):
# Use exponential backoff for progress updates
progress_interval = 0.1
max_retries = 10
with st.spinner():
# Step 1: Text extraction
st.session_state['progress'] = 33
resume_text = extract_text_from_file(uploaded_file)
if "Error" in resume_text:
st.error(resume_text)
st.session_state['progress'] = 100
continue
# Step 2: Summarization
st.session_state['progress'] = 66
summary, _ = summarize_resume_text(resume_text)
# Step 3: Scoring
st.session_state['progress'] = 100
# Display results
st.subheader("Analysis Complete!")
st.markdown(summary)
# Display scores
overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
show_score(overall_score)
# Display feedback
feedback, _ = generate_template_feedback(category_scores)
st.markdown(feedback)
# Progress bar implementation
if st.session_state['progress'] < 100:
st.progress(st.session_state['progress'], 100)
time.sleep(progress_interval)
def show_score(score):
"""Display score with appropriate formatting"""
score_percent = int(score * 100)
if score >= 0.85:
st.success(f"**Match Score**: {score_percent}% 🌟")
elif score >= 0.70:
st.success(f"**Match Score**: {score_percent}% βœ…")
elif score >= 0.50:
st.warning(f"**Match Score**: {score_percent}% ⚠️")
else:
st.error(f"**Match Score**: {score_percent}% πŸ”")
# ... [Keep your remaining functions] ...