Spaces:
Sleeping
Sleeping
import os | |
import io | |
import streamlit as st | |
import docx | |
import docx2txt | |
import tempfile | |
import time | |
import re | |
import concurrent.futures | |
from functools import lru_cache | |
from transformers import pipeline | |
# ... [Keep your existing configurations and constants] ... | |
##################################### | |
# Preload Models - Optimized with DistilBART | |
##################################### | |
def load_models(): | |
"""Load optimized models at startup""" | |
with st.spinner("Loading AI models..."): | |
# Use smaller, faster model | |
models = { | |
'summarizer': pipeline( | |
"summarization", | |
model="distilbart-base-cs", # Faster than BART | |
max_length=300, # Reduced context window | |
truncation=True, | |
num_return_sequences=1 | |
) | |
} | |
return models | |
models = load_models() | |
##################################### | |
# Function: Extract Text from File - Optimized | |
##################################### | |
def extract_text_from_file(file_obj): | |
"""Optimized text extraction with early exit""" | |
filename = file_obj.name | |
ext = os.path.splitext(filename)[1].lower() | |
text = "" | |
MAX_TEXT = 15000 # Reduced processing limit | |
try: | |
if ext == ".docx": | |
doc = docx.Document(file_obj) | |
# Only process first 50 paragraphs (approx 10 pages) | |
text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT] | |
elif ext == ".doc": | |
# Direct conversion using docx2txt | |
text = docx2txt.process(file_obj.stream.read())[:MAX_TEXT] | |
elif ext == ".txt": | |
text = file_obj.read().decode("utf-8")[:MAX_TEXT] | |
except Exception as e: | |
text = f"Error: {str(e)}" | |
return text | |
# ... [Keep your existing extraction functions] ... | |
##################################### | |
# Optimized Summarization | |
##################################### | |
def summarize_resume_text(resume_text): | |
"""Faster summarization with input truncation""" | |
start_time = time.time() | |
# Truncate text for summarization | |
text_to_summarize = resume_text[:1024] | |
base_summary = models['summarizer']( | |
text_to_summarize, | |
max_length=150, # Smaller summary | |
truncation=True | |
)[0]['summary_text'] | |
# Parallel extraction with thread pool | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
# Reduced number of parallel tasks | |
name_future = executor.submit(extract_name, resume_text[:200]) | |
age_future = executor.submit(extract_age, resume_text) | |
industry_future = executor.submit(extract_industry, resume_text, base_summary) | |
# Get results | |
name = name_future.result() | |
age = age_future.result() | |
industry = industry_future.result() | |
skills, work = extract_skills_and_work(resume_text) # Sequential | |
# Format summary (simplified) | |
return f"**Name**: {name}\n**Age**: {age}\n**Industry**: {industry}\n\n{base_summary}", 0.1 | |
# ... [Keep your scoring and feedback functions] ... | |
##################################### | |
# Optimized Streamlit Interface | |
##################################### | |
st.title("Google Resume Analyzer") | |
# Initialize session state properly | |
if 'progress' not in st.session_state: | |
st.session_state['progress'] = 0 | |
if 'last_update' not in st.session_state: | |
st.session_state['last_update'] = time.time() | |
uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"]) | |
if uploaded_file and st.button("Analyze"): | |
# Use exponential backoff for progress updates | |
progress_interval = 0.1 | |
max_retries = 10 | |
with st.spinner(): | |
# Step 1: Text extraction | |
st.session_state['progress'] = 33 | |
resume_text = extract_text_from_file(uploaded_file) | |
if "Error" in resume_text: | |
st.error(resume_text) | |
st.session_state['progress'] = 100 | |
continue | |
# Step 2: Summarization | |
st.session_state['progress'] = 66 | |
summary, _ = summarize_resume_text(resume_text) | |
# Step 3: Scoring | |
st.session_state['progress'] = 100 | |
# Display results | |
st.subheader("Analysis Complete!") | |
st.markdown(summary) | |
# Display scores | |
overall_score, category_scores, score_breakdown = calculate_google_match_score(summary) | |
show_score(overall_score) | |
# Display feedback | |
feedback, _ = generate_template_feedback(category_scores) | |
st.markdown(feedback) | |
# Progress bar implementation | |
if st.session_state['progress'] < 100: | |
st.progress(st.session_state['progress'], 100) | |
time.sleep(progress_interval) | |
def show_score(score): | |
"""Display score with appropriate formatting""" | |
score_percent = int(score * 100) | |
if score >= 0.85: | |
st.success(f"**Match Score**: {score_percent}% π") | |
elif score >= 0.70: | |
st.success(f"**Match Score**: {score_percent}% β ") | |
elif score >= 0.50: | |
st.warning(f"**Match Score**: {score_percent}% β οΈ") | |
else: | |
st.error(f"**Match Score**: {score_percent}% π") | |
# ... [Keep your remaining functions] ... |