Spaces:
Sleeping
Sleeping
File size: 5,333 Bytes
cf8a522 4077883 8e1d297 92f45fe 2e98a93 e0405b6 1a0f22c e1a5956 ce7c5e8 d2d6501 46ff202 ca31f44 8e1d297 97150aa c6d228e d2d6501 5d07781 97150aa 46ff202 97150aa 46ff202 97150aa d2d6501 c6d228e 97150aa 8e1d297 97150aa 501c91b 97150aa 501c91b 92f45fe 97150aa 46ff202 97150aa 46ff202 97150aa 46ff202 97150aa ce7c5e8 97150aa 8e1d297 46ff202 d204788 8e1d297 97150aa 7716c5c e33d65b 97150aa 46ff202 97150aa 46ff202 97150aa c6d228e 46ff202 e1a5956 46ff202 0d4f4dd 46ff202 d836318 46ff202 3e9d890 46ff202 ce7c5e8 97150aa 46ff202 97150aa 46ff202 97150aa 46ff202 97150aa 46ff202 97150aa 46ff202 97150aa 46ff202 97150aa 46ff202 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import os
import io
import streamlit as st
import docx
import docx2txt
import tempfile
import time
import re
import concurrent.futures
from functools import lru_cache
from transformers import pipeline
# ... [Keep your existing configurations and constants] ...
#####################################
# Preload Models - Optimized with DistilBART
#####################################
@st.cache_resource(show_spinner=True)
def load_models():
"""Load optimized models at startup"""
with st.spinner("Loading AI models..."):
# Use smaller, faster model
models = {
'summarizer': pipeline(
"summarization",
model="distilbart-base-cs", # Faster than BART
max_length=300, # Reduced context window
truncation=True,
num_return_sequences=1
)
}
return models
models = load_models()
#####################################
# Function: Extract Text from File - Optimized
#####################################
@lru_cache(maxsize=16, typed=False)
def extract_text_from_file(file_obj):
"""Optimized text extraction with early exit"""
filename = file_obj.name
ext = os.path.splitext(filename)[1].lower()
text = ""
MAX_TEXT = 15000 # Reduced processing limit
try:
if ext == ".docx":
doc = docx.Document(file_obj)
# Only process first 50 paragraphs (approx 10 pages)
text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
elif ext == ".doc":
# Direct conversion using docx2txt
text = docx2txt.process(file_obj.stream.read())[:MAX_TEXT]
elif ext == ".txt":
text = file_obj.read().decode("utf-8")[:MAX_TEXT]
except Exception as e:
text = f"Error: {str(e)}"
return text
# ... [Keep your existing extraction functions] ...
#####################################
# Optimized Summarization
#####################################
def summarize_resume_text(resume_text):
"""Faster summarization with input truncation"""
start_time = time.time()
# Truncate text for summarization
text_to_summarize = resume_text[:1024]
base_summary = models['summarizer'](
text_to_summarize,
max_length=150, # Smaller summary
truncation=True
)[0]['summary_text']
# Parallel extraction with thread pool
with concurrent.futures.ThreadPoolExecutor() as executor:
# Reduced number of parallel tasks
name_future = executor.submit(extract_name, resume_text[:200])
age_future = executor.submit(extract_age, resume_text)
industry_future = executor.submit(extract_industry, resume_text, base_summary)
# Get results
name = name_future.result()
age = age_future.result()
industry = industry_future.result()
skills, work = extract_skills_and_work(resume_text) # Sequential
# Format summary (simplified)
return f"**Name**: {name}\n**Age**: {age}\n**Industry**: {industry}\n\n{base_summary}", 0.1
# ... [Keep your scoring and feedback functions] ...
#####################################
# Optimized Streamlit Interface
#####################################
st.title("Google Resume Analyzer")
# Initialize session state properly
if 'progress' not in st.session_state:
st.session_state['progress'] = 0
if 'last_update' not in st.session_state:
st.session_state['last_update'] = time.time()
uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])
if uploaded_file and st.button("Analyze"):
# Use exponential backoff for progress updates
progress_interval = 0.1
max_retries = 10
with st.spinner():
# Step 1: Text extraction
st.session_state['progress'] = 33
resume_text = extract_text_from_file(uploaded_file)
if "Error" in resume_text:
st.error(resume_text)
st.session_state['progress'] = 100
continue
# Step 2: Summarization
st.session_state['progress'] = 66
summary, _ = summarize_resume_text(resume_text)
# Step 3: Scoring
st.session_state['progress'] = 100
# Display results
st.subheader("Analysis Complete!")
st.markdown(summary)
# Display scores
overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
show_score(overall_score)
# Display feedback
feedback, _ = generate_template_feedback(category_scores)
st.markdown(feedback)
# Progress bar implementation
if st.session_state['progress'] < 100:
st.progress(st.session_state['progress'], 100)
time.sleep(progress_interval)
def show_score(score):
"""Display score with appropriate formatting"""
score_percent = int(score * 100)
if score >= 0.85:
st.success(f"**Match Score**: {score_percent}% π")
elif score >= 0.70:
st.success(f"**Match Score**: {score_percent}% β
")
elif score >= 0.50:
st.warning(f"**Match Score**: {score_percent}% β οΈ")
else:
st.error(f"**Match Score**: {score_percent}% π")
# ... [Keep your remaining functions] ... |