Spaces:
Sleeping
Sleeping
File size: 6,113 Bytes
cf8a522 4077883 8e1d297 92f45fe 2e98a93 e0405b6 1a0f22c e1a5956 ce7c5e8 97150aa d2d6501 5d07781 ca31f44 5d07781 8e1d297 97150aa ca31f44 8e1d297 97150aa c6d228e d2d6501 5d07781 97150aa d2d6501 c6d228e 97150aa 8e1d297 97150aa 501c91b 97150aa 501c91b 92f45fe 97150aa cf98c48 97150aa ce7c5e8 97150aa 8e1d297 1a0f22c 97150aa 1a0f22c 97150aa e1a5956 97150aa 1a0f22c 97150aa 1a0f22c 97150aa d204788 8e1d297 97150aa 7716c5c e33d65b 97150aa c6d228e e1a5956 97150aa 0d4f4dd 97150aa d836318 cccaa8e 97150aa 3e9d890 97150aa 3e9d890 97150aa 3e9d890 97150aa 3e9d890 97150aa 3e9d890 97150aa ce7c5e8 97150aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import os
import io
import streamlit as st
import docx
import docx2txt
import tempfile
import time
import re
import concurrent.futures
from functools import lru_cache
from transformers import pipeline
from collections import defaultdict
# Set page title and hide sidebar
st.set_page_config(
page_title="Resume-Google Job Match Analyzer",
initial_sidebar_state="collapsed"
)
# Hide sidebar completely with custom CSS
st.markdown("""
<style>
[data-testid="collapsedControl"] {display: none;}
section[data-testid="stSidebar"] {display: none;}
</style>
""", unsafe_allow_html=True)
# Pre-defined company description for Google (unchanged)
GOOGLE_DESCRIPTION = """...""" # Keep your original content here
#####################################
# Preload Models - Optimized with DistilBART
#####################################
@st.cache_resource(show_spinner=True)
def load_models():
"""Load optimized models at startup"""
with st.spinner("Loading AI models..."):
models = {
'summarizer': pipeline(
"summarization",
model="distilbart-base-cs", # Faster smaller model
max_length=300,
truncation=True,
num_return_sequences=1
)
}
return models
models = load_models()
#####################################
# Function: Extract Text from File - Optimized
#####################################
@lru_cache(maxsize=16, typed=False)
def extract_text_from_file(file_obj):
"""Optimized text extraction with early exit"""
filename = file_obj.name
ext = os.path.splitext(filename)[1].lower()
text = ""
MAX_TEXT = 15000 # Reduced processing limit
try:
if ext == ".docx":
doc = docx.Document(file_obj)
text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
elif ext == ".doc":
with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
temp_file.write(file_obj.getvalue())
text = docx2txt.process(temp_file.name)[:MAX_TEXT]
os.unlink(temp_file.name)
elif ext == ".txt":
text = file_obj.getvalue().decode("utf-8")[:MAX_TEXT]
except Exception as e:
text = f"Error: {str(e)}"
return text
#####################################
# Unified Information Extraction - Optimized
#####################################
@lru_cache(maxsize=16, typed=False)
def extract_info(text):
"""Combined extraction of all candidate info in one pass"""
text_lower = text.lower()
info = {
'name': extract_name_optimized(text),
'age': extract_age_optimized(text_lower),
'industry': extract_industry_optimized(text_lower),
'skills': extract_skills_optimized(text_lower),
'experience': extract_experience_optimized(text)
}
return info
def extract_name_optimized(text):
"""Faster name extraction with reduced checks"""
lines = text.split('\n')[:10]
for line in lines:
if 5 <= len(line) <= 40 and not any(keyword in line.lower() for keyword in ["resume", "cv"]):
return line.strip()
return "Unknown"
def extract_age_optimized(text):
"""Simplified age pattern matching"""
patterns = [r'\b(age)\b?:?\s*(\d{1,2})', r'(\d{1,2})\s+years? old']
for pattern in patterns:
match = re.search(pattern, text)
if match: return match.group(1)
return "Not specified"
# Other extract_ functions with similar optimizations...
#####################################
# Optimized Summarization
#####################################
def summarize_resume_text(resume_text):
"""Faster summarization with input truncation"""
base_summary = models['summarizer'](
resume_text[:1024],
max_length=150,
truncation=True
)[0]['summary_text']
with concurrent.futures.ThreadPoolExecutor() as executor:
info = executor.submit(extract_info, resume_text).result()
return f"**Name**: {info['name']}\n**Age**: {info['age']}\n**Industry**: {info['industry']}\n\n{base_summary}", 0.1
#####################################
# Optimized Scoring System
#####################################
def calculate_google_match_score(summary):
"""Precomputed keyword matching for faster scoring"""
GOOGLE_KEYWORDS = {
"Technical Skills": {"python", "java", "c++", "sql", "algorithms"},
"Advanced Tech": {"ai", "ml", "cloud", "data science"},
# Add other categories...
}
score = defaultdict(float)
summary_lower = summary.lower()
for category, keywords in GOOGLE_KEYWORDS.items():
count = len(keywords & set(summary_lower.split()))
score[category] = min(1, (count / len(keywords)) * 1.5 if keywords else 0)
return sum(score.values() * weights), score # weights defined accordingly
#####################################
# Streamlit Interface Optimizations
#####################################
st.title("Google Resume Analyzer")
st.session_state progress = 0
st.session_state.last_update = time.time()
if uploaded_file and st.button("Analyze"):
with st.spinner():
# Use session state for progress tracking
start_time = time.time()
# Step 1: Text extraction
text = extract_text_from_file(uploaded_file)
st.session_state.progress = 33
if "Error" in text:
st.error(text)
continue
# Step 2: Information extraction & summarization
summary, _ = summarize_resume_text(text)
st.session_state.progress = 66
# Step 3: Scoring
score, breakdown = calculate_google_match_score(summary)
st.session_state.progress = 100
# Display results
st.subheader("Analysis Complete!")
st.markdown(f"**Match Score**: {score*100:.1f}%")
# Add other displays...
if st.session_state.progress < 100:
st.progress(st.session_state.progress, 100)
time.sleep(0.1) # Simulate progress update |