Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,24 +9,8 @@ import re
|
|
9 |
import concurrent.futures
|
10 |
from functools import lru_cache
|
11 |
from transformers import pipeline
|
12 |
-
from collections import defaultdict
|
13 |
|
14 |
-
#
|
15 |
-
st.set_page_config(
|
16 |
-
page_title="Resume-Google Job Match Analyzer",
|
17 |
-
initial_sidebar_state="collapsed"
|
18 |
-
)
|
19 |
-
|
20 |
-
# Hide sidebar completely with custom CSS
|
21 |
-
st.markdown("""
|
22 |
-
<style>
|
23 |
-
[data-testid="collapsedControl"] {display: none;}
|
24 |
-
section[data-testid="stSidebar"] {display: none;}
|
25 |
-
</style>
|
26 |
-
""", unsafe_allow_html=True)
|
27 |
-
|
28 |
-
# Pre-defined company description for Google (unchanged)
|
29 |
-
GOOGLE_DESCRIPTION = """...""" # Keep your original content here
|
30 |
|
31 |
#####################################
|
32 |
# Preload Models - Optimized with DistilBART
|
@@ -35,11 +19,12 @@ GOOGLE_DESCRIPTION = """...""" # Keep your original content here
|
|
35 |
def load_models():
|
36 |
"""Load optimized models at startup"""
|
37 |
with st.spinner("Loading AI models..."):
|
|
|
38 |
models = {
|
39 |
'summarizer': pipeline(
|
40 |
"summarization",
|
41 |
-
model="distilbart-base-cs", # Faster
|
42 |
-
max_length=300,
|
43 |
truncation=True,
|
44 |
num_return_sequences=1
|
45 |
)
|
@@ -62,121 +47,114 @@ def extract_text_from_file(file_obj):
|
|
62 |
try:
|
63 |
if ext == ".docx":
|
64 |
doc = docx.Document(file_obj)
|
|
|
65 |
text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
|
66 |
elif ext == ".doc":
|
67 |
-
|
68 |
-
|
69 |
-
text = docx2txt.process(temp_file.name)[:MAX_TEXT]
|
70 |
-
os.unlink(temp_file.name)
|
71 |
elif ext == ".txt":
|
72 |
-
text = file_obj.
|
73 |
except Exception as e:
|
74 |
text = f"Error: {str(e)}"
|
75 |
|
76 |
return text
|
77 |
|
78 |
-
|
79 |
-
# Unified Information Extraction - Optimized
|
80 |
-
#####################################
|
81 |
-
@lru_cache(maxsize=16, typed=False)
|
82 |
-
def extract_info(text):
|
83 |
-
"""Combined extraction of all candidate info in one pass"""
|
84 |
-
text_lower = text.lower()
|
85 |
-
info = {
|
86 |
-
'name': extract_name_optimized(text),
|
87 |
-
'age': extract_age_optimized(text_lower),
|
88 |
-
'industry': extract_industry_optimized(text_lower),
|
89 |
-
'skills': extract_skills_optimized(text_lower),
|
90 |
-
'experience': extract_experience_optimized(text)
|
91 |
-
}
|
92 |
-
return info
|
93 |
-
|
94 |
-
def extract_name_optimized(text):
|
95 |
-
"""Faster name extraction with reduced checks"""
|
96 |
-
lines = text.split('\n')[:10]
|
97 |
-
for line in lines:
|
98 |
-
if 5 <= len(line) <= 40 and not any(keyword in line.lower() for keyword in ["resume", "cv"]):
|
99 |
-
return line.strip()
|
100 |
-
return "Unknown"
|
101 |
-
|
102 |
-
def extract_age_optimized(text):
|
103 |
-
"""Simplified age pattern matching"""
|
104 |
-
patterns = [r'\b(age)\b?:?\s*(\d{1,2})', r'(\d{1,2})\s+years? old']
|
105 |
-
for pattern in patterns:
|
106 |
-
match = re.search(pattern, text)
|
107 |
-
if match: return match.group(1)
|
108 |
-
return "Not specified"
|
109 |
-
|
110 |
-
# Other extract_ functions with similar optimizations...
|
111 |
|
112 |
#####################################
|
113 |
# Optimized Summarization
|
114 |
#####################################
|
115 |
def summarize_resume_text(resume_text):
|
116 |
"""Faster summarization with input truncation"""
|
|
|
|
|
|
|
|
|
117 |
base_summary = models['summarizer'](
|
118 |
-
|
119 |
-
max_length=150,
|
120 |
truncation=True
|
121 |
)[0]['summary_text']
|
122 |
|
|
|
123 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
|
|
|
127 |
|
128 |
-
|
129 |
-
# Optimized Scoring System
|
130 |
-
#####################################
|
131 |
-
def calculate_google_match_score(summary):
|
132 |
-
"""Precomputed keyword matching for faster scoring"""
|
133 |
-
GOOGLE_KEYWORDS = {
|
134 |
-
"Technical Skills": {"python", "java", "c++", "sql", "algorithms"},
|
135 |
-
"Advanced Tech": {"ai", "ml", "cloud", "data science"},
|
136 |
-
# Add other categories...
|
137 |
-
}
|
138 |
-
|
139 |
-
score = defaultdict(float)
|
140 |
-
summary_lower = summary.lower()
|
141 |
-
|
142 |
-
for category, keywords in GOOGLE_KEYWORDS.items():
|
143 |
-
count = len(keywords & set(summary_lower.split()))
|
144 |
-
score[category] = min(1, (count / len(keywords)) * 1.5 if keywords else 0)
|
145 |
-
|
146 |
-
return sum(score.values() * weights), score # weights defined accordingly
|
147 |
|
148 |
#####################################
|
149 |
-
# Streamlit Interface
|
150 |
#####################################
|
151 |
st.title("Google Resume Analyzer")
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
if uploaded_file and st.button("Analyze"):
|
|
|
|
|
|
|
|
|
156 |
with st.spinner():
|
157 |
-
# Use session state for progress tracking
|
158 |
-
start_time = time.time()
|
159 |
-
|
160 |
# Step 1: Text extraction
|
161 |
-
|
162 |
-
|
163 |
-
if "Error" in
|
164 |
-
st.error(
|
|
|
165 |
continue
|
166 |
|
167 |
-
# Step 2:
|
168 |
-
|
169 |
-
|
170 |
|
171 |
# Step 3: Scoring
|
172 |
-
|
173 |
-
st.session_state.progress = 100
|
174 |
|
175 |
# Display results
|
176 |
st.subheader("Analysis Complete!")
|
177 |
-
st.markdown(
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
-
|
181 |
-
st.progress(st.session_state.progress, 100)
|
182 |
-
time.sleep(0.1) # Simulate progress update
|
|
|
9 |
import concurrent.futures
|
10 |
from functools import lru_cache
|
11 |
from transformers import pipeline
|
|
|
12 |
|
13 |
+
# ... [Keep your existing configurations and constants] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
#####################################
|
16 |
# Preload Models - Optimized with DistilBART
|
|
|
19 |
def load_models():
|
20 |
"""Load optimized models at startup"""
|
21 |
with st.spinner("Loading AI models..."):
|
22 |
+
# Use smaller, faster model
|
23 |
models = {
|
24 |
'summarizer': pipeline(
|
25 |
"summarization",
|
26 |
+
model="distilbart-base-cs", # Faster than BART
|
27 |
+
max_length=300, # Reduced context window
|
28 |
truncation=True,
|
29 |
num_return_sequences=1
|
30 |
)
|
|
|
47 |
try:
|
48 |
if ext == ".docx":
|
49 |
doc = docx.Document(file_obj)
|
50 |
+
# Only process first 50 paragraphs (approx 10 pages)
|
51 |
text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
|
52 |
elif ext == ".doc":
|
53 |
+
# Direct conversion using docx2txt
|
54 |
+
text = docx2txt.process(file_obj.stream.read())[:MAX_TEXT]
|
|
|
|
|
55 |
elif ext == ".txt":
|
56 |
+
text = file_obj.read().decode("utf-8")[:MAX_TEXT]
|
57 |
except Exception as e:
|
58 |
text = f"Error: {str(e)}"
|
59 |
|
60 |
return text
|
61 |
|
62 |
+
# ... [Keep your existing extraction functions] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
#####################################
|
65 |
# Optimized Summarization
|
66 |
#####################################
|
67 |
def summarize_resume_text(resume_text):
|
68 |
"""Faster summarization with input truncation"""
|
69 |
+
start_time = time.time()
|
70 |
+
|
71 |
+
# Truncate text for summarization
|
72 |
+
text_to_summarize = resume_text[:1024]
|
73 |
base_summary = models['summarizer'](
|
74 |
+
text_to_summarize,
|
75 |
+
max_length=150, # Smaller summary
|
76 |
truncation=True
|
77 |
)[0]['summary_text']
|
78 |
|
79 |
+
# Parallel extraction with thread pool
|
80 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
81 |
+
# Reduced number of parallel tasks
|
82 |
+
name_future = executor.submit(extract_name, resume_text[:200])
|
83 |
+
age_future = executor.submit(extract_age, resume_text)
|
84 |
+
industry_future = executor.submit(extract_industry, resume_text, base_summary)
|
85 |
+
|
86 |
+
# Get results
|
87 |
+
name = name_future.result()
|
88 |
+
age = age_future.result()
|
89 |
+
industry = industry_future.result()
|
90 |
+
skills, work = extract_skills_and_work(resume_text) # Sequential
|
91 |
|
92 |
+
# Format summary (simplified)
|
93 |
+
return f"**Name**: {name}\n**Age**: {age}\n**Industry**: {industry}\n\n{base_summary}", 0.1
|
94 |
|
95 |
+
# ... [Keep your scoring and feedback functions] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
#####################################
|
98 |
+
# Optimized Streamlit Interface
|
99 |
#####################################
|
100 |
st.title("Google Resume Analyzer")
|
101 |
+
|
102 |
+
# Initialize session state properly
|
103 |
+
if 'progress' not in st.session_state:
|
104 |
+
st.session_state['progress'] = 0
|
105 |
+
if 'last_update' not in st.session_state:
|
106 |
+
st.session_state['last_update'] = time.time()
|
107 |
+
|
108 |
+
uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])
|
109 |
|
110 |
if uploaded_file and st.button("Analyze"):
|
111 |
+
# Use exponential backoff for progress updates
|
112 |
+
progress_interval = 0.1
|
113 |
+
max_retries = 10
|
114 |
+
|
115 |
with st.spinner():
|
|
|
|
|
|
|
116 |
# Step 1: Text extraction
|
117 |
+
st.session_state['progress'] = 33
|
118 |
+
resume_text = extract_text_from_file(uploaded_file)
|
119 |
+
if "Error" in resume_text:
|
120 |
+
st.error(resume_text)
|
121 |
+
st.session_state['progress'] = 100
|
122 |
continue
|
123 |
|
124 |
+
# Step 2: Summarization
|
125 |
+
st.session_state['progress'] = 66
|
126 |
+
summary, _ = summarize_resume_text(resume_text)
|
127 |
|
128 |
# Step 3: Scoring
|
129 |
+
st.session_state['progress'] = 100
|
|
|
130 |
|
131 |
# Display results
|
132 |
st.subheader("Analysis Complete!")
|
133 |
+
st.markdown(summary)
|
134 |
+
|
135 |
+
# Display scores
|
136 |
+
overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
|
137 |
+
show_score(overall_score)
|
138 |
+
|
139 |
+
# Display feedback
|
140 |
+
feedback, _ = generate_template_feedback(category_scores)
|
141 |
+
st.markdown(feedback)
|
142 |
+
|
143 |
+
# Progress bar implementation
|
144 |
+
if st.session_state['progress'] < 100:
|
145 |
+
st.progress(st.session_state['progress'], 100)
|
146 |
+
time.sleep(progress_interval)
|
147 |
+
|
148 |
+
def show_score(score):
|
149 |
+
"""Display score with appropriate formatting"""
|
150 |
+
score_percent = int(score * 100)
|
151 |
+
if score >= 0.85:
|
152 |
+
st.success(f"**Match Score**: {score_percent}% π")
|
153 |
+
elif score >= 0.70:
|
154 |
+
st.success(f"**Match Score**: {score_percent}% β
")
|
155 |
+
elif score >= 0.50:
|
156 |
+
st.warning(f"**Match Score**: {score_percent}% β οΈ")
|
157 |
+
else:
|
158 |
+
st.error(f"**Match Score**: {score_percent}% π")
|
159 |
|
160 |
+
# ... [Keep your remaining functions] ...
|
|
|
|