CR7CAD commited on
Commit
d3c5eab
Β·
verified Β·
1 Parent(s): 46ff202

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +503 -106
app.py CHANGED
@@ -10,151 +10,548 @@ import concurrent.futures
10
  from functools import lru_cache
11
  from transformers import pipeline
12
 
13
- # ... [Keep your existing configurations and constants] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  #####################################
16
- # Preload Models - Optimized with DistilBART
17
  #####################################
18
  @st.cache_resource(show_spinner=True)
19
  def load_models():
20
- """Load optimized models at startup"""
21
- with st.spinner("Loading AI models..."):
22
- # Use smaller, faster model
23
- models = {
24
- 'summarizer': pipeline(
25
- "summarization",
26
- model="distilbart-base-cs", # Faster than BART
27
- max_length=300, # Reduced context window
28
- truncation=True,
29
- num_return_sequences=1
30
- )
31
- }
32
  return models
33
 
 
34
  models = load_models()
35
 
36
  #####################################
37
- # Function: Extract Text from File - Optimized
38
  #####################################
39
- @lru_cache(maxsize=16, typed=False)
40
  def extract_text_from_file(file_obj):
41
- """Optimized text extraction with early exit"""
 
 
 
42
  filename = file_obj.name
43
  ext = os.path.splitext(filename)[1].lower()
44
  text = ""
45
- MAX_TEXT = 15000 # Reduced processing limit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- try:
48
- if ext == ".docx":
49
- doc = docx.Document(file_obj)
50
- # Only process first 50 paragraphs (approx 10 pages)
51
- text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
52
- elif ext == ".doc":
53
- # Direct conversion using docx2txt
54
- text = docx2txt.process(file_obj.stream.read())[:MAX_TEXT]
55
- elif ext == ".txt":
56
- text = file_obj.read().decode("utf-8")[:MAX_TEXT]
57
- except Exception as e:
58
- text = f"Error: {str(e)}"
59
 
60
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- # ... [Keep your existing extraction functions] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  #####################################
65
- # Optimized Summarization
66
  #####################################
67
  def summarize_resume_text(resume_text):
68
- """Faster summarization with input truncation"""
 
 
69
  start_time = time.time()
70
 
71
- # Truncate text for summarization
72
- text_to_summarize = resume_text[:1024]
73
- base_summary = models['summarizer'](
74
- text_to_summarize,
75
- max_length=150, # Smaller summary
76
- truncation=True
77
- )[0]['summary_text']
78
-
79
- # Parallel extraction with thread pool
80
- with concurrent.futures.ThreadPoolExecutor() as executor:
81
- # Reduced number of parallel tasks
82
- name_future = executor.submit(extract_name, resume_text[:200])
83
  age_future = executor.submit(extract_age, resume_text)
84
  industry_future = executor.submit(extract_industry, resume_text, base_summary)
 
85
 
86
  # Get results
87
  name = name_future.result()
88
  age = age_future.result()
89
  industry = industry_future.result()
90
- skills, work = extract_skills_and_work(resume_text) # Sequential
91
 
92
- # Format summary (simplified)
93
- return f"**Name**: {name}\n**Age**: {age}\n**Industry**: {industry}\n\n{base_summary}", 0.1
94
-
95
- # ... [Keep your scoring and feedback functions] ...
 
 
 
 
 
96
 
97
  #####################################
98
- # Optimized Streamlit Interface
99
  #####################################
100
- st.title("Google Resume Analyzer")
101
-
102
- # Initialize session state properly
103
- if 'progress' not in st.session_state:
104
- st.session_state['progress'] = 0
105
- if 'last_update' not in st.session_state:
106
- st.session_state['last_update'] = time.time()
107
-
108
- uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])
109
-
110
- if uploaded_file and st.button("Analyze"):
111
- # Use exponential backoff for progress updates
112
- progress_interval = 0.1
113
- max_retries = 10
114
-
115
- with st.spinner():
116
- # Step 1: Text extraction
117
- st.session_state['progress'] = 33
118
- resume_text = extract_text_from_file(uploaded_file)
119
- if "Error" in resume_text:
120
- st.error(resume_text)
121
- st.session_state['progress'] = 100
122
- continue
123
-
124
- # Step 2: Summarization
125
- st.session_state['progress'] = 66
126
- summary, _ = summarize_resume_text(resume_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- # Step 3: Scoring
129
- st.session_state['progress'] = 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- # Display results
132
- st.subheader("Analysis Complete!")
133
  st.markdown(summary)
 
134
 
135
- # Display scores
 
136
  overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
137
- show_score(overall_score)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- # Display feedback
140
- feedback, _ = generate_template_feedback(category_scores)
141
  st.markdown(feedback)
142
-
143
- # Progress bar implementation
144
- if st.session_state['progress'] < 100:
145
- st.progress(st.session_state['progress'], 100)
146
- time.sleep(progress_interval)
147
-
148
- def show_score(score):
149
- """Display score with appropriate formatting"""
150
- score_percent = int(score * 100)
151
- if score >= 0.85:
152
- st.success(f"**Match Score**: {score_percent}% 🌟")
153
- elif score >= 0.70:
154
- st.success(f"**Match Score**: {score_percent}% βœ…")
155
- elif score >= 0.50:
156
- st.warning(f"**Match Score**: {score_percent}% ⚠️")
157
- else:
158
- st.error(f"**Match Score**: {score_percent}% πŸ”")
159
-
160
- # ... [Keep your remaining functions] ...
 
 
 
 
 
 
 
 
10
  from functools import lru_cache
11
  from transformers import pipeline
12
 
13
+ # Set page title and hide sidebar
14
+ st.set_page_config(
15
+ page_title="Resume-Google Job Match Analyzer",
16
+ initial_sidebar_state="collapsed"
17
+ )
18
+
19
+ # Hide sidebar completely with custom CSS
20
+ st.markdown("""
21
+ <style>
22
+ [data-testid="collapsedControl"] {display: none;}
23
+ section[data-testid="stSidebar"] {display: none;}
24
+ </style>
25
+ """, unsafe_allow_html=True)
26
+
27
+ # Pre-defined company description for Google
28
+ GOOGLE_DESCRIPTION = """Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."""
29
 
30
  #####################################
31
+ # Preload Models - Optimized
32
  #####################################
33
  @st.cache_resource(show_spinner=True)
34
  def load_models():
35
+ """Load models at startup - using smaller/faster models"""
36
+ with st.spinner("Loading AI models... This may take a minute on first run."):
37
+ models = {}
38
+ # Use bart-base instead of bart-large-cnn for faster processing
39
+ models['summarizer'] = pipeline(
40
+ "summarization",
41
+ model="facebook/bart-base",
42
+ max_length=100,
43
+ truncation=True
44
+ )
45
+ # We don't need T5 model anymore since we're using template-based feedback
 
46
  return models
47
 
48
+ # Preload models immediately when app starts
49
  models = load_models()
50
 
51
  #####################################
52
+ # Function: Extract Text from File
53
  #####################################
54
+ @st.cache_data(show_spinner=False)
55
  def extract_text_from_file(file_obj):
56
+ """
57
+ Extract text from .docx and .doc files.
58
+ Returns the extracted text or an error message if extraction fails.
59
+ """
60
  filename = file_obj.name
61
  ext = os.path.splitext(filename)[1].lower()
62
  text = ""
63
+
64
+ if ext == ".docx":
65
+ try:
66
+ document = docx.Document(file_obj)
67
+ text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
68
+ except Exception as e:
69
+ text = f"Error processing DOCX file: {e}"
70
+ elif ext == ".doc":
71
+ try:
72
+ # For .doc files, we need to save to a temp file
73
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
74
+ temp_file.write(file_obj.getvalue())
75
+ temp_path = temp_file.name
76
+
77
+ # Use docx2txt which is generally faster
78
+ try:
79
+ text = docx2txt.process(temp_path)
80
+ except Exception:
81
+ text = "Could not process .doc file. Please convert to .docx format."
82
+
83
+ # Clean up temp file
84
+ os.unlink(temp_path)
85
+ except Exception as e:
86
+ text = f"Error processing DOC file: {e}"
87
+ elif ext == ".txt":
88
+ try:
89
+ text = file_obj.getvalue().decode("utf-8")
90
+ except Exception as e:
91
+ text = f"Error processing TXT file: {e}"
92
+ else:
93
+ text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
94
+
95
+ # Limit text size for faster processing
96
+ return text[:15000] if text else text
97
+
98
+ #####################################
99
+ # Functions for Information Extraction - Optimized
100
+ #####################################
101
+
102
+ # Cache the extraction functions to avoid reprocessing
103
+ @lru_cache(maxsize=32)
104
+ def extract_name(text_start):
105
+ """Extract candidate name from the beginning of resume text"""
106
+ # Only use the first 500 characters to speed up processing
107
+ lines = text_start.split('\n')
108
+
109
+ # Check first few non-empty lines for potential names
110
+ potential_name_lines = [line.strip() for line in lines[:5] if line.strip()]
111
+
112
+ if potential_name_lines:
113
+ # First line is often the name if it's short and doesn't contain common headers
114
+ first_line = potential_name_lines[0]
115
+ if 5 <= len(first_line) <= 40 and not any(x in first_line.lower() for x in ["resume", "cv", "curriculum", "vitae", "profile"]):
116
+ return first_line
117
+
118
+ # Look for lines that might contain a name
119
+ for line in potential_name_lines[:3]:
120
+ if len(line.split()) <= 4 and not any(x in line.lower() for x in ["address", "phone", "email", "resume", "cv"]):
121
+ return line
122
+
123
+ return "Unknown (please extract from resume)"
124
+
125
+ def extract_age(text):
126
+ """Extract candidate age from resume text"""
127
+ # Simplified: just check a few common patterns
128
+ age_patterns = [
129
+ r'age:?\s*(\d{1,2})',
130
+ r'(\d{1,2})\s*years\s*old',
131
+ ]
132
+
133
+ text_lower = text.lower()
134
+ for pattern in age_patterns:
135
+ matches = re.search(pattern, text_lower)
136
+ if matches:
137
+ return matches.group(1)
138
+
139
+ return "Not specified"
140
+
141
+ def extract_industry(text, base_summary):
142
+ """Extract expected job industry from resume"""
143
+ # Simplified industry keywords focused on the most common ones
144
+ industry_keywords = {
145
+ "technology": ["software", "programming", "developer", "IT", "tech", "computer"],
146
+ "finance": ["banking", "financial", "accounting", "finance", "analyst"],
147
+ "healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor"],
148
+ "education": ["teaching", "teacher", "professor", "education", "university"],
149
+ "marketing": ["marketing", "advertising", "digital marketing", "social media"],
150
+ "engineering": ["engineer", "engineering"],
151
+ "data science": ["data science", "machine learning", "AI", "analytics"],
152
+ "information systems": ["information systems", "ERP", "systems management"]
153
+ }
154
 
155
+ # Use the base summary (already lowercased) to speed up matching
156
+ combined_text = base_summary.lower()
 
 
 
 
 
 
 
 
 
 
157
 
158
+ counts = {}
159
+ for industry, keywords in industry_keywords.items():
160
+ counts[industry] = sum(combined_text.count(keyword.lower()) for keyword in keywords)
161
+
162
+ # Get the industry with the highest count
163
+ if counts:
164
+ likely_industry = max(counts.items(), key=lambda x: x[1])
165
+ if likely_industry[1] > 0:
166
+ return likely_industry[0].capitalize()
167
+
168
+ # Check for educational background that might indicate industry
169
+ degrees = ["computer science", "business", "engineering", "medicine", "education", "finance", "marketing"]
170
+
171
+ for degree in degrees:
172
+ if degree in combined_text:
173
+ return f"{degree.capitalize()}-related field"
174
+
175
+ return "Not clearly specified"
176
 
177
+ def extract_skills_and_work(text):
178
+ """Extract both skills and work experience at once to save processing time"""
179
+ # Common skill categories - reduced keyword list for speed
180
+ skill_categories = {
181
+ "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
182
+ "Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "Algorithms"],
183
+ "Database": ["SQL", "MySQL", "MongoDB", "Database", "NoSQL", "PostgreSQL"],
184
+ "Web Development": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack"],
185
+ "Software Development": ["Agile", "Scrum", "Git", "DevOps", "Docker", "System Design"],
186
+ "Cloud": ["AWS", "Azure", "Google Cloud", "Cloud Computing"],
187
+ "Security": ["Cybersecurity", "Network Security", "Encryption", "Security"],
188
+ "Business": ["Project Management", "Business Analysis", "Leadership", "Teamwork"],
189
+ "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
190
+ }
191
+
192
+ # Work experience extraction
193
+ work_headers = [
194
+ "work experience", "professional experience", "employment history",
195
+ "work history", "experience"
196
+ ]
197
+
198
+ next_section_headers = [
199
+ "education", "skills", "certifications", "projects", "achievements"
200
+ ]
201
+
202
+ # Process everything at once
203
+ lines = text.split('\n')
204
+ text_lower = text.lower()
205
+
206
+ # Skills extraction
207
+ found_skills = []
208
+ for category, skills in skill_categories.items():
209
+ category_skills = []
210
+ for skill in skills:
211
+ if skill.lower() in text_lower:
212
+ category_skills.append(skill)
213
+ if category_skills:
214
+ found_skills.append(f"{category}: {', '.join(category_skills)}")
215
+
216
+ # Work experience extraction - simplified approach
217
+ work_section = []
218
+ in_work_section = False
219
+
220
+ for idx, line in enumerate(lines):
221
+ line_lower = line.lower().strip()
222
+ # Start of work section
223
+ if not in_work_section:
224
+ if any(header in line_lower for header in work_headers):
225
+ in_work_section = True
226
+ continue
227
+ # End of work section
228
+ elif in_work_section:
229
+ if any(header in line_lower for header in next_section_headers):
230
+ break
231
+ if line.strip():
232
+ work_section.append(line.strip())
233
+
234
+ # Simplified work formatting
235
+ if not work_section:
236
+ work_experience = "Work experience not clearly identified"
237
+ else:
238
+ work_lines = []
239
+ company_count = 0
240
+ for line in work_section:
241
+ if re.search(r'(19|20)\d{2}', line):
242
+ company_count += 1
243
+ if company_count <= 3: # Limit to 3 most recent positions
244
+ work_lines.append(f"**{line}**")
245
+ else:
246
+ break
247
+ elif company_count <= 3 and len(work_lines) < 10:
248
+ work_lines.append(line)
249
+
250
+ work_experience = "\nβ€’ " + "\nβ€’ ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
251
+
252
+ skills_formatted = "\nβ€’ " + "\nβ€’ ".join(found_skills) if found_skills else "No specific technical skills clearly identified"
253
+
254
+ return skills_formatted, work_experience
255
 
256
  #####################################
257
+ # Function: Summarize Resume Text - Optimized
258
  #####################################
259
  def summarize_resume_text(resume_text):
260
+ """
261
+ Generates a structured summary of the resume text - optimized for speed
262
+ """
263
  start_time = time.time()
264
 
265
+ # First, generate a quick summary using the preloaded model
266
+ max_input_length = 1024 # Model limit
267
+ # Only summarize the first 1024 characters for speed
268
+ text_to_summarize = resume_text[:max_input_length]
269
+ base_summary = models['summarizer'](text_to_summarize, truncation=True)[0]['summary_text']
270
+
271
+ # Extract information in parallel where possible
272
+ # Limit the number of workers to reduce overhead
273
+ with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
274
+ name_future = executor.submit(extract_name, resume_text[:500]) # Only use the start of text
 
 
275
  age_future = executor.submit(extract_age, resume_text)
276
  industry_future = executor.submit(extract_industry, resume_text, base_summary)
277
+ skills_work_future = executor.submit(extract_skills_and_work, resume_text)
278
 
279
  # Get results
280
  name = name_future.result()
281
  age = age_future.result()
282
  industry = industry_future.result()
283
+ skills, work_experience = skills_work_future.result()
284
 
285
+ # Format the structured summary
286
+ formatted_summary = f"Name: {name}\n"
287
+ formatted_summary += f"Age: {age}\n"
288
+ formatted_summary += f"Expected Job Industry: {industry}\n\n"
289
+ formatted_summary += f"Previous Work Experience: {work_experience}\n\n"
290
+ formatted_summary += f"Skills: {skills}"
291
+
292
+ execution_time = time.time() - start_time
293
+ return formatted_summary, execution_time
294
 
295
  #####################################
296
+ # Function: Calculate Google Match Score - Detailed Breakdown
297
  #####################################
298
+ def calculate_google_match_score(candidate_summary):
299
+ """
300
+ Calculate a detailed match score breakdown based on skills and experience in the candidate summary
301
+ compared with what Google requires.
302
+ Returns:
303
+ - overall_score: A normalized score between 0 and 1
304
+ - category_scores: A dictionary with scores for each category
305
+ - score_breakdown: A formatted string explanation of the scoring
306
+ """
307
+ # Define categories that Google values with specific keywords
308
+ google_categories = {
309
+ "Technical Skills": {
310
+ "keywords": ["python", "java", "c++", "go", "javascript", "sql", "nosql",
311
+ "algorithms", "data structures", "system design"],
312
+ "weight": 0.35
313
+ },
314
+ "Advanced Technologies": {
315
+ "keywords": ["artificial intelligence", "machine learning", "cloud computing",
316
+ "ai", "ml", "cloud", "data science", "big data",
317
+ "tensorflow", "pytorch", "deep learning"],
318
+ "weight": 0.25
319
+ },
320
+ "Problem Solving": {
321
+ "keywords": ["problem solving", "algorithms", "analytical", "critical thinking",
322
+ "debugging", "troubleshooting", "optimization"],
323
+ "weight": 0.20
324
+ },
325
+ "Innovation & Creativity": {
326
+ "keywords": ["innovation", "creative", "creativity", "novel", "cutting-edge",
327
+ "research", "design thinking", "innovative"],
328
+ "weight": 0.10
329
+ },
330
+ "Teamwork & Leadership": {
331
+ "keywords": ["team", "leadership", "collaborate", "collaboration", "communication",
332
+ "mentoring", "lead", "coordinate", "agile", "scrum"],
333
+ "weight": 0.10
334
+ }
335
+ }
336
+
337
+ summary_lower = candidate_summary.lower()
338
+
339
+ # Calculate scores for each category
340
+ category_scores = {}
341
+ for category, details in google_categories.items():
342
+ keywords = details["keywords"]
343
+ max_possible = len(keywords)
344
+ matches = sum(1 for keyword in keywords if keyword in summary_lower)
345
 
346
+ if max_possible > 0:
347
+ raw_score = matches / max_possible
348
+ category_scores[category] = min(1.0, raw_score * 1.5)
349
+ else:
350
+ category_scores[category] = 0
351
+
352
+ overall_score = sum(
353
+ score * google_categories[category]["weight"]
354
+ for category, score in category_scores.items()
355
+ )
356
+ overall_score = min(1.0, max(0.0, overall_score))
357
+
358
+ # Create score breakdown explanation
359
+ score_breakdown = "**Score Breakdown by Category:**\n\n"
360
+
361
+ for category, score in category_scores.items():
362
+ percentage = int(score * 100)
363
+ weight = int(google_categories[category]["weight"] * 100)
364
+ score_breakdown += f"β€’ **{category}** ({weight}% of total): {percentage}%\n"
365
+
366
+ return overall_score, category_scores, score_breakdown
367
+
368
+ #####################################
369
+ # Function: Generate Robust Feedback - Template-Based
370
+ #####################################
371
+ def generate_template_feedback(category_scores):
372
+ """
373
+ Generate comprehensive template-based feedback without using ML model for speed and reliability.
374
+ """
375
+ start_time = time.time()
376
+ import random
377
+
378
+ sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
379
+ top_categories = sorted_categories[:2]
380
+ bottom_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
381
+
382
+ top_feedback_templates = {
383
+ "Technical Skills": [
384
+ "demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
385
+ "shows excellent technical capabilities that align well with Google's engineering requirements.",
386
+ "possesses the technical expertise needed for Google's development environment."
387
+ ],
388
+ "Advanced Technologies": [
389
+ "has valuable experience with cutting-edge technologies that Google prioritizes in its innovation efforts.",
390
+ "demonstrates knowledge in advanced technological areas that align with Google's future direction.",
391
+ "shows proficiency in modern technologies that Google uses in its products and services."
392
+ ],
393
+ "Problem Solving": [
394
+ "exhibits strong problem-solving abilities which are fundamental to Google's engineering culture.",
395
+ "demonstrates analytical thinking and problem-solving skills that Google seeks in candidates.",
396
+ "shows the problem-solving aptitude that would be valuable in Google's collaborative environment."
397
+ ],
398
+ "Innovation & Creativity": [
399
+ "shows the creative thinking and innovation mindset that Google values in its workforce.",
400
+ "demonstrates the innovative approach that would fit well with Google's creative culture.",
401
+ "exhibits creativity that could contribute to Google's product development process."
402
+ ],
403
+ "Teamwork & Leadership": [
404
+ "demonstrates leadership qualities and teamwork skills that Google looks for in potential employees.",
405
+ "shows collaborative abilities that would integrate well with Google's team-based structure.",
406
+ "exhibits the interpersonal skills needed to thrive in Google's collaborative environment."
407
+ ]
408
+ }
409
+
410
+ bottom_feedback_templates = {
411
+ "Technical Skills": [
412
+ "should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
413
+ "would benefit from developing more depth in technical tools and programming capabilities to meet Google's standards.",
414
+ "needs to enhance their technical expertise to better align with Google's engineering requirements."
415
+ ],
416
+ "Advanced Technologies": [
417
+ "would benefit from gaining more experience with AI, machine learning, or cloud technologies that Google prioritizes.",
418
+ "should develop more expertise in advanced technologies like machine learning or data science to increase their value to Google.",
419
+ "needs more exposure to the cutting-edge technologies that drive Google's innovation."
420
+ ],
421
+ "Problem Solving": [
422
+ "should strengthen their problem-solving abilities, particularly with algorithms and data structures that are crucial for Google interviews.",
423
+ "would benefit from developing stronger analytical and problem-solving skills to match Google's expectations.",
424
+ "needs to improve their approach to complex problem-solving to meet Google's standards."
425
+ ],
426
+ "Innovation & Creativity": [
427
+ "could develop a more innovative mindset to better align with Google's creative culture.",
428
+ "should work on demonstrating more creative thinking in their approach to match Google's innovation focus.",
429
+ "would benefit from cultivating more creativity and out-of-the-box thinking valued at Google."
430
+ ],
431
+ "Teamwork & Leadership": [
432
+ "should focus on developing stronger leadership and teamwork skills to thrive in Google's collaborative environment.",
433
+ "would benefit from more experience in collaborative settings to match Google's team-oriented culture.",
434
+ "needs to strengthen their interpersonal and leadership capabilities to align with Google's expectations."
435
+ ]
436
+ }
437
+
438
+ top_category = top_categories[0][0]
439
+ top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
440
+
441
+ bottom_category = bottom_categories[0][0]
442
+ bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
443
+
444
+ feedback = f"This candidate {top_feedback} "
445
+
446
+ if top_categories[1][1] >= 0.6:
447
+ second_top = top_categories[1][0]
448
+ second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
449
+ feedback += f"The candidate also {second_top_feedback} "
450
+
451
+ feedback += f"However, the candidate {bottom_feedback} "
452
+
453
+ overall_score = sum(score * weight for (category, score), weight in
454
+ zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
455
+
456
+ if overall_score >= 0.75:
457
+ feedback += "Overall, this candidate shows strong potential for success at Google."
458
+ elif overall_score >= 0.6:
459
+ feedback += "With these improvements, the candidate could be a good fit for Google."
460
+ else:
461
+ feedback += "The candidate would need significant development to meet Google's standards."
462
+
463
+ execution_time = time.time() - start_time
464
+ return feedback, execution_time
465
+
466
+ #####################################
467
+ # Main Streamlit Interface - with Progress Reporting
468
+ #####################################
469
+ st.title("Google Resume Match Analyzer")
470
+ st.markdown(
471
+ """
472
+ Upload your resume file in **.docx**, **.doc**, or **.txt** format to see how well you match with Google's hiring requirements. The app performs the following tasks:
473
+ 1. Extracts text from your resume.
474
+ 2. Uses AI to generate a structured candidate summary.
475
+ 3. Evaluates your fit for Google across key hiring criteria with a detailed score breakdown.
476
+ """
477
+ )
478
+
479
+ # Display Google's requirements
480
+ with st.expander("Google's Requirements", expanded=False):
481
+ st.write(GOOGLE_DESCRIPTION)
482
+
483
+ # File uploader
484
+ uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
485
+
486
+ # Process button with optimized flow
487
+ if uploaded_file is not None and st.button("Analyze My Google Fit"):
488
+ progress_bar = st.progress(0)
489
+ status_text = st.empty()
490
+
491
+ # Step 1: Extract text
492
+ status_text.text("Step 1/3: Extracting text from resume...")
493
+ resume_text = extract_text_from_file(uploaded_file)
494
+ progress_bar.progress(25)
495
+
496
+ if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
497
+ st.error(resume_text)
498
+ else:
499
+ # Step 2: Generate summary
500
+ status_text.text("Step 2/3: Analyzing resume and generating summary...")
501
+ summary, summarization_time = summarize_resume_text(resume_text)
502
+ progress_bar.progress(50)
503
 
504
+ st.subheader("Your Resume Summary")
 
505
  st.markdown(summary)
506
+ st.info(f"Summary generated in {summarization_time:.2f} seconds")
507
 
508
+ # Step 3: Calculate scores and generate feedback
509
+ status_text.text("Step 3/3: Calculating Google fit scores...")
510
  overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
511
+ feedback, feedback_time = generate_template_feedback(category_scores)
512
+
513
+ progress_bar.progress(100)
514
+ status_text.empty()
515
+
516
+ st.subheader("Google Fit Assessment")
517
+ score_percent = int(overall_score * 100)
518
+ if overall_score >= 0.85:
519
+ st.success(f"**Overall Google Match Score:** {score_percent}% 🌟")
520
+ elif overall_score >= 0.70:
521
+ st.success(f"**Overall Google Match Score:** {score_percent}% βœ…")
522
+ elif overall_score >= 0.50:
523
+ st.warning(f"**Overall Google Match Score:** {score_percent}% ⚠️")
524
+ else:
525
+ st.error(f"**Overall Google Match Score:** {score_percent}% πŸ”")
526
+
527
+ st.markdown("### Score Calculation")
528
+ st.markdown(score_breakdown)
529
 
530
+ st.markdown("### Expert Assessment")
 
531
  st.markdown(feedback)
532
+
533
+ st.info(f"Assessment completed in {feedback_time:.2f} seconds")
534
+
535
+ st.subheader("Recommended Next Steps")
536
+ weakest_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
537
+
538
+ if overall_score >= 0.80:
539
+ st.markdown("""
540
+ - Consider applying for positions at Google that match your experience
541
+ - Prepare for technical interviews by practicing algorithms and system design
542
+ - Review Google's interview process and STAR method for behavioral questions
543
+ """)
544
+ elif overall_score >= 0.60:
545
+ improvement_areas = ", ".join([cat for cat, _ in weakest_categories])
546
+ st.markdown(f"""
547
+ - Focus on strengthening these areas: {improvement_areas}
548
+ - Work on projects that demonstrate your skills in Google's key technology areas
549
+ - Consider taking additional courses in algorithms, system design, or other Google focus areas
550
+ """)
551
+ else:
552
+ improvement_areas = ", ".join([cat for cat, _ in weakest_categories])
553
+ st.markdown(f"""
554
+ - Build experience in these critical areas: {improvement_areas}
555
+ - Develop projects showcasing problem-solving abilities and technical skills
556
+ - Consider gaining more experience before applying, or target specific Google roles that better match your profile
557
+ """)