CR7CAD commited on
Commit
02338c4
·
verified ·
1 Parent(s): 5b94bbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -145
app.py CHANGED
@@ -115,53 +115,19 @@ def extract_text_from_file(file_obj):
115
 
116
  # Information extraction functions
117
  def extract_skills(text):
118
- """Extract skills from text - expanded for better matching"""
119
- # Expanded skill keywords dictionary
120
  skill_keywords = {
121
- "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "React", "Angular", "Vue",
122
- "PHP", "Ruby", "Swift", "Kotlin", "Go", "TypeScript", "Node.js", "jQuery", "Bootstrap"],
123
- "Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "NLP",
124
- "Data Mining", "Big Data", "Data Visualization", "Statistical Analysis", "R", "SPSS", "SAS",
125
- "Regression", "Classification", "Clustering", "Neural Networks", "Deep Learning"],
126
- "Database": ["SQL", "MySQL", "MongoDB", "PostgreSQL", "Oracle", "Redis", "DynamoDB", "SQLite", "NoSQL",
127
- "Database Design", "SQL Server", "Database Administration", "ETL", "Data Warehousing"],
128
- "Web Dev": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack", "REST API", "GraphQL",
129
- "Web Development", "WordPress", "Drupal", "CMS", "SEO", "UI/UX", "Responsive Design", "AJAX"],
130
- "Software Dev": ["Agile", "Scrum", "Git", "DevOps", "Docker", "CI/CD", "Jenkins", "Software Development",
131
- "Object-Oriented Programming", "Design Patterns", "Testing", "QA", "Software Architecture",
132
- "Version Control", "JIRA", "Microservices", "Code Review", "Debugging"],
133
- "Cloud": ["AWS", "Azure", "Google Cloud", "Lambda", "S3", "EC2", "Cloud Computing", "Serverless",
134
- "Infrastructure as Code", "Cloud Architecture", "Cloud Security", "Kubernetes", "Load Balancing"],
135
- "Business": ["Project Management", "Leadership", "Teamwork", "Agile", "Scrum", "Business Analysis",
136
- "Requirements Gathering", "Client Relations", "Communication", "Presentation", "Meeting Facilitation",
137
- "Strategic Planning", "Process Improvement", "Problem Solving", "Decision Making", "Stakeholder Management"]
138
  }
139
 
140
  text_lower = text.lower()
141
-
142
- # Method 1: Look for exact matches
143
- exact_skills = [skill for _, skills in skill_keywords.items() for skill in skills if skill.lower() in text_lower]
144
-
145
- # Method 2: Use regex for more flexible matching (accounts for variations)
146
- more_skills = []
147
- for category, skills in skill_keywords.items():
148
- for skill in skills:
149
- # This handles cases like "Python developer" or "experienced in Python"
150
- if re.search(r'\b' + re.escape(skill.lower()) + r'(?:\s|\b|ing|er|ed)', text_lower):
151
- more_skills.append(skill)
152
-
153
- # Combine both methods and remove duplicates
154
- all_skills = list(set(exact_skills + more_skills))
155
-
156
- # Add soft skill detection
157
- soft_skills = ["Communication", "Teamwork", "Problem Solving", "Critical Thinking",
158
- "Leadership", "Organization", "Time Management", "Flexibility", "Adaptability"]
159
-
160
- for skill in soft_skills:
161
- if skill.lower() in text_lower or re.search(r'\b' + re.escape(skill.lower()) + r'(?:\s|$)', text_lower):
162
- all_skills.append(skill)
163
-
164
- return all_skills
165
 
166
  @lru_cache(maxsize=32)
167
  def extract_name(text_start):
@@ -252,19 +218,9 @@ def summarize_resume_text(resume_text, models):
252
  return summary, time.time() - start
253
 
254
  def extract_job_requirements(job_description, models):
255
- # Expanded technical skills list for better matching
256
  tech_skills = [
257
- "Python", "Java", "JavaScript", "SQL", "HTML", "CSS", "React", "Angular", "Vue", "Node.js",
258
- "Machine Learning", "Data Science", "AI", "Deep Learning", "NLP", "Statistics", "TensorFlow",
259
- "AWS", "Azure", "Google Cloud", "Docker", "Kubernetes", "CI/CD", "DevOps",
260
- "MySQL", "MongoDB", "PostgreSQL", "Oracle", "NoSQL", "Database", "Data Analysis",
261
- "Project Management", "Agile", "Scrum", "Leadership", "Communication", "Teamwork",
262
- "Git", "Software Development", "Full Stack", "Frontend", "Backend", "RESTful API",
263
- "Mobile Development", "Android", "iOS", "Swift", "Kotlin", "React Native", "Flutter",
264
- "Business Analysis", "Requirements", "UX/UI", "Design", "Product Management",
265
- "Testing", "QA", "Security", "Cloud Computing", "Networking", "System Administration",
266
- "Linux", "Windows", "Excel", "PowerPoint", "Word", "Microsoft Office",
267
- "Problem Solving", "Critical Thinking", "Analytical Skills"
268
  ]
269
 
270
  clean_text = job_description.lower()
@@ -308,108 +264,86 @@ def extract_job_requirements(job_description, models):
308
  }
309
 
310
  def evaluate_job_fit(resume_summary, job_requirements, models):
311
- start = time.time()
 
 
 
312
 
313
- # Basic extraction
314
  required_skills = job_requirements["required_skills"]
315
  years_required = job_requirements["years_experience"]
316
  job_title = job_requirements["title"]
 
 
 
317
  skills_mentioned = extract_skills(resume_summary)
318
 
319
- # Calculate matches - IMPROVED MATCHING ALGORITHM
320
  matching_skills = [skill for skill in required_skills if skill in skills_mentioned]
 
321
 
322
- # More balanced skill match calculation:
323
- # - If no required skills, default to 0.5 (neutral)
324
- # - Otherwise calculate percentage but with diminishing returns
325
- if not required_skills:
326
- skill_match = 0.5
327
- else:
328
- raw_match = len(matching_skills) / len(required_skills)
329
- # Apply a more gradual scaling to avoid big jumps
330
- skill_match = raw_match ** 0.7 # Using power < 1 gives more weight to partial matches
331
-
332
- # Extract experience
333
  years_experience = 0
334
- exp_match = re.search(r'(\d+)\+?\s*years?\s*(?:of)?\s*experience', resume_summary, re.IGNORECASE)
335
- if exp_match:
336
- try: years_experience = int(exp_match.group(1))
337
- except: pass
 
338
 
339
- # Calculate scores with smoother transitions
340
- # Experience matching: more balanced, handles the case where job requires no experience
341
- if years_required == 0:
342
- # If no experience required, having 1+ years is good, 0 is neutral
343
- exp_match_ratio = min(1.0, years_experience / 2 + 0.5)
344
- else:
345
- # For jobs requiring experience, use a more gradual scale
346
- exp_match_ratio = min(1.0, (years_experience / max(1, years_required)) ** 0.8)
347
 
348
- # Title matching - improved to find partial matches
349
- title_words = [w for w in job_title.lower().split() if len(w) > 3]
350
- if not title_words:
351
- title_match = 0.5 # Neutral if no meaningful title words
352
- else:
353
- matches = 0
354
- for word in title_words:
355
- if word in resume_summary.lower():
356
- matches += 1
357
- # Look for similar words (prefixes) for partial matching
358
- elif any(w.startswith(word[:4]) for w in resume_summary.lower().split() if len(w) > 3):
359
- matches += 0.5
360
- title_match = matches / len(title_words)
361
-
362
- # Calculate final scores with more reasonable ranges
363
- skill_score = skill_match * 2.0 # 0-2 scale
364
- exp_score = exp_match_ratio * 2.0 # 0-2 scale
365
- title_score = title_match * 2.0 # 0-2 scale
366
-
367
- # Extract candidate info
368
- name = re.search(r'Name:\s*(.*?)(?=\n|\Z)', resume_summary)
369
- name = name.group(1).strip() if name else "The candidate"
370
-
371
- industry = re.search(r'Expected Industry:\s*(.*?)(?=\n|\Z)', resume_summary)
372
- industry = industry.group(1).strip() if industry else "unspecified industry"
373
-
374
- # Calculate weighted score - ADJUSTED WEIGHTS
375
- weighted_score = (skill_score * 0.45) + (exp_score * 0.35) + (title_score * 0.20)
376
-
377
- # IMPROVED THRESHOLDS to get more "Potential Fit" results
378
- # Good Fit: 1.25+ (was 1.5)
379
- # Potential Fit: 0.6-1.25 (was 0.8-1.5)
380
- # No Fit: <0.6 (was <0.8)
381
- if weighted_score >= 1.25:
382
  fit_score = 2 # Good fit
383
- elif weighted_score >= 0.6:
384
- fit_score = 1 # Potential fit - wider range
385
  else:
386
  fit_score = 0 # Not a fit
387
 
388
- # Add logging to help debug the scoring
389
- st.session_state['debug_scores'] = {
390
- 'skill_match': skill_match,
391
- 'skill_score': skill_score,
392
- 'exp_match_ratio': exp_match_ratio,
393
- 'exp_score': exp_score,
394
- 'title_match': title_match,
395
- 'title_score': title_score,
396
- 'weighted_score': weighted_score,
397
- 'fit_score': fit_score,
398
- 'matching_skills': matching_skills,
399
- 'required_skills': required_skills
400
- }
401
-
402
- # Generate assessment
403
- missing = [skill for skill in required_skills if skill not in skills_mentioned]
404
 
405
  if fit_score == 2:
406
- assessment = f"{fit_score}: GOOD FIT - {name} demonstrates strong alignment with the {job_title} position. Their background in {industry} appears well-suited for this role's requirements."
407
  elif fit_score == 1:
408
- assessment = f"{fit_score}: POTENTIAL FIT - {name} shows potential for the {job_title} role but has gaps in certain areas. Additional training might be needed in {', '.join(missing[:2])}."
409
  else:
410
- assessment = f"{fit_score}: NO FIT - {name}'s background shows limited alignment with this {job_title} position. Their experience and skills differ significantly from the requirements."
411
 
412
- return assessment, fit_score, time.time() - start
 
 
413
 
414
  def analyze_job_fit(resume_summary, job_description, models):
415
  start = time.time()
@@ -421,10 +355,6 @@ def analyze_job_fit(resume_summary, job_description, models):
421
  # Main Function
422
  #####################################
423
  def main():
424
- # Initialize session state for debug info
425
- if 'debug_scores' not in st.session_state:
426
- st.session_state['debug_scores'] = {}
427
-
428
  st.title("Resume-Job Fit Analyzer")
429
  st.markdown("Upload your resume file in **.docx**, **.doc**, or **.txt** format and enter a job description to see how well you match.")
430
 
@@ -487,10 +417,6 @@ def main():
487
  - If interested in this field, focus on developing the required skills
488
  - Consider similar roles with fewer experience requirements
489
  """)
490
-
491
- # Show debug scores if needed (uncomment this to debug scoring)
492
- # st.subheader("Debug Information")
493
- # st.json(st.session_state['debug_scores'])
494
 
495
  if __name__ == "__main__":
496
  main()
 
115
 
116
  # Information extraction functions
117
  def extract_skills(text):
118
+ """Extract skills from text"""
 
119
  skill_keywords = {
120
+ "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "React", "Angular"],
121
+ "Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "NLP"],
122
+ "Database": ["SQL", "MySQL", "MongoDB", "PostgreSQL", "Oracle", "Redis"],
123
+ "Web Dev": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack", "REST API"],
124
+ "Software Dev": ["Agile", "Scrum", "Git", "DevOps", "Docker", "CI/CD", "Jenkins"],
125
+ "Cloud": ["AWS", "Azure", "Google Cloud", "Lambda", "S3", "EC2"],
126
+ "Business": ["Project Management", "Leadership", "Teamwork", "Agile", "Scrum"]
 
 
 
 
 
 
 
 
 
 
127
  }
128
 
129
  text_lower = text.lower()
130
+ return [skill for _, skills in skill_keywords.items() for skill in skills if skill.lower() in text_lower]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  @lru_cache(maxsize=32)
133
  def extract_name(text_start):
 
218
  return summary, time.time() - start
219
 
220
  def extract_job_requirements(job_description, models):
 
221
  tech_skills = [
222
+ "Python", "Java", "JavaScript", "SQL", "HTML", "CSS", "React", "Angular", "Machine Learning", "AWS",
223
+ "Azure", "Docker", "MySQL", "MongoDB", "Project Management", "Agile", "Leadership", "Git", "DevOps"
 
 
 
 
 
 
 
 
 
224
  ]
225
 
226
  clean_text = job_description.lower()
 
264
  }
265
 
266
  def evaluate_job_fit(resume_summary, job_requirements, models):
267
+ """
268
+ Use a more direct method to evaluate job fit, rather than relying solely on sentiment analysis
269
+ """
270
+ start_time = time.time()
271
 
272
+ # Extract basic information for context
273
  required_skills = job_requirements["required_skills"]
274
  years_required = job_requirements["years_experience"]
275
  job_title = job_requirements["title"]
276
+ job_summary = job_requirements["summary"]
277
+
278
+ # Extract skills from resume
279
  skills_mentioned = extract_skills(resume_summary)
280
 
281
+ # Calculate skill match percentage
282
  matching_skills = [skill for skill in required_skills if skill in skills_mentioned]
283
+ skill_match_percentage = len(matching_skills) / len(required_skills) if required_skills else 0
284
 
285
+ # Extract experience level from resume
286
+ experience_pattern = r'(\d+)\+?\s*years?\s*(?:of)?\s*experience'
287
+ experience_match = re.search(experience_pattern, resume_summary, re.IGNORECASE)
 
 
 
 
 
 
 
 
288
  years_experience = 0
289
+ if experience_match:
290
+ try:
291
+ years_experience = int(experience_match.group(1))
292
+ except:
293
+ years_experience = 0
294
 
295
+ # Experience match
296
+ exp_match_ratio = min(1.0, years_experience / max(1, years_required)) if years_required > 0 else 0.5
 
 
 
 
 
 
297
 
298
+ # Check job title match
299
+ job_title_lower = job_title.lower()
300
+ title_match = 0
301
+
302
+ # Look for job title words in resume
303
+ title_words = [word for word in job_title_lower.split() if len(word) > 3]
304
+ title_matches = sum(1 for word in title_words if word in resume_summary.lower())
305
+ title_match = title_matches / len(title_words) if title_words else 0
306
+
307
+ # Calculate scores for each dimension
308
+ skill_score = min(2, skill_match_percentage * 3) # 0-2 scale
309
+ exp_score = min(2, exp_match_ratio * 2) # 0-2 scale
310
+ title_score = min(2, title_match * 2) # 0-2 scale
311
+
312
+ # Extract name, age, industry from resume summary
313
+ name_match = re.search(r'Name:\s*(.*?)(?=\n|\Z)', resume_summary)
314
+ name = name_match.group(1).strip() if name_match else "The candidate"
315
+
316
+ age_match = re.search(r'Age:\s*(.*?)(?=\n|\Z)', resume_summary)
317
+ age = age_match.group(1).strip() if age_match else "unspecified age"
318
+
319
+ industry_match = re.search(r'Expected Industry:\s*(.*?)(?=\n|\Z)', resume_summary)
320
+ industry = industry_match.group(1).strip() if industry_match else "unspecified industry"
321
+
322
+ # Calculate weighted final score
323
+ # Skills: 50%, Experience: 30%, Title match: 20%
324
+ weighted_score = (skill_score * 0.5) + (exp_score * 0.3) + (title_score * 0.2)
325
+
326
+ # Convert to integer score (0-2)
327
+ if weighted_score >= 1.5:
 
 
 
 
328
  fit_score = 2 # Good fit
329
+ elif weighted_score >= 0.8:
330
+ fit_score = 1 # Potential fit
331
  else:
332
  fit_score = 0 # Not a fit
333
 
334
+ # Generate assessment text based on score
335
+ missing_skills = [skill for skill in required_skills if skill not in skills_mentioned]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
  if fit_score == 2:
338
+ fit_assessment = f"{fit_score}: GOOD FIT - {name} demonstrates strong alignment with the {job_title} position. Their background in {industry} and professional experience appear well-suited for this role's requirements. The technical expertise matches what the position demands."
339
  elif fit_score == 1:
340
+ fit_assessment = f"{fit_score}: POTENTIAL FIT - {name} shows potential for the {job_title} role with some relevant experience, though there are gaps in certain technical areas. Their {industry} background provides partial alignment with the position requirements. Additional training might be needed in {', '.join(missing_skills[:2])} if pursuing this opportunity."
341
  else:
342
+ fit_assessment = f"{fit_score}: NO FIT - {name}'s current background shows limited alignment with this {job_title} position. Their experience level and technical background differ significantly from the role requirements. A position better matching their {industry} expertise might be more suitable."
343
 
344
+ execution_time = time.time() - start_time
345
+
346
+ return fit_assessment, fit_score, execution_time
347
 
348
  def analyze_job_fit(resume_summary, job_description, models):
349
  start = time.time()
 
355
  # Main Function
356
  #####################################
357
  def main():
 
 
 
 
358
  st.title("Resume-Job Fit Analyzer")
359
  st.markdown("Upload your resume file in **.docx**, **.doc**, or **.txt** format and enter a job description to see how well you match.")
360
 
 
417
  - If interested in this field, focus on developing the required skills
418
  - Consider similar roles with fewer experience requirements
419
  """)
 
 
 
 
420
 
421
  if __name__ == "__main__":
422
  main()