CR7CAD commited on
Commit
986332a
·
verified ·
1 Parent(s): 88107c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -37
app.py CHANGED
@@ -8,7 +8,20 @@ import time
8
  import re
9
  import pandas as pd
10
  from functools import lru_cache
11
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # Set page title and hide sidebar
14
  st.set_page_config(
@@ -32,24 +45,116 @@ def load_models():
32
  """Load models at startup"""
33
  with st.spinner("Loading AI models... This may take a minute on first run."):
34
  models = {}
35
- # Use bart-base for summarization
36
- models['summarizer'] = pipeline(
37
- "summarization",
38
- model="facebook/bart-base",
39
- max_length=100,
40
- truncation=True
41
- )
42
 
43
  # Load sentiment model for evaluation
44
- models['evaluator'] = pipeline(
45
- "sentiment-analysis",
46
- model="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
47
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  return models
50
 
51
- # Preload models immediately when app starts
52
- models = load_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  #####################################
55
  # Function: Extract Text from File
@@ -215,18 +320,14 @@ def extract_skills_and_work(text):
215
  #####################################
216
  # Function: Summarize Resume Text
217
  #####################################
218
- def summarize_resume_text(resume_text):
219
  """
220
  Generates a structured summary of the resume text
221
  """
222
  start_time = time.time()
223
 
224
- # First, generate a quick summary using pre-loaded model
225
- max_input_length = 1024 # Model limit
226
-
227
- # Only summarize the first portion of text for speed
228
- text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
229
- base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
230
 
231
  # Extract name from the beginning of the resume
232
  name = extract_name(resume_text[:500])
@@ -308,7 +409,7 @@ def extract_job_requirements(job_description):
308
  required_skills = [skill for skill in tech_skills if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text)]
309
 
310
  # Create a simple summary of the job
311
- job_summary = models['summarizer'](job_description[:1024])[0]['summary_text']
312
 
313
  # Format the job requirements
314
  job_requirements = {
@@ -323,9 +424,9 @@ def extract_job_requirements(job_description):
323
  #####################################
324
  # Function: Analyze Job Fit
325
  #####################################
326
- def analyze_job_fit(resume_summary, job_description):
327
  """
328
- Analyze how well the candidate fits the job requirements with the DistilBERT sentiment model.
329
  Returns a fit score (0-2) and an assessment.
330
  """
331
  start_time = time.time()
@@ -406,23 +507,20 @@ def analyze_job_fit(resume_summary, job_description):
406
  Overall assessment: The candidate's skills and experience {"appear to match well with" if skills_match_percentage >= 60 and experience_match == "sufficient" else "have some gaps compared to"} the job requirements.
407
  """
408
 
409
- # Use sentiment analysis model to evaluate the comparison
410
- sentiment_result = models['evaluator'](comparison_text)
411
-
412
- # Map sentiment to score: NEGATIVE = 0, POSITIVE = 1
413
- sentiment_score = 1 if sentiment_result[0]['label'] == 'POSITIVE' else 0
414
 
415
  # Derive final score based on sentiment and match metrics
416
- if sentiment_score == 1 and skills_match_percentage >= 70 and experience_match == "sufficient":
417
- final_score = 2 # Good fit
418
- elif sentiment_score == 1 and skills_match_percentage >= 50:
419
  final_score = 1 # Potential fit
420
  else:
421
  final_score = 0 # Not fit
422
 
423
  # Generate assessment text based on the score
424
  if final_score == 2:
425
- assessment = f"{final_score}: The candidate is a good match for this {job_requirements['title']} position. They have the required {experience_years} years of experience and demonstrate proficiency in key skills including {', '.join(skills_in_resume[:5])}. Their background aligns well with the job requirements."
426
  elif final_score == 1:
427
  assessment = f"{final_score}: The candidate shows potential for this {job_requirements['title']} position, but has some skill gaps. They match on {skills_match_percentage}% of required skills including {', '.join(skills_in_resume[:3]) if skills_in_resume else 'minimal required skills'}, and their experience is {experience_match}."
428
  else:
@@ -432,6 +530,9 @@ def analyze_job_fit(resume_summary, job_description):
432
 
433
  return assessment, final_score, execution_time
434
 
 
 
 
435
  #####################################
436
  # Main Streamlit Interface
437
  #####################################
@@ -464,7 +565,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
464
  else:
465
  # Step 2: Generate summary
466
  status_text.text("Step 2/3: Analyzing resume and generating summary...")
467
- summary, summarization_time = summarize_resume_text(resume_text)
468
  progress_bar.progress(50)
469
 
470
  # Display summary
@@ -473,7 +574,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
473
 
474
  # Step 3: Generate job fit assessment
475
  status_text.text("Step 3/3: Evaluating job fit...")
476
- assessment, fit_score, assessment_time = analyze_job_fit(summary, job_description)
477
  progress_bar.progress(100)
478
 
479
  # Clear status messages
@@ -486,7 +587,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
486
  fit_labels = {
487
  0: "NOT FIT ❌",
488
  1: "POTENTIAL FIT ⚠️",
489
- 2: "GOOD FIT ✅"
490
  }
491
 
492
  # Show the score prominently
@@ -502,7 +603,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
502
 
503
  if fit_score == 2:
504
  st.markdown("""
505
- - Apply for this position as you appear to be a good match
506
  - Prepare for interviews by focusing on your relevant experience
507
  - Highlight your matching skills in your cover letter
508
  """)
 
8
  import re
9
  import pandas as pd
10
  from functools import lru_cache
11
+
12
+ # Try different import approaches
13
+ try:
14
+ from transformers import pipeline
15
+ has_pipeline = True
16
+ except ImportError:
17
+ try:
18
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
19
+ import torch
20
+ has_pipeline = False
21
+ st.warning("Using basic transformers functionality instead of pipeline API")
22
+ except ImportError:
23
+ st.error("Transformers library not properly installed. Some features will be limited.")
24
+ has_pipeline = False
25
 
26
  # Set page title and hide sidebar
27
  st.set_page_config(
 
45
  """Load models at startup"""
46
  with st.spinner("Loading AI models... This may take a minute on first run."):
47
  models = {}
 
 
 
 
 
 
 
48
 
49
  # Load sentiment model for evaluation
50
+ if has_pipeline:
51
+ # Use pipeline if available
52
+ models['evaluator'] = pipeline(
53
+ "sentiment-analysis",
54
+ model="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
55
+ )
56
+ else:
57
+ # Fall back to basic model loading
58
+ try:
59
+ models['evaluator_model'] = AutoModelForSequenceClassification.from_pretrained(
60
+ "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
61
+ )
62
+ models['evaluator_tokenizer'] = AutoTokenizer.from_pretrained(
63
+ "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
64
+ )
65
+ except Exception as e:
66
+ st.error(f"Error loading models: {e}")
67
+ models['evaluator_model'] = None
68
+ models['evaluator_tokenizer'] = None
69
 
70
  return models
71
 
72
+ # Manual implementation of text summarization
73
+ def basic_summarize(text, max_length=100):
74
+ """Basic text summarization by extracting key sentences"""
75
+ # Split into sentences
76
+ sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
77
+
78
+ # Score sentences by position (earlier is better) and length
79
+ scored_sentences = []
80
+ for i, sentence in enumerate(sentences):
81
+ # Skip very short sentences
82
+ if len(sentence.split()) < 4:
83
+ continue
84
+
85
+ # Simple scoring: earlier sentences get higher scores, penalize very long sentences
86
+ score = 1.0 / (i + 1) - (0.01 * max(0, len(sentence.split()) - 20))
87
+ scored_sentences.append((score, sentence))
88
+
89
+ # Sort by score
90
+ scored_sentences.sort(reverse=True)
91
+
92
+ # Get top sentences until we reach max_length
93
+ summary_sentences = []
94
+ current_length = 0
95
+
96
+ for _, sentence in scored_sentences:
97
+ if current_length + len(sentence.split()) <= max_length:
98
+ summary_sentences.append(sentence)
99
+ current_length += len(sentence.split())
100
+ else:
101
+ break
102
+
103
+ # Re-order sentences to match original order if we have more than one
104
+ if summary_sentences:
105
+ original_order = []
106
+ for sentence in summary_sentences:
107
+ original_order.append((sentences.index(sentence), sentence))
108
+ original_order.sort()
109
+ summary_sentences = [s for _, s in original_order]
110
+
111
+ # Combine into a summary
112
+ summary = " ".join(summary_sentences)
113
+ return summary
114
+
115
+ # Custom sentiment analysis function as fallback
116
+ def analyze_sentiment(text, models):
117
+ """Analyze sentiment using available models"""
118
+
119
+ if has_pipeline and 'evaluator' in models:
120
+ # Use pipeline if available
121
+ try:
122
+ result = models['evaluator'](text)
123
+ return result[0]['label'] == 'POSITIVE'
124
+ except Exception as e:
125
+ st.warning(f"Error in pipeline sentiment analysis: {e}")
126
+
127
+ # Fall back to manual model inference
128
+ if 'evaluator_model' in models and 'evaluator_tokenizer' in models and models['evaluator_model']:
129
+ try:
130
+ tokenizer = models['evaluator_tokenizer']
131
+ model = models['evaluator_model']
132
+
133
+ # Truncate to avoid exceeding model's max length
134
+ max_length = tokenizer.model_max_length if hasattr(tokenizer, 'model_max_length') else 512
135
+ truncated_text = " ".join(text.split()[:max_length])
136
+
137
+ inputs = tokenizer(truncated_text, return_tensors="pt", truncation=True, max_length=max_length)
138
+ with torch.no_grad():
139
+ outputs = model(**inputs)
140
+
141
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
142
+ prediction = torch.argmax(probabilities, dim=-1).item()
143
+
144
+ # Usually for sentiment models, 1 = positive, 0 = negative
145
+ return prediction == 1
146
+ except Exception as e:
147
+ st.warning(f"Error in manual sentiment analysis: {e}")
148
+
149
+ # If all else fails, use a simple keyword approach
150
+ positive_words = ["match", "fit", "qualified", "skilled", "experienced", "suitable", "aligned", "good", "strong"]
151
+ negative_words = ["mismatch", "gap", "insufficient", "lacking", "inadequate", "limited", "missing", "poor", "weak"]
152
+
153
+ text_lower = text.lower()
154
+ positive_count = sum(text_lower.count(word) for word in positive_words)
155
+ negative_count = sum(text_lower.count(word) for word in negative_words)
156
+
157
+ return positive_count > negative_count
158
 
159
  #####################################
160
  # Function: Extract Text from File
 
320
  #####################################
321
  # Function: Summarize Resume Text
322
  #####################################
323
+ def summarize_resume_text(resume_text, models):
324
  """
325
  Generates a structured summary of the resume text
326
  """
327
  start_time = time.time()
328
 
329
+ # Create a basic summary using our custom function
330
+ base_summary = basic_summarize(resume_text, max_length=100)
 
 
 
 
331
 
332
  # Extract name from the beginning of the resume
333
  name = extract_name(resume_text[:500])
 
409
  required_skills = [skill for skill in tech_skills if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text)]
410
 
411
  # Create a simple summary of the job
412
+ job_summary = basic_summarize(job_description, max_length=100)
413
 
414
  # Format the job requirements
415
  job_requirements = {
 
424
  #####################################
425
  # Function: Analyze Job Fit
426
  #####################################
427
+ def analyze_job_fit(resume_summary, job_description, models):
428
  """
429
+ Analyze how well the candidate fits the job requirements.
430
  Returns a fit score (0-2) and an assessment.
431
  """
432
  start_time = time.time()
 
507
  Overall assessment: The candidate's skills and experience {"appear to match well with" if skills_match_percentage >= 60 and experience_match == "sufficient" else "have some gaps compared to"} the job requirements.
508
  """
509
 
510
+ # Use sentiment analysis function to evaluate the comparison
511
+ is_positive = analyze_sentiment(comparison_text, models)
 
 
 
512
 
513
  # Derive final score based on sentiment and match metrics
514
+ if is_positive and skills_match_percentage >= 70 and experience_match == "sufficient":
515
+ final_score = 2 # Strong fit
516
+ elif is_positive and skills_match_percentage >= 50:
517
  final_score = 1 # Potential fit
518
  else:
519
  final_score = 0 # Not fit
520
 
521
  # Generate assessment text based on the score
522
  if final_score == 2:
523
+ assessment = f"{final_score}: The candidate is a strong match for this {job_requirements['title']} position. They have the required {experience_years} years of experience and demonstrate proficiency in key skills including {', '.join(skills_in_resume[:5])}. Their background aligns well with the job requirements."
524
  elif final_score == 1:
525
  assessment = f"{final_score}: The candidate shows potential for this {job_requirements['title']} position, but has some skill gaps. They match on {skills_match_percentage}% of required skills including {', '.join(skills_in_resume[:3]) if skills_in_resume else 'minimal required skills'}, and their experience is {experience_match}."
526
  else:
 
530
 
531
  return assessment, final_score, execution_time
532
 
533
+ # Load models at startup
534
+ models = load_models()
535
+
536
  #####################################
537
  # Main Streamlit Interface
538
  #####################################
 
565
  else:
566
  # Step 2: Generate summary
567
  status_text.text("Step 2/3: Analyzing resume and generating summary...")
568
+ summary, summarization_time = summarize_resume_text(resume_text, models)
569
  progress_bar.progress(50)
570
 
571
  # Display summary
 
574
 
575
  # Step 3: Generate job fit assessment
576
  status_text.text("Step 3/3: Evaluating job fit...")
577
+ assessment, fit_score, assessment_time = analyze_job_fit(summary, job_description, models)
578
  progress_bar.progress(100)
579
 
580
  # Clear status messages
 
587
  fit_labels = {
588
  0: "NOT FIT ❌",
589
  1: "POTENTIAL FIT ⚠️",
590
+ 2: "STRONG FIT ✅"
591
  }
592
 
593
  # Show the score prominently
 
603
 
604
  if fit_score == 2:
605
  st.markdown("""
606
+ - Apply for this position as you appear to be a strong match
607
  - Prepare for interviews by focusing on your relevant experience
608
  - Highlight your matching skills in your cover letter
609
  """)