Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,20 @@ import time
|
|
8 |
import re
|
9 |
import pandas as pd
|
10 |
from functools import lru_cache
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Set page title and hide sidebar
|
14 |
st.set_page_config(
|
@@ -32,24 +45,116 @@ def load_models():
|
|
32 |
"""Load models at startup"""
|
33 |
with st.spinner("Loading AI models... This may take a minute on first run."):
|
34 |
models = {}
|
35 |
-
# Use bart-base for summarization
|
36 |
-
models['summarizer'] = pipeline(
|
37 |
-
"summarization",
|
38 |
-
model="facebook/bart-base",
|
39 |
-
max_length=100,
|
40 |
-
truncation=True
|
41 |
-
)
|
42 |
|
43 |
# Load sentiment model for evaluation
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
return models
|
50 |
|
51 |
-
#
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
#####################################
|
55 |
# Function: Extract Text from File
|
@@ -215,18 +320,14 @@ def extract_skills_and_work(text):
|
|
215 |
#####################################
|
216 |
# Function: Summarize Resume Text
|
217 |
#####################################
|
218 |
-
def summarize_resume_text(resume_text):
|
219 |
"""
|
220 |
Generates a structured summary of the resume text
|
221 |
"""
|
222 |
start_time = time.time()
|
223 |
|
224 |
-
#
|
225 |
-
|
226 |
-
|
227 |
-
# Only summarize the first portion of text for speed
|
228 |
-
text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
|
229 |
-
base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
|
230 |
|
231 |
# Extract name from the beginning of the resume
|
232 |
name = extract_name(resume_text[:500])
|
@@ -308,7 +409,7 @@ def extract_job_requirements(job_description):
|
|
308 |
required_skills = [skill for skill in tech_skills if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text)]
|
309 |
|
310 |
# Create a simple summary of the job
|
311 |
-
job_summary =
|
312 |
|
313 |
# Format the job requirements
|
314 |
job_requirements = {
|
@@ -323,9 +424,9 @@ def extract_job_requirements(job_description):
|
|
323 |
#####################################
|
324 |
# Function: Analyze Job Fit
|
325 |
#####################################
|
326 |
-
def analyze_job_fit(resume_summary, job_description):
|
327 |
"""
|
328 |
-
Analyze how well the candidate fits the job requirements
|
329 |
Returns a fit score (0-2) and an assessment.
|
330 |
"""
|
331 |
start_time = time.time()
|
@@ -406,23 +507,20 @@ def analyze_job_fit(resume_summary, job_description):
|
|
406 |
Overall assessment: The candidate's skills and experience {"appear to match well with" if skills_match_percentage >= 60 and experience_match == "sufficient" else "have some gaps compared to"} the job requirements.
|
407 |
"""
|
408 |
|
409 |
-
# Use sentiment analysis
|
410 |
-
|
411 |
-
|
412 |
-
# Map sentiment to score: NEGATIVE = 0, POSITIVE = 1
|
413 |
-
sentiment_score = 1 if sentiment_result[0]['label'] == 'POSITIVE' else 0
|
414 |
|
415 |
# Derive final score based on sentiment and match metrics
|
416 |
-
if
|
417 |
-
final_score = 2 #
|
418 |
-
elif
|
419 |
final_score = 1 # Potential fit
|
420 |
else:
|
421 |
final_score = 0 # Not fit
|
422 |
|
423 |
# Generate assessment text based on the score
|
424 |
if final_score == 2:
|
425 |
-
assessment = f"{final_score}: The candidate is a
|
426 |
elif final_score == 1:
|
427 |
assessment = f"{final_score}: The candidate shows potential for this {job_requirements['title']} position, but has some skill gaps. They match on {skills_match_percentage}% of required skills including {', '.join(skills_in_resume[:3]) if skills_in_resume else 'minimal required skills'}, and their experience is {experience_match}."
|
428 |
else:
|
@@ -432,6 +530,9 @@ def analyze_job_fit(resume_summary, job_description):
|
|
432 |
|
433 |
return assessment, final_score, execution_time
|
434 |
|
|
|
|
|
|
|
435 |
#####################################
|
436 |
# Main Streamlit Interface
|
437 |
#####################################
|
@@ -464,7 +565,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
|
|
464 |
else:
|
465 |
# Step 2: Generate summary
|
466 |
status_text.text("Step 2/3: Analyzing resume and generating summary...")
|
467 |
-
summary, summarization_time = summarize_resume_text(resume_text)
|
468 |
progress_bar.progress(50)
|
469 |
|
470 |
# Display summary
|
@@ -473,7 +574,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
|
|
473 |
|
474 |
# Step 3: Generate job fit assessment
|
475 |
status_text.text("Step 3/3: Evaluating job fit...")
|
476 |
-
assessment, fit_score, assessment_time = analyze_job_fit(summary, job_description)
|
477 |
progress_bar.progress(100)
|
478 |
|
479 |
# Clear status messages
|
@@ -486,7 +587,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
|
|
486 |
fit_labels = {
|
487 |
0: "NOT FIT ❌",
|
488 |
1: "POTENTIAL FIT ⚠️",
|
489 |
-
2: "
|
490 |
}
|
491 |
|
492 |
# Show the score prominently
|
@@ -502,7 +603,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
|
|
502 |
|
503 |
if fit_score == 2:
|
504 |
st.markdown("""
|
505 |
-
- Apply for this position as you appear to be a
|
506 |
- Prepare for interviews by focusing on your relevant experience
|
507 |
- Highlight your matching skills in your cover letter
|
508 |
""")
|
|
|
8 |
import re
|
9 |
import pandas as pd
|
10 |
from functools import lru_cache
|
11 |
+
|
12 |
+
# Try different import approaches
|
13 |
+
try:
|
14 |
+
from transformers import pipeline
|
15 |
+
has_pipeline = True
|
16 |
+
except ImportError:
|
17 |
+
try:
|
18 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
19 |
+
import torch
|
20 |
+
has_pipeline = False
|
21 |
+
st.warning("Using basic transformers functionality instead of pipeline API")
|
22 |
+
except ImportError:
|
23 |
+
st.error("Transformers library not properly installed. Some features will be limited.")
|
24 |
+
has_pipeline = False
|
25 |
|
26 |
# Set page title and hide sidebar
|
27 |
st.set_page_config(
|
|
|
45 |
"""Load models at startup"""
|
46 |
with st.spinner("Loading AI models... This may take a minute on first run."):
|
47 |
models = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
# Load sentiment model for evaluation
|
50 |
+
if has_pipeline:
|
51 |
+
# Use pipeline if available
|
52 |
+
models['evaluator'] = pipeline(
|
53 |
+
"sentiment-analysis",
|
54 |
+
model="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
|
55 |
+
)
|
56 |
+
else:
|
57 |
+
# Fall back to basic model loading
|
58 |
+
try:
|
59 |
+
models['evaluator_model'] = AutoModelForSequenceClassification.from_pretrained(
|
60 |
+
"distilbert/distilbert-base-uncased-finetuned-sst-2-english"
|
61 |
+
)
|
62 |
+
models['evaluator_tokenizer'] = AutoTokenizer.from_pretrained(
|
63 |
+
"distilbert/distilbert-base-uncased-finetuned-sst-2-english"
|
64 |
+
)
|
65 |
+
except Exception as e:
|
66 |
+
st.error(f"Error loading models: {e}")
|
67 |
+
models['evaluator_model'] = None
|
68 |
+
models['evaluator_tokenizer'] = None
|
69 |
|
70 |
return models
|
71 |
|
72 |
+
# Manual implementation of text summarization
|
73 |
+
def basic_summarize(text, max_length=100):
|
74 |
+
"""Basic text summarization by extracting key sentences"""
|
75 |
+
# Split into sentences
|
76 |
+
sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
|
77 |
+
|
78 |
+
# Score sentences by position (earlier is better) and length
|
79 |
+
scored_sentences = []
|
80 |
+
for i, sentence in enumerate(sentences):
|
81 |
+
# Skip very short sentences
|
82 |
+
if len(sentence.split()) < 4:
|
83 |
+
continue
|
84 |
+
|
85 |
+
# Simple scoring: earlier sentences get higher scores, penalize very long sentences
|
86 |
+
score = 1.0 / (i + 1) - (0.01 * max(0, len(sentence.split()) - 20))
|
87 |
+
scored_sentences.append((score, sentence))
|
88 |
+
|
89 |
+
# Sort by score
|
90 |
+
scored_sentences.sort(reverse=True)
|
91 |
+
|
92 |
+
# Get top sentences until we reach max_length
|
93 |
+
summary_sentences = []
|
94 |
+
current_length = 0
|
95 |
+
|
96 |
+
for _, sentence in scored_sentences:
|
97 |
+
if current_length + len(sentence.split()) <= max_length:
|
98 |
+
summary_sentences.append(sentence)
|
99 |
+
current_length += len(sentence.split())
|
100 |
+
else:
|
101 |
+
break
|
102 |
+
|
103 |
+
# Re-order sentences to match original order if we have more than one
|
104 |
+
if summary_sentences:
|
105 |
+
original_order = []
|
106 |
+
for sentence in summary_sentences:
|
107 |
+
original_order.append((sentences.index(sentence), sentence))
|
108 |
+
original_order.sort()
|
109 |
+
summary_sentences = [s for _, s in original_order]
|
110 |
+
|
111 |
+
# Combine into a summary
|
112 |
+
summary = " ".join(summary_sentences)
|
113 |
+
return summary
|
114 |
+
|
115 |
+
# Custom sentiment analysis function as fallback
|
116 |
+
def analyze_sentiment(text, models):
|
117 |
+
"""Analyze sentiment using available models"""
|
118 |
+
|
119 |
+
if has_pipeline and 'evaluator' in models:
|
120 |
+
# Use pipeline if available
|
121 |
+
try:
|
122 |
+
result = models['evaluator'](text)
|
123 |
+
return result[0]['label'] == 'POSITIVE'
|
124 |
+
except Exception as e:
|
125 |
+
st.warning(f"Error in pipeline sentiment analysis: {e}")
|
126 |
+
|
127 |
+
# Fall back to manual model inference
|
128 |
+
if 'evaluator_model' in models and 'evaluator_tokenizer' in models and models['evaluator_model']:
|
129 |
+
try:
|
130 |
+
tokenizer = models['evaluator_tokenizer']
|
131 |
+
model = models['evaluator_model']
|
132 |
+
|
133 |
+
# Truncate to avoid exceeding model's max length
|
134 |
+
max_length = tokenizer.model_max_length if hasattr(tokenizer, 'model_max_length') else 512
|
135 |
+
truncated_text = " ".join(text.split()[:max_length])
|
136 |
+
|
137 |
+
inputs = tokenizer(truncated_text, return_tensors="pt", truncation=True, max_length=max_length)
|
138 |
+
with torch.no_grad():
|
139 |
+
outputs = model(**inputs)
|
140 |
+
|
141 |
+
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
142 |
+
prediction = torch.argmax(probabilities, dim=-1).item()
|
143 |
+
|
144 |
+
# Usually for sentiment models, 1 = positive, 0 = negative
|
145 |
+
return prediction == 1
|
146 |
+
except Exception as e:
|
147 |
+
st.warning(f"Error in manual sentiment analysis: {e}")
|
148 |
+
|
149 |
+
# If all else fails, use a simple keyword approach
|
150 |
+
positive_words = ["match", "fit", "qualified", "skilled", "experienced", "suitable", "aligned", "good", "strong"]
|
151 |
+
negative_words = ["mismatch", "gap", "insufficient", "lacking", "inadequate", "limited", "missing", "poor", "weak"]
|
152 |
+
|
153 |
+
text_lower = text.lower()
|
154 |
+
positive_count = sum(text_lower.count(word) for word in positive_words)
|
155 |
+
negative_count = sum(text_lower.count(word) for word in negative_words)
|
156 |
+
|
157 |
+
return positive_count > negative_count
|
158 |
|
159 |
#####################################
|
160 |
# Function: Extract Text from File
|
|
|
320 |
#####################################
|
321 |
# Function: Summarize Resume Text
|
322 |
#####################################
|
323 |
+
def summarize_resume_text(resume_text, models):
|
324 |
"""
|
325 |
Generates a structured summary of the resume text
|
326 |
"""
|
327 |
start_time = time.time()
|
328 |
|
329 |
+
# Create a basic summary using our custom function
|
330 |
+
base_summary = basic_summarize(resume_text, max_length=100)
|
|
|
|
|
|
|
|
|
331 |
|
332 |
# Extract name from the beginning of the resume
|
333 |
name = extract_name(resume_text[:500])
|
|
|
409 |
required_skills = [skill for skill in tech_skills if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text)]
|
410 |
|
411 |
# Create a simple summary of the job
|
412 |
+
job_summary = basic_summarize(job_description, max_length=100)
|
413 |
|
414 |
# Format the job requirements
|
415 |
job_requirements = {
|
|
|
424 |
#####################################
|
425 |
# Function: Analyze Job Fit
|
426 |
#####################################
|
427 |
+
def analyze_job_fit(resume_summary, job_description, models):
|
428 |
"""
|
429 |
+
Analyze how well the candidate fits the job requirements.
|
430 |
Returns a fit score (0-2) and an assessment.
|
431 |
"""
|
432 |
start_time = time.time()
|
|
|
507 |
Overall assessment: The candidate's skills and experience {"appear to match well with" if skills_match_percentage >= 60 and experience_match == "sufficient" else "have some gaps compared to"} the job requirements.
|
508 |
"""
|
509 |
|
510 |
+
# Use sentiment analysis function to evaluate the comparison
|
511 |
+
is_positive = analyze_sentiment(comparison_text, models)
|
|
|
|
|
|
|
512 |
|
513 |
# Derive final score based on sentiment and match metrics
|
514 |
+
if is_positive and skills_match_percentage >= 70 and experience_match == "sufficient":
|
515 |
+
final_score = 2 # Strong fit
|
516 |
+
elif is_positive and skills_match_percentage >= 50:
|
517 |
final_score = 1 # Potential fit
|
518 |
else:
|
519 |
final_score = 0 # Not fit
|
520 |
|
521 |
# Generate assessment text based on the score
|
522 |
if final_score == 2:
|
523 |
+
assessment = f"{final_score}: The candidate is a strong match for this {job_requirements['title']} position. They have the required {experience_years} years of experience and demonstrate proficiency in key skills including {', '.join(skills_in_resume[:5])}. Their background aligns well with the job requirements."
|
524 |
elif final_score == 1:
|
525 |
assessment = f"{final_score}: The candidate shows potential for this {job_requirements['title']} position, but has some skill gaps. They match on {skills_match_percentage}% of required skills including {', '.join(skills_in_resume[:3]) if skills_in_resume else 'minimal required skills'}, and their experience is {experience_match}."
|
526 |
else:
|
|
|
530 |
|
531 |
return assessment, final_score, execution_time
|
532 |
|
533 |
+
# Load models at startup
|
534 |
+
models = load_models()
|
535 |
+
|
536 |
#####################################
|
537 |
# Main Streamlit Interface
|
538 |
#####################################
|
|
|
565 |
else:
|
566 |
# Step 2: Generate summary
|
567 |
status_text.text("Step 2/3: Analyzing resume and generating summary...")
|
568 |
+
summary, summarization_time = summarize_resume_text(resume_text, models)
|
569 |
progress_bar.progress(50)
|
570 |
|
571 |
# Display summary
|
|
|
574 |
|
575 |
# Step 3: Generate job fit assessment
|
576 |
status_text.text("Step 3/3: Evaluating job fit...")
|
577 |
+
assessment, fit_score, assessment_time = analyze_job_fit(summary, job_description, models)
|
578 |
progress_bar.progress(100)
|
579 |
|
580 |
# Clear status messages
|
|
|
587 |
fit_labels = {
|
588 |
0: "NOT FIT ❌",
|
589 |
1: "POTENTIAL FIT ⚠️",
|
590 |
+
2: "STRONG FIT ✅"
|
591 |
}
|
592 |
|
593 |
# Show the score prominently
|
|
|
603 |
|
604 |
if fit_score == 2:
|
605 |
st.markdown("""
|
606 |
+
- Apply for this position as you appear to be a strong match
|
607 |
- Prepare for interviews by focusing on your relevant experience
|
608 |
- Highlight your matching skills in your cover letter
|
609 |
""")
|