root
commited on
Commit
Β·
1d59f84
1
Parent(s):
a83946a
ss
Browse files
app.py
CHANGED
@@ -3,22 +3,22 @@ import pandas as pd
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
import nltk
|
6 |
-
import faiss
|
7 |
import os
|
8 |
import tempfile
|
9 |
import base64
|
10 |
-
import re
|
11 |
-
import io
|
12 |
from rank_bm25 import BM25Okapi
|
13 |
-
from
|
14 |
-
from nltk.tokenize import word_tokenize
|
15 |
-
from tqdm import tqdm
|
16 |
import pdfplumber
|
17 |
import PyPDF2
|
18 |
from docx import Document
|
19 |
import csv
|
20 |
from datasets import load_dataset
|
21 |
import gc
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Download NLTK resources
|
24 |
try:
|
@@ -47,79 +47,93 @@ with st.sidebar:
|
|
47 |
# Advanced options
|
48 |
st.subheader("Advanced Options")
|
49 |
top_k = st.number_input("Number of results to display", min_value=1, max_value=50, value=10, step=1)
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
st.markdown("---")
|
53 |
-
st.markdown("### π€
|
54 |
-
st.markdown("- **
|
55 |
-
st.markdown("- **
|
56 |
-
st.markdown("
|
57 |
-
st.markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Initialize session state
|
60 |
if 'embedding_model' not in st.session_state:
|
61 |
st.session_state.embedding_model = None
|
62 |
-
if '
|
63 |
-
st.session_state.
|
64 |
if 'results' not in st.session_state:
|
65 |
st.session_state.results = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
@st.cache_resource
|
68 |
def load_embedding_model():
|
69 |
-
"""Load and cache the embedding model"""
|
70 |
try:
|
71 |
-
with st.spinner("π Loading
|
72 |
-
|
73 |
-
model = AutoModel.from_pretrained(
|
74 |
-
"nvidia/NV-Embed-v2",
|
75 |
-
trust_remote_code=True,
|
76 |
-
device_map="auto",
|
77 |
-
torch_dtype=torch.float16
|
78 |
-
)
|
79 |
st.success("β
Embedding model loaded successfully!")
|
80 |
-
return model
|
81 |
except Exception as e:
|
82 |
st.error(f"β Error loading embedding model: {str(e)}")
|
83 |
-
return None
|
84 |
|
85 |
@st.cache_resource
|
86 |
-
def
|
87 |
-
"""Load and cache the
|
88 |
-
if not use_explanation:
|
89 |
-
return None, None
|
90 |
-
|
91 |
try:
|
92 |
-
with st.spinner("π Loading
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
st.success("β
Explanation model loaded successfully!")
|
110 |
-
return model, tokenizer
|
111 |
except Exception as e:
|
112 |
-
st.error(f"β Error
|
113 |
-
return None
|
114 |
|
115 |
class ResumeScreener:
|
116 |
def __init__(self):
|
117 |
# Load models
|
118 |
-
self.embedding_model
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
123 |
|
124 |
def extract_text_from_file(self, file_path, file_type):
|
125 |
"""Extract text from various file types"""
|
@@ -158,48 +172,35 @@ class ResumeScreener:
|
|
158 |
return ""
|
159 |
|
160 |
def get_embedding(self, text):
|
161 |
-
"""Generate embedding for text"""
|
162 |
if self.embedding_model is None:
|
163 |
-
|
|
|
164 |
|
165 |
try:
|
166 |
-
#
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
text,
|
171 |
-
return_tensors="pt",
|
172 |
-
truncation=True,
|
173 |
-
max_length=512,
|
174 |
-
padding=True
|
175 |
-
)
|
176 |
-
|
177 |
-
# Move to same device as model
|
178 |
-
device = next(self.embedding_model.parameters()).device
|
179 |
-
inputs = {k: v.to(device) for k, v in inputs.items()}
|
180 |
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
# Extract embeddings - NV-Embed-v2 specific
|
185 |
-
if hasattr(outputs, 'pooler_output'):
|
186 |
-
embeddings = outputs.pooler_output
|
187 |
-
elif hasattr(outputs, 'last_hidden_state'):
|
188 |
-
embeddings = outputs.last_hidden_state.mean(dim=1)
|
189 |
-
else:
|
190 |
-
embeddings = outputs[0].mean(dim=1)
|
191 |
-
|
192 |
-
return embeddings.cpu().numpy().squeeze()
|
193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
except Exception as e:
|
195 |
st.error(f"Error generating embedding: {str(e)}")
|
196 |
-
return np.zeros(
|
197 |
|
198 |
def calculate_bm25_scores(self, resume_texts, job_description):
|
199 |
"""Calculate BM25 scores for keyword matching"""
|
200 |
try:
|
201 |
job_tokens = word_tokenize(job_description.lower())
|
202 |
-
corpus = [word_tokenize(text.lower()) for text in resume_texts if text.strip()]
|
203 |
|
204 |
if not corpus:
|
205 |
return [0.0] * len(resume_texts)
|
@@ -212,146 +213,382 @@ class ResumeScreener:
|
|
212 |
st.error(f"Error calculating BM25 scores: {str(e)}")
|
213 |
return [0.0] * len(resume_texts)
|
214 |
|
215 |
-
def
|
216 |
-
"""
|
217 |
-
|
218 |
-
|
|
|
|
|
|
|
|
|
219 |
|
220 |
-
#
|
221 |
-
|
222 |
-
|
223 |
-
for i, text in enumerate(resume_texts):
|
224 |
-
embedding = self.get_embedding(text)
|
225 |
-
resume_embeddings.append(embedding)
|
226 |
-
progress_bar.progress((i + 1) / len(resume_texts))
|
227 |
|
228 |
-
#
|
229 |
-
|
230 |
-
|
231 |
-
job_norm = job_embedding / (np.linalg.norm(job_embedding) + 1e-8)
|
232 |
-
resume_norm = resume_emb / (np.linalg.norm(resume_emb) + 1e-8)
|
233 |
-
similarity = np.dot(job_norm, resume_norm)
|
234 |
-
semantic_scores.append(float(similarity))
|
235 |
|
236 |
-
#
|
237 |
-
|
|
|
238 |
|
239 |
-
#
|
240 |
-
|
241 |
-
|
242 |
-
bm25_scores = [score / max_bm25 for score in bm25_scores]
|
243 |
|
244 |
-
#
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
def extract_skills(self, text, job_description):
|
253 |
"""Extract skills from resume based on job description"""
|
254 |
-
|
|
|
|
|
|
|
255 |
common_skills = [
|
256 |
-
"python", "java", "javascript", "react", "
|
257 |
-
"
|
258 |
-
"
|
259 |
-
"
|
260 |
-
"
|
|
|
|
|
|
|
|
|
261 |
]
|
262 |
|
263 |
-
# Extract skills from job description
|
264 |
job_words = set(word.lower() for word in word_tokenize(job_description) if len(word) > 2)
|
265 |
|
266 |
# Find matching skills
|
267 |
found_skills = []
|
268 |
text_lower = text.lower()
|
269 |
|
270 |
-
# Check common skills
|
271 |
for skill in common_skills:
|
272 |
-
if skill in text_lower and skill in
|
273 |
found_skills.append(skill)
|
274 |
|
275 |
-
# Check job
|
276 |
for word in job_words:
|
277 |
-
if len(word) > 3 and word in text_lower:
|
278 |
-
|
|
|
|
|
279 |
|
280 |
-
return list(set(found_skills))[:
|
281 |
|
282 |
-
def
|
283 |
-
"""Generate explanation
|
284 |
-
if self.explanation_model is None or self.explanation_tokenizer is None:
|
285 |
-
return self._generate_simple_explanation(score, semantic_score, bm25_score, skills)
|
286 |
-
|
287 |
-
try:
|
288 |
-
# Create prompt
|
289 |
-
prompt = f"""As a recruitment AI assistant, explain why this resume scored {score:.2f} for the given job position.
|
290 |
-
|
291 |
-
Job Requirements:
|
292 |
-
{job_description[:500]}...
|
293 |
-
|
294 |
-
Resume Summary:
|
295 |
-
{resume_text[:800]}...
|
296 |
-
|
297 |
-
Scores:
|
298 |
-
- Overall: {score:.2f}/1.0
|
299 |
-
- Semantic Match: {semantic_score:.2f}/1.0
|
300 |
-
- Keyword Match: {bm25_score:.2f}/1.0
|
301 |
-
- Key Skills: {', '.join(skills[:5])}
|
302 |
-
|
303 |
-
Provide a concise 2-3 sentence explanation of the match quality and key strengths."""
|
304 |
-
|
305 |
-
# Generate response
|
306 |
-
messages = [{"role": "user", "content": prompt}]
|
307 |
-
text = self.explanation_tokenizer.apply_chat_template(
|
308 |
-
messages, tokenize=False, add_generation_prompt=True
|
309 |
-
)
|
310 |
-
|
311 |
-
inputs = self.explanation_tokenizer(text, return_tensors="pt").to(self.explanation_model.device)
|
312 |
-
|
313 |
-
with torch.no_grad():
|
314 |
-
outputs = self.explanation_model.generate(
|
315 |
-
**inputs,
|
316 |
-
max_new_tokens=150,
|
317 |
-
temperature=0.7,
|
318 |
-
do_sample=True,
|
319 |
-
pad_token_id=self.explanation_tokenizer.eos_token_id
|
320 |
-
)
|
321 |
-
|
322 |
-
response = self.explanation_tokenizer.decode(
|
323 |
-
outputs[0][inputs.input_ids.shape[1]:],
|
324 |
-
skip_special_tokens=True
|
325 |
-
)
|
326 |
-
|
327 |
-
return response.strip()[:400] # Limit length
|
328 |
-
|
329 |
-
except Exception as e:
|
330 |
-
st.warning(f"AI explanation failed: {str(e)}")
|
331 |
-
return self._generate_simple_explanation(score, semantic_score, bm25_score, skills)
|
332 |
-
|
333 |
-
def _generate_simple_explanation(self, score, semantic_score, bm25_score, skills):
|
334 |
-
"""Fallback explanation generation"""
|
335 |
if score > 0.8:
|
336 |
quality = "excellent"
|
337 |
elif score > 0.6:
|
338 |
-
quality = "
|
339 |
elif score > 0.4:
|
340 |
quality = "moderate"
|
341 |
else:
|
342 |
quality = "limited"
|
343 |
|
344 |
-
explanation = f"This
|
345 |
|
346 |
if semantic_score > bm25_score:
|
347 |
-
explanation += f"
|
348 |
else:
|
349 |
-
explanation += f"
|
350 |
|
351 |
if skills:
|
352 |
-
explanation += f"Key matching
|
353 |
|
354 |
return explanation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
|
356 |
def create_download_link(df, filename="resume_screening_results.csv"):
|
357 |
"""Create download link for results"""
|
@@ -361,16 +598,22 @@ def create_download_link(df, filename="resume_screening_results.csv"):
|
|
361 |
|
362 |
# Main App Interface
|
363 |
st.title("π― AI-Powered Resume Screener")
|
364 |
-
st.markdown("*Find the perfect candidates using
|
365 |
st.markdown("---")
|
366 |
|
367 |
# Initialize screener
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
|
375 |
# Job Description Input
|
376 |
st.header("π Step 1: Enter Job Description")
|
@@ -383,14 +626,25 @@ job_description = st.text_area(
|
|
383 |
# Resume Input Options
|
384 |
st.header("π Step 2: Upload Resumes")
|
385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
input_method = st.radio(
|
387 |
"Choose input method:",
|
388 |
["π Upload Files", "ποΈ Load from CSV Dataset", "π Load from Hugging Face Dataset"]
|
389 |
)
|
390 |
|
391 |
-
resume_texts = []
|
392 |
-
file_names = []
|
393 |
-
|
394 |
if input_method == "π Upload Files":
|
395 |
uploaded_files = st.file_uploader(
|
396 |
"Upload resume files",
|
@@ -401,23 +655,26 @@ if input_method == "π Upload Files":
|
|
401 |
|
402 |
if uploaded_files:
|
403 |
with st.spinner(f"π Processing {len(uploaded_files)} files..."):
|
|
|
|
|
|
|
404 |
for file in uploaded_files:
|
405 |
file_type = file.name.split('.')[-1].lower()
|
406 |
|
407 |
-
# Save temporary file
|
408 |
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{file_type}') as tmp_file:
|
409 |
tmp_file.write(file.getvalue())
|
410 |
tmp_path = tmp_file.name
|
411 |
|
412 |
-
# Extract text
|
413 |
text = screener.extract_text_from_file(tmp_path, file_type)
|
414 |
if text.strip():
|
415 |
resume_texts.append(text)
|
416 |
file_names.append(file.name)
|
417 |
|
418 |
-
# Cleanup
|
419 |
os.unlink(tmp_path)
|
420 |
-
|
|
|
|
|
|
|
421 |
if resume_texts:
|
422 |
st.success(f"β
Successfully processed {len(resume_texts)} resumes")
|
423 |
|
@@ -442,6 +699,9 @@ elif input_method == "ποΈ Load from CSV Dataset":
|
|
442 |
|
443 |
if st.button("π Process CSV Data"):
|
444 |
with st.spinner("π Processing CSV data..."):
|
|
|
|
|
|
|
445 |
for idx, row in df.iterrows():
|
446 |
text = str(row[text_column])
|
447 |
if text and text.strip() and text.lower() != 'nan':
|
@@ -451,6 +711,9 @@ elif input_method == "ποΈ Load from CSV Dataset":
|
|
451 |
file_names.append(f"Resume_{idx}")
|
452 |
else:
|
453 |
file_names.append(str(row[name_column]))
|
|
|
|
|
|
|
454 |
|
455 |
if resume_texts:
|
456 |
st.success(f"β
Successfully loaded {len(resume_texts)} resumes from CSV")
|
@@ -459,7 +722,9 @@ elif input_method == "ποΈ Load from CSV Dataset":
|
|
459 |
st.error(f"β Error processing CSV: {str(e)}")
|
460 |
|
461 |
elif input_method == "π Load from Hugging Face Dataset":
|
462 |
-
st.markdown("**
|
|
|
|
|
463 |
|
464 |
col1, col2 = st.columns([2, 1])
|
465 |
with col1:
|
@@ -479,35 +744,35 @@ elif input_method == "π Load from Hugging Face Dataset":
|
|
479 |
st.success(f"β
Loaded dataset with {len(dataset)} entries")
|
480 |
st.write("**Dataset Preview:**")
|
481 |
|
482 |
-
# Show first few examples
|
483 |
preview_df = pd.DataFrame(dataset[:5])
|
484 |
st.dataframe(preview_df)
|
485 |
|
486 |
-
# Column selection
|
487 |
text_column = st.selectbox(
|
488 |
"Select column with resume text:",
|
489 |
dataset.column_names,
|
490 |
-
index=
|
491 |
)
|
492 |
|
493 |
category_column = None
|
494 |
if 'category' in dataset.column_names:
|
|
|
495 |
category_column = st.selectbox(
|
496 |
"Filter by category (optional):",
|
497 |
-
["All"] +
|
498 |
)
|
499 |
|
500 |
max_samples = st.slider("Maximum samples to load:", 10, min(1000, len(dataset)), 100)
|
501 |
|
502 |
if st.button("π Process Dataset"):
|
503 |
with st.spinner("π Processing dataset..."):
|
|
|
|
|
|
|
504 |
filtered_dataset = dataset
|
505 |
|
506 |
-
# Apply category filter
|
507 |
if category_column and category_column != "All":
|
508 |
filtered_dataset = dataset.filter(lambda x: x['category'] == category_column)
|
509 |
|
510 |
-
# Limit samples
|
511 |
sample_indices = list(range(min(max_samples, len(filtered_dataset))))
|
512 |
|
513 |
for idx in sample_indices:
|
@@ -517,11 +782,13 @@ elif input_method == "π Load from Hugging Face Dataset":
|
|
517 |
if text and text.strip() and text.lower() != 'nan':
|
518 |
resume_texts.append(text)
|
519 |
|
520 |
-
# Use ID or index for naming
|
521 |
if 'id' in item:
|
522 |
file_names.append(f"Resume_{item['id']}")
|
523 |
else:
|
524 |
file_names.append(f"Resume_{idx}")
|
|
|
|
|
|
|
525 |
|
526 |
if resume_texts:
|
527 |
st.success(f"β
Successfully loaded {len(resume_texts)} resumes")
|
@@ -530,142 +797,308 @@ elif input_method == "π Load from Hugging Face Dataset":
|
|
530 |
st.error(f"β Error loading dataset: {str(e)}")
|
531 |
|
532 |
# Processing and Results
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
# Generate explanation
|
554 |
-
explanation = ""
|
555 |
-
if use_explanation:
|
556 |
-
explanation = screener.generate_explanation(
|
557 |
-
text, job_description, hybrid_score, semantic_score, bm25_score, skills
|
558 |
)
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
|
583 |
# Display Results
|
584 |
if st.session_state.results:
|
585 |
st.header("π Top Candidates")
|
586 |
|
587 |
-
# Create
|
588 |
-
|
589 |
-
for result in st.session_state.results:
|
590 |
-
summary_data.append({
|
591 |
-
"Rank": result['rank'],
|
592 |
-
"Candidate": result['name'],
|
593 |
-
"Overall Score": f"{result['score']:.3f}",
|
594 |
-
"Semantic Score": f"{result['semantic_score']:.3f}",
|
595 |
-
"Keyword Score": f"{result['keyword_score']:.3f}",
|
596 |
-
"Key Skills": ", ".join(result['skills'][:3]) + ("..." if len(result['skills']) > 3 else ""),
|
597 |
-
})
|
598 |
-
|
599 |
-
summary_df = pd.DataFrame(summary_data)
|
600 |
-
st.dataframe(summary_df, use_container_width=True)
|
601 |
-
|
602 |
-
# Download link
|
603 |
-
detailed_data = []
|
604 |
-
for result in st.session_state.results:
|
605 |
-
detailed_data.append({
|
606 |
-
"Rank": result['rank'],
|
607 |
-
"Candidate": result['name'],
|
608 |
-
"Overall_Score": result['score'],
|
609 |
-
"Semantic_Score": result['semantic_score'],
|
610 |
-
"Keyword_Score": result['keyword_score'],
|
611 |
-
"Skills": "; ".join(result['skills']),
|
612 |
-
"Explanation": result['explanation'],
|
613 |
-
"Resume_Preview": result['text_preview']
|
614 |
-
})
|
615 |
|
616 |
-
|
617 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
618 |
|
619 |
-
|
620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
621 |
|
622 |
-
|
623 |
-
|
624 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
625 |
|
626 |
with col1:
|
627 |
-
st.
|
628 |
-
|
629 |
-
|
|
|
|
|
|
|
|
|
630 |
|
631 |
-
|
632 |
-
|
633 |
-
|
|
|
|
|
634 |
|
635 |
with col2:
|
636 |
-
|
637 |
-
|
638 |
-
|
|
|
|
|
639 |
|
640 |
-
st.
|
641 |
-
st.
|
642 |
-
|
643 |
-
|
644 |
-
if len(st.session_state.results) > 1:
|
645 |
-
st.subheader("π Score Visualization")
|
646 |
-
|
647 |
-
chart_data = pd.DataFrame({
|
648 |
-
'Candidate': [r['name'] for r in st.session_state.results],
|
649 |
-
'Overall Score': [r['score'] for r in st.session_state.results],
|
650 |
-
'Semantic Score': [r['semantic_score'] for r in st.session_state.results],
|
651 |
-
'Keyword Score': [r['keyword_score'] for r in st.session_state.results]
|
652 |
-
})
|
653 |
-
|
654 |
-
st.bar_chart(chart_data.set_index('Candidate'))
|
655 |
|
656 |
# Memory cleanup
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
662 |
|
663 |
# Footer
|
664 |
st.markdown("---")
|
665 |
st.markdown(
|
666 |
"""
|
667 |
<div style='text-align: center; color: #666;'>
|
668 |
-
π Powered by
|
669 |
</div>
|
670 |
""",
|
671 |
unsafe_allow_html=True
|
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
import nltk
|
|
|
6 |
import os
|
7 |
import tempfile
|
8 |
import base64
|
|
|
|
|
9 |
from rank_bm25 import BM25Okapi
|
10 |
+
from sentence_transformers import SentenceTransformer, CrossEncoder
|
11 |
+
from nltk.tokenize import word_tokenize
|
|
|
12 |
import pdfplumber
|
13 |
import PyPDF2
|
14 |
from docx import Document
|
15 |
import csv
|
16 |
from datasets import load_dataset
|
17 |
import gc
|
18 |
+
from huggingface_hub import InferenceClient
|
19 |
+
import time
|
20 |
+
import faiss
|
21 |
+
import re
|
22 |
|
23 |
# Download NLTK resources
|
24 |
try:
|
|
|
47 |
# Advanced options
|
48 |
st.subheader("Advanced Options")
|
49 |
top_k = st.number_input("Number of results to display", min_value=1, max_value=50, value=10, step=1)
|
50 |
+
|
51 |
+
# LLM Settings
|
52 |
+
st.subheader("LLM Settings")
|
53 |
+
use_llm_explanations = st.checkbox("Generate AI Explanations", value=True)
|
54 |
+
if use_llm_explanations:
|
55 |
+
hf_token = st.text_input("Hugging Face Token (optional)", type="password",
|
56 |
+
help="Enter your HF token for better rate limits")
|
57 |
|
58 |
st.markdown("---")
|
59 |
+
st.markdown("### π€ Advanced Pipeline")
|
60 |
+
st.markdown("- **Stage 1**: FAISS Recall (Top 50)")
|
61 |
+
st.markdown("- **Stage 2**: Cross-Encoder Re-ranking (Top 20)")
|
62 |
+
st.markdown("- **Stage 3**: BM25 Keyword Matching")
|
63 |
+
st.markdown("- **Stage 4**: LLM Intent Analysis")
|
64 |
+
st.markdown("- **Final**: Combined Scoring (Top 5)")
|
65 |
+
st.markdown("### π Models Used")
|
66 |
+
st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
|
67 |
+
st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
|
68 |
+
st.markdown("- **LLM**: Qwen/Qwen3-14B")
|
69 |
+
st.markdown("### π Scoring Formula")
|
70 |
+
st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
|
71 |
|
72 |
# Initialize session state
|
73 |
if 'embedding_model' not in st.session_state:
|
74 |
st.session_state.embedding_model = None
|
75 |
+
if 'cross_encoder' not in st.session_state:
|
76 |
+
st.session_state.cross_encoder = None
|
77 |
if 'results' not in st.session_state:
|
78 |
st.session_state.results = []
|
79 |
+
if 'resume_texts' not in st.session_state:
|
80 |
+
st.session_state.resume_texts = []
|
81 |
+
if 'file_names' not in st.session_state:
|
82 |
+
st.session_state.file_names = []
|
83 |
+
if 'llm_client' not in st.session_state:
|
84 |
+
st.session_state.llm_client = None
|
85 |
+
if 'explanations_generated' not in st.session_state:
|
86 |
+
st.session_state.explanations_generated = False
|
87 |
+
if 'current_job_description' not in st.session_state:
|
88 |
+
st.session_state.current_job_description = ""
|
89 |
|
90 |
@st.cache_resource
|
91 |
def load_embedding_model():
|
92 |
+
"""Load and cache the BGE embedding model"""
|
93 |
try:
|
94 |
+
with st.spinner("π Loading BAAI/bge-large-en-v1.5 model..."):
|
95 |
+
model = SentenceTransformer('BAAI/bge-large-en-v1.5')
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
st.success("β
Embedding model loaded successfully!")
|
97 |
+
return model
|
98 |
except Exception as e:
|
99 |
st.error(f"β Error loading embedding model: {str(e)}")
|
100 |
+
return None
|
101 |
|
102 |
@st.cache_resource
|
103 |
+
def load_cross_encoder():
|
104 |
+
"""Load and cache the Cross-Encoder model"""
|
|
|
|
|
|
|
105 |
try:
|
106 |
+
with st.spinner("π Loading Cross-Encoder ms-marco-MiniLM-L6-v2..."):
|
107 |
+
from sentence_transformers import CrossEncoder
|
108 |
+
model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
|
109 |
+
st.success("β
Cross-Encoder model loaded successfully!")
|
110 |
+
return model
|
111 |
+
except Exception as e:
|
112 |
+
st.error(f"β Error loading Cross-Encoder model: {str(e)}")
|
113 |
+
return None
|
114 |
+
|
115 |
+
def initialize_llm_client(hf_token=None):
|
116 |
+
"""Initialize the LLM client for Qwen3-14B"""
|
117 |
+
try:
|
118 |
+
client = InferenceClient(
|
119 |
+
model="Qwen/Qwen3-14B",
|
120 |
+
token=hf_token if hf_token else None
|
121 |
+
)
|
122 |
+
return client
|
|
|
|
|
123 |
except Exception as e:
|
124 |
+
st.error(f"β Error initializing LLM client: {str(e)}")
|
125 |
+
return None
|
126 |
|
127 |
class ResumeScreener:
|
128 |
def __init__(self):
|
129 |
# Load models
|
130 |
+
self.embedding_model = load_embedding_model()
|
131 |
+
self.cross_encoder = load_cross_encoder()
|
132 |
+
self.llm_client = None
|
133 |
+
|
134 |
+
def set_llm_client(self, client):
|
135 |
+
"""Set the LLM client"""
|
136 |
+
self.llm_client = client
|
137 |
|
138 |
def extract_text_from_file(self, file_path, file_type):
|
139 |
"""Extract text from various file types"""
|
|
|
172 |
return ""
|
173 |
|
174 |
def get_embedding(self, text):
|
175 |
+
"""Generate embedding for text using BGE model"""
|
176 |
if self.embedding_model is None:
|
177 |
+
st.error("No embedding model loaded!")
|
178 |
+
return np.zeros(1024) # BGE-large dimension
|
179 |
|
180 |
try:
|
181 |
+
# BGE models recommend adding instruction for retrieval
|
182 |
+
# For queries (job description)
|
183 |
+
if len(text) < 500: # Assuming shorter texts are queries
|
184 |
+
text = "Represent this sentence for searching relevant passages: " + text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
+
# Truncate text to avoid memory issues
|
187 |
+
text = text[:8192] if text else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
+
# Generate embedding
|
190 |
+
embedding = self.embedding_model.encode(text,
|
191 |
+
convert_to_numpy=True,
|
192 |
+
normalize_embeddings=True)
|
193 |
+
return embedding
|
194 |
+
|
195 |
except Exception as e:
|
196 |
st.error(f"Error generating embedding: {str(e)}")
|
197 |
+
return np.zeros(1024) # BGE-large dimension
|
198 |
|
199 |
def calculate_bm25_scores(self, resume_texts, job_description):
|
200 |
"""Calculate BM25 scores for keyword matching"""
|
201 |
try:
|
202 |
job_tokens = word_tokenize(job_description.lower())
|
203 |
+
corpus = [word_tokenize(text.lower()) for text in resume_texts if text and text.strip()]
|
204 |
|
205 |
if not corpus:
|
206 |
return [0.0] * len(resume_texts)
|
|
|
213 |
st.error(f"Error calculating BM25 scores: {str(e)}")
|
214 |
return [0.0] * len(resume_texts)
|
215 |
|
216 |
+
def advanced_pipeline_ranking(self, resume_texts, job_description):
|
217 |
+
"""Advanced pipeline: FAISS recall -> Cross-encoder -> BM25 -> LLM intent -> Final ranking"""
|
218 |
+
if not resume_texts:
|
219 |
+
return []
|
220 |
+
|
221 |
+
# Stage 1: FAISS Recall (Top 50)
|
222 |
+
st.write("π **Stage 1**: FAISS Recall - Finding top 50 candidates...")
|
223 |
+
top_50_indices = self.faiss_recall(resume_texts, job_description, top_k=50)
|
224 |
|
225 |
+
# Stage 2: Cross-Encoder Re-ranking (Top 20)
|
226 |
+
st.write("π― **Stage 2**: Cross-Encoder Re-ranking - Selecting top 20...")
|
227 |
+
top_20_results = self.cross_encoder_rerank(resume_texts, job_description, top_50_indices, top_k=20)
|
|
|
|
|
|
|
|
|
228 |
|
229 |
+
# Stage 3: BM25 Keyword Matching
|
230 |
+
st.write("π€ **Stage 3**: BM25 Keyword Matching...")
|
231 |
+
top_20_with_bm25 = self.add_bm25_scores(resume_texts, job_description, top_20_results)
|
|
|
|
|
|
|
|
|
232 |
|
233 |
+
# Stage 4: LLM Intent Analysis
|
234 |
+
st.write("π€ **Stage 4**: LLM Intent Analysis...")
|
235 |
+
top_20_with_intent = self.add_intent_scores(resume_texts, job_description, top_20_with_bm25)
|
236 |
|
237 |
+
# Stage 5: Final Combined Ranking (Top 5)
|
238 |
+
st.write("π **Stage 5**: Final Combined Ranking...")
|
239 |
+
final_results = self.calculate_final_scores(top_20_with_intent)
|
|
|
240 |
|
241 |
+
return final_results[:5] # Return top 5
|
242 |
+
|
243 |
+
def faiss_recall(self, resume_texts, job_description, top_k=50):
|
244 |
+
"""Stage 1: Use FAISS for initial recall to find top 50 resumes"""
|
245 |
+
try:
|
246 |
+
# Get job embedding
|
247 |
+
job_embedding = self.get_embedding(job_description)
|
248 |
+
|
249 |
+
# Get resume embeddings
|
250 |
+
resume_embeddings = []
|
251 |
+
progress_bar = st.progress(0)
|
252 |
+
|
253 |
+
for i, text in enumerate(resume_texts):
|
254 |
+
if text:
|
255 |
+
embedding = self.embedding_model.encode(text[:8192],
|
256 |
+
convert_to_numpy=True,
|
257 |
+
normalize_embeddings=True)
|
258 |
+
resume_embeddings.append(embedding)
|
259 |
+
else:
|
260 |
+
resume_embeddings.append(np.zeros(1024))
|
261 |
+
progress_bar.progress((i + 1) / len(resume_texts))
|
262 |
+
|
263 |
+
progress_bar.empty()
|
264 |
+
|
265 |
+
# Create FAISS index
|
266 |
+
resume_embeddings = np.array(resume_embeddings).astype('float32')
|
267 |
+
dimension = resume_embeddings.shape[1]
|
268 |
+
index = faiss.IndexFlatIP(dimension) # Inner product for cosine similarity
|
269 |
+
index.add(resume_embeddings)
|
270 |
+
|
271 |
+
# Search for top K
|
272 |
+
job_embedding = job_embedding.reshape(1, -1).astype('float32')
|
273 |
+
scores, indices = index.search(job_embedding, min(top_k, len(resume_texts)))
|
274 |
+
|
275 |
+
return indices[0].tolist()
|
276 |
+
|
277 |
+
except Exception as e:
|
278 |
+
st.error(f"Error in FAISS recall: {str(e)}")
|
279 |
+
# Fallback: return all indices
|
280 |
+
return list(range(min(top_k, len(resume_texts))))
|
281 |
+
|
282 |
+
def cross_encoder_rerank(self, resume_texts, job_description, top_50_indices, top_k=20):
|
283 |
+
"""Stage 2: Use Cross-Encoder to re-rank top 50 and select top 20"""
|
284 |
+
try:
|
285 |
+
if not self.cross_encoder:
|
286 |
+
st.error("Cross-encoder not loaded!")
|
287 |
+
return [(idx, 0.0) for idx in top_50_indices[:top_k]]
|
288 |
+
|
289 |
+
# Prepare pairs for cross-encoder
|
290 |
+
pairs = []
|
291 |
+
valid_indices = []
|
292 |
+
|
293 |
+
for idx in top_50_indices:
|
294 |
+
if idx < len(resume_texts) and resume_texts[idx]:
|
295 |
+
# Truncate texts for cross-encoder
|
296 |
+
job_snippet = job_description[:512]
|
297 |
+
resume_snippet = resume_texts[idx][:512]
|
298 |
+
pairs.append([job_snippet, resume_snippet])
|
299 |
+
valid_indices.append(idx)
|
300 |
+
|
301 |
+
if not pairs:
|
302 |
+
return [(idx, 0.0) for idx in top_50_indices[:top_k]]
|
303 |
+
|
304 |
+
# Get cross-encoder scores
|
305 |
+
progress_bar = st.progress(0)
|
306 |
+
scores = []
|
307 |
+
|
308 |
+
# Process in batches to avoid memory issues
|
309 |
+
batch_size = 8
|
310 |
+
for i in range(0, len(pairs), batch_size):
|
311 |
+
batch = pairs[i:i+batch_size]
|
312 |
+
batch_scores = self.cross_encoder.predict(batch)
|
313 |
+
scores.extend(batch_scores)
|
314 |
+
progress_bar.progress(min(1.0, (i + batch_size) / len(pairs)))
|
315 |
+
|
316 |
+
progress_bar.empty()
|
317 |
+
|
318 |
+
# Combine indices with scores and sort
|
319 |
+
indexed_scores = list(zip(valid_indices, scores))
|
320 |
+
indexed_scores.sort(key=lambda x: x[1], reverse=True)
|
321 |
+
|
322 |
+
return indexed_scores[:top_k]
|
323 |
+
|
324 |
+
except Exception as e:
|
325 |
+
st.error(f"Error in cross-encoder re-ranking: {str(e)}")
|
326 |
+
return [(idx, 0.0) for idx in top_50_indices[:top_k]]
|
327 |
+
|
328 |
+
def add_bm25_scores(self, resume_texts, job_description, top_20_results):
|
329 |
+
"""Stage 3: Add BM25 scores to top 20 resumes"""
|
330 |
+
try:
|
331 |
+
# Get texts for top 20
|
332 |
+
top_20_texts = [resume_texts[idx] for idx, _ in top_20_results]
|
333 |
+
|
334 |
+
# Calculate BM25 scores
|
335 |
+
bm25_scores = self.calculate_bm25_scores(top_20_texts, job_description)
|
336 |
+
|
337 |
+
# Normalize BM25 scores to 0.1-0.2 range
|
338 |
+
if bm25_scores and max(bm25_scores) > 0:
|
339 |
+
max_bm25 = max(bm25_scores)
|
340 |
+
min_bm25 = min(bm25_scores)
|
341 |
+
if max_bm25 > min_bm25:
|
342 |
+
normalized_bm25 = [
|
343 |
+
0.1 + 0.1 * (score - min_bm25) / (max_bm25 - min_bm25)
|
344 |
+
for score in bm25_scores
|
345 |
+
]
|
346 |
+
else:
|
347 |
+
normalized_bm25 = [0.15] * len(bm25_scores)
|
348 |
+
else:
|
349 |
+
normalized_bm25 = [0.15] * len(top_20_results)
|
350 |
+
|
351 |
+
# Combine with existing results
|
352 |
+
results_with_bm25 = []
|
353 |
+
for i, (idx, cross_score) in enumerate(top_20_results):
|
354 |
+
bm25_score = normalized_bm25[i] if i < len(normalized_bm25) else 0.15
|
355 |
+
results_with_bm25.append((idx, cross_score, bm25_score))
|
356 |
+
|
357 |
+
return results_with_bm25
|
358 |
+
|
359 |
+
except Exception as e:
|
360 |
+
st.error(f"Error adding BM25 scores: {str(e)}")
|
361 |
+
return [(idx, cross_score, 0.15) for idx, cross_score in top_20_results]
|
362 |
+
|
363 |
+
def add_intent_scores(self, resume_texts, job_description, top_20_with_bm25):
|
364 |
+
"""Stage 4: Add LLM intent analysis scores"""
|
365 |
+
try:
|
366 |
+
if not self.llm_client:
|
367 |
+
st.warning("LLM client not available. Using default intent scores.")
|
368 |
+
return [(idx, cross_score, bm25_score, 0.1) for idx, cross_score, bm25_score in top_20_with_bm25]
|
369 |
+
|
370 |
+
results_with_intent = []
|
371 |
+
progress_bar = st.progress(0)
|
372 |
+
|
373 |
+
for i, (idx, cross_score, bm25_score) in enumerate(top_20_with_bm25):
|
374 |
+
intent_score = self.analyze_intent(resume_texts[idx], job_description)
|
375 |
+
results_with_intent.append((idx, cross_score, bm25_score, intent_score))
|
376 |
+
progress_bar.progress((i + 1) / len(top_20_with_bm25))
|
377 |
+
|
378 |
+
progress_bar.empty()
|
379 |
+
return results_with_intent
|
380 |
+
|
381 |
+
except Exception as e:
|
382 |
+
st.error(f"Error adding intent scores: {str(e)}")
|
383 |
+
return [(idx, cross_score, bm25_score, 0.1) for idx, cross_score, bm25_score in top_20_with_bm25]
|
384 |
+
|
385 |
+
def analyze_intent(self, resume_text, job_description):
|
386 |
+
"""Analyze candidate's intent using LLM"""
|
387 |
+
try:
|
388 |
+
# Truncate texts
|
389 |
+
resume_snippet = resume_text[:1500] if len(resume_text) > 1500 else resume_text
|
390 |
+
job_snippet = job_description[:800] if len(job_description) > 800 else job_description
|
391 |
+
|
392 |
+
prompt = f"""You are given a job description and a candidate's resume.
|
393 |
+
Clearly answer: "Is the candidate likely seeking this job? Respond with 'Yes', 'Maybe', or 'No' and give a brief justification."
|
394 |
+
|
395 |
+
Job Description:
|
396 |
+
"""
|
397 |
+
{job_snippet}
|
398 |
+
"""
|
399 |
+
|
400 |
+
Candidate Resume:
|
401 |
+
"""
|
402 |
+
{resume_snippet}
|
403 |
+
"""
|
404 |
+
|
405 |
+
Response format:
|
406 |
+
Intent: [Yes/Maybe/No]
|
407 |
+
Reason: [Brief justification]"""
|
408 |
+
|
409 |
+
response = self.llm_client.text_generation(
|
410 |
+
prompt,
|
411 |
+
max_new_tokens=100,
|
412 |
+
temperature=0.3,
|
413 |
+
top_p=0.9,
|
414 |
+
do_sample=True
|
415 |
+
)
|
416 |
+
|
417 |
+
# Parse response
|
418 |
+
response_lower = response.lower()
|
419 |
+
if 'intent: yes' in response_lower or 'intent:yes' in response_lower:
|
420 |
+
return 0.3
|
421 |
+
elif 'intent: maybe' in response_lower or 'intent:maybe' in response_lower:
|
422 |
+
return 0.1
|
423 |
+
else:
|
424 |
+
return 0.0
|
425 |
+
|
426 |
+
except Exception as e:
|
427 |
+
st.warning(f"Error analyzing intent: {str(e)}")
|
428 |
+
return 0.1 # Default to "Maybe"
|
429 |
+
|
430 |
+
def calculate_final_scores(self, results_with_all_scores):
|
431 |
+
"""Stage 5: Calculate final combined scores"""
|
432 |
+
try:
|
433 |
+
final_results = []
|
434 |
+
|
435 |
+
for idx, cross_score, bm25_score, intent_score in results_with_all_scores:
|
436 |
+
# Normalize cross-encoder score to 0-1 range
|
437 |
+
normalized_cross = max(0, min(1, cross_score))
|
438 |
+
|
439 |
+
# Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)
|
440 |
+
final_score = normalized_cross + bm25_score + intent_score
|
441 |
+
|
442 |
+
final_results.append({
|
443 |
+
'index': idx,
|
444 |
+
'cross_encoder_score': normalized_cross,
|
445 |
+
'bm25_score': bm25_score,
|
446 |
+
'intent_score': intent_score,
|
447 |
+
'final_score': final_score
|
448 |
+
})
|
449 |
+
|
450 |
+
# Sort by final score
|
451 |
+
final_results.sort(key=lambda x: x['final_score'], reverse=True)
|
452 |
+
|
453 |
+
return final_results
|
454 |
+
|
455 |
+
except Exception as e:
|
456 |
+
st.error(f"Error calculating final scores: {str(e)}")
|
457 |
+
return []
|
458 |
|
459 |
def extract_skills(self, text, job_description):
|
460 |
"""Extract skills from resume based on job description"""
|
461 |
+
if not text:
|
462 |
+
return []
|
463 |
+
|
464 |
+
# Common tech skills
|
465 |
common_skills = [
|
466 |
+
"python", "java", "javascript", "react", "angular", "vue", "node.js",
|
467 |
+
"express", "django", "flask", "spring", "sql", "nosql", "html", "css",
|
468 |
+
"aws", "azure", "gcp", "docker", "kubernetes", "jenkins", "git", "github",
|
469 |
+
"agile", "scrum", "jira", "ci/cd", "devops", "microservices", "rest", "api",
|
470 |
+
"machine learning", "deep learning", "data science", "artificial intelligence",
|
471 |
+
"tensorflow", "pytorch", "keras", "scikit-learn", "pandas", "numpy",
|
472 |
+
"matplotlib", "seaborn", "jupyter", "r", "sas", "spss", "tableau", "powerbi",
|
473 |
+
"excel", "mysql", "postgresql", "mongodb", "redis", "elasticsearch",
|
474 |
+
"kafka", "rabbitmq", "spark", "hadoop", "hive", "airflow", "linux", "unix"
|
475 |
]
|
476 |
|
477 |
+
# Extract potential skills from job description
|
478 |
job_words = set(word.lower() for word in word_tokenize(job_description) if len(word) > 2)
|
479 |
|
480 |
# Find matching skills
|
481 |
found_skills = []
|
482 |
text_lower = text.lower()
|
483 |
|
484 |
+
# Check common skills that appear in both resume and job description
|
485 |
for skill in common_skills:
|
486 |
+
if skill in text_lower and any(skill in job_word for job_word in job_words):
|
487 |
found_skills.append(skill)
|
488 |
|
489 |
+
# Check for skills mentioned in job description
|
490 |
for word in job_words:
|
491 |
+
if len(word) > 3 and word in text_lower and word not in found_skills:
|
492 |
+
# Basic filter to avoid common words
|
493 |
+
if word not in ['with', 'have', 'that', 'this', 'from', 'what', 'when', 'where']:
|
494 |
+
found_skills.append(word)
|
495 |
|
496 |
+
return list(set(found_skills))[:15] # Return top 15 unique skills
|
497 |
|
498 |
+
def generate_simple_explanation(self, score, semantic_score, bm25_score, skills):
|
499 |
+
"""Generate simple explanation for the match (fallback)"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
500 |
if score > 0.8:
|
501 |
quality = "excellent"
|
502 |
elif score > 0.6:
|
503 |
+
quality = "strong"
|
504 |
elif score > 0.4:
|
505 |
quality = "moderate"
|
506 |
else:
|
507 |
quality = "limited"
|
508 |
|
509 |
+
explanation = f"This candidate shows {quality} alignment with the position (score: {score:.2f}). "
|
510 |
|
511 |
if semantic_score > bm25_score:
|
512 |
+
explanation += f"The resume demonstrates strong conceptual relevance ({semantic_score:.2f}) suggesting good experience fit. "
|
513 |
else:
|
514 |
+
explanation += f"The resume has high keyword match ({bm25_score:.2f}) indicating direct skill alignment. "
|
515 |
|
516 |
if skills:
|
517 |
+
explanation += f"Key matching competencies include: {', '.join(skills[:5])}."
|
518 |
|
519 |
return explanation
|
520 |
+
|
521 |
+
def generate_llm_explanation(self, resume_text, job_description, score, skills, max_retries=3):
|
522 |
+
"""Generate detailed explanation using Qwen3-14B"""
|
523 |
+
if not self.llm_client:
|
524 |
+
return self.generate_simple_explanation(score, score, score, skills)
|
525 |
+
|
526 |
+
# Truncate texts to manage token limits
|
527 |
+
resume_snippet = resume_text[:2000] if len(resume_text) > 2000 else resume_text
|
528 |
+
job_snippet = job_description[:1000] if len(job_description) > 1000 else job_description
|
529 |
+
|
530 |
+
prompt = f"""You are an expert HR analyst. Analyze this individual candidate's resume against the job requirements and write EXACTLY 150 words explaining why this specific candidate is suitable for the position.
|
531 |
+
|
532 |
+
Structure your 150-word analysis as follows:
|
533 |
+
1. Experience alignment (40-50 words)
|
534 |
+
2. Key strengths and skills match (40-50 words)
|
535 |
+
3. Unique value proposition (40-50 words)
|
536 |
+
4. Overall recommendation (10-20 words)
|
537 |
+
|
538 |
+
Job Requirements:
|
539 |
+
{job_snippet}
|
540 |
+
|
541 |
+
Candidate's Resume:
|
542 |
+
{resume_snippet}
|
543 |
+
|
544 |
+
Identified Matching Skills: {', '.join(skills[:10])}
|
545 |
+
Compatibility Score: {score:.1%}
|
546 |
+
|
547 |
+
Write a professional, detailed 150-word analysis for THIS INDIVIDUAL CANDIDATE:"""
|
548 |
+
|
549 |
+
for attempt in range(max_retries):
|
550 |
+
try:
|
551 |
+
response = self.llm_client.text_generation(
|
552 |
+
prompt,
|
553 |
+
max_new_tokens=200,
|
554 |
+
temperature=0.7,
|
555 |
+
top_p=0.9,
|
556 |
+
do_sample=True
|
557 |
+
)
|
558 |
+
|
559 |
+
# Extract the response and ensure it's about 150 words
|
560 |
+
explanation = response.strip()
|
561 |
+
word_count = len(explanation.split())
|
562 |
+
|
563 |
+
# If response is close to 150 words (130-170), accept it
|
564 |
+
if 130 <= word_count <= 170:
|
565 |
+
return explanation
|
566 |
+
|
567 |
+
# If response is too short or too long, try again with adjusted prompt
|
568 |
+
if word_count < 130:
|
569 |
+
# Response too short, try again
|
570 |
+
continue
|
571 |
+
elif word_count > 170:
|
572 |
+
# Response too long, truncate to approximately 150 words
|
573 |
+
words = explanation.split()
|
574 |
+
truncated = ' '.join(words[:150])
|
575 |
+
# Add proper ending if truncated
|
576 |
+
if not truncated.endswith('.'):
|
577 |
+
truncated += '.'
|
578 |
+
return truncated
|
579 |
+
|
580 |
+
return explanation
|
581 |
+
|
582 |
+
except Exception as e:
|
583 |
+
if attempt < max_retries - 1:
|
584 |
+
time.sleep(2) # Wait before retry
|
585 |
+
continue
|
586 |
+
else:
|
587 |
+
# Fallback to simple explanation
|
588 |
+
return self.generate_simple_explanation(score, score, score, skills)
|
589 |
+
|
590 |
+
# If all retries failed, use simple explanation
|
591 |
+
return self.generate_simple_explanation(score, score, score, skills)
|
592 |
|
593 |
def create_download_link(df, filename="resume_screening_results.csv"):
|
594 |
"""Create download link for results"""
|
|
|
598 |
|
599 |
# Main App Interface
|
600 |
st.title("π― AI-Powered Resume Screener")
|
601 |
+
st.markdown("*Find the perfect candidates using BAAI/bge-large-en-v1.5 embeddings and Qwen3-14B explanations*")
|
602 |
st.markdown("---")
|
603 |
|
604 |
# Initialize screener
|
605 |
+
screener = ResumeScreener()
|
606 |
+
|
607 |
+
# Initialize LLM client if enabled
|
608 |
+
if use_llm_explanations:
|
609 |
+
if 'hf_token' in locals() and hf_token:
|
610 |
+
if st.session_state.llm_client is None:
|
611 |
+
st.session_state.llm_client = initialize_llm_client(hf_token)
|
612 |
+
else:
|
613 |
+
if st.session_state.llm_client is None:
|
614 |
+
st.session_state.llm_client = initialize_llm_client()
|
615 |
+
|
616 |
+
screener.set_llm_client(st.session_state.llm_client)
|
617 |
|
618 |
# Job Description Input
|
619 |
st.header("π Step 1: Enter Job Description")
|
|
|
626 |
# Resume Input Options
|
627 |
st.header("π Step 2: Upload Resumes")
|
628 |
|
629 |
+
# Show loaded resumes indicator
|
630 |
+
if st.session_state.resume_texts:
|
631 |
+
col1, col2 = st.columns([3, 1])
|
632 |
+
with col1:
|
633 |
+
st.info(f"π {len(st.session_state.resume_texts)} resumes loaded and ready for analysis")
|
634 |
+
with col2:
|
635 |
+
if st.button("ποΈ Clear Resumes", type="secondary", help="Clear all loaded resumes to start fresh"):
|
636 |
+
st.session_state.resume_texts = []
|
637 |
+
st.session_state.file_names = []
|
638 |
+
st.session_state.results = []
|
639 |
+
st.session_state.explanations_generated = False
|
640 |
+
st.session_state.current_job_description = ""
|
641 |
+
st.rerun()
|
642 |
+
|
643 |
input_method = st.radio(
|
644 |
"Choose input method:",
|
645 |
["π Upload Files", "ποΈ Load from CSV Dataset", "π Load from Hugging Face Dataset"]
|
646 |
)
|
647 |
|
|
|
|
|
|
|
648 |
if input_method == "π Upload Files":
|
649 |
uploaded_files = st.file_uploader(
|
650 |
"Upload resume files",
|
|
|
655 |
|
656 |
if uploaded_files:
|
657 |
with st.spinner(f"π Processing {len(uploaded_files)} files..."):
|
658 |
+
resume_texts = []
|
659 |
+
file_names = []
|
660 |
+
|
661 |
for file in uploaded_files:
|
662 |
file_type = file.name.split('.')[-1].lower()
|
663 |
|
|
|
664 |
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{file_type}') as tmp_file:
|
665 |
tmp_file.write(file.getvalue())
|
666 |
tmp_path = tmp_file.name
|
667 |
|
|
|
668 |
text = screener.extract_text_from_file(tmp_path, file_type)
|
669 |
if text.strip():
|
670 |
resume_texts.append(text)
|
671 |
file_names.append(file.name)
|
672 |
|
|
|
673 |
os.unlink(tmp_path)
|
674 |
+
|
675 |
+
st.session_state.resume_texts = resume_texts
|
676 |
+
st.session_state.file_names = file_names
|
677 |
+
|
678 |
if resume_texts:
|
679 |
st.success(f"β
Successfully processed {len(resume_texts)} resumes")
|
680 |
|
|
|
699 |
|
700 |
if st.button("π Process CSV Data"):
|
701 |
with st.spinner("π Processing CSV data..."):
|
702 |
+
resume_texts = []
|
703 |
+
file_names = []
|
704 |
+
|
705 |
for idx, row in df.iterrows():
|
706 |
text = str(row[text_column])
|
707 |
if text and text.strip() and text.lower() != 'nan':
|
|
|
711 |
file_names.append(f"Resume_{idx}")
|
712 |
else:
|
713 |
file_names.append(str(row[name_column]))
|
714 |
+
|
715 |
+
st.session_state.resume_texts = resume_texts
|
716 |
+
st.session_state.file_names = file_names
|
717 |
|
718 |
if resume_texts:
|
719 |
st.success(f"β
Successfully loaded {len(resume_texts)} resumes from CSV")
|
|
|
722 |
st.error(f"β Error processing CSV: {str(e)}")
|
723 |
|
724 |
elif input_method == "π Load from Hugging Face Dataset":
|
725 |
+
st.markdown("**Popular Resume Datasets:**")
|
726 |
+
st.markdown("- `ahmedheakl/resume-atlas`")
|
727 |
+
st.markdown("- `InferenceFly/Resume-Dataset`")
|
728 |
|
729 |
col1, col2 = st.columns([2, 1])
|
730 |
with col1:
|
|
|
744 |
st.success(f"β
Loaded dataset with {len(dataset)} entries")
|
745 |
st.write("**Dataset Preview:**")
|
746 |
|
|
|
747 |
preview_df = pd.DataFrame(dataset[:5])
|
748 |
st.dataframe(preview_df)
|
749 |
|
|
|
750 |
text_column = st.selectbox(
|
751 |
"Select column with resume text:",
|
752 |
dataset.column_names,
|
753 |
+
index=dataset.column_names.index('resume_text') if 'resume_text' in dataset.column_names else 0
|
754 |
)
|
755 |
|
756 |
category_column = None
|
757 |
if 'category' in dataset.column_names:
|
758 |
+
categories = list(set(dataset['category']))
|
759 |
category_column = st.selectbox(
|
760 |
"Filter by category (optional):",
|
761 |
+
["All"] + categories
|
762 |
)
|
763 |
|
764 |
max_samples = st.slider("Maximum samples to load:", 10, min(1000, len(dataset)), 100)
|
765 |
|
766 |
if st.button("π Process Dataset"):
|
767 |
with st.spinner("π Processing dataset..."):
|
768 |
+
resume_texts = []
|
769 |
+
file_names = []
|
770 |
+
|
771 |
filtered_dataset = dataset
|
772 |
|
|
|
773 |
if category_column and category_column != "All":
|
774 |
filtered_dataset = dataset.filter(lambda x: x['category'] == category_column)
|
775 |
|
|
|
776 |
sample_indices = list(range(min(max_samples, len(filtered_dataset))))
|
777 |
|
778 |
for idx in sample_indices:
|
|
|
782 |
if text and text.strip() and text.lower() != 'nan':
|
783 |
resume_texts.append(text)
|
784 |
|
|
|
785 |
if 'id' in item:
|
786 |
file_names.append(f"Resume_{item['id']}")
|
787 |
else:
|
788 |
file_names.append(f"Resume_{idx}")
|
789 |
+
|
790 |
+
st.session_state.resume_texts = resume_texts
|
791 |
+
st.session_state.file_names = file_names
|
792 |
|
793 |
if resume_texts:
|
794 |
st.success(f"β
Successfully loaded {len(resume_texts)} resumes")
|
|
|
797 |
st.error(f"β Error loading dataset: {str(e)}")
|
798 |
|
799 |
# Processing and Results
|
800 |
+
st.header("π Step 3: Analyze Resumes")
|
801 |
+
|
802 |
+
# First button: Find top K candidates (fast ranking)
|
803 |
+
col1, col2 = st.columns([1, 1])
|
804 |
+
|
805 |
+
with col1:
|
806 |
+
if st.button("π Advanced Pipeline Analysis",
|
807 |
+
disabled=not (job_description and st.session_state.resume_texts),
|
808 |
+
type="primary",
|
809 |
+
help="Run the complete 5-stage advanced pipeline"):
|
810 |
+
if len(st.session_state.resume_texts) == 0:
|
811 |
+
st.error("β Please upload resumes first!")
|
812 |
+
elif not job_description.strip():
|
813 |
+
st.error("β Please enter a job description!")
|
814 |
+
else:
|
815 |
+
with st.spinner("π Running Advanced Pipeline Analysis..."):
|
816 |
+
try:
|
817 |
+
# Run the advanced pipeline
|
818 |
+
pipeline_results = screener.advanced_pipeline_ranking(
|
819 |
+
st.session_state.resume_texts, job_description
|
|
|
|
|
|
|
|
|
|
|
820 |
)
|
821 |
+
|
822 |
+
# Prepare results for display
|
823 |
+
results = []
|
824 |
+
|
825 |
+
for rank, result_data in enumerate(pipeline_results, 1):
|
826 |
+
idx = result_data['index']
|
827 |
+
name = st.session_state.file_names[idx]
|
828 |
+
text = st.session_state.resume_texts[idx]
|
829 |
+
|
830 |
+
# Extract skills
|
831 |
+
skills = screener.extract_skills(text, job_description)
|
832 |
+
|
833 |
+
results.append({
|
834 |
+
'rank': rank,
|
835 |
+
'name': name,
|
836 |
+
'final_score': result_data['final_score'],
|
837 |
+
'cross_encoder_score': result_data['cross_encoder_score'],
|
838 |
+
'bm25_score': result_data['bm25_score'],
|
839 |
+
'intent_score': result_data['intent_score'],
|
840 |
+
'skills': skills,
|
841 |
+
'text': text,
|
842 |
+
'text_preview': text[:500] + "..." if len(text) > 500 else text,
|
843 |
+
'explanation': None # No detailed explanation yet
|
844 |
+
})
|
845 |
+
|
846 |
+
# Add simple explanations for now
|
847 |
+
for result in results:
|
848 |
+
result['explanation'] = screener.generate_simple_explanation(
|
849 |
+
result['final_score'],
|
850 |
+
result['cross_encoder_score'],
|
851 |
+
result['bm25_score'],
|
852 |
+
result['skills']
|
853 |
+
)
|
854 |
+
|
855 |
+
# Store in session state
|
856 |
+
st.session_state.results = results
|
857 |
+
st.session_state.explanations_generated = False
|
858 |
+
st.session_state.current_job_description = job_description
|
859 |
+
|
860 |
+
st.success(f"π Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
|
861 |
+
|
862 |
+
except Exception as e:
|
863 |
+
st.error(f"β Error during analysis: {str(e)}")
|
864 |
+
|
865 |
+
# Second button: Generate AI explanations (slower, optional)
|
866 |
+
with col2:
|
867 |
+
# Show this button only if we have results and LLM is enabled
|
868 |
+
show_explanation_button = (
|
869 |
+
st.session_state.results and
|
870 |
+
use_llm_explanations and
|
871 |
+
st.session_state.llm_client and
|
872 |
+
not st.session_state.explanations_generated
|
873 |
+
)
|
874 |
+
|
875 |
+
if show_explanation_button:
|
876 |
+
if st.button("π€ Generate AI Explanations",
|
877 |
+
type="secondary",
|
878 |
+
help="Generate detailed 150-word explanations using Qwen3-14B (takes longer)"):
|
879 |
+
with st.spinner("π€ Generating detailed AI explanations..."):
|
880 |
+
try:
|
881 |
+
explanation_progress = st.progress(0)
|
882 |
+
explanation_text = st.empty()
|
883 |
+
|
884 |
+
for i, result in enumerate(st.session_state.results):
|
885 |
+
explanation_text.text(f"π€ Generating AI explanation for candidate {i+1}/{len(st.session_state.results)}...")
|
886 |
+
|
887 |
+
llm_explanation = screener.generate_llm_explanation(
|
888 |
+
result['text'],
|
889 |
+
st.session_state.current_job_description,
|
890 |
+
result['final_score'],
|
891 |
+
result['skills']
|
892 |
+
)
|
893 |
+
result['explanation'] = llm_explanation
|
894 |
+
|
895 |
+
explanation_progress.progress((i + 1) / len(st.session_state.results))
|
896 |
+
|
897 |
+
explanation_progress.empty()
|
898 |
+
explanation_text.empty()
|
899 |
+
|
900 |
+
# Mark explanations as generated
|
901 |
+
st.session_state.explanations_generated = True
|
902 |
+
|
903 |
+
st.success(f"π€ AI explanations generated for all {len(st.session_state.results)} candidates!")
|
904 |
+
|
905 |
+
except Exception as e:
|
906 |
+
st.error(f"β Error generating explanations: {str(e)}")
|
907 |
+
|
908 |
+
elif st.session_state.results and st.session_state.explanations_generated:
|
909 |
+
st.info("β
AI explanations already generated!")
|
910 |
+
|
911 |
+
elif st.session_state.results and not use_llm_explanations:
|
912 |
+
st.info("π‘ Enable 'Generate AI Explanations' in sidebar to use this feature")
|
913 |
+
|
914 |
+
elif st.session_state.results and not st.session_state.llm_client:
|
915 |
+
st.warning("β οΈ LLM client not available. Check your Hugging Face token.")
|
916 |
|
917 |
# Display Results
|
918 |
if st.session_state.results:
|
919 |
st.header("π Top Candidates")
|
920 |
|
921 |
+
# Create tabs for different views
|
922 |
+
tab1, tab2, tab3 = st.tabs(["π Summary", "π Detailed Analysis", "π Visualizations"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
923 |
|
924 |
+
with tab1:
|
925 |
+
# Create summary dataframe with new scoring system
|
926 |
+
summary_data = []
|
927 |
+
for result in st.session_state.results:
|
928 |
+
# Map intent score to text
|
929 |
+
intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
|
930 |
+
|
931 |
+
summary_data.append({
|
932 |
+
"Rank": result['rank'],
|
933 |
+
"Candidate": result['name'],
|
934 |
+
"Final Score": f"{result['final_score']:.2f}",
|
935 |
+
"Cross-Encoder": f"{result['cross_encoder_score']:.2f}",
|
936 |
+
"BM25": f"{result['bm25_score']:.2f}",
|
937 |
+
"Intent": f"{intent_text} ({result['intent_score']:.1f})",
|
938 |
+
"Top Skills": ", ".join(result['skills'][:5])
|
939 |
+
})
|
940 |
+
|
941 |
+
summary_df = pd.DataFrame(summary_data)
|
942 |
+
|
943 |
+
# Style the dataframe
|
944 |
+
def color_scores(val):
|
945 |
+
if isinstance(val, str) and any(char.isdigit() for char in val):
|
946 |
+
try:
|
947 |
+
# Extract numeric value
|
948 |
+
numeric_val = float(''.join(c for c in val if c.isdigit() or c == '.'))
|
949 |
+
if 'Final Score' in val or numeric_val >= 1.0:
|
950 |
+
if numeric_val >= 1.2:
|
951 |
+
return 'background-color: #d4edda'
|
952 |
+
elif numeric_val >= 1.0:
|
953 |
+
return 'background-color: #fff3cd'
|
954 |
+
else:
|
955 |
+
return 'background-color: #f8d7da'
|
956 |
+
else:
|
957 |
+
if numeric_val >= 0.7:
|
958 |
+
return 'background-color: #d4edda'
|
959 |
+
elif numeric_val >= 0.5:
|
960 |
+
return 'background-color: #fff3cd'
|
961 |
+
else:
|
962 |
+
return 'background-color: #f8d7da'
|
963 |
+
except:
|
964 |
+
pass
|
965 |
+
return ''
|
966 |
+
|
967 |
+
styled_df = summary_df.style.applymap(color_scores, subset=['Final Score', 'Cross-Encoder', 'BM25'])
|
968 |
+
st.dataframe(styled_df, use_container_width=True)
|
969 |
+
|
970 |
+
# Download link
|
971 |
+
detailed_data = []
|
972 |
+
for result in st.session_state.results:
|
973 |
+
intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
|
974 |
+
|
975 |
+
detailed_data.append({
|
976 |
+
"Rank": result['rank'],
|
977 |
+
"Candidate": result['name'],
|
978 |
+
"Final_Score": result['final_score'],
|
979 |
+
"Cross_Encoder_Score": result['cross_encoder_score'],
|
980 |
+
"BM25_Score": result['bm25_score'],
|
981 |
+
"Intent_Score": result['intent_score'],
|
982 |
+
"Intent_Analysis": intent_text,
|
983 |
+
"Skills": "; ".join(result['skills']),
|
984 |
+
"AI_Explanation": result['explanation'],
|
985 |
+
"Resume_Preview": result['text_preview']
|
986 |
+
})
|
987 |
+
|
988 |
+
download_df = pd.DataFrame(detailed_data)
|
989 |
+
st.markdown(create_download_link(download_df), unsafe_allow_html=True)
|
990 |
|
991 |
+
with tab2:
|
992 |
+
# Detailed results with new scoring breakdown
|
993 |
+
for result in st.session_state.results:
|
994 |
+
intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
|
995 |
+
|
996 |
+
with st.expander(f"#{result['rank']}: {result['name']} (Final Score: {result['final_score']:.2f})"):
|
997 |
+
col1, col2 = st.columns([1, 2])
|
998 |
+
|
999 |
+
with col1:
|
1000 |
+
st.metric("π Final Score", f"{result['final_score']:.2f}")
|
1001 |
+
|
1002 |
+
st.write("**π Score Breakdown:**")
|
1003 |
+
st.metric("π― Cross-Encoder", f"{result['cross_encoder_score']:.2f}", help="Semantic relevance (0-1)")
|
1004 |
+
st.metric("π€ BM25 Keywords", f"{result['bm25_score']:.2f}", help="Keyword matching (0.1-0.2)")
|
1005 |
+
st.metric("π€ Intent Analysis", f"{intent_text} ({result['intent_score']:.1f})", help="Job seeking likelihood (0-0.3)")
|
1006 |
+
|
1007 |
+
st.write("**π― Matching Skills:**")
|
1008 |
+
skills_per_column = 5
|
1009 |
+
skill_cols = st.columns(2)
|
1010 |
+
for idx, skill in enumerate(result['skills'][:10]):
|
1011 |
+
with skill_cols[idx % 2]:
|
1012 |
+
st.write(f"β’ {skill}")
|
1013 |
+
|
1014 |
+
with col2:
|
1015 |
+
st.write("**π‘ AI-Generated Match Analysis:**")
|
1016 |
+
st.info(result['explanation'])
|
1017 |
+
|
1018 |
+
st.write("**π Resume Preview:**")
|
1019 |
+
st.text_area("", result['text_preview'], height=200, disabled=True, key=f"preview_{result['rank']}")
|
1020 |
|
1021 |
+
with tab3:
|
1022 |
+
# Score visualization
|
1023 |
+
if len(st.session_state.results) > 1:
|
1024 |
+
# Bar chart
|
1025 |
+
st.subheader("Score Comparison")
|
1026 |
+
|
1027 |
+
chart_data = pd.DataFrame({
|
1028 |
+
'Candidate': [r['name'][:20] + '...' if len(r['name']) > 20 else r['name']
|
1029 |
+
for r in st.session_state.results],
|
1030 |
+
'Final Score': [r['final_score'] for r in st.session_state.results],
|
1031 |
+
'Cross-Encoder': [r['cross_encoder_score'] for r in st.session_state.results],
|
1032 |
+
'BM25': [r['bm25_score'] for r in st.session_state.results],
|
1033 |
+
'Intent': [r['intent_score'] for r in st.session_state.results]
|
1034 |
+
})
|
1035 |
+
|
1036 |
+
st.bar_chart(chart_data.set_index('Candidate'))
|
1037 |
+
|
1038 |
+
# Score distribution
|
1039 |
+
col1, col2 = st.columns(2)
|
1040 |
|
1041 |
with col1:
|
1042 |
+
st.subheader("Score Distribution")
|
1043 |
+
score_ranges = {
|
1044 |
+
'Excellent (β₯1.2)': sum(1 for r in st.session_state.results if r['final_score'] >= 1.2),
|
1045 |
+
'Good (1.0-1.2)': sum(1 for r in st.session_state.results if 1.0 <= r['final_score'] < 1.2),
|
1046 |
+
'Fair (0.8-1.0)': sum(1 for r in st.session_state.results if 0.8 <= r['final_score'] < 1.0),
|
1047 |
+
'Poor (<0.8)': sum(1 for r in st.session_state.results if r['final_score'] < 0.8),
|
1048 |
+
}
|
1049 |
|
1050 |
+
dist_df = pd.DataFrame({
|
1051 |
+
'Range': score_ranges.keys(),
|
1052 |
+
'Count': score_ranges.values()
|
1053 |
+
})
|
1054 |
+
st.bar_chart(dist_df.set_index('Range'))
|
1055 |
|
1056 |
with col2:
|
1057 |
+
st.subheader("Average Scores")
|
1058 |
+
avg_final = np.mean([r['final_score'] for r in st.session_state.results])
|
1059 |
+
avg_cross = np.mean([r['cross_encoder_score'] for r in st.session_state.results])
|
1060 |
+
avg_bm25 = np.mean([r['bm25_score'] for r in st.session_state.results])
|
1061 |
+
avg_intent = np.mean([r['intent_score'] for r in st.session_state.results])
|
1062 |
|
1063 |
+
st.metric("Average Final Score", f"{avg_final:.2f}")
|
1064 |
+
st.metric("Average Cross-Encoder", f"{avg_cross:.2f}")
|
1065 |
+
st.metric("Average BM25", f"{avg_bm25:.2f}")
|
1066 |
+
st.metric("Average Intent", f"{avg_intent:.2f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1067 |
|
1068 |
# Memory cleanup
|
1069 |
+
st.markdown("---")
|
1070 |
+
st.subheader("π§Ή Reset Application")
|
1071 |
+
col1, col2, col3 = st.columns([1, 1, 3])
|
1072 |
+
with col1:
|
1073 |
+
if st.button("ποΈ Clear Resumes Only", type="secondary", help="Clear only the loaded resumes"):
|
1074 |
+
st.session_state.resume_texts = []
|
1075 |
+
st.session_state.file_names = []
|
1076 |
+
st.session_state.results = []
|
1077 |
+
st.session_state.explanations_generated = False
|
1078 |
+
st.session_state.current_job_description = ""
|
1079 |
+
st.success("β
Resumes cleared!")
|
1080 |
+
st.rerun()
|
1081 |
+
|
1082 |
+
with col2:
|
1083 |
+
if st.button("π§Ή Clear Everything", type="primary", help="Clear all data and free memory"):
|
1084 |
+
st.session_state.resume_texts = []
|
1085 |
+
st.session_state.file_names = []
|
1086 |
+
st.session_state.results = []
|
1087 |
+
st.session_state.explanations_generated = False
|
1088 |
+
st.session_state.current_job_description = ""
|
1089 |
+
|
1090 |
+
if torch.cuda.is_available():
|
1091 |
+
torch.cuda.empty_cache()
|
1092 |
+
gc.collect()
|
1093 |
+
st.success("β
Everything cleared!")
|
1094 |
+
st.rerun()
|
1095 |
|
1096 |
# Footer
|
1097 |
st.markdown("---")
|
1098 |
st.markdown(
|
1099 |
"""
|
1100 |
<div style='text-align: center; color: #666;'>
|
1101 |
+
π Powered by BAAI/bge-large-en-v1.5 & Qwen3-14B | Built with Streamlit
|
1102 |
</div>
|
1103 |
""",
|
1104 |
unsafe_allow_html=True
|