root
commited on
Commit
Β·
847b129
1
Parent(s):
19f7d68
ss
Browse files
app.py
CHANGED
@@ -47,14 +47,8 @@ st.set_page_config(
|
|
47 |
with st.sidebar:
|
48 |
st.title("βοΈ Configuration")
|
49 |
|
50 |
-
# Ranking weights
|
51 |
-
st.subheader("Ranking Weights")
|
52 |
-
semantic_weight = st.slider("Semantic Similarity Weight", 0.0, 1.0, 0.7, 0.1)
|
53 |
-
keyword_weight = 1.0 - semantic_weight
|
54 |
-
st.write(f"Keyword Weight: {keyword_weight:.1f}")
|
55 |
-
|
56 |
# Advanced options
|
57 |
-
st.subheader("
|
58 |
top_k = st.selectbox("Number of results to display", options=[1, 2, 3, 4, 5], index=4)
|
59 |
|
60 |
# LLM Settings
|
@@ -62,18 +56,16 @@ with st.sidebar:
|
|
62 |
st.info("π‘ Intent analysis using Qwen3-1.7B is always enabled")
|
63 |
|
64 |
st.markdown("---")
|
65 |
-
st.markdown("### π€
|
66 |
-
st.markdown("-
|
67 |
-
st.markdown("
|
68 |
-
st.markdown("-
|
69 |
-
st.markdown("
|
70 |
-
st.markdown("
|
71 |
-
st.markdown("
|
72 |
-
|
73 |
-
st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
|
74 |
-
st.markdown("- **Intent Analysis**: Qwen/Qwen3-1.7B")
|
75 |
st.markdown("### π Scoring Formula")
|
76 |
-
st.markdown("**Final Score = Cross-Encoder (0-
|
77 |
|
78 |
# Initialize session state
|
79 |
if 'embedding_model' not in st.session_state:
|
@@ -134,7 +126,7 @@ def load_cross_encoder():
|
|
134 |
try:
|
135 |
with st.spinner("π Loading Cross-Encoder ms-marco-MiniLM-L6-v2..."):
|
136 |
from sentence_transformers import CrossEncoder
|
137 |
-
# Try with explicit device specification
|
138 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
139 |
model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2', device=device)
|
140 |
st.success("β
Cross-Encoder model loaded successfully!")
|
@@ -142,7 +134,7 @@ def load_cross_encoder():
|
|
142 |
except Exception as e:
|
143 |
st.error(f"β Error loading Cross-Encoder model: {str(e)}")
|
144 |
try:
|
145 |
-
# Fallback: try without device specification
|
146 |
st.warning("π Trying Cross-Encoder without device specification...")
|
147 |
model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
|
148 |
st.success("β
Cross-Encoder model loaded (fallback)!")
|
@@ -285,6 +277,8 @@ class ResumeScreener:
|
|
285 |
# Get job embedding
|
286 |
job_embedding = self.get_embedding(job_description)
|
287 |
|
|
|
|
|
288 |
# Get resume embeddings
|
289 |
resume_embeddings = []
|
290 |
progress_bar = st.progress(0)
|
@@ -301,6 +295,8 @@ class ResumeScreener:
|
|
301 |
|
302 |
progress_bar.empty()
|
303 |
|
|
|
|
|
304 |
# Create FAISS index
|
305 |
resume_embeddings = np.array(resume_embeddings).astype('float32')
|
306 |
dimension = resume_embeddings.shape[1]
|
@@ -311,6 +307,9 @@ class ResumeScreener:
|
|
311 |
job_embedding = job_embedding.reshape(1, -1).astype('float32')
|
312 |
scores, indices = index.search(job_embedding, min(top_k, len(resume_texts)))
|
313 |
|
|
|
|
|
|
|
314 |
return indices[0].tolist()
|
315 |
|
316 |
except Exception as e:
|
@@ -325,6 +324,8 @@ class ResumeScreener:
|
|
325 |
st.error("Cross-encoder not loaded!")
|
326 |
return [(idx, 0.0) for idx in top_50_indices[:top_k]]
|
327 |
|
|
|
|
|
328 |
# Prepare pairs for cross-encoder
|
329 |
pairs = []
|
330 |
valid_indices = []
|
@@ -338,18 +339,33 @@ class ResumeScreener:
|
|
338 |
valid_indices.append(idx)
|
339 |
|
340 |
if not pairs:
|
|
|
341 |
return [(idx, 0.0) for idx in top_50_indices[:top_k]]
|
342 |
|
|
|
|
|
343 |
# Get cross-encoder scores
|
344 |
progress_bar = st.progress(0)
|
345 |
scores = []
|
346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
# Process in batches to avoid memory issues
|
348 |
batch_size = 8
|
349 |
for i in range(0, len(pairs), batch_size):
|
350 |
batch = pairs[i:i+batch_size]
|
|
|
351 |
batch_scores = self.cross_encoder.predict(batch)
|
352 |
-
|
|
|
|
|
353 |
progress_bar.progress(min(1.0, (i + batch_size) / len(pairs)))
|
354 |
|
355 |
progress_bar.empty()
|
@@ -358,6 +374,26 @@ class ResumeScreener:
|
|
358 |
indexed_scores = list(zip(valid_indices, scores))
|
359 |
indexed_scores.sort(key=lambda x: x[1], reverse=True)
|
360 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
return indexed_scores[:top_k]
|
362 |
|
363 |
except Exception as e:
|
@@ -367,6 +403,8 @@ class ResumeScreener:
|
|
367 |
def add_bm25_scores(self, resume_texts, job_description, top_20_results):
|
368 |
"""Stage 3: Add BM25 scores to top 20 resumes"""
|
369 |
try:
|
|
|
|
|
370 |
# Get texts for top 20
|
371 |
top_20_texts = [resume_texts[idx] for idx, _ in top_20_results]
|
372 |
|
@@ -393,6 +431,8 @@ class ResumeScreener:
|
|
393 |
bm25_score = normalized_bm25[i] if i < len(normalized_bm25) else 0.15
|
394 |
results_with_bm25.append((idx, cross_score, bm25_score))
|
395 |
|
|
|
|
|
396 |
return results_with_bm25
|
397 |
|
398 |
except Exception as e:
|
@@ -406,7 +446,12 @@ class ResumeScreener:
|
|
406 |
progress_bar = st.progress(0)
|
407 |
|
408 |
for i, (idx, cross_score, bm25_score) in enumerate(top_20_with_bm25):
|
409 |
-
|
|
|
|
|
|
|
|
|
|
|
410 |
results_with_intent.append((idx, cross_score, bm25_score, intent_score))
|
411 |
progress_bar.progress((i + 1) / len(top_20_with_bm25))
|
412 |
|
@@ -424,49 +469,65 @@ class ResumeScreener:
|
|
424 |
resume_snippet = resume_text[:1500] if len(resume_text) > 1500 else resume_text
|
425 |
job_snippet = job_description[:800] if len(job_description) > 800 else job_description
|
426 |
|
427 |
-
prompt = f"""You are
|
428 |
-
Clearly answer: "Is the candidate likely seeking this job? Respond with 'Yes', 'Maybe', or 'No' and give a brief justification."
|
429 |
|
430 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
431 |
{job_snippet}
|
432 |
|
433 |
Candidate Resume:
|
434 |
{resume_snippet}
|
435 |
|
436 |
-
|
437 |
-
Intent: [Yes/Maybe/No]
|
438 |
-
Reason: [Brief justification]"""
|
439 |
|
440 |
response = generate_qwen3_response(
|
441 |
prompt,
|
442 |
st.session_state.qwen3_intent_tokenizer,
|
443 |
st.session_state.qwen3_intent_model,
|
444 |
-
max_new_tokens=
|
445 |
)
|
446 |
|
447 |
-
#
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
|
|
|
|
|
|
453 |
else:
|
454 |
-
|
|
|
|
|
455 |
|
456 |
except Exception as e:
|
457 |
st.warning(f"Error analyzing intent: {str(e)}")
|
458 |
-
return 0.1 # Default to "Maybe"
|
459 |
|
460 |
def calculate_final_scores(self, results_with_all_scores):
|
461 |
"""Stage 5: Calculate final combined scores"""
|
462 |
try:
|
|
|
|
|
463 |
final_results = []
|
464 |
|
465 |
for idx, cross_score, bm25_score, intent_score in results_with_all_scores:
|
466 |
-
#
|
467 |
-
normalized_cross =
|
468 |
|
469 |
-
# Final Score = Cross-Encoder (0-
|
470 |
final_score = normalized_cross + bm25_score + intent_score
|
471 |
|
472 |
final_results.append({
|
@@ -480,52 +541,15 @@ Reason: [Brief justification]"""
|
|
480 |
# Sort by final score
|
481 |
final_results.sort(key=lambda x: x['final_score'], reverse=True)
|
482 |
|
|
|
|
|
483 |
return final_results
|
484 |
|
485 |
except Exception as e:
|
486 |
st.error(f"Error calculating final scores: {str(e)}")
|
487 |
return []
|
488 |
|
489 |
-
def
|
490 |
-
"""Extract skills from resume based on job description"""
|
491 |
-
if not text:
|
492 |
-
return []
|
493 |
-
|
494 |
-
# Common tech skills
|
495 |
-
common_skills = [
|
496 |
-
"python", "java", "javascript", "react", "angular", "vue", "node.js",
|
497 |
-
"express", "django", "flask", "spring", "sql", "nosql", "html", "css",
|
498 |
-
"aws", "azure", "gcp", "docker", "kubernetes", "jenkins", "git", "github",
|
499 |
-
"agile", "scrum", "jira", "ci/cd", "devops", "microservices", "rest", "api",
|
500 |
-
"machine learning", "deep learning", "data science", "artificial intelligence",
|
501 |
-
"tensorflow", "pytorch", "keras", "scikit-learn", "pandas", "numpy",
|
502 |
-
"matplotlib", "seaborn", "jupyter", "r", "sas", "spss", "tableau", "powerbi",
|
503 |
-
"excel", "mysql", "postgresql", "mongodb", "redis", "elasticsearch",
|
504 |
-
"kafka", "rabbitmq", "spark", "hadoop", "hive", "airflow", "linux", "unix"
|
505 |
-
]
|
506 |
-
|
507 |
-
# Extract potential skills from job description
|
508 |
-
job_words = set(word.lower() for word in word_tokenize(job_description) if len(word) > 2)
|
509 |
-
|
510 |
-
# Find matching skills
|
511 |
-
found_skills = []
|
512 |
-
text_lower = text.lower()
|
513 |
-
|
514 |
-
# Check common skills that appear in both resume and job description
|
515 |
-
for skill in common_skills:
|
516 |
-
if skill in text_lower and any(skill in job_word for job_word in job_words):
|
517 |
-
found_skills.append(skill)
|
518 |
-
|
519 |
-
# Check for skills mentioned in job description
|
520 |
-
for word in job_words:
|
521 |
-
if len(word) > 3 and word in text_lower and word not in found_skills:
|
522 |
-
# Basic filter to avoid common words
|
523 |
-
if word not in ['with', 'have', 'that', 'this', 'from', 'what', 'when', 'where']:
|
524 |
-
found_skills.append(word)
|
525 |
-
|
526 |
-
return list(set(found_skills))[:15] # Return top 15 unique skills
|
527 |
-
|
528 |
-
def generate_simple_explanation(self, score, semantic_score, bm25_score, skills):
|
529 |
"""Generate simple explanation for the match (fallback)"""
|
530 |
if score > 0.8:
|
531 |
quality = "excellent"
|
@@ -543,9 +567,6 @@ Reason: [Brief justification]"""
|
|
543 |
else:
|
544 |
explanation += f"The resume has high keyword match ({bm25_score:.2f}) indicating direct skill alignment. "
|
545 |
|
546 |
-
if skills:
|
547 |
-
explanation += f"Key matching competencies include: {', '.join(skills[:5])}."
|
548 |
-
|
549 |
return explanation
|
550 |
|
551 |
|
@@ -772,9 +793,6 @@ if st.button("π Advanced Pipeline Analysis",
|
|
772 |
name = st.session_state.file_names[idx]
|
773 |
text = st.session_state.resume_texts[idx]
|
774 |
|
775 |
-
# Extract skills
|
776 |
-
skills = screener.extract_skills(text, job_description)
|
777 |
-
|
778 |
results.append({
|
779 |
'rank': rank,
|
780 |
'name': name,
|
@@ -782,7 +800,7 @@ if st.button("π Advanced Pipeline Analysis",
|
|
782 |
'cross_encoder_score': result_data['cross_encoder_score'],
|
783 |
'bm25_score': result_data['bm25_score'],
|
784 |
'intent_score': result_data['intent_score'],
|
785 |
-
'skills':
|
786 |
'text': text,
|
787 |
'text_preview': text[:500] + "..." if len(text) > 500 else text,
|
788 |
'explanation': None # Will be filled with simple explanation
|
@@ -793,8 +811,7 @@ if st.button("π Advanced Pipeline Analysis",
|
|
793 |
result['explanation'] = screener.generate_simple_explanation(
|
794 |
result['final_score'],
|
795 |
result['cross_encoder_score'],
|
796 |
-
result['bm25_score']
|
797 |
-
result['skills']
|
798 |
)
|
799 |
|
800 |
# Store in session state
|
@@ -818,7 +835,7 @@ if st.session_state.results:
|
|
818 |
summary_data = []
|
819 |
for result in st.session_state.results:
|
820 |
# Map intent score to text
|
821 |
-
intent_text = "Yes" if result['intent_score'] == 0.
|
822 |
|
823 |
summary_data.append({
|
824 |
"Rank": result['rank'],
|
@@ -826,8 +843,7 @@ if st.session_state.results:
|
|
826 |
"Final Score": f"{result['final_score']:.2f}",
|
827 |
"Cross-Encoder": f"{result['cross_encoder_score']:.2f}",
|
828 |
"BM25": f"{result['bm25_score']:.2f}",
|
829 |
-
"Intent": f"{intent_text} ({result['intent_score']:.1f})"
|
830 |
-
"Top Skills": ", ".join(result['skills'][:5])
|
831 |
})
|
832 |
|
833 |
summary_df = pd.DataFrame(summary_data)
|
@@ -862,7 +878,7 @@ if st.session_state.results:
|
|
862 |
# Download link
|
863 |
detailed_data = []
|
864 |
for result in st.session_state.results:
|
865 |
-
intent_text = "Yes" if result['intent_score'] == 0.
|
866 |
|
867 |
detailed_data.append({
|
868 |
"Rank": result['rank'],
|
@@ -872,7 +888,6 @@ if st.session_state.results:
|
|
872 |
"BM25_Score": result['bm25_score'],
|
873 |
"Intent_Score": result['intent_score'],
|
874 |
"Intent_Analysis": intent_text,
|
875 |
-
"Skills": "; ".join(result['skills']),
|
876 |
"AI_Explanation": result['explanation'],
|
877 |
"Resume_Preview": result['text_preview']
|
878 |
})
|
@@ -883,7 +898,7 @@ if st.session_state.results:
|
|
883 |
with tab2:
|
884 |
# Detailed results with new scoring breakdown
|
885 |
for result in st.session_state.results:
|
886 |
-
intent_text = "Yes" if result['intent_score'] == 0.
|
887 |
|
888 |
with st.expander(f"#{result['rank']}: {result['name']} (Final Score: {result['final_score']:.2f})"):
|
889 |
col1, col2 = st.columns([1, 2])
|
@@ -892,16 +907,9 @@ if st.session_state.results:
|
|
892 |
st.metric("π Final Score", f"{result['final_score']:.2f}")
|
893 |
|
894 |
st.write("**π Score Breakdown:**")
|
895 |
-
st.metric("π― Cross-Encoder", f"{result['cross_encoder_score']:.2f}", help="Semantic relevance (0-
|
896 |
st.metric("π€ BM25 Keywords", f"{result['bm25_score']:.2f}", help="Keyword matching (0.1-0.2)")
|
897 |
-
st.metric("π€ Intent Analysis", f"{intent_text} ({result['intent_score']:.
|
898 |
-
|
899 |
-
st.write("**π― Matching Skills:**")
|
900 |
-
skills_per_column = 5
|
901 |
-
skill_cols = st.columns(2)
|
902 |
-
for idx, skill in enumerate(result['skills'][:10]):
|
903 |
-
with skill_cols[idx % 2]:
|
904 |
-
st.write(f"β’ {skill}")
|
905 |
|
906 |
with col2:
|
907 |
st.write("**π‘ AI-Generated Match Analysis:**")
|
|
|
47 |
with st.sidebar:
|
48 |
st.title("βοΈ Configuration")
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# Advanced options
|
51 |
+
st.subheader("Display Options")
|
52 |
top_k = st.selectbox("Number of results to display", options=[1, 2, 3, 4, 5], index=4)
|
53 |
|
54 |
# LLM Settings
|
|
|
56 |
st.info("π‘ Intent analysis using Qwen3-1.7B is always enabled")
|
57 |
|
58 |
st.markdown("---")
|
59 |
+
st.markdown("### π€ Pipeline Overview")
|
60 |
+
st.markdown("**5-Stage Advanced Pipeline:**")
|
61 |
+
st.markdown("1. FAISS Recall (Top 50)")
|
62 |
+
st.markdown("2. Cross-Encoder Re-ranking (Top 20)")
|
63 |
+
st.markdown("3. BM25 Keyword Matching")
|
64 |
+
st.markdown("4. LLM Intent Analysis")
|
65 |
+
st.markdown("5. Combined Scoring")
|
66 |
+
|
|
|
|
|
67 |
st.markdown("### π Scoring Formula")
|
68 |
+
st.markdown("**Final Score = Cross-Encoder (0-0.7) + BM25 (0.1-0.2) + Intent (0-0.1)**")
|
69 |
|
70 |
# Initialize session state
|
71 |
if 'embedding_model' not in st.session_state:
|
|
|
126 |
try:
|
127 |
with st.spinner("π Loading Cross-Encoder ms-marco-MiniLM-L6-v2..."):
|
128 |
from sentence_transformers import CrossEncoder
|
129 |
+
# Try with explicit device specification and logistic scoring
|
130 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
131 |
model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2', device=device)
|
132 |
st.success("β
Cross-Encoder model loaded successfully!")
|
|
|
134 |
except Exception as e:
|
135 |
st.error(f"β Error loading Cross-Encoder model: {str(e)}")
|
136 |
try:
|
137 |
+
# Fallback: try without device specification but with logistic scoring
|
138 |
st.warning("π Trying Cross-Encoder without device specification...")
|
139 |
model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
|
140 |
st.success("β
Cross-Encoder model loaded (fallback)!")
|
|
|
277 |
# Get job embedding
|
278 |
job_embedding = self.get_embedding(job_description)
|
279 |
|
280 |
+
st.write(f"π Generating embeddings for {len(resume_texts)} resumes...")
|
281 |
+
|
282 |
# Get resume embeddings
|
283 |
resume_embeddings = []
|
284 |
progress_bar = st.progress(0)
|
|
|
295 |
|
296 |
progress_bar.empty()
|
297 |
|
298 |
+
st.write("π Building FAISS index and searching...")
|
299 |
+
|
300 |
# Create FAISS index
|
301 |
resume_embeddings = np.array(resume_embeddings).astype('float32')
|
302 |
dimension = resume_embeddings.shape[1]
|
|
|
307 |
job_embedding = job_embedding.reshape(1, -1).astype('float32')
|
308 |
scores, indices = index.search(job_embedding, min(top_k, len(resume_texts)))
|
309 |
|
310 |
+
# Show completion message
|
311 |
+
st.write(f"β
FAISS recall completed! Found top {min(top_k, len(resume_texts))} candidates.")
|
312 |
+
|
313 |
return indices[0].tolist()
|
314 |
|
315 |
except Exception as e:
|
|
|
324 |
st.error("Cross-encoder not loaded!")
|
325 |
return [(idx, 0.0) for idx in top_50_indices[:top_k]]
|
326 |
|
327 |
+
st.write(f"π Processing {len(top_50_indices)} candidates with Cross-Encoder...")
|
328 |
+
|
329 |
# Prepare pairs for cross-encoder
|
330 |
pairs = []
|
331 |
valid_indices = []
|
|
|
339 |
valid_indices.append(idx)
|
340 |
|
341 |
if not pairs:
|
342 |
+
st.warning("No valid pairs found for cross-encoder!")
|
343 |
return [(idx, 0.0) for idx in top_50_indices[:top_k]]
|
344 |
|
345 |
+
st.write(f"π Cross-Encoder analyzing {len(pairs)} resume-job pairs...")
|
346 |
+
|
347 |
# Get cross-encoder scores
|
348 |
progress_bar = st.progress(0)
|
349 |
scores = []
|
350 |
|
351 |
+
def safe_sigmoid(x):
|
352 |
+
"""Safe sigmoid function that handles overflow"""
|
353 |
+
if x >= 0:
|
354 |
+
exp_neg_x = np.exp(-x)
|
355 |
+
return 1 / (1 + exp_neg_x)
|
356 |
+
else:
|
357 |
+
exp_x = np.exp(x)
|
358 |
+
return exp_x / (1 + exp_x)
|
359 |
+
|
360 |
# Process in batches to avoid memory issues
|
361 |
batch_size = 8
|
362 |
for i in range(0, len(pairs), batch_size):
|
363 |
batch = pairs[i:i+batch_size]
|
364 |
+
# Get raw logits from cross-encoder
|
365 |
batch_scores = self.cross_encoder.predict(batch)
|
366 |
+
# Apply sigmoid to convert logits to [0,1] range
|
367 |
+
batch_scores_sigmoid = [safe_sigmoid(score) for score in batch_scores]
|
368 |
+
scores.extend(batch_scores_sigmoid)
|
369 |
progress_bar.progress(min(1.0, (i + batch_size) / len(pairs)))
|
370 |
|
371 |
progress_bar.empty()
|
|
|
374 |
indexed_scores = list(zip(valid_indices, scores))
|
375 |
indexed_scores.sort(key=lambda x: x[1], reverse=True)
|
376 |
|
377 |
+
# Normalize scores to 0-0.7 range (highest score becomes 0.7)
|
378 |
+
if scores and len(scores) > 0:
|
379 |
+
max_score = max(scores)
|
380 |
+
min_score = min(scores)
|
381 |
+
|
382 |
+
if max_score > min_score:
|
383 |
+
# Scale to 0-0.7 range
|
384 |
+
normalized_indexed_scores = []
|
385 |
+
for idx, score in indexed_scores:
|
386 |
+
normalized_score = 0.7 * (score - min_score) / (max_score - min_score)
|
387 |
+
normalized_indexed_scores.append((idx, normalized_score))
|
388 |
+
indexed_scores = normalized_indexed_scores
|
389 |
+
else:
|
390 |
+
# All scores are the same, give them all 0.35 (middle value)
|
391 |
+
indexed_scores = [(idx, 0.35) for idx, _ in indexed_scores]
|
392 |
+
|
393 |
+
# Show completion message
|
394 |
+
st.write(f"β
Cross-Encoder completed! Selected top {min(top_k, len(indexed_scores))} candidates.")
|
395 |
+
st.write(f"π Cross-Encoder scores normalized to 0-0.7 range (highest: {indexed_scores[0][1]:.3f})")
|
396 |
+
|
397 |
return indexed_scores[:top_k]
|
398 |
|
399 |
except Exception as e:
|
|
|
403 |
def add_bm25_scores(self, resume_texts, job_description, top_20_results):
|
404 |
"""Stage 3: Add BM25 scores to top 20 resumes"""
|
405 |
try:
|
406 |
+
st.write(f"π Calculating BM25 keyword scores for {len(top_20_results)} candidates...")
|
407 |
+
|
408 |
# Get texts for top 20
|
409 |
top_20_texts = [resume_texts[idx] for idx, _ in top_20_results]
|
410 |
|
|
|
431 |
bm25_score = normalized_bm25[i] if i < len(normalized_bm25) else 0.15
|
432 |
results_with_bm25.append((idx, cross_score, bm25_score))
|
433 |
|
434 |
+
st.write(f"β
BM25 keyword matching completed!")
|
435 |
+
|
436 |
return results_with_bm25
|
437 |
|
438 |
except Exception as e:
|
|
|
446 |
progress_bar = st.progress(0)
|
447 |
|
448 |
for i, (idx, cross_score, bm25_score) in enumerate(top_20_with_bm25):
|
449 |
+
candidate_name = st.session_state.file_names[idx] if idx < len(st.session_state.file_names) else f"Resume_{idx}"
|
450 |
+
intent_score, intent_text = self.analyze_intent(resume_texts[idx], job_description)
|
451 |
+
|
452 |
+
# Print the intent analysis result
|
453 |
+
st.write(f"π **{candidate_name}**: Intent = **{intent_text}** (Score: {intent_score:.1f})")
|
454 |
+
|
455 |
results_with_intent.append((idx, cross_score, bm25_score, intent_score))
|
456 |
progress_bar.progress((i + 1) / len(top_20_with_bm25))
|
457 |
|
|
|
469 |
resume_snippet = resume_text[:1500] if len(resume_text) > 1500 else resume_text
|
470 |
job_snippet = job_description[:800] if len(job_description) > 800 else job_description
|
471 |
|
472 |
+
prompt = f"""You are a helpful HR assistant. Look at this candidate's resume and job posting.
|
|
|
473 |
|
474 |
+
The candidate is likely a good fit if they have ANY of these:
|
475 |
+
- Related work experience (even if different industry)
|
476 |
+
- Relevant technical skills
|
477 |
+
- Educational background that could apply
|
478 |
+
- Any transferable skills
|
479 |
+
- Similar job titles or responsibilities
|
480 |
+
|
481 |
+
Be generous in your assessment. Most candidates who made it this far are potentially suitable.
|
482 |
+
|
483 |
+
Answer "Yes" for most candidates unless they are completely unrelated.
|
484 |
+
Answer "No" only if absolutely no connection exists.
|
485 |
+
|
486 |
+
Job Posting:
|
487 |
{job_snippet}
|
488 |
|
489 |
Candidate Resume:
|
490 |
{resume_snippet}
|
491 |
|
492 |
+
Is this candidate suitable? Answer:"""
|
|
|
|
|
493 |
|
494 |
response = generate_qwen3_response(
|
495 |
prompt,
|
496 |
st.session_state.qwen3_intent_tokenizer,
|
497 |
st.session_state.qwen3_intent_model,
|
498 |
+
max_new_tokens=20
|
499 |
)
|
500 |
|
501 |
+
# Debug: print the raw response
|
502 |
+
print(f"Raw LLM response: '{response}'")
|
503 |
+
|
504 |
+
# Parse response - look for the answer directly
|
505 |
+
response_lower = response.lower().strip()
|
506 |
+
if 'yes' in response_lower:
|
507 |
+
return 0.1, "Yes"
|
508 |
+
elif 'no' in response_lower:
|
509 |
+
return 0.0, "No"
|
510 |
else:
|
511 |
+
# If no clear answer, default to "Yes" to be more lenient
|
512 |
+
print(f"Unclear response, defaulting to Yes: '{response}'")
|
513 |
+
return 0.1, "Yes"
|
514 |
|
515 |
except Exception as e:
|
516 |
st.warning(f"Error analyzing intent: {str(e)}")
|
517 |
+
return 0.1, "Yes" # Default to "Yes" instead of "Maybe"
|
518 |
|
519 |
def calculate_final_scores(self, results_with_all_scores):
|
520 |
"""Stage 5: Calculate final combined scores"""
|
521 |
try:
|
522 |
+
st.write(f"π Computing final combined scores for {len(results_with_all_scores)} candidates...")
|
523 |
+
|
524 |
final_results = []
|
525 |
|
526 |
for idx, cross_score, bm25_score, intent_score in results_with_all_scores:
|
527 |
+
# Cross-encoder scores are already in [0,1] range with logistic scoring
|
528 |
+
normalized_cross = cross_score
|
529 |
|
530 |
+
# Final Score = Cross-Encoder (0-0.7) + BM25 (0.1-0.2) + Intent (0-0.1)
|
531 |
final_score = normalized_cross + bm25_score + intent_score
|
532 |
|
533 |
final_results.append({
|
|
|
541 |
# Sort by final score
|
542 |
final_results.sort(key=lambda x: x['final_score'], reverse=True)
|
543 |
|
544 |
+
st.write(f"β
Final ranking completed! Candidates sorted by combined score.")
|
545 |
+
|
546 |
return final_results
|
547 |
|
548 |
except Exception as e:
|
549 |
st.error(f"Error calculating final scores: {str(e)}")
|
550 |
return []
|
551 |
|
552 |
+
def generate_simple_explanation(self, score, semantic_score, bm25_score):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
553 |
"""Generate simple explanation for the match (fallback)"""
|
554 |
if score > 0.8:
|
555 |
quality = "excellent"
|
|
|
567 |
else:
|
568 |
explanation += f"The resume has high keyword match ({bm25_score:.2f}) indicating direct skill alignment. "
|
569 |
|
|
|
|
|
|
|
570 |
return explanation
|
571 |
|
572 |
|
|
|
793 |
name = st.session_state.file_names[idx]
|
794 |
text = st.session_state.resume_texts[idx]
|
795 |
|
|
|
|
|
|
|
796 |
results.append({
|
797 |
'rank': rank,
|
798 |
'name': name,
|
|
|
800 |
'cross_encoder_score': result_data['cross_encoder_score'],
|
801 |
'bm25_score': result_data['bm25_score'],
|
802 |
'intent_score': result_data['intent_score'],
|
803 |
+
'skills': [],
|
804 |
'text': text,
|
805 |
'text_preview': text[:500] + "..." if len(text) > 500 else text,
|
806 |
'explanation': None # Will be filled with simple explanation
|
|
|
811 |
result['explanation'] = screener.generate_simple_explanation(
|
812 |
result['final_score'],
|
813 |
result['cross_encoder_score'],
|
814 |
+
result['bm25_score']
|
|
|
815 |
)
|
816 |
|
817 |
# Store in session state
|
|
|
835 |
summary_data = []
|
836 |
for result in st.session_state.results:
|
837 |
# Map intent score to text
|
838 |
+
intent_text = "Yes" if result['intent_score'] == 0.1 else "No"
|
839 |
|
840 |
summary_data.append({
|
841 |
"Rank": result['rank'],
|
|
|
843 |
"Final Score": f"{result['final_score']:.2f}",
|
844 |
"Cross-Encoder": f"{result['cross_encoder_score']:.2f}",
|
845 |
"BM25": f"{result['bm25_score']:.2f}",
|
846 |
+
"Intent": f"{intent_text} ({result['intent_score']:.1f})"
|
|
|
847 |
})
|
848 |
|
849 |
summary_df = pd.DataFrame(summary_data)
|
|
|
878 |
# Download link
|
879 |
detailed_data = []
|
880 |
for result in st.session_state.results:
|
881 |
+
intent_text = "Yes" if result['intent_score'] == 0.1 else "No"
|
882 |
|
883 |
detailed_data.append({
|
884 |
"Rank": result['rank'],
|
|
|
888 |
"BM25_Score": result['bm25_score'],
|
889 |
"Intent_Score": result['intent_score'],
|
890 |
"Intent_Analysis": intent_text,
|
|
|
891 |
"AI_Explanation": result['explanation'],
|
892 |
"Resume_Preview": result['text_preview']
|
893 |
})
|
|
|
898 |
with tab2:
|
899 |
# Detailed results with new scoring breakdown
|
900 |
for result in st.session_state.results:
|
901 |
+
intent_text = "Yes" if result['intent_score'] == 0.1 else "No"
|
902 |
|
903 |
with st.expander(f"#{result['rank']}: {result['name']} (Final Score: {result['final_score']:.2f})"):
|
904 |
col1, col2 = st.columns([1, 2])
|
|
|
907 |
st.metric("π Final Score", f"{result['final_score']:.2f}")
|
908 |
|
909 |
st.write("**π Score Breakdown:**")
|
910 |
+
st.metric("π― Cross-Encoder", f"{result['cross_encoder_score']:.2f}", help="Semantic relevance (0-0.7)")
|
911 |
st.metric("π€ BM25 Keywords", f"{result['bm25_score']:.2f}", help="Keyword matching (0.1-0.2)")
|
912 |
+
st.metric("π€ Intent Analysis", f"{intent_text} ({result['intent_score']:.2f})", help="Job seeking likelihood (0-0.1)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
913 |
|
914 |
with col2:
|
915 |
st.write("**π‘ AI-Generated Match Analysis:**")
|