root commited on
Commit
847b129
Β·
1 Parent(s): 19f7d68
Files changed (1) hide show
  1. app.py +112 -104
app.py CHANGED
@@ -47,14 +47,8 @@ st.set_page_config(
47
  with st.sidebar:
48
  st.title("βš™οΈ Configuration")
49
 
50
- # Ranking weights
51
- st.subheader("Ranking Weights")
52
- semantic_weight = st.slider("Semantic Similarity Weight", 0.0, 1.0, 0.7, 0.1)
53
- keyword_weight = 1.0 - semantic_weight
54
- st.write(f"Keyword Weight: {keyword_weight:.1f}")
55
-
56
  # Advanced options
57
- st.subheader("Advanced Options")
58
  top_k = st.selectbox("Number of results to display", options=[1, 2, 3, 4, 5], index=4)
59
 
60
  # LLM Settings
@@ -62,18 +56,16 @@ with st.sidebar:
62
  st.info("πŸ’‘ Intent analysis using Qwen3-1.7B is always enabled")
63
 
64
  st.markdown("---")
65
- st.markdown("### πŸ€– Advanced Pipeline")
66
- st.markdown("- **Stage 1**: FAISS Recall (Top 50)")
67
- st.markdown("- **Stage 2**: Cross-Encoder Re-ranking (Top 20)")
68
- st.markdown("- **Stage 3**: BM25 Keyword Matching")
69
- st.markdown("- **Stage 4**: LLM Intent Analysis")
70
- st.markdown("- **Final**: Combined Scoring (Top 5)")
71
- st.markdown("### πŸ“Š Models Used")
72
- st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
73
- st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
74
- st.markdown("- **Intent Analysis**: Qwen/Qwen3-1.7B")
75
  st.markdown("### πŸ“ˆ Scoring Formula")
76
- st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
77
 
78
  # Initialize session state
79
  if 'embedding_model' not in st.session_state:
@@ -134,7 +126,7 @@ def load_cross_encoder():
134
  try:
135
  with st.spinner("πŸ”„ Loading Cross-Encoder ms-marco-MiniLM-L6-v2..."):
136
  from sentence_transformers import CrossEncoder
137
- # Try with explicit device specification
138
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
139
  model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2', device=device)
140
  st.success("βœ… Cross-Encoder model loaded successfully!")
@@ -142,7 +134,7 @@ def load_cross_encoder():
142
  except Exception as e:
143
  st.error(f"❌ Error loading Cross-Encoder model: {str(e)}")
144
  try:
145
- # Fallback: try without device specification
146
  st.warning("πŸ”„ Trying Cross-Encoder without device specification...")
147
  model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
148
  st.success("βœ… Cross-Encoder model loaded (fallback)!")
@@ -285,6 +277,8 @@ class ResumeScreener:
285
  # Get job embedding
286
  job_embedding = self.get_embedding(job_description)
287
 
 
 
288
  # Get resume embeddings
289
  resume_embeddings = []
290
  progress_bar = st.progress(0)
@@ -301,6 +295,8 @@ class ResumeScreener:
301
 
302
  progress_bar.empty()
303
 
 
 
304
  # Create FAISS index
305
  resume_embeddings = np.array(resume_embeddings).astype('float32')
306
  dimension = resume_embeddings.shape[1]
@@ -311,6 +307,9 @@ class ResumeScreener:
311
  job_embedding = job_embedding.reshape(1, -1).astype('float32')
312
  scores, indices = index.search(job_embedding, min(top_k, len(resume_texts)))
313
 
 
 
 
314
  return indices[0].tolist()
315
 
316
  except Exception as e:
@@ -325,6 +324,8 @@ class ResumeScreener:
325
  st.error("Cross-encoder not loaded!")
326
  return [(idx, 0.0) for idx in top_50_indices[:top_k]]
327
 
 
 
328
  # Prepare pairs for cross-encoder
329
  pairs = []
330
  valid_indices = []
@@ -338,18 +339,33 @@ class ResumeScreener:
338
  valid_indices.append(idx)
339
 
340
  if not pairs:
 
341
  return [(idx, 0.0) for idx in top_50_indices[:top_k]]
342
 
 
 
343
  # Get cross-encoder scores
344
  progress_bar = st.progress(0)
345
  scores = []
346
 
 
 
 
 
 
 
 
 
 
347
  # Process in batches to avoid memory issues
348
  batch_size = 8
349
  for i in range(0, len(pairs), batch_size):
350
  batch = pairs[i:i+batch_size]
 
351
  batch_scores = self.cross_encoder.predict(batch)
352
- scores.extend(batch_scores)
 
 
353
  progress_bar.progress(min(1.0, (i + batch_size) / len(pairs)))
354
 
355
  progress_bar.empty()
@@ -358,6 +374,26 @@ class ResumeScreener:
358
  indexed_scores = list(zip(valid_indices, scores))
359
  indexed_scores.sort(key=lambda x: x[1], reverse=True)
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  return indexed_scores[:top_k]
362
 
363
  except Exception as e:
@@ -367,6 +403,8 @@ class ResumeScreener:
367
  def add_bm25_scores(self, resume_texts, job_description, top_20_results):
368
  """Stage 3: Add BM25 scores to top 20 resumes"""
369
  try:
 
 
370
  # Get texts for top 20
371
  top_20_texts = [resume_texts[idx] for idx, _ in top_20_results]
372
 
@@ -393,6 +431,8 @@ class ResumeScreener:
393
  bm25_score = normalized_bm25[i] if i < len(normalized_bm25) else 0.15
394
  results_with_bm25.append((idx, cross_score, bm25_score))
395
 
 
 
396
  return results_with_bm25
397
 
398
  except Exception as e:
@@ -406,7 +446,12 @@ class ResumeScreener:
406
  progress_bar = st.progress(0)
407
 
408
  for i, (idx, cross_score, bm25_score) in enumerate(top_20_with_bm25):
409
- intent_score = self.analyze_intent(resume_texts[idx], job_description)
 
 
 
 
 
410
  results_with_intent.append((idx, cross_score, bm25_score, intent_score))
411
  progress_bar.progress((i + 1) / len(top_20_with_bm25))
412
 
@@ -424,49 +469,65 @@ class ResumeScreener:
424
  resume_snippet = resume_text[:1500] if len(resume_text) > 1500 else resume_text
425
  job_snippet = job_description[:800] if len(job_description) > 800 else job_description
426
 
427
- prompt = f"""You are given a job description and a candidate's resume.
428
- Clearly answer: "Is the candidate likely seeking this job? Respond with 'Yes', 'Maybe', or 'No' and give a brief justification."
429
 
430
- Job Description:
 
 
 
 
 
 
 
 
 
 
 
 
431
  {job_snippet}
432
 
433
  Candidate Resume:
434
  {resume_snippet}
435
 
436
- Response format:
437
- Intent: [Yes/Maybe/No]
438
- Reason: [Brief justification]"""
439
 
440
  response = generate_qwen3_response(
441
  prompt,
442
  st.session_state.qwen3_intent_tokenizer,
443
  st.session_state.qwen3_intent_model,
444
- max_new_tokens=100
445
  )
446
 
447
- # Parse response
448
- response_lower = response.lower()
449
- if 'intent: yes' in response_lower or 'intent:yes' in response_lower:
450
- return 0.3
451
- elif 'intent: maybe' in response_lower or 'intent:maybe' in response_lower:
452
- return 0.1
 
 
 
453
  else:
454
- return 0.0
 
 
455
 
456
  except Exception as e:
457
  st.warning(f"Error analyzing intent: {str(e)}")
458
- return 0.1 # Default to "Maybe"
459
 
460
  def calculate_final_scores(self, results_with_all_scores):
461
  """Stage 5: Calculate final combined scores"""
462
  try:
 
 
463
  final_results = []
464
 
465
  for idx, cross_score, bm25_score, intent_score in results_with_all_scores:
466
- # Normalize cross-encoder score to 0-1 range
467
- normalized_cross = max(0, min(1, cross_score))
468
 
469
- # Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)
470
  final_score = normalized_cross + bm25_score + intent_score
471
 
472
  final_results.append({
@@ -480,52 +541,15 @@ Reason: [Brief justification]"""
480
  # Sort by final score
481
  final_results.sort(key=lambda x: x['final_score'], reverse=True)
482
 
 
 
483
  return final_results
484
 
485
  except Exception as e:
486
  st.error(f"Error calculating final scores: {str(e)}")
487
  return []
488
 
489
- def extract_skills(self, text, job_description):
490
- """Extract skills from resume based on job description"""
491
- if not text:
492
- return []
493
-
494
- # Common tech skills
495
- common_skills = [
496
- "python", "java", "javascript", "react", "angular", "vue", "node.js",
497
- "express", "django", "flask", "spring", "sql", "nosql", "html", "css",
498
- "aws", "azure", "gcp", "docker", "kubernetes", "jenkins", "git", "github",
499
- "agile", "scrum", "jira", "ci/cd", "devops", "microservices", "rest", "api",
500
- "machine learning", "deep learning", "data science", "artificial intelligence",
501
- "tensorflow", "pytorch", "keras", "scikit-learn", "pandas", "numpy",
502
- "matplotlib", "seaborn", "jupyter", "r", "sas", "spss", "tableau", "powerbi",
503
- "excel", "mysql", "postgresql", "mongodb", "redis", "elasticsearch",
504
- "kafka", "rabbitmq", "spark", "hadoop", "hive", "airflow", "linux", "unix"
505
- ]
506
-
507
- # Extract potential skills from job description
508
- job_words = set(word.lower() for word in word_tokenize(job_description) if len(word) > 2)
509
-
510
- # Find matching skills
511
- found_skills = []
512
- text_lower = text.lower()
513
-
514
- # Check common skills that appear in both resume and job description
515
- for skill in common_skills:
516
- if skill in text_lower and any(skill in job_word for job_word in job_words):
517
- found_skills.append(skill)
518
-
519
- # Check for skills mentioned in job description
520
- for word in job_words:
521
- if len(word) > 3 and word in text_lower and word not in found_skills:
522
- # Basic filter to avoid common words
523
- if word not in ['with', 'have', 'that', 'this', 'from', 'what', 'when', 'where']:
524
- found_skills.append(word)
525
-
526
- return list(set(found_skills))[:15] # Return top 15 unique skills
527
-
528
- def generate_simple_explanation(self, score, semantic_score, bm25_score, skills):
529
  """Generate simple explanation for the match (fallback)"""
530
  if score > 0.8:
531
  quality = "excellent"
@@ -543,9 +567,6 @@ Reason: [Brief justification]"""
543
  else:
544
  explanation += f"The resume has high keyword match ({bm25_score:.2f}) indicating direct skill alignment. "
545
 
546
- if skills:
547
- explanation += f"Key matching competencies include: {', '.join(skills[:5])}."
548
-
549
  return explanation
550
 
551
 
@@ -772,9 +793,6 @@ if st.button("πŸš€ Advanced Pipeline Analysis",
772
  name = st.session_state.file_names[idx]
773
  text = st.session_state.resume_texts[idx]
774
 
775
- # Extract skills
776
- skills = screener.extract_skills(text, job_description)
777
-
778
  results.append({
779
  'rank': rank,
780
  'name': name,
@@ -782,7 +800,7 @@ if st.button("πŸš€ Advanced Pipeline Analysis",
782
  'cross_encoder_score': result_data['cross_encoder_score'],
783
  'bm25_score': result_data['bm25_score'],
784
  'intent_score': result_data['intent_score'],
785
- 'skills': skills,
786
  'text': text,
787
  'text_preview': text[:500] + "..." if len(text) > 500 else text,
788
  'explanation': None # Will be filled with simple explanation
@@ -793,8 +811,7 @@ if st.button("πŸš€ Advanced Pipeline Analysis",
793
  result['explanation'] = screener.generate_simple_explanation(
794
  result['final_score'],
795
  result['cross_encoder_score'],
796
- result['bm25_score'],
797
- result['skills']
798
  )
799
 
800
  # Store in session state
@@ -818,7 +835,7 @@ if st.session_state.results:
818
  summary_data = []
819
  for result in st.session_state.results:
820
  # Map intent score to text
821
- intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
822
 
823
  summary_data.append({
824
  "Rank": result['rank'],
@@ -826,8 +843,7 @@ if st.session_state.results:
826
  "Final Score": f"{result['final_score']:.2f}",
827
  "Cross-Encoder": f"{result['cross_encoder_score']:.2f}",
828
  "BM25": f"{result['bm25_score']:.2f}",
829
- "Intent": f"{intent_text} ({result['intent_score']:.1f})",
830
- "Top Skills": ", ".join(result['skills'][:5])
831
  })
832
 
833
  summary_df = pd.DataFrame(summary_data)
@@ -862,7 +878,7 @@ if st.session_state.results:
862
  # Download link
863
  detailed_data = []
864
  for result in st.session_state.results:
865
- intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
866
 
867
  detailed_data.append({
868
  "Rank": result['rank'],
@@ -872,7 +888,6 @@ if st.session_state.results:
872
  "BM25_Score": result['bm25_score'],
873
  "Intent_Score": result['intent_score'],
874
  "Intent_Analysis": intent_text,
875
- "Skills": "; ".join(result['skills']),
876
  "AI_Explanation": result['explanation'],
877
  "Resume_Preview": result['text_preview']
878
  })
@@ -883,7 +898,7 @@ if st.session_state.results:
883
  with tab2:
884
  # Detailed results with new scoring breakdown
885
  for result in st.session_state.results:
886
- intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
887
 
888
  with st.expander(f"#{result['rank']}: {result['name']} (Final Score: {result['final_score']:.2f})"):
889
  col1, col2 = st.columns([1, 2])
@@ -892,16 +907,9 @@ if st.session_state.results:
892
  st.metric("πŸ† Final Score", f"{result['final_score']:.2f}")
893
 
894
  st.write("**πŸ“Š Score Breakdown:**")
895
- st.metric("🎯 Cross-Encoder", f"{result['cross_encoder_score']:.2f}", help="Semantic relevance (0-1)")
896
  st.metric("πŸ”€ BM25 Keywords", f"{result['bm25_score']:.2f}", help="Keyword matching (0.1-0.2)")
897
- st.metric("πŸ€– Intent Analysis", f"{intent_text} ({result['intent_score']:.1f})", help="Job seeking likelihood (0-0.3)")
898
-
899
- st.write("**🎯 Matching Skills:**")
900
- skills_per_column = 5
901
- skill_cols = st.columns(2)
902
- for idx, skill in enumerate(result['skills'][:10]):
903
- with skill_cols[idx % 2]:
904
- st.write(f"β€’ {skill}")
905
 
906
  with col2:
907
  st.write("**πŸ’‘ AI-Generated Match Analysis:**")
 
47
  with st.sidebar:
48
  st.title("βš™οΈ Configuration")
49
 
 
 
 
 
 
 
50
  # Advanced options
51
+ st.subheader("Display Options")
52
  top_k = st.selectbox("Number of results to display", options=[1, 2, 3, 4, 5], index=4)
53
 
54
  # LLM Settings
 
56
  st.info("πŸ’‘ Intent analysis using Qwen3-1.7B is always enabled")
57
 
58
  st.markdown("---")
59
+ st.markdown("### πŸ€– Pipeline Overview")
60
+ st.markdown("**5-Stage Advanced Pipeline:**")
61
+ st.markdown("1. FAISS Recall (Top 50)")
62
+ st.markdown("2. Cross-Encoder Re-ranking (Top 20)")
63
+ st.markdown("3. BM25 Keyword Matching")
64
+ st.markdown("4. LLM Intent Analysis")
65
+ st.markdown("5. Combined Scoring")
66
+
 
 
67
  st.markdown("### πŸ“ˆ Scoring Formula")
68
+ st.markdown("**Final Score = Cross-Encoder (0-0.7) + BM25 (0.1-0.2) + Intent (0-0.1)**")
69
 
70
  # Initialize session state
71
  if 'embedding_model' not in st.session_state:
 
126
  try:
127
  with st.spinner("πŸ”„ Loading Cross-Encoder ms-marco-MiniLM-L6-v2..."):
128
  from sentence_transformers import CrossEncoder
129
+ # Try with explicit device specification and logistic scoring
130
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
131
  model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2', device=device)
132
  st.success("βœ… Cross-Encoder model loaded successfully!")
 
134
  except Exception as e:
135
  st.error(f"❌ Error loading Cross-Encoder model: {str(e)}")
136
  try:
137
+ # Fallback: try without device specification but with logistic scoring
138
  st.warning("πŸ”„ Trying Cross-Encoder without device specification...")
139
  model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
140
  st.success("βœ… Cross-Encoder model loaded (fallback)!")
 
277
  # Get job embedding
278
  job_embedding = self.get_embedding(job_description)
279
 
280
+ st.write(f"πŸ”„ Generating embeddings for {len(resume_texts)} resumes...")
281
+
282
  # Get resume embeddings
283
  resume_embeddings = []
284
  progress_bar = st.progress(0)
 
295
 
296
  progress_bar.empty()
297
 
298
+ st.write("πŸ” Building FAISS index and searching...")
299
+
300
  # Create FAISS index
301
  resume_embeddings = np.array(resume_embeddings).astype('float32')
302
  dimension = resume_embeddings.shape[1]
 
307
  job_embedding = job_embedding.reshape(1, -1).astype('float32')
308
  scores, indices = index.search(job_embedding, min(top_k, len(resume_texts)))
309
 
310
+ # Show completion message
311
+ st.write(f"βœ… FAISS recall completed! Found top {min(top_k, len(resume_texts))} candidates.")
312
+
313
  return indices[0].tolist()
314
 
315
  except Exception as e:
 
324
  st.error("Cross-encoder not loaded!")
325
  return [(idx, 0.0) for idx in top_50_indices[:top_k]]
326
 
327
+ st.write(f"πŸ”„ Processing {len(top_50_indices)} candidates with Cross-Encoder...")
328
+
329
  # Prepare pairs for cross-encoder
330
  pairs = []
331
  valid_indices = []
 
339
  valid_indices.append(idx)
340
 
341
  if not pairs:
342
+ st.warning("No valid pairs found for cross-encoder!")
343
  return [(idx, 0.0) for idx in top_50_indices[:top_k]]
344
 
345
+ st.write(f"πŸ“Š Cross-Encoder analyzing {len(pairs)} resume-job pairs...")
346
+
347
  # Get cross-encoder scores
348
  progress_bar = st.progress(0)
349
  scores = []
350
 
351
+ def safe_sigmoid(x):
352
+ """Safe sigmoid function that handles overflow"""
353
+ if x >= 0:
354
+ exp_neg_x = np.exp(-x)
355
+ return 1 / (1 + exp_neg_x)
356
+ else:
357
+ exp_x = np.exp(x)
358
+ return exp_x / (1 + exp_x)
359
+
360
  # Process in batches to avoid memory issues
361
  batch_size = 8
362
  for i in range(0, len(pairs), batch_size):
363
  batch = pairs[i:i+batch_size]
364
+ # Get raw logits from cross-encoder
365
  batch_scores = self.cross_encoder.predict(batch)
366
+ # Apply sigmoid to convert logits to [0,1] range
367
+ batch_scores_sigmoid = [safe_sigmoid(score) for score in batch_scores]
368
+ scores.extend(batch_scores_sigmoid)
369
  progress_bar.progress(min(1.0, (i + batch_size) / len(pairs)))
370
 
371
  progress_bar.empty()
 
374
  indexed_scores = list(zip(valid_indices, scores))
375
  indexed_scores.sort(key=lambda x: x[1], reverse=True)
376
 
377
+ # Normalize scores to 0-0.7 range (highest score becomes 0.7)
378
+ if scores and len(scores) > 0:
379
+ max_score = max(scores)
380
+ min_score = min(scores)
381
+
382
+ if max_score > min_score:
383
+ # Scale to 0-0.7 range
384
+ normalized_indexed_scores = []
385
+ for idx, score in indexed_scores:
386
+ normalized_score = 0.7 * (score - min_score) / (max_score - min_score)
387
+ normalized_indexed_scores.append((idx, normalized_score))
388
+ indexed_scores = normalized_indexed_scores
389
+ else:
390
+ # All scores are the same, give them all 0.35 (middle value)
391
+ indexed_scores = [(idx, 0.35) for idx, _ in indexed_scores]
392
+
393
+ # Show completion message
394
+ st.write(f"βœ… Cross-Encoder completed! Selected top {min(top_k, len(indexed_scores))} candidates.")
395
+ st.write(f"πŸ“Š Cross-Encoder scores normalized to 0-0.7 range (highest: {indexed_scores[0][1]:.3f})")
396
+
397
  return indexed_scores[:top_k]
398
 
399
  except Exception as e:
 
403
  def add_bm25_scores(self, resume_texts, job_description, top_20_results):
404
  """Stage 3: Add BM25 scores to top 20 resumes"""
405
  try:
406
+ st.write(f"πŸ”„ Calculating BM25 keyword scores for {len(top_20_results)} candidates...")
407
+
408
  # Get texts for top 20
409
  top_20_texts = [resume_texts[idx] for idx, _ in top_20_results]
410
 
 
431
  bm25_score = normalized_bm25[i] if i < len(normalized_bm25) else 0.15
432
  results_with_bm25.append((idx, cross_score, bm25_score))
433
 
434
+ st.write(f"βœ… BM25 keyword matching completed!")
435
+
436
  return results_with_bm25
437
 
438
  except Exception as e:
 
446
  progress_bar = st.progress(0)
447
 
448
  for i, (idx, cross_score, bm25_score) in enumerate(top_20_with_bm25):
449
+ candidate_name = st.session_state.file_names[idx] if idx < len(st.session_state.file_names) else f"Resume_{idx}"
450
+ intent_score, intent_text = self.analyze_intent(resume_texts[idx], job_description)
451
+
452
+ # Print the intent analysis result
453
+ st.write(f"πŸ“‹ **{candidate_name}**: Intent = **{intent_text}** (Score: {intent_score:.1f})")
454
+
455
  results_with_intent.append((idx, cross_score, bm25_score, intent_score))
456
  progress_bar.progress((i + 1) / len(top_20_with_bm25))
457
 
 
469
  resume_snippet = resume_text[:1500] if len(resume_text) > 1500 else resume_text
470
  job_snippet = job_description[:800] if len(job_description) > 800 else job_description
471
 
472
+ prompt = f"""You are a helpful HR assistant. Look at this candidate's resume and job posting.
 
473
 
474
+ The candidate is likely a good fit if they have ANY of these:
475
+ - Related work experience (even if different industry)
476
+ - Relevant technical skills
477
+ - Educational background that could apply
478
+ - Any transferable skills
479
+ - Similar job titles or responsibilities
480
+
481
+ Be generous in your assessment. Most candidates who made it this far are potentially suitable.
482
+
483
+ Answer "Yes" for most candidates unless they are completely unrelated.
484
+ Answer "No" only if absolutely no connection exists.
485
+
486
+ Job Posting:
487
  {job_snippet}
488
 
489
  Candidate Resume:
490
  {resume_snippet}
491
 
492
+ Is this candidate suitable? Answer:"""
 
 
493
 
494
  response = generate_qwen3_response(
495
  prompt,
496
  st.session_state.qwen3_intent_tokenizer,
497
  st.session_state.qwen3_intent_model,
498
+ max_new_tokens=20
499
  )
500
 
501
+ # Debug: print the raw response
502
+ print(f"Raw LLM response: '{response}'")
503
+
504
+ # Parse response - look for the answer directly
505
+ response_lower = response.lower().strip()
506
+ if 'yes' in response_lower:
507
+ return 0.1, "Yes"
508
+ elif 'no' in response_lower:
509
+ return 0.0, "No"
510
  else:
511
+ # If no clear answer, default to "Yes" to be more lenient
512
+ print(f"Unclear response, defaulting to Yes: '{response}'")
513
+ return 0.1, "Yes"
514
 
515
  except Exception as e:
516
  st.warning(f"Error analyzing intent: {str(e)}")
517
+ return 0.1, "Yes" # Default to "Yes" instead of "Maybe"
518
 
519
  def calculate_final_scores(self, results_with_all_scores):
520
  """Stage 5: Calculate final combined scores"""
521
  try:
522
+ st.write(f"πŸ”„ Computing final combined scores for {len(results_with_all_scores)} candidates...")
523
+
524
  final_results = []
525
 
526
  for idx, cross_score, bm25_score, intent_score in results_with_all_scores:
527
+ # Cross-encoder scores are already in [0,1] range with logistic scoring
528
+ normalized_cross = cross_score
529
 
530
+ # Final Score = Cross-Encoder (0-0.7) + BM25 (0.1-0.2) + Intent (0-0.1)
531
  final_score = normalized_cross + bm25_score + intent_score
532
 
533
  final_results.append({
 
541
  # Sort by final score
542
  final_results.sort(key=lambda x: x['final_score'], reverse=True)
543
 
544
+ st.write(f"βœ… Final ranking completed! Candidates sorted by combined score.")
545
+
546
  return final_results
547
 
548
  except Exception as e:
549
  st.error(f"Error calculating final scores: {str(e)}")
550
  return []
551
 
552
+ def generate_simple_explanation(self, score, semantic_score, bm25_score):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  """Generate simple explanation for the match (fallback)"""
554
  if score > 0.8:
555
  quality = "excellent"
 
567
  else:
568
  explanation += f"The resume has high keyword match ({bm25_score:.2f}) indicating direct skill alignment. "
569
 
 
 
 
570
  return explanation
571
 
572
 
 
793
  name = st.session_state.file_names[idx]
794
  text = st.session_state.resume_texts[idx]
795
 
 
 
 
796
  results.append({
797
  'rank': rank,
798
  'name': name,
 
800
  'cross_encoder_score': result_data['cross_encoder_score'],
801
  'bm25_score': result_data['bm25_score'],
802
  'intent_score': result_data['intent_score'],
803
+ 'skills': [],
804
  'text': text,
805
  'text_preview': text[:500] + "..." if len(text) > 500 else text,
806
  'explanation': None # Will be filled with simple explanation
 
811
  result['explanation'] = screener.generate_simple_explanation(
812
  result['final_score'],
813
  result['cross_encoder_score'],
814
+ result['bm25_score']
 
815
  )
816
 
817
  # Store in session state
 
835
  summary_data = []
836
  for result in st.session_state.results:
837
  # Map intent score to text
838
+ intent_text = "Yes" if result['intent_score'] == 0.1 else "No"
839
 
840
  summary_data.append({
841
  "Rank": result['rank'],
 
843
  "Final Score": f"{result['final_score']:.2f}",
844
  "Cross-Encoder": f"{result['cross_encoder_score']:.2f}",
845
  "BM25": f"{result['bm25_score']:.2f}",
846
+ "Intent": f"{intent_text} ({result['intent_score']:.1f})"
 
847
  })
848
 
849
  summary_df = pd.DataFrame(summary_data)
 
878
  # Download link
879
  detailed_data = []
880
  for result in st.session_state.results:
881
+ intent_text = "Yes" if result['intent_score'] == 0.1 else "No"
882
 
883
  detailed_data.append({
884
  "Rank": result['rank'],
 
888
  "BM25_Score": result['bm25_score'],
889
  "Intent_Score": result['intent_score'],
890
  "Intent_Analysis": intent_text,
 
891
  "AI_Explanation": result['explanation'],
892
  "Resume_Preview": result['text_preview']
893
  })
 
898
  with tab2:
899
  # Detailed results with new scoring breakdown
900
  for result in st.session_state.results:
901
+ intent_text = "Yes" if result['intent_score'] == 0.1 else "No"
902
 
903
  with st.expander(f"#{result['rank']}: {result['name']} (Final Score: {result['final_score']:.2f})"):
904
  col1, col2 = st.columns([1, 2])
 
907
  st.metric("πŸ† Final Score", f"{result['final_score']:.2f}")
908
 
909
  st.write("**πŸ“Š Score Breakdown:**")
910
+ st.metric("🎯 Cross-Encoder", f"{result['cross_encoder_score']:.2f}", help="Semantic relevance (0-0.7)")
911
  st.metric("πŸ”€ BM25 Keywords", f"{result['bm25_score']:.2f}", help="Keyword matching (0.1-0.2)")
912
+ st.metric("πŸ€– Intent Analysis", f"{intent_text} ({result['intent_score']:.2f})", help="Job seeking likelihood (0-0.1)")
 
 
 
 
 
 
 
913
 
914
  with col2:
915
  st.write("**πŸ’‘ AI-Generated Match Analysis:**")