Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Paused

App Files Files Community

root commited on May 26

Commit

1d59f84

1 Parent(s): a83946a

ss

Browse files

Files changed (1) hide show

app.py +756 -323

app.py CHANGED Viewed

@@ -3,22 +3,22 @@ import pandas as pd
 import numpy as np
 import torch
 import nltk
-import faiss
 import os
 import tempfile
 import base64
-import re
-import io
 from rank_bm25 import BM25Okapi
-from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-from nltk.tokenize import word_tokenize, sent_tokenize
-from tqdm import tqdm
 import pdfplumber
 import PyPDF2
 from docx import Document
 import csv
 from datasets import load_dataset
 import gc
 # Download NLTK resources
 try:
@@ -47,79 +47,93 @@ with st.sidebar:
     # Advanced options
     st.subheader("Advanced Options")
     top_k = st.number_input("Number of results to display", min_value=1, max_value=50, value=10, step=1)
-    use_explanation = st.checkbox("Generate AI Explanations", value=True)
     st.markdown("---")
-    st.markdown("### 🤖 Models Used")
-    st.markdown("- **Embedding**: NVIDIA NV-Embed-v2")
-    st.markdown("- **Explanation**: Qwen3-14B (4-bit)")
-    st.markdown("### 📊 About")
-    st.markdown("This app uses hybrid ranking combining semantic similarity with keyword matching to find the best candidates for job positions.")
 # Initialize session state
 if 'embedding_model' not in st.session_state:
     st.session_state.embedding_model = None
-if 'explanation_model' not in st.session_state:
-    st.session_state.explanation_model = None
 if 'results' not in st.session_state:
     st.session_state.results = []
 @st.cache_resource
 def load_embedding_model():
-    """Load and cache the embedding model"""
     try:
-        with st.spinner("🔄 Loading NVIDIA NV-Embed-v2 model..."):
-            tokenizer = AutoTokenizer.from_pretrained("nvidia/NV-Embed-v2", trust_remote_code=True)
-            model = AutoModel.from_pretrained(
-                "nvidia/NV-Embed-v2",
-                trust_remote_code=True,
-                device_map="auto",
-                torch_dtype=torch.float16
-            )
             st.success("✅ Embedding model loaded successfully!")
-            return model, tokenizer
     except Exception as e:
         st.error(f"❌ Error loading embedding model: {str(e)}")
-        return None, None
 @st.cache_resource
-def load_explanation_model():
-    """Load and cache the explanation model with quantization"""
-    if not use_explanation:
-        return None, None
     try:
-        with st.spinner("🔄 Loading Qwen3-14B model with 4-bit quantization..."):
-            # Configure 4-bit quantization
-            quantization_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_compute_dtype=torch.float16,
-                bnb_4bit_use_double_quant=True
-            )
-            tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-14B-Instruct", trust_remote_code=True)
-            model = AutoModelForCausalLM.from_pretrained(
-                "Qwen/Qwen2.5-14B-Instruct",
-                quantization_config=quantization_config,
-                device_map="auto",
-                trust_remote_code=True,
-                torch_dtype=torch.float16
-            )
-            st.success("✅ Explanation model loaded successfully!")
-            return model, tokenizer
     except Exception as e:
-        st.error(f"❌ Error loading explanation model: {str(e)}")
-        return None, None
 class ResumeScreener:
     def __init__(self):
         # Load models
-        self.embedding_model, self.embedding_tokenizer = load_embedding_model()
-        if use_explanation:
-            self.explanation_model, self.explanation_tokenizer = load_explanation_model()
-        else:
-            self.explanation_model, self.explanation_tokenizer = None, None
     def extract_text_from_file(self, file_path, file_type):
         """Extract text from various file types"""
@@ -158,48 +172,35 @@ class ResumeScreener:
             return ""
     def get_embedding(self, text):
-        """Generate embedding for text"""
         if self.embedding_model is None:
-            return np.zeros(4096)  # NV-Embed-v2 dimension
         try:
-            # Truncate text to avoid memory issues
-            text = text[:8192]  # Reasonable limit for NV-Embed-v2
-            inputs = self.embedding_tokenizer(
-                text,
-                return_tensors="pt",
-                truncation=True,
-                max_length=512,
-                padding=True
-            )
-            # Move to same device as model
-            device = next(self.embedding_model.parameters()).device
-            inputs = {k: v.to(device) for k, v in inputs.items()}
-            with torch.no_grad():
-                outputs = self.embedding_model(**inputs)
-            # Extract embeddings - NV-Embed-v2 specific
-            if hasattr(outputs, 'pooler_output'):
-                embeddings = outputs.pooler_output
-            elif hasattr(outputs, 'last_hidden_state'):
-                embeddings = outputs.last_hidden_state.mean(dim=1)
-            else:
-                embeddings = outputs[0].mean(dim=1)
-            return embeddings.cpu().numpy().squeeze()
         except Exception as e:
             st.error(f"Error generating embedding: {str(e)}")
-            return np.zeros(4096)
     def calculate_bm25_scores(self, resume_texts, job_description):
         """Calculate BM25 scores for keyword matching"""
         try:
             job_tokens = word_tokenize(job_description.lower())
-            corpus = [word_tokenize(text.lower()) for text in resume_texts if text.strip()]
             if not corpus:
                 return [0.0] * len(resume_texts)
@@ -212,146 +213,382 @@ class ResumeScreener:
             st.error(f"Error calculating BM25 scores: {str(e)}")
             return [0.0] * len(resume_texts)
-    def calculate_hybrid_scores(self, resume_texts, job_description):
-        """Calculate hybrid scores combining semantic and keyword matching"""
-        # Get job embedding
-        job_embedding = self.get_embedding(job_description)
-        # Get resume embeddings
-        resume_embeddings = []
-        progress_bar = st.progress(0)
-        for i, text in enumerate(resume_texts):
-            embedding = self.get_embedding(text)
-            resume_embeddings.append(embedding)
-            progress_bar.progress((i + 1) / len(resume_texts))
-        # Calculate semantic scores (cosine similarity)
-        semantic_scores = []
-        for resume_emb in resume_embeddings:
-            job_norm = job_embedding / (np.linalg.norm(job_embedding) + 1e-8)
-            resume_norm = resume_emb / (np.linalg.norm(resume_emb) + 1e-8)
-            similarity = np.dot(job_norm, resume_norm)
-            semantic_scores.append(float(similarity))
-        # Calculate BM25 scores
-        bm25_scores = self.calculate_bm25_scores(resume_texts, job_description)
-        # Normalize BM25 scores
-        if bm25_scores and max(bm25_scores) > 0:
-            max_bm25 = max(bm25_scores)
-            bm25_scores = [score / max_bm25 for score in bm25_scores]
-        # Calculate hybrid scores
-        hybrid_scores = [
-            (semantic_weight * sem_score) + (keyword_weight * bm25_score)
-            for sem_score, bm25_score in zip(semantic_scores, bm25_scores)
-        ]
-        return hybrid_scores, semantic_scores, bm25_scores
     def extract_skills(self, text, job_description):
         """Extract skills from resume based on job description"""
-        # Common tech skills and job-related terms
         common_skills = [
-            "python", "java", "javascript", "react", "node.js", "sql", "html", "css",
-            "aws", "azure", "docker", "kubernetes", "git", "agile", "scrum", "ci/cd",
-            "machine learning", "data science", "artificial intelligence", "tensorflow",
-            "pytorch", "pandas", "numpy", "scikit-learn", "mysql", "postgresql",
-            "mongodb", "redis", "elasticsearch", "spark", "hadoop", "tableau", "powerbi"
         ]
-        # Extract skills from job description
         job_words = set(word.lower() for word in word_tokenize(job_description) if len(word) > 2)
         # Find matching skills
         found_skills = []
         text_lower = text.lower()
-        # Check common skills
         for skill in common_skills:
-            if skill in text_lower and skill in " ".join(job_words):
                 found_skills.append(skill)
-        # Check job-specific terms
         for word in job_words:
-            if len(word) > 3 and word in text_lower:
-                found_skills.append(word)
-        return list(set(found_skills))[:10]  # Return top 10 unique skills
-    def generate_explanation(self, resume_text, job_description, score, semantic_score, bm25_score, skills):
-        """Generate explanation using Qwen model"""
-        if self.explanation_model is None or self.explanation_tokenizer is None:
-            return self._generate_simple_explanation(score, semantic_score, bm25_score, skills)
-        try:
-            # Create prompt
-            prompt = f"""As a recruitment AI assistant, explain why this resume scored {score:.2f} for the given job position.
-Job Requirements:
-{job_description[:500]}...
-Resume Summary:
-{resume_text[:800]}...
-Scores:
-- Overall: {score:.2f}/1.0
-- Semantic Match: {semantic_score:.2f}/1.0
-- Keyword Match: {bm25_score:.2f}/1.0
-- Key Skills: {', '.join(skills[:5])}
-Provide a concise 2-3 sentence explanation of the match quality and key strengths."""
-            # Generate response
-            messages = [{"role": "user", "content": prompt}]
-            text = self.explanation_tokenizer.apply_chat_template(
-                messages, tokenize=False, add_generation_prompt=True
-            )
-            inputs = self.explanation_tokenizer(text, return_tensors="pt").to(self.explanation_model.device)
-            with torch.no_grad():
-                outputs = self.explanation_model.generate(
-                    **inputs,
-                    max_new_tokens=150,
-                    temperature=0.7,
-                    do_sample=True,
-                    pad_token_id=self.explanation_tokenizer.eos_token_id
-                )
-            response = self.explanation_tokenizer.decode(
-                outputs[0][inputs.input_ids.shape[1]:],
-                skip_special_tokens=True
-            )
-            return response.strip()[:400]  # Limit length
-        except Exception as e:
-            st.warning(f"AI explanation failed: {str(e)}")
-            return self._generate_simple_explanation(score, semantic_score, bm25_score, skills)
-    def _generate_simple_explanation(self, score, semantic_score, bm25_score, skills):
-        """Fallback explanation generation"""
         if score > 0.8:
             quality = "excellent"
         elif score > 0.6:
-            quality = "good"
         elif score > 0.4:
             quality = "moderate"
         else:
             quality = "limited"
-        explanation = f"This resume shows {quality} alignment with the job requirements (score: {score:.2f}). "
         if semantic_score > bm25_score:
-            explanation += f"Strong conceptual match ({semantic_score:.2f}) with relevant experience. "
         else:
-            explanation += f"Good keyword coverage ({bm25_score:.2f}) of job requirements. "
         if skills:
-            explanation += f"Key matching skills: {', '.join(skills[:3])}."
         return explanation
 def create_download_link(df, filename="resume_screening_results.csv"):
     """Create download link for results"""
@@ -361,16 +598,22 @@ def create_download_link(df, filename="resume_screening_results.csv"):
 # Main App Interface
 st.title("🎯 AI-Powered Resume Screener")
-st.markdown("*Find the perfect candidates using advanced AI matching*")
 st.markdown("---")
 # Initialize screener
-if st.session_state.embedding_model is None:
-    screener = ResumeScreener()
-    st.session_state.embedding_model = screener.embedding_model
-    st.session_state.explanation_model = screener.explanation_model
-else:
-    screener = ResumeScreener()
 # Job Description Input
 st.header("📝 Step 1: Enter Job Description")
@@ -383,14 +626,25 @@ job_description = st.text_area(
 # Resume Input Options
 st.header("📄 Step 2: Upload Resumes")
 input_method = st.radio(
     "Choose input method:",
     ["📁 Upload Files", "🗂️ Load from CSV Dataset", "🔗 Load from Hugging Face Dataset"]
 )
-resume_texts = []
-file_names = []
 if input_method == "📁 Upload Files":
     uploaded_files = st.file_uploader(
         "Upload resume files",
@@ -401,23 +655,26 @@ if input_method == "📁 Upload Files":
     if uploaded_files:
         with st.spinner(f"🔄 Processing {len(uploaded_files)} files..."):
             for file in uploaded_files:
                 file_type = file.name.split('.')[-1].lower()
-                # Save temporary file
                 with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{file_type}') as tmp_file:
                     tmp_file.write(file.getvalue())
                     tmp_path = tmp_file.name
-                # Extract text
                 text = screener.extract_text_from_file(tmp_path, file_type)
                 if text.strip():
                     resume_texts.append(text)
                     file_names.append(file.name)
-                # Cleanup
                 os.unlink(tmp_path)
         if resume_texts:
             st.success(f"✅ Successfully processed {len(resume_texts)} resumes")
@@ -442,6 +699,9 @@ elif input_method == "🗂️ Load from CSV Dataset":
             if st.button("🚀 Process CSV Data"):
                 with st.spinner("🔄 Processing CSV data..."):
                     for idx, row in df.iterrows():
                         text = str(row[text_column])
                         if text and text.strip() and text.lower() != 'nan':
@@ -451,6 +711,9 @@ elif input_method == "🗂️ Load from CSV Dataset":
                                 file_names.append(f"Resume_{idx}")
                             else:
                                 file_names.append(str(row[name_column]))
                 if resume_texts:
                     st.success(f"✅ Successfully loaded {len(resume_texts)} resumes from CSV")
@@ -459,7 +722,9 @@ elif input_method == "🗂️ Load from CSV Dataset":
             st.error(f"❌ Error processing CSV: {str(e)}")
 elif input_method == "🔗 Load from Hugging Face Dataset":
-    st.markdown("**Quick Load:** [Resume Atlas Dataset](https://huggingface.co/datasets/ahmedheakl/resume-atlas)")
     col1, col2 = st.columns([2, 1])
     with col1:
@@ -479,35 +744,35 @@ elif input_method == "🔗 Load from Hugging Face Dataset":
             st.success(f"✅ Loaded dataset with {len(dataset)} entries")
             st.write("**Dataset Preview:**")
-            # Show first few examples
             preview_df = pd.DataFrame(dataset[:5])
             st.dataframe(preview_df)
-            # Column selection
             text_column = st.selectbox(
                 "Select column with resume text:",
                 dataset.column_names,
-                index=0 if 'resume_text' in dataset.column_names else 0
             )
             category_column = None
             if 'category' in dataset.column_names:
                 category_column = st.selectbox(
                     "Filter by category (optional):",
-                    ["All"] + list(set(dataset['category']))
                 )
             max_samples = st.slider("Maximum samples to load:", 10, min(1000, len(dataset)), 100)
             if st.button("🚀 Process Dataset"):
                 with st.spinner("🔄 Processing dataset..."):
                     filtered_dataset = dataset
-                    # Apply category filter
                     if category_column and category_column != "All":
                         filtered_dataset = dataset.filter(lambda x: x['category'] == category_column)
-                    # Limit samples
                     sample_indices = list(range(min(max_samples, len(filtered_dataset))))
                     for idx in sample_indices:
@@ -517,11 +782,13 @@ elif input_method == "🔗 Load from Hugging Face Dataset":
                         if text and text.strip() and text.lower() != 'nan':
                             resume_texts.append(text)
-                            # Use ID or index for naming
                             if 'id' in item:
                                 file_names.append(f"Resume_{item['id']}")
                             else:
                                 file_names.append(f"Resume_{idx}")
                 if resume_texts:
                     st.success(f"✅ Successfully loaded {len(resume_texts)} resumes")
@@ -530,142 +797,308 @@ elif input_method == "🔗 Load from Hugging Face Dataset":
             st.error(f"❌ Error loading dataset: {str(e)}")
 # Processing and Results
-if st.button("🔍 Find Best Candidates", disabled=not (job_description and resume_texts)):
-    if len(resume_texts) == 0:
-        st.error("❌ Please upload resumes first!")
-    elif not job_description.strip():
-        st.error("❌ Please enter a job description!")
-    else:
-        with st.spinner("🧠 AI is analyzing resumes..."):
-            # Calculate scores
-            hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
-                resume_texts, job_description
-            )
-            # Prepare results
-            results = []
-            for i, (name, text, hybrid_score, semantic_score, bm25_score) in enumerate(
-                zip(file_names, resume_texts, hybrid_scores, semantic_scores, bm25_scores)
-            ):
-                # Extract skills
-                skills = screener.extract_skills(text, job_description)
-                # Generate explanation
-                explanation = ""
-                if use_explanation:
-                    explanation = screener.generate_explanation(
-                        text, job_description, hybrid_score, semantic_score, bm25_score, skills
                     )
-                results.append({
-                    'rank': i + 1,
-                    'name': name,
-                    'score': hybrid_score,
-                    'semantic_score': semantic_score,
-                    'keyword_score': bm25_score,
-                    'skills': skills,
-                    'explanation': explanation,
-                    'text_preview': text[:300] + "..." if len(text) > 300 else text
-                })
-            # Sort by score
-            results.sort(key=lambda x: x['score'], reverse=True)
-            # Update ranks
-            for i, result in enumerate(results):
-                result['rank'] = i + 1
-            # Store in session state
-            st.session_state.results = results[:top_k]
-        st.success(f"🎉 Analysis complete! Found top {len(st.session_state.results)} candidates")
 # Display Results
 if st.session_state.results:
     st.header("🏆 Top Candidates")
-    # Create summary dataframe
-    summary_data = []
-    for result in st.session_state.results:
-        summary_data.append({
-            "Rank": result['rank'],
-            "Candidate": result['name'],
-            "Overall Score": f"{result['score']:.3f}",
-            "Semantic Score": f"{result['semantic_score']:.3f}",
-            "Keyword Score": f"{result['keyword_score']:.3f}",
-            "Key Skills": ", ".join(result['skills'][:3]) + ("..." if len(result['skills']) > 3 else ""),
-        })
-    summary_df = pd.DataFrame(summary_data)
-    st.dataframe(summary_df, use_container_width=True)
-    # Download link
-    detailed_data = []
-    for result in st.session_state.results:
-        detailed_data.append({
-            "Rank": result['rank'],
-            "Candidate": result['name'],
-            "Overall_Score": result['score'],
-            "Semantic_Score": result['semantic_score'],
-            "Keyword_Score": result['keyword_score'],
-            "Skills": "; ".join(result['skills']),
-            "Explanation": result['explanation'],
-            "Resume_Preview": result['text_preview']
-        })
-    download_df = pd.DataFrame(detailed_data)
-    st.markdown(create_download_link(download_df), unsafe_allow_html=True)
-    # Detailed results
-    st.subheader("📋 Detailed Analysis")
-    for result in st.session_state.results:
-        with st.expander(f"🥇 #{result['rank']}: {result['name']} (Score: {result['score']:.3f})"):
-            col1, col2 = st.columns([1, 2])
             with col1:
-                st.metric("Overall Score", f"{result['score']:.3f}")
-                st.metric("Semantic Match", f"{result['semantic_score']:.3f}")
-                st.metric("Keyword Match", f"{result['keyword_score']:.3f}")
-                st.write("**🎯 Key Skills:**")
-                for skill in result['skills'][:8]:
-                    st.write(f"• {skill}")
             with col2:
-                if result['explanation']:
-                    st.write("**🤖 AI Analysis:**")
-                    st.info(result['explanation'])
-                st.write("**📄 Resume Preview:**")
-                st.text_area("", result['text_preview'], height=150, disabled=True, key=f"preview_{result['rank']}")
-    # Score visualization
-    if len(st.session_state.results) > 1:
-        st.subheader("📊 Score Visualization")
-        chart_data = pd.DataFrame({
-            'Candidate': [r['name'] for r in st.session_state.results],
-            'Overall Score': [r['score'] for r in st.session_state.results],
-            'Semantic Score': [r['semantic_score'] for r in st.session_state.results],
-            'Keyword Score': [r['keyword_score'] for r in st.session_state.results]
-        })
-        st.bar_chart(chart_data.set_index('Candidate'))
 # Memory cleanup
-if st.button("🧹 Clear Memory"):
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-    gc.collect()
-    st.success("✅ Memory cleared!")
 # Footer
 st.markdown("---")
 st.markdown(
     """
     <div style='text-align: center; color: #666;'>
-        🚀 Powered by NVIDIA NV-Embed-v2 & Qwen3-14B | Built with Streamlit
     </div>
     """,
     unsafe_allow_html=True

 import numpy as np
 import torch
 import nltk
 import os
 import tempfile
 import base64
 from rank_bm25 import BM25Okapi
+from sentence_transformers import SentenceTransformer, CrossEncoder
+from nltk.tokenize import word_tokenize
 import pdfplumber
 import PyPDF2
 from docx import Document
 import csv
 from datasets import load_dataset
 import gc
+from huggingface_hub import InferenceClient
+import time
+import faiss
+import re
 # Download NLTK resources
 try:
     # Advanced options
     st.subheader("Advanced Options")
     top_k = st.number_input("Number of results to display", min_value=1, max_value=50, value=10, step=1)
+    # LLM Settings
+    st.subheader("LLM Settings")
+    use_llm_explanations = st.checkbox("Generate AI Explanations", value=True)
+    if use_llm_explanations:
+        hf_token = st.text_input("Hugging Face Token (optional)", type="password",
+                                help="Enter your HF token for better rate limits")
     st.markdown("---")
+    st.markdown("### 🤖 Advanced Pipeline")
+    st.markdown("- **Stage 1**: FAISS Recall (Top 50)")
+    st.markdown("- **Stage 2**: Cross-Encoder Re-ranking (Top 20)")
+    st.markdown("- **Stage 3**: BM25 Keyword Matching")
+    st.markdown("- **Stage 4**: LLM Intent Analysis")
+    st.markdown("- **Final**: Combined Scoring (Top 5)")
+    st.markdown("### 📊 Models Used")
+    st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
+    st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
+    st.markdown("- **LLM**: Qwen/Qwen3-14B")
+    st.markdown("### 📈 Scoring Formula")
+    st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
 # Initialize session state
 if 'embedding_model' not in st.session_state:
     st.session_state.embedding_model = None
+if 'cross_encoder' not in st.session_state:
+    st.session_state.cross_encoder = None
 if 'results' not in st.session_state:
     st.session_state.results = []
+if 'resume_texts' not in st.session_state:
+    st.session_state.resume_texts = []
+if 'file_names' not in st.session_state:
+    st.session_state.file_names = []
+if 'llm_client' not in st.session_state:
+    st.session_state.llm_client = None
+if 'explanations_generated' not in st.session_state:
+    st.session_state.explanations_generated = False
+if 'current_job_description' not in st.session_state:
+    st.session_state.current_job_description = ""
 @st.cache_resource
 def load_embedding_model():
+    """Load and cache the BGE embedding model"""
     try:
+        with st.spinner("🔄 Loading BAAI/bge-large-en-v1.5 model..."):
+            model = SentenceTransformer('BAAI/bge-large-en-v1.5')
             st.success("✅ Embedding model loaded successfully!")
+            return model
     except Exception as e:
         st.error(f"❌ Error loading embedding model: {str(e)}")
+        return None
 @st.cache_resource
+def load_cross_encoder():
+    """Load and cache the Cross-Encoder model"""
     try:
+        with st.spinner("🔄 Loading Cross-Encoder ms-marco-MiniLM-L6-v2..."):
+            from sentence_transformers import CrossEncoder
+            model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
+            st.success("✅ Cross-Encoder model loaded successfully!")
+            return model
+    except Exception as e:
+        st.error(f"❌ Error loading Cross-Encoder model: {str(e)}")
+        return None
+def initialize_llm_client(hf_token=None):
+    """Initialize the LLM client for Qwen3-14B"""
+    try:
+        client = InferenceClient(
+            model="Qwen/Qwen3-14B",
+            token=hf_token if hf_token else None
+        )
+        return client
     except Exception as e:
+        st.error(f"❌ Error initializing LLM client: {str(e)}")
+        return None
 class ResumeScreener:
     def __init__(self):
         # Load models
+        self.embedding_model = load_embedding_model()
+        self.cross_encoder = load_cross_encoder()
+        self.llm_client = None
+    def set_llm_client(self, client):
+        """Set the LLM client"""
+        self.llm_client = client
     def extract_text_from_file(self, file_path, file_type):
         """Extract text from various file types"""
             return ""
     def get_embedding(self, text):
+        """Generate embedding for text using BGE model"""
         if self.embedding_model is None:
+            st.error("No embedding model loaded!")
+            return np.zeros(1024)  # BGE-large dimension
         try:
+            # BGE models recommend adding instruction for retrieval
+            # For queries (job description)
+            if len(text) < 500:  # Assuming shorter texts are queries
+                text = "Represent this sentence for searching relevant passages: " + text
+            # Truncate text to avoid memory issues
+            text = text[:8192] if text else ""
+            # Generate embedding
+            embedding = self.embedding_model.encode(text,
+                                                  convert_to_numpy=True,
+                                                  normalize_embeddings=True)
+            return embedding
         except Exception as e:
             st.error(f"Error generating embedding: {str(e)}")
+            return np.zeros(1024)  # BGE-large dimension
     def calculate_bm25_scores(self, resume_texts, job_description):
         """Calculate BM25 scores for keyword matching"""
         try:
             job_tokens = word_tokenize(job_description.lower())
+            corpus = [word_tokenize(text.lower()) for text in resume_texts if text and text.strip()]
             if not corpus:
                 return [0.0] * len(resume_texts)
             st.error(f"Error calculating BM25 scores: {str(e)}")
             return [0.0] * len(resume_texts)
+    def advanced_pipeline_ranking(self, resume_texts, job_description):
+        """Advanced pipeline: FAISS recall -> Cross-encoder -> BM25 -> LLM intent -> Final ranking"""
+        if not resume_texts:
+            return []
+        # Stage 1: FAISS Recall (Top 50)
+        st.write("🔍 **Stage 1**: FAISS Recall - Finding top 50 candidates...")
+        top_50_indices = self.faiss_recall(resume_texts, job_description, top_k=50)
+        # Stage 2: Cross-Encoder Re-ranking (Top 20)
+        st.write("🎯 **Stage 2**: Cross-Encoder Re-ranking - Selecting top 20...")
+        top_20_results = self.cross_encoder_rerank(resume_texts, job_description, top_50_indices, top_k=20)
+        # Stage 3: BM25 Keyword Matching
+        st.write("🔤 **Stage 3**: BM25 Keyword Matching...")
+        top_20_with_bm25 = self.add_bm25_scores(resume_texts, job_description, top_20_results)
+        # Stage 4: LLM Intent Analysis
+        st.write("🤖 **Stage 4**: LLM Intent Analysis...")
+        top_20_with_intent = self.add_intent_scores(resume_texts, job_description, top_20_with_bm25)
+        # Stage 5: Final Combined Ranking (Top 5)
+        st.write("🏆 **Stage 5**: Final Combined Ranking...")
+        final_results = self.calculate_final_scores(top_20_with_intent)
+        return final_results[:5]  # Return top 5
+    def faiss_recall(self, resume_texts, job_description, top_k=50):
+        """Stage 1: Use FAISS for initial recall to find top 50 resumes"""
+        try:
+            # Get job embedding
+            job_embedding = self.get_embedding(job_description)
+            # Get resume embeddings
+            resume_embeddings = []
+            progress_bar = st.progress(0)
+            for i, text in enumerate(resume_texts):
+                if text:
+                    embedding = self.embedding_model.encode(text[:8192],
+                                                          convert_to_numpy=True,
+                                                          normalize_embeddings=True)
+                    resume_embeddings.append(embedding)
+                else:
+                    resume_embeddings.append(np.zeros(1024))
+                progress_bar.progress((i + 1) / len(resume_texts))
+            progress_bar.empty()
+            # Create FAISS index
+            resume_embeddings = np.array(resume_embeddings).astype('float32')
+            dimension = resume_embeddings.shape[1]
+            index = faiss.IndexFlatIP(dimension)  # Inner product for cosine similarity
+            index.add(resume_embeddings)
+            # Search for top K
+            job_embedding = job_embedding.reshape(1, -1).astype('float32')
+            scores, indices = index.search(job_embedding, min(top_k, len(resume_texts)))
+            return indices[0].tolist()
+        except Exception as e:
+            st.error(f"Error in FAISS recall: {str(e)}")
+            # Fallback: return all indices
+            return list(range(min(top_k, len(resume_texts))))
+    def cross_encoder_rerank(self, resume_texts, job_description, top_50_indices, top_k=20):
+        """Stage 2: Use Cross-Encoder to re-rank top 50 and select top 20"""
+        try:
+            if not self.cross_encoder:
+                st.error("Cross-encoder not loaded!")
+                return [(idx, 0.0) for idx in top_50_indices[:top_k]]
+            # Prepare pairs for cross-encoder
+            pairs = []
+            valid_indices = []
+            for idx in top_50_indices:
+                if idx < len(resume_texts) and resume_texts[idx]:
+                    # Truncate texts for cross-encoder
+                    job_snippet = job_description[:512]
+                    resume_snippet = resume_texts[idx][:512]
+                    pairs.append([job_snippet, resume_snippet])
+                    valid_indices.append(idx)
+            if not pairs:
+                return [(idx, 0.0) for idx in top_50_indices[:top_k]]
+            # Get cross-encoder scores
+            progress_bar = st.progress(0)
+            scores = []
+            # Process in batches to avoid memory issues
+            batch_size = 8
+            for i in range(0, len(pairs), batch_size):
+                batch = pairs[i:i+batch_size]
+                batch_scores = self.cross_encoder.predict(batch)
+                scores.extend(batch_scores)
+                progress_bar.progress(min(1.0, (i + batch_size) / len(pairs)))
+            progress_bar.empty()
+            # Combine indices with scores and sort
+            indexed_scores = list(zip(valid_indices, scores))
+            indexed_scores.sort(key=lambda x: x[1], reverse=True)
+            return indexed_scores[:top_k]
+        except Exception as e:
+            st.error(f"Error in cross-encoder re-ranking: {str(e)}")
+            return [(idx, 0.0) for idx in top_50_indices[:top_k]]
+    def add_bm25_scores(self, resume_texts, job_description, top_20_results):
+        """Stage 3: Add BM25 scores to top 20 resumes"""
+        try:
+            # Get texts for top 20
+            top_20_texts = [resume_texts[idx] for idx, _ in top_20_results]
+            # Calculate BM25 scores
+            bm25_scores = self.calculate_bm25_scores(top_20_texts, job_description)
+            # Normalize BM25 scores to 0.1-0.2 range
+            if bm25_scores and max(bm25_scores) > 0:
+                max_bm25 = max(bm25_scores)
+                min_bm25 = min(bm25_scores)
+                if max_bm25 > min_bm25:
+                    normalized_bm25 = [
+                        0.1 + 0.1 * (score - min_bm25) / (max_bm25 - min_bm25)
+                        for score in bm25_scores
+                    ]
+                else:
+                    normalized_bm25 = [0.15] * len(bm25_scores)
+            else:
+                normalized_bm25 = [0.15] * len(top_20_results)
+            # Combine with existing results
+            results_with_bm25 = []
+            for i, (idx, cross_score) in enumerate(top_20_results):
+                bm25_score = normalized_bm25[i] if i < len(normalized_bm25) else 0.15
+                results_with_bm25.append((idx, cross_score, bm25_score))
+            return results_with_bm25
+        except Exception as e:
+            st.error(f"Error adding BM25 scores: {str(e)}")
+            return [(idx, cross_score, 0.15) for idx, cross_score in top_20_results]
+    def add_intent_scores(self, resume_texts, job_description, top_20_with_bm25):
+        """Stage 4: Add LLM intent analysis scores"""
+        try:
+            if not self.llm_client:
+                st.warning("LLM client not available. Using default intent scores.")
+                return [(idx, cross_score, bm25_score, 0.1) for idx, cross_score, bm25_score in top_20_with_bm25]
+            results_with_intent = []
+            progress_bar = st.progress(0)
+            for i, (idx, cross_score, bm25_score) in enumerate(top_20_with_bm25):
+                intent_score = self.analyze_intent(resume_texts[idx], job_description)
+                results_with_intent.append((idx, cross_score, bm25_score, intent_score))
+                progress_bar.progress((i + 1) / len(top_20_with_bm25))
+            progress_bar.empty()
+            return results_with_intent
+        except Exception as e:
+            st.error(f"Error adding intent scores: {str(e)}")
+            return [(idx, cross_score, bm25_score, 0.1) for idx, cross_score, bm25_score in top_20_with_bm25]
+    def analyze_intent(self, resume_text, job_description):
+        """Analyze candidate's intent using LLM"""
+        try:
+            # Truncate texts
+            resume_snippet = resume_text[:1500] if len(resume_text) > 1500 else resume_text
+            job_snippet = job_description[:800] if len(job_description) > 800 else job_description
+            prompt = f"""You are given a job description and a candidate's resume.
+Clearly answer: "Is the candidate likely seeking this job? Respond with 'Yes', 'Maybe', or 'No' and give a brief justification."
+Job Description:
+"""
+{job_snippet}
+"""
+Candidate Resume:
+"""
+{resume_snippet}
+"""
+Response format:
+Intent: [Yes/Maybe/No]
+Reason: [Brief justification]"""
+            response = self.llm_client.text_generation(
+                prompt,
+                max_new_tokens=100,
+                temperature=0.3,
+                top_p=0.9,
+                do_sample=True
+            )
+            # Parse response
+            response_lower = response.lower()
+            if 'intent: yes' in response_lower or 'intent:yes' in response_lower:
+                return 0.3
+            elif 'intent: maybe' in response_lower or 'intent:maybe' in response_lower:
+                return 0.1
+            else:
+                return 0.0
+        except Exception as e:
+            st.warning(f"Error analyzing intent: {str(e)}")
+            return 0.1  # Default to "Maybe"
+    def calculate_final_scores(self, results_with_all_scores):
+        """Stage 5: Calculate final combined scores"""
+        try:
+            final_results = []
+            for idx, cross_score, bm25_score, intent_score in results_with_all_scores:
+                # Normalize cross-encoder score to 0-1 range
+                normalized_cross = max(0, min(1, cross_score))
+                # Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)
+                final_score = normalized_cross + bm25_score + intent_score
+                final_results.append({
+                    'index': idx,
+                    'cross_encoder_score': normalized_cross,
+                    'bm25_score': bm25_score,
+                    'intent_score': intent_score,
+                    'final_score': final_score
+                })
+            # Sort by final score
+            final_results.sort(key=lambda x: x['final_score'], reverse=True)
+            return final_results
+        except Exception as e:
+            st.error(f"Error calculating final scores: {str(e)}")
+            return []
     def extract_skills(self, text, job_description):
         """Extract skills from resume based on job description"""
+        if not text:
+            return []
+        # Common tech skills
         common_skills = [
+            "python", "java", "javascript", "react", "angular", "vue", "node.js",
+            "express", "django", "flask", "spring", "sql", "nosql", "html", "css",
+            "aws", "azure", "gcp", "docker", "kubernetes", "jenkins", "git", "github",
+            "agile", "scrum", "jira", "ci/cd", "devops", "microservices", "rest", "api",
+            "machine learning", "deep learning", "data science", "artificial intelligence",
+            "tensorflow", "pytorch", "keras", "scikit-learn", "pandas", "numpy",
+            "matplotlib", "seaborn", "jupyter", "r", "sas", "spss", "tableau", "powerbi",
+            "excel", "mysql", "postgresql", "mongodb", "redis", "elasticsearch",
+            "kafka", "rabbitmq", "spark", "hadoop", "hive", "airflow", "linux", "unix"
         ]
+        # Extract potential skills from job description
         job_words = set(word.lower() for word in word_tokenize(job_description) if len(word) > 2)
         # Find matching skills
         found_skills = []
         text_lower = text.lower()
+        # Check common skills that appear in both resume and job description
         for skill in common_skills:
+            if skill in text_lower and any(skill in job_word for job_word in job_words):
                 found_skills.append(skill)
+        # Check for skills mentioned in job description
         for word in job_words:
+            if len(word) > 3 and word in text_lower and word not in found_skills:
+                # Basic filter to avoid common words
+                if word not in ['with', 'have', 'that', 'this', 'from', 'what', 'when', 'where']:
+                    found_skills.append(word)
+        return list(set(found_skills))[:15]  # Return top 15 unique skills
+    def generate_simple_explanation(self, score, semantic_score, bm25_score, skills):
+        """Generate simple explanation for the match (fallback)"""
         if score > 0.8:
             quality = "excellent"
         elif score > 0.6:
+            quality = "strong"
         elif score > 0.4:
             quality = "moderate"
         else:
             quality = "limited"
+        explanation = f"This candidate shows {quality} alignment with the position (score: {score:.2f}). "
         if semantic_score > bm25_score:
+            explanation += f"The resume demonstrates strong conceptual relevance ({semantic_score:.2f}) suggesting good experience fit. "
         else:
+            explanation += f"The resume has high keyword match ({bm25_score:.2f}) indicating direct skill alignment. "
         if skills:
+            explanation += f"Key matching competencies include: {', '.join(skills[:5])}."
         return explanation
+    def generate_llm_explanation(self, resume_text, job_description, score, skills, max_retries=3):
+        """Generate detailed explanation using Qwen3-14B"""
+        if not self.llm_client:
+            return self.generate_simple_explanation(score, score, score, skills)
+        # Truncate texts to manage token limits
+        resume_snippet = resume_text[:2000] if len(resume_text) > 2000 else resume_text
+        job_snippet = job_description[:1000] if len(job_description) > 1000 else job_description
+        prompt = f"""You are an expert HR analyst. Analyze this individual candidate's resume against the job requirements and write EXACTLY 150 words explaining why this specific candidate is suitable for the position.
+Structure your 150-word analysis as follows:
+1. Experience alignment (40-50 words)
+2. Key strengths and skills match (40-50 words)
+3. Unique value proposition (40-50 words)
+4. Overall recommendation (10-20 words)
+Job Requirements:
+{job_snippet}
+Candidate's Resume:
+{resume_snippet}
+Identified Matching Skills: {', '.join(skills[:10])}
+Compatibility Score: {score:.1%}
+Write a professional, detailed 150-word analysis for THIS INDIVIDUAL CANDIDATE:"""
+        for attempt in range(max_retries):
+            try:
+                response = self.llm_client.text_generation(
+                    prompt,
+                    max_new_tokens=200,
+                    temperature=0.7,
+                    top_p=0.9,
+                    do_sample=True
+                )
+                # Extract the response and ensure it's about 150 words
+                explanation = response.strip()
+                word_count = len(explanation.split())
+                # If response is close to 150 words (130-170), accept it
+                if 130 <= word_count <= 170:
+                    return explanation
+                # If response is too short or too long, try again with adjusted prompt
+                if word_count < 130:
+                    # Response too short, try again
+                    continue
+                elif word_count > 170:
+                    # Response too long, truncate to approximately 150 words
+                    words = explanation.split()
+                    truncated = ' '.join(words[:150])
+                    # Add proper ending if truncated
+                    if not truncated.endswith('.'):
+                        truncated += '.'
+                    return truncated
+                return explanation
+            except Exception as e:
+                if attempt < max_retries - 1:
+                    time.sleep(2)  # Wait before retry
+                    continue
+                else:
+                    # Fallback to simple explanation
+                    return self.generate_simple_explanation(score, score, score, skills)
+        # If all retries failed, use simple explanation
+        return self.generate_simple_explanation(score, score, score, skills)
 def create_download_link(df, filename="resume_screening_results.csv"):
     """Create download link for results"""
 # Main App Interface
 st.title("🎯 AI-Powered Resume Screener")
+st.markdown("*Find the perfect candidates using BAAI/bge-large-en-v1.5 embeddings and Qwen3-14B explanations*")
 st.markdown("---")
 # Initialize screener
+screener = ResumeScreener()
+# Initialize LLM client if enabled
+if use_llm_explanations:
+    if 'hf_token' in locals() and hf_token:
+        if st.session_state.llm_client is None:
+            st.session_state.llm_client = initialize_llm_client(hf_token)
+    else:
+        if st.session_state.llm_client is None:
+            st.session_state.llm_client = initialize_llm_client()
+    screener.set_llm_client(st.session_state.llm_client)
 # Job Description Input
 st.header("📝 Step 1: Enter Job Description")
 # Resume Input Options
 st.header("📄 Step 2: Upload Resumes")
+# Show loaded resumes indicator
+if st.session_state.resume_texts:
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        st.info(f"📚 {len(st.session_state.resume_texts)} resumes loaded and ready for analysis")
+    with col2:
+        if st.button("🗑️ Clear Resumes", type="secondary", help="Clear all loaded resumes to start fresh"):
+            st.session_state.resume_texts = []
+            st.session_state.file_names = []
+            st.session_state.results = []
+            st.session_state.explanations_generated = False
+            st.session_state.current_job_description = ""
+            st.rerun()
 input_method = st.radio(
     "Choose input method:",
     ["📁 Upload Files", "🗂️ Load from CSV Dataset", "🔗 Load from Hugging Face Dataset"]
 )
 if input_method == "📁 Upload Files":
     uploaded_files = st.file_uploader(
         "Upload resume files",
     if uploaded_files:
         with st.spinner(f"🔄 Processing {len(uploaded_files)} files..."):
+            resume_texts = []
+            file_names = []
             for file in uploaded_files:
                 file_type = file.name.split('.')[-1].lower()
                 with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{file_type}') as tmp_file:
                     tmp_file.write(file.getvalue())
                     tmp_path = tmp_file.name
                 text = screener.extract_text_from_file(tmp_path, file_type)
                 if text.strip():
                     resume_texts.append(text)
                     file_names.append(file.name)
                 os.unlink(tmp_path)
+            st.session_state.resume_texts = resume_texts
+            st.session_state.file_names = file_names
         if resume_texts:
             st.success(f"✅ Successfully processed {len(resume_texts)} resumes")
             if st.button("🚀 Process CSV Data"):
                 with st.spinner("🔄 Processing CSV data..."):
+                    resume_texts = []
+                    file_names = []
                     for idx, row in df.iterrows():
                         text = str(row[text_column])
                         if text and text.strip() and text.lower() != 'nan':
                                 file_names.append(f"Resume_{idx}")
                             else:
                                 file_names.append(str(row[name_column]))
+                    st.session_state.resume_texts = resume_texts
+                    st.session_state.file_names = file_names
                 if resume_texts:
                     st.success(f"✅ Successfully loaded {len(resume_texts)} resumes from CSV")
             st.error(f"❌ Error processing CSV: {str(e)}")
 elif input_method == "🔗 Load from Hugging Face Dataset":
+    st.markdown("**Popular Resume Datasets:**")
+    st.markdown("- `ahmedheakl/resume-atlas`")
+    st.markdown("- `InferenceFly/Resume-Dataset`")
     col1, col2 = st.columns([2, 1])
     with col1:
             st.success(f"✅ Loaded dataset with {len(dataset)} entries")
             st.write("**Dataset Preview:**")
             preview_df = pd.DataFrame(dataset[:5])
             st.dataframe(preview_df)
             text_column = st.selectbox(
                 "Select column with resume text:",
                 dataset.column_names,
+                index=dataset.column_names.index('resume_text') if 'resume_text' in dataset.column_names else 0
             )
             category_column = None
             if 'category' in dataset.column_names:
+                categories = list(set(dataset['category']))
                 category_column = st.selectbox(
                     "Filter by category (optional):",
+                    ["All"] + categories
                 )
             max_samples = st.slider("Maximum samples to load:", 10, min(1000, len(dataset)), 100)
             if st.button("🚀 Process Dataset"):
                 with st.spinner("🔄 Processing dataset..."):
+                    resume_texts = []
+                    file_names = []
                     filtered_dataset = dataset
                     if category_column and category_column != "All":
                         filtered_dataset = dataset.filter(lambda x: x['category'] == category_column)
                     sample_indices = list(range(min(max_samples, len(filtered_dataset))))
                     for idx in sample_indices:
                         if text and text.strip() and text.lower() != 'nan':
                             resume_texts.append(text)
                             if 'id' in item:
                                 file_names.append(f"Resume_{item['id']}")
                             else:
                                 file_names.append(f"Resume_{idx}")
+                    st.session_state.resume_texts = resume_texts
+                    st.session_state.file_names = file_names
                 if resume_texts:
                     st.success(f"✅ Successfully loaded {len(resume_texts)} resumes")
             st.error(f"❌ Error loading dataset: {str(e)}")
 # Processing and Results
+st.header("🔍 Step 3: Analyze Resumes")
+# First button: Find top K candidates (fast ranking)
+col1, col2 = st.columns([1, 1])
+with col1:
+    if st.button("🚀 Advanced Pipeline Analysis",
+                 disabled=not (job_description and st.session_state.resume_texts),
+                 type="primary",
+                 help="Run the complete 5-stage advanced pipeline"):
+        if len(st.session_state.resume_texts) == 0:
+            st.error("❌ Please upload resumes first!")
+        elif not job_description.strip():
+            st.error("❌ Please enter a job description!")
+        else:
+            with st.spinner("🚀 Running Advanced Pipeline Analysis..."):
+                try:
+                    # Run the advanced pipeline
+                    pipeline_results = screener.advanced_pipeline_ranking(
+                        st.session_state.resume_texts, job_description
                     )
+                    # Prepare results for display
+                    results = []
+                    for rank, result_data in enumerate(pipeline_results, 1):
+                        idx = result_data['index']
+                        name = st.session_state.file_names[idx]
+                        text = st.session_state.resume_texts[idx]
+                        # Extract skills
+                        skills = screener.extract_skills(text, job_description)
+                        results.append({
+                            'rank': rank,
+                            'name': name,
+                            'final_score': result_data['final_score'],
+                            'cross_encoder_score': result_data['cross_encoder_score'],
+                            'bm25_score': result_data['bm25_score'],
+                            'intent_score': result_data['intent_score'],
+                            'skills': skills,
+                            'text': text,
+                            'text_preview': text[:500] + "..." if len(text) > 500 else text,
+                            'explanation': None  # No detailed explanation yet
+                        })
+                    # Add simple explanations for now
+                    for result in results:
+                        result['explanation'] = screener.generate_simple_explanation(
+                            result['final_score'],
+                            result['cross_encoder_score'],
+                            result['bm25_score'],
+                            result['skills']
+                        )
+                    # Store in session state
+                    st.session_state.results = results
+                    st.session_state.explanations_generated = False
+                    st.session_state.current_job_description = job_description
+                    st.success(f"🚀 Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
+                except Exception as e:
+                    st.error(f"❌ Error during analysis: {str(e)}")
+# Second button: Generate AI explanations (slower, optional)
+with col2:
+    # Show this button only if we have results and LLM is enabled
+    show_explanation_button = (
+        st.session_state.results and
+        use_llm_explanations and
+        st.session_state.llm_client and
+        not st.session_state.explanations_generated
+    )
+    if show_explanation_button:
+        if st.button("🤖 Generate AI Explanations",
+                     type="secondary",
+                     help="Generate detailed 150-word explanations using Qwen3-14B (takes longer)"):
+            with st.spinner("🤖 Generating detailed AI explanations..."):
+                try:
+                    explanation_progress = st.progress(0)
+                    explanation_text = st.empty()
+                    for i, result in enumerate(st.session_state.results):
+                        explanation_text.text(f"🤖 Generating AI explanation for candidate {i+1}/{len(st.session_state.results)}...")
+                        llm_explanation = screener.generate_llm_explanation(
+                            result['text'],
+                            st.session_state.current_job_description,
+                            result['final_score'],
+                            result['skills']
+                        )
+                        result['explanation'] = llm_explanation
+                        explanation_progress.progress((i + 1) / len(st.session_state.results))
+                    explanation_progress.empty()
+                    explanation_text.empty()
+                    # Mark explanations as generated
+                    st.session_state.explanations_generated = True
+                    st.success(f"🤖 AI explanations generated for all {len(st.session_state.results)} candidates!")
+                except Exception as e:
+                    st.error(f"❌ Error generating explanations: {str(e)}")
+    elif st.session_state.results and st.session_state.explanations_generated:
+        st.info("✅ AI explanations already generated!")
+    elif st.session_state.results and not use_llm_explanations:
+        st.info("💡 Enable 'Generate AI Explanations' in sidebar to use this feature")
+    elif st.session_state.results and not st.session_state.llm_client:
+        st.warning("⚠️ LLM client not available. Check your Hugging Face token.")
 # Display Results
 if st.session_state.results:
     st.header("🏆 Top Candidates")
+    # Create tabs for different views
+    tab1, tab2, tab3 = st.tabs(["📊 Summary", "📋 Detailed Analysis", "📈 Visualizations"])
+    with tab1:
+        # Create summary dataframe with new scoring system
+        summary_data = []
+        for result in st.session_state.results:
+            # Map intent score to text
+            intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
+            summary_data.append({
+                "Rank": result['rank'],
+                "Candidate": result['name'],
+                "Final Score": f"{result['final_score']:.2f}",
+                "Cross-Encoder": f"{result['cross_encoder_score']:.2f}",
+                "BM25": f"{result['bm25_score']:.2f}",
+                "Intent": f"{intent_text} ({result['intent_score']:.1f})",
+                "Top Skills": ", ".join(result['skills'][:5])
+            })
+        summary_df = pd.DataFrame(summary_data)
+        # Style the dataframe
+        def color_scores(val):
+            if isinstance(val, str) and any(char.isdigit() for char in val):
+                try:
+                    # Extract numeric value
+                    numeric_val = float(''.join(c for c in val if c.isdigit() or c == '.'))
+                    if 'Final Score' in val or numeric_val >= 1.0:
+                        if numeric_val >= 1.2:
+                            return 'background-color: #d4edda'
+                        elif numeric_val >= 1.0:
+                            return 'background-color: #fff3cd'
+                        else:
+                            return 'background-color: #f8d7da'
+                    else:
+                        if numeric_val >= 0.7:
+                            return 'background-color: #d4edda'
+                        elif numeric_val >= 0.5:
+                            return 'background-color: #fff3cd'
+                        else:
+                            return 'background-color: #f8d7da'
+                except:
+                    pass
+            return ''
+        styled_df = summary_df.style.applymap(color_scores, subset=['Final Score', 'Cross-Encoder', 'BM25'])
+        st.dataframe(styled_df, use_container_width=True)
+        # Download link
+        detailed_data = []
+        for result in st.session_state.results:
+            intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
+            detailed_data.append({
+                "Rank": result['rank'],
+                "Candidate": result['name'],
+                "Final_Score": result['final_score'],
+                "Cross_Encoder_Score": result['cross_encoder_score'],
+                "BM25_Score": result['bm25_score'],
+                "Intent_Score": result['intent_score'],
+                "Intent_Analysis": intent_text,
+                "Skills": "; ".join(result['skills']),
+                "AI_Explanation": result['explanation'],
+                "Resume_Preview": result['text_preview']
+            })
+        download_df = pd.DataFrame(detailed_data)
+        st.markdown(create_download_link(download_df), unsafe_allow_html=True)
+    with tab2:
+        # Detailed results with new scoring breakdown
+        for result in st.session_state.results:
+            intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
+            with st.expander(f"#{result['rank']}: {result['name']} (Final Score: {result['final_score']:.2f})"):
+                col1, col2 = st.columns([1, 2])
+                with col1:
+                    st.metric("🏆 Final Score", f"{result['final_score']:.2f}")
+                    st.write("**📊 Score Breakdown:**")
+                    st.metric("🎯 Cross-Encoder", f"{result['cross_encoder_score']:.2f}", help="Semantic relevance (0-1)")
+                    st.metric("🔤 BM25 Keywords", f"{result['bm25_score']:.2f}", help="Keyword matching (0.1-0.2)")
+                    st.metric("🤖 Intent Analysis", f"{intent_text} ({result['intent_score']:.1f})", help="Job seeking likelihood (0-0.3)")
+                    st.write("**🎯 Matching Skills:**")
+                    skills_per_column = 5
+                    skill_cols = st.columns(2)
+                    for idx, skill in enumerate(result['skills'][:10]):
+                        with skill_cols[idx % 2]:
+                            st.write(f"• {skill}")
+                with col2:
+                    st.write("**💡 AI-Generated Match Analysis:**")
+                    st.info(result['explanation'])
+                    st.write("**📄 Resume Preview:**")
+                    st.text_area("", result['text_preview'], height=200, disabled=True, key=f"preview_{result['rank']}")
+    with tab3:
+        # Score visualization
+        if len(st.session_state.results) > 1:
+            # Bar chart
+            st.subheader("Score Comparison")
+            chart_data = pd.DataFrame({
+                'Candidate': [r['name'][:20] + '...' if len(r['name']) > 20 else r['name']
+                             for r in st.session_state.results],
+                'Final Score': [r['final_score'] for r in st.session_state.results],
+                'Cross-Encoder': [r['cross_encoder_score'] for r in st.session_state.results],
+                'BM25': [r['bm25_score'] for r in st.session_state.results],
+                'Intent': [r['intent_score'] for r in st.session_state.results]
+            })
+            st.bar_chart(chart_data.set_index('Candidate'))
+            # Score distribution
+            col1, col2 = st.columns(2)
             with col1:
+                st.subheader("Score Distribution")
+                score_ranges = {
+                    'Excellent (≥1.2)': sum(1 for r in st.session_state.results if r['final_score'] >= 1.2),
+                    'Good (1.0-1.2)': sum(1 for r in st.session_state.results if 1.0 <= r['final_score'] < 1.2),
+                    'Fair (0.8-1.0)': sum(1 for r in st.session_state.results if 0.8 <= r['final_score'] < 1.0),
+                    'Poor (<0.8)': sum(1 for r in st.session_state.results if r['final_score'] < 0.8),
+                }
+                dist_df = pd.DataFrame({
+                    'Range': score_ranges.keys(),
+                    'Count': score_ranges.values()
+                })
+                st.bar_chart(dist_df.set_index('Range'))
             with col2:
+                st.subheader("Average Scores")
+                avg_final = np.mean([r['final_score'] for r in st.session_state.results])
+                avg_cross = np.mean([r['cross_encoder_score'] for r in st.session_state.results])
+                avg_bm25 = np.mean([r['bm25_score'] for r in st.session_state.results])
+                avg_intent = np.mean([r['intent_score'] for r in st.session_state.results])
+                st.metric("Average Final Score", f"{avg_final:.2f}")
+                st.metric("Average Cross-Encoder", f"{avg_cross:.2f}")
+                st.metric("Average BM25", f"{avg_bm25:.2f}")
+                st.metric("Average Intent", f"{avg_intent:.2f}")
 # Memory cleanup
+st.markdown("---")
+st.subheader("🧹 Reset Application")
+col1, col2, col3 = st.columns([1, 1, 3])
+with col1:
+    if st.button("🗑️ Clear Resumes Only", type="secondary", help="Clear only the loaded resumes"):
+        st.session_state.resume_texts = []
+        st.session_state.file_names = []
+        st.session_state.results = []
+        st.session_state.explanations_generated = False
+        st.session_state.current_job_description = ""
+        st.success("✅ Resumes cleared!")
+        st.rerun()
+with col2:
+    if st.button("🧹 Clear Everything", type="primary", help="Clear all data and free memory"):
+        st.session_state.resume_texts = []
+        st.session_state.file_names = []
+        st.session_state.results = []
+        st.session_state.explanations_generated = False
+        st.session_state.current_job_description = ""
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        gc.collect()
+        st.success("✅ Everything cleared!")
+        st.rerun()
 # Footer
 st.markdown("---")
 st.markdown(
     """
     <div style='text-align: center; color: #666;'>
+        🚀 Powered by BAAI/bge-large-en-v1.5 & Qwen3-14B | Built with Streamlit
     </div>
     """,
     unsafe_allow_html=True