Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Paused

App Files Files Community

root commited on May 21

Commit

d57693d

1 Parent(s): 0bfe6dd

ss

Browse files

Files changed (1) hide show

app.py +78 -14

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import os
 import tempfile
 import base64
 from rank_bm25 import BM25Okapi
-from transformers import AutoModel, AutoTokenizer
 from sentence_transformers import SentenceTransformer
 from nltk.tokenize import word_tokenize, sent_tokenize
 from tqdm import tqdm
@@ -31,10 +31,24 @@ EMBEDDING_MODEL_NAME = "nvidia/NV-Embed-v2"
 print(f"Loading embedding model {EMBEDDING_MODEL_NAME}...")
 try:
-    # Load embedding model and tokenizer
     global_embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True)
-    global_embedding_model = AutoModel.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True, device_map="auto")
-    print(f"Successfully loaded {EMBEDDING_MODEL_NAME}")
 except Exception as e:
     print(f"Error loading embedding model: {str(e)}")
     global_embedding_tokenizer = None
@@ -179,13 +193,25 @@ class ResumeScreener:
             return np.zeros(768)  # Default embedding size as fallback
         try:
-            # For HuggingFace models
-            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
             # Move inputs to same device as model
             device = next(self.model.parameters()).device
             inputs = {k: v.to(device) for k, v in inputs.items()}
             with torch.no_grad():
                 outputs = self.model(**inputs)
@@ -193,24 +219,33 @@ class ResumeScreener:
             if hasattr(outputs, "last_hidden_state"):
                 # Mean pooling across token dimension
                 embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
-                embedding_np = embeddings.cpu().detach().numpy()
                 # Set embedding size if not set
                 if self.embedding_size is None:
                     self.embedding_size = embedding_np.shape[0]
                 return embedding_np
             else:
                 # For models that return a specific embedding
-                embedding_np = outputs.cpu().detach().numpy()
                 # Set embedding size if not set
                 if self.embedding_size is None:
                     self.embedding_size = embedding_np.shape[0]
                 return embedding_np
         except Exception as e:
             st.error(f"Error generating embedding: {str(e)}")
             return np.zeros(768)  # Default embedding size as fallback
     def create_faiss_index(self, embeddings):
@@ -795,13 +830,33 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
         # Get job description embedding
         job_embedding = screener.get_embedding(job_description)
-        # Get resume embeddings
         resume_embeddings = []
         progress_bar = st.progress(0)
-        for i, text in enumerate(resume_texts):
-            embedding = screener.get_embedding(text)
-            resume_embeddings.append(embedding)
-            progress_bar.progress((i + 1) / len(resume_texts))
         # Calculate hybrid scores
         hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
@@ -819,7 +874,9 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
         # Create results with explanations if enabled
         results = []
-        for name, text, score, semantic_score, bm25_score in top_candidates:
             # Extract skills for this resume
             skills = screener.extract_skills(text, job_description)
@@ -834,6 +891,10 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
             }
             if use_explanation:
                 explanation = screener.generate_explanation(
                     text,
                     job_description,
@@ -843,6 +904,9 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
                     skills
                 )
                 result["explanation"] = explanation
             else:
                 result["explanation"] = ""

 import tempfile
 import base64
 from rank_bm25 import BM25Okapi
+from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
 from sentence_transformers import SentenceTransformer
 from nltk.tokenize import word_tokenize, sent_tokenize
 from tqdm import tqdm
 print(f"Loading embedding model {EMBEDDING_MODEL_NAME}...")
 try:
+    # Configure 4-bit quantization for better memory efficiency
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True
+    )
+    # Load embedding model and tokenizer with 4-bit quantization
     global_embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True)
+    global_embedding_model = AutoModel.from_pretrained(
+        EMBEDDING_MODEL_NAME,
+        trust_remote_code=True,
+        device_map="auto",
+        quantization_config=quantization_config,
+        torch_dtype=torch.float16
+    )
+    print(f"Successfully loaded {EMBEDDING_MODEL_NAME} with 4-bit quantization")
 except Exception as e:
     print(f"Error loading embedding model: {str(e)}")
     global_embedding_tokenizer = None
             return np.zeros(768)  # Default embedding size as fallback
         try:
+            # For long texts, split into smaller chunks to avoid OOM
+            max_length = 256  # Reduced from default 512 to save memory
+            # Truncate text and tokenize
+            inputs = self.tokenizer(
+                text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=max_length,
+                padding=True
+            )
             # Move inputs to same device as model
             device = next(self.model.parameters()).device
             inputs = {k: v.to(device) for k, v in inputs.items()}
+            # Free up memory before inference
+            torch.cuda.empty_cache()
             with torch.no_grad():
                 outputs = self.model(**inputs)
             if hasattr(outputs, "last_hidden_state"):
                 # Mean pooling across token dimension
                 embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
+                embedding_np = embeddings.detach().cpu().numpy()
                 # Set embedding size if not set
                 if self.embedding_size is None:
                     self.embedding_size = embedding_np.shape[0]
+                # Clear cache after getting embedding
+                del outputs, embeddings
+                torch.cuda.empty_cache()
                 return embedding_np
             else:
                 # For models that return a specific embedding
+                embedding_np = outputs.detach().cpu().numpy()
                 # Set embedding size if not set
                 if self.embedding_size is None:
                     self.embedding_size = embedding_np.shape[0]
+                # Clear cache after getting embedding
+                del outputs
+                torch.cuda.empty_cache()
                 return embedding_np
         except Exception as e:
             st.error(f"Error generating embedding: {str(e)}")
+            torch.cuda.empty_cache()  # Try to recover memory
             return np.zeros(768)  # Default embedding size as fallback
     def create_faiss_index(self, embeddings):
         # Get job description embedding
         job_embedding = screener.get_embedding(job_description)
+        # Process resumes in batches to avoid OOM
         resume_embeddings = []
+        batch_size = 10  # Process 10 resumes at a time
         progress_bar = st.progress(0)
+        status_text = st.empty()
+        for i in range(0, len(resume_texts), batch_size):
+            batch = resume_texts[i:i+batch_size]
+            status_text.text(f"Processing resumes {i+1}-{min(i+batch_size, len(resume_texts))} of {len(resume_texts)}...")
+            batch_embeddings = []
+            for j, text in enumerate(batch):
+                embedding = screener.get_embedding(text)
+                batch_embeddings.append(embedding)
+                # Update progress after each resume
+                progress = (i + j + 1) / len(resume_texts)
+                progress_bar.progress(progress)
+            # Add batch embeddings to the full list
+            resume_embeddings.extend(batch_embeddings)
+            # Force garbage collection between batches
+            import gc
+            gc.collect()
+            torch.cuda.empty_cache()
+        status_text.text("Calculating similarity scores...")
         # Calculate hybrid scores
         hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
         # Create results with explanations if enabled
         results = []
+        status_text.text("Generating explanations...")
+        for idx, (name, text, score, semantic_score, bm25_score) in enumerate(top_candidates):
             # Extract skills for this resume
             skills = screener.extract_skills(text, job_description)
             }
             if use_explanation:
+                # Update progress to show explanation generation
+                progress_bar.progress((idx + 1) / len(top_candidates))
+                status_text.text(f"Generating explanation for candidate {idx+1}/{len(top_candidates)}...")
                 explanation = screener.generate_explanation(
                     text,
                     job_description,
                     skills
                 )
                 result["explanation"] = explanation
+                # Clear cache after each explanation
+                torch.cuda.empty_cache()
             else:
                 result["explanation"] = ""