root commited on
Commit
d57693d
·
1 Parent(s): 0bfe6dd
Files changed (1) hide show
  1. app.py +78 -14
app.py CHANGED
@@ -9,7 +9,7 @@ import os
9
  import tempfile
10
  import base64
11
  from rank_bm25 import BM25Okapi
12
- from transformers import AutoModel, AutoTokenizer
13
  from sentence_transformers import SentenceTransformer
14
  from nltk.tokenize import word_tokenize, sent_tokenize
15
  from tqdm import tqdm
@@ -31,10 +31,24 @@ EMBEDDING_MODEL_NAME = "nvidia/NV-Embed-v2"
31
  print(f"Loading embedding model {EMBEDDING_MODEL_NAME}...")
32
 
33
  try:
34
- # Load embedding model and tokenizer
 
 
 
 
 
 
 
 
35
  global_embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True)
36
- global_embedding_model = AutoModel.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True, device_map="auto")
37
- print(f"Successfully loaded {EMBEDDING_MODEL_NAME}")
 
 
 
 
 
 
38
  except Exception as e:
39
  print(f"Error loading embedding model: {str(e)}")
40
  global_embedding_tokenizer = None
@@ -179,13 +193,25 @@ class ResumeScreener:
179
  return np.zeros(768) # Default embedding size as fallback
180
 
181
  try:
182
- # For HuggingFace models
183
- inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
 
 
 
 
 
 
 
 
 
184
 
185
  # Move inputs to same device as model
186
  device = next(self.model.parameters()).device
187
  inputs = {k: v.to(device) for k, v in inputs.items()}
188
 
 
 
 
189
  with torch.no_grad():
190
  outputs = self.model(**inputs)
191
 
@@ -193,24 +219,33 @@ class ResumeScreener:
193
  if hasattr(outputs, "last_hidden_state"):
194
  # Mean pooling across token dimension
195
  embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
196
- embedding_np = embeddings.cpu().detach().numpy()
197
 
198
  # Set embedding size if not set
199
  if self.embedding_size is None:
200
  self.embedding_size = embedding_np.shape[0]
201
 
 
 
 
 
202
  return embedding_np
203
  else:
204
  # For models that return a specific embedding
205
- embedding_np = outputs.cpu().detach().numpy()
206
 
207
  # Set embedding size if not set
208
  if self.embedding_size is None:
209
  self.embedding_size = embedding_np.shape[0]
210
 
 
 
 
 
211
  return embedding_np
212
  except Exception as e:
213
  st.error(f"Error generating embedding: {str(e)}")
 
214
  return np.zeros(768) # Default embedding size as fallback
215
 
216
  def create_faiss_index(self, embeddings):
@@ -795,13 +830,33 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
795
  # Get job description embedding
796
  job_embedding = screener.get_embedding(job_description)
797
 
798
- # Get resume embeddings
799
  resume_embeddings = []
 
800
  progress_bar = st.progress(0)
801
- for i, text in enumerate(resume_texts):
802
- embedding = screener.get_embedding(text)
803
- resume_embeddings.append(embedding)
804
- progress_bar.progress((i + 1) / len(resume_texts))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
805
 
806
  # Calculate hybrid scores
807
  hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
@@ -819,7 +874,9 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
819
 
820
  # Create results with explanations if enabled
821
  results = []
822
- for name, text, score, semantic_score, bm25_score in top_candidates:
 
 
823
  # Extract skills for this resume
824
  skills = screener.extract_skills(text, job_description)
825
 
@@ -834,6 +891,10 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
834
  }
835
 
836
  if use_explanation:
 
 
 
 
837
  explanation = screener.generate_explanation(
838
  text,
839
  job_description,
@@ -843,6 +904,9 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
843
  skills
844
  )
845
  result["explanation"] = explanation
 
 
 
846
  else:
847
  result["explanation"] = ""
848
 
 
9
  import tempfile
10
  import base64
11
  from rank_bm25 import BM25Okapi
12
+ from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
13
  from sentence_transformers import SentenceTransformer
14
  from nltk.tokenize import word_tokenize, sent_tokenize
15
  from tqdm import tqdm
 
31
  print(f"Loading embedding model {EMBEDDING_MODEL_NAME}...")
32
 
33
  try:
34
+ # Configure 4-bit quantization for better memory efficiency
35
+ quantization_config = BitsAndBytesConfig(
36
+ load_in_4bit=True,
37
+ bnb_4bit_quant_type="nf4",
38
+ bnb_4bit_compute_dtype=torch.float16,
39
+ bnb_4bit_use_double_quant=True
40
+ )
41
+
42
+ # Load embedding model and tokenizer with 4-bit quantization
43
  global_embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True)
44
+ global_embedding_model = AutoModel.from_pretrained(
45
+ EMBEDDING_MODEL_NAME,
46
+ trust_remote_code=True,
47
+ device_map="auto",
48
+ quantization_config=quantization_config,
49
+ torch_dtype=torch.float16
50
+ )
51
+ print(f"Successfully loaded {EMBEDDING_MODEL_NAME} with 4-bit quantization")
52
  except Exception as e:
53
  print(f"Error loading embedding model: {str(e)}")
54
  global_embedding_tokenizer = None
 
193
  return np.zeros(768) # Default embedding size as fallback
194
 
195
  try:
196
+ # For long texts, split into smaller chunks to avoid OOM
197
+ max_length = 256 # Reduced from default 512 to save memory
198
+
199
+ # Truncate text and tokenize
200
+ inputs = self.tokenizer(
201
+ text,
202
+ return_tensors="pt",
203
+ truncation=True,
204
+ max_length=max_length,
205
+ padding=True
206
+ )
207
 
208
  # Move inputs to same device as model
209
  device = next(self.model.parameters()).device
210
  inputs = {k: v.to(device) for k, v in inputs.items()}
211
 
212
+ # Free up memory before inference
213
+ torch.cuda.empty_cache()
214
+
215
  with torch.no_grad():
216
  outputs = self.model(**inputs)
217
 
 
219
  if hasattr(outputs, "last_hidden_state"):
220
  # Mean pooling across token dimension
221
  embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
222
+ embedding_np = embeddings.detach().cpu().numpy()
223
 
224
  # Set embedding size if not set
225
  if self.embedding_size is None:
226
  self.embedding_size = embedding_np.shape[0]
227
 
228
+ # Clear cache after getting embedding
229
+ del outputs, embeddings
230
+ torch.cuda.empty_cache()
231
+
232
  return embedding_np
233
  else:
234
  # For models that return a specific embedding
235
+ embedding_np = outputs.detach().cpu().numpy()
236
 
237
  # Set embedding size if not set
238
  if self.embedding_size is None:
239
  self.embedding_size = embedding_np.shape[0]
240
 
241
+ # Clear cache after getting embedding
242
+ del outputs
243
+ torch.cuda.empty_cache()
244
+
245
  return embedding_np
246
  except Exception as e:
247
  st.error(f"Error generating embedding: {str(e)}")
248
+ torch.cuda.empty_cache() # Try to recover memory
249
  return np.zeros(768) # Default embedding size as fallback
250
 
251
  def create_faiss_index(self, embeddings):
 
830
  # Get job description embedding
831
  job_embedding = screener.get_embedding(job_description)
832
 
833
+ # Process resumes in batches to avoid OOM
834
  resume_embeddings = []
835
+ batch_size = 10 # Process 10 resumes at a time
836
  progress_bar = st.progress(0)
837
+ status_text = st.empty()
838
+
839
+ for i in range(0, len(resume_texts), batch_size):
840
+ batch = resume_texts[i:i+batch_size]
841
+ status_text.text(f"Processing resumes {i+1}-{min(i+batch_size, len(resume_texts))} of {len(resume_texts)}...")
842
+
843
+ batch_embeddings = []
844
+ for j, text in enumerate(batch):
845
+ embedding = screener.get_embedding(text)
846
+ batch_embeddings.append(embedding)
847
+ # Update progress after each resume
848
+ progress = (i + j + 1) / len(resume_texts)
849
+ progress_bar.progress(progress)
850
+
851
+ # Add batch embeddings to the full list
852
+ resume_embeddings.extend(batch_embeddings)
853
+
854
+ # Force garbage collection between batches
855
+ import gc
856
+ gc.collect()
857
+ torch.cuda.empty_cache()
858
+
859
+ status_text.text("Calculating similarity scores...")
860
 
861
  # Calculate hybrid scores
862
  hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
 
874
 
875
  # Create results with explanations if enabled
876
  results = []
877
+ status_text.text("Generating explanations...")
878
+
879
+ for idx, (name, text, score, semantic_score, bm25_score) in enumerate(top_candidates):
880
  # Extract skills for this resume
881
  skills = screener.extract_skills(text, job_description)
882
 
 
891
  }
892
 
893
  if use_explanation:
894
+ # Update progress to show explanation generation
895
+ progress_bar.progress((idx + 1) / len(top_candidates))
896
+ status_text.text(f"Generating explanation for candidate {idx+1}/{len(top_candidates)}...")
897
+
898
  explanation = screener.generate_explanation(
899
  text,
900
  job_description,
 
904
  skills
905
  )
906
  result["explanation"] = explanation
907
+
908
+ # Clear cache after each explanation
909
+ torch.cuda.empty_cache()
910
  else:
911
  result["explanation"] = ""
912