root
commited on
Commit
·
d57693d
1
Parent(s):
0bfe6dd
ss
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import os
|
|
9 |
import tempfile
|
10 |
import base64
|
11 |
from rank_bm25 import BM25Okapi
|
12 |
-
from transformers import AutoModel, AutoTokenizer
|
13 |
from sentence_transformers import SentenceTransformer
|
14 |
from nltk.tokenize import word_tokenize, sent_tokenize
|
15 |
from tqdm import tqdm
|
@@ -31,10 +31,24 @@ EMBEDDING_MODEL_NAME = "nvidia/NV-Embed-v2"
|
|
31 |
print(f"Loading embedding model {EMBEDDING_MODEL_NAME}...")
|
32 |
|
33 |
try:
|
34 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
global_embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True)
|
36 |
-
global_embedding_model = AutoModel.from_pretrained(
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
except Exception as e:
|
39 |
print(f"Error loading embedding model: {str(e)}")
|
40 |
global_embedding_tokenizer = None
|
@@ -179,13 +193,25 @@ class ResumeScreener:
|
|
179 |
return np.zeros(768) # Default embedding size as fallback
|
180 |
|
181 |
try:
|
182 |
-
# For
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
# Move inputs to same device as model
|
186 |
device = next(self.model.parameters()).device
|
187 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
188 |
|
|
|
|
|
|
|
189 |
with torch.no_grad():
|
190 |
outputs = self.model(**inputs)
|
191 |
|
@@ -193,24 +219,33 @@ class ResumeScreener:
|
|
193 |
if hasattr(outputs, "last_hidden_state"):
|
194 |
# Mean pooling across token dimension
|
195 |
embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
|
196 |
-
embedding_np = embeddings.
|
197 |
|
198 |
# Set embedding size if not set
|
199 |
if self.embedding_size is None:
|
200 |
self.embedding_size = embedding_np.shape[0]
|
201 |
|
|
|
|
|
|
|
|
|
202 |
return embedding_np
|
203 |
else:
|
204 |
# For models that return a specific embedding
|
205 |
-
embedding_np = outputs.
|
206 |
|
207 |
# Set embedding size if not set
|
208 |
if self.embedding_size is None:
|
209 |
self.embedding_size = embedding_np.shape[0]
|
210 |
|
|
|
|
|
|
|
|
|
211 |
return embedding_np
|
212 |
except Exception as e:
|
213 |
st.error(f"Error generating embedding: {str(e)}")
|
|
|
214 |
return np.zeros(768) # Default embedding size as fallback
|
215 |
|
216 |
def create_faiss_index(self, embeddings):
|
@@ -795,13 +830,33 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
|
|
795 |
# Get job description embedding
|
796 |
job_embedding = screener.get_embedding(job_description)
|
797 |
|
798 |
-
#
|
799 |
resume_embeddings = []
|
|
|
800 |
progress_bar = st.progress(0)
|
801 |
-
|
802 |
-
|
803 |
-
|
804 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
805 |
|
806 |
# Calculate hybrid scores
|
807 |
hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
|
@@ -819,7 +874,9 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
|
|
819 |
|
820 |
# Create results with explanations if enabled
|
821 |
results = []
|
822 |
-
|
|
|
|
|
823 |
# Extract skills for this resume
|
824 |
skills = screener.extract_skills(text, job_description)
|
825 |
|
@@ -834,6 +891,10 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
|
|
834 |
}
|
835 |
|
836 |
if use_explanation:
|
|
|
|
|
|
|
|
|
837 |
explanation = screener.generate_explanation(
|
838 |
text,
|
839 |
job_description,
|
@@ -843,6 +904,9 @@ if st.button("Find Top Candidates", disabled=not (job_description and resume_tex
|
|
843 |
skills
|
844 |
)
|
845 |
result["explanation"] = explanation
|
|
|
|
|
|
|
846 |
else:
|
847 |
result["explanation"] = ""
|
848 |
|
|
|
9 |
import tempfile
|
10 |
import base64
|
11 |
from rank_bm25 import BM25Okapi
|
12 |
+
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
|
13 |
from sentence_transformers import SentenceTransformer
|
14 |
from nltk.tokenize import word_tokenize, sent_tokenize
|
15 |
from tqdm import tqdm
|
|
|
31 |
print(f"Loading embedding model {EMBEDDING_MODEL_NAME}...")
|
32 |
|
33 |
try:
|
34 |
+
# Configure 4-bit quantization for better memory efficiency
|
35 |
+
quantization_config = BitsAndBytesConfig(
|
36 |
+
load_in_4bit=True,
|
37 |
+
bnb_4bit_quant_type="nf4",
|
38 |
+
bnb_4bit_compute_dtype=torch.float16,
|
39 |
+
bnb_4bit_use_double_quant=True
|
40 |
+
)
|
41 |
+
|
42 |
+
# Load embedding model and tokenizer with 4-bit quantization
|
43 |
global_embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True)
|
44 |
+
global_embedding_model = AutoModel.from_pretrained(
|
45 |
+
EMBEDDING_MODEL_NAME,
|
46 |
+
trust_remote_code=True,
|
47 |
+
device_map="auto",
|
48 |
+
quantization_config=quantization_config,
|
49 |
+
torch_dtype=torch.float16
|
50 |
+
)
|
51 |
+
print(f"Successfully loaded {EMBEDDING_MODEL_NAME} with 4-bit quantization")
|
52 |
except Exception as e:
|
53 |
print(f"Error loading embedding model: {str(e)}")
|
54 |
global_embedding_tokenizer = None
|
|
|
193 |
return np.zeros(768) # Default embedding size as fallback
|
194 |
|
195 |
try:
|
196 |
+
# For long texts, split into smaller chunks to avoid OOM
|
197 |
+
max_length = 256 # Reduced from default 512 to save memory
|
198 |
+
|
199 |
+
# Truncate text and tokenize
|
200 |
+
inputs = self.tokenizer(
|
201 |
+
text,
|
202 |
+
return_tensors="pt",
|
203 |
+
truncation=True,
|
204 |
+
max_length=max_length,
|
205 |
+
padding=True
|
206 |
+
)
|
207 |
|
208 |
# Move inputs to same device as model
|
209 |
device = next(self.model.parameters()).device
|
210 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
211 |
|
212 |
+
# Free up memory before inference
|
213 |
+
torch.cuda.empty_cache()
|
214 |
+
|
215 |
with torch.no_grad():
|
216 |
outputs = self.model(**inputs)
|
217 |
|
|
|
219 |
if hasattr(outputs, "last_hidden_state"):
|
220 |
# Mean pooling across token dimension
|
221 |
embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
|
222 |
+
embedding_np = embeddings.detach().cpu().numpy()
|
223 |
|
224 |
# Set embedding size if not set
|
225 |
if self.embedding_size is None:
|
226 |
self.embedding_size = embedding_np.shape[0]
|
227 |
|
228 |
+
# Clear cache after getting embedding
|
229 |
+
del outputs, embeddings
|
230 |
+
torch.cuda.empty_cache()
|
231 |
+
|
232 |
return embedding_np
|
233 |
else:
|
234 |
# For models that return a specific embedding
|
235 |
+
embedding_np = outputs.detach().cpu().numpy()
|
236 |
|
237 |
# Set embedding size if not set
|
238 |
if self.embedding_size is None:
|
239 |
self.embedding_size = embedding_np.shape[0]
|
240 |
|
241 |
+
# Clear cache after getting embedding
|
242 |
+
del outputs
|
243 |
+
torch.cuda.empty_cache()
|
244 |
+
|
245 |
return embedding_np
|
246 |
except Exception as e:
|
247 |
st.error(f"Error generating embedding: {str(e)}")
|
248 |
+
torch.cuda.empty_cache() # Try to recover memory
|
249 |
return np.zeros(768) # Default embedding size as fallback
|
250 |
|
251 |
def create_faiss_index(self, embeddings):
|
|
|
830 |
# Get job description embedding
|
831 |
job_embedding = screener.get_embedding(job_description)
|
832 |
|
833 |
+
# Process resumes in batches to avoid OOM
|
834 |
resume_embeddings = []
|
835 |
+
batch_size = 10 # Process 10 resumes at a time
|
836 |
progress_bar = st.progress(0)
|
837 |
+
status_text = st.empty()
|
838 |
+
|
839 |
+
for i in range(0, len(resume_texts), batch_size):
|
840 |
+
batch = resume_texts[i:i+batch_size]
|
841 |
+
status_text.text(f"Processing resumes {i+1}-{min(i+batch_size, len(resume_texts))} of {len(resume_texts)}...")
|
842 |
+
|
843 |
+
batch_embeddings = []
|
844 |
+
for j, text in enumerate(batch):
|
845 |
+
embedding = screener.get_embedding(text)
|
846 |
+
batch_embeddings.append(embedding)
|
847 |
+
# Update progress after each resume
|
848 |
+
progress = (i + j + 1) / len(resume_texts)
|
849 |
+
progress_bar.progress(progress)
|
850 |
+
|
851 |
+
# Add batch embeddings to the full list
|
852 |
+
resume_embeddings.extend(batch_embeddings)
|
853 |
+
|
854 |
+
# Force garbage collection between batches
|
855 |
+
import gc
|
856 |
+
gc.collect()
|
857 |
+
torch.cuda.empty_cache()
|
858 |
+
|
859 |
+
status_text.text("Calculating similarity scores...")
|
860 |
|
861 |
# Calculate hybrid scores
|
862 |
hybrid_scores, semantic_scores, bm25_scores = screener.calculate_hybrid_scores(
|
|
|
874 |
|
875 |
# Create results with explanations if enabled
|
876 |
results = []
|
877 |
+
status_text.text("Generating explanations...")
|
878 |
+
|
879 |
+
for idx, (name, text, score, semantic_score, bm25_score) in enumerate(top_candidates):
|
880 |
# Extract skills for this resume
|
881 |
skills = screener.extract_skills(text, job_description)
|
882 |
|
|
|
891 |
}
|
892 |
|
893 |
if use_explanation:
|
894 |
+
# Update progress to show explanation generation
|
895 |
+
progress_bar.progress((idx + 1) / len(top_candidates))
|
896 |
+
status_text.text(f"Generating explanation for candidate {idx+1}/{len(top_candidates)}...")
|
897 |
+
|
898 |
explanation = screener.generate_explanation(
|
899 |
text,
|
900 |
job_description,
|
|
|
904 |
skills
|
905 |
)
|
906 |
result["explanation"] = explanation
|
907 |
+
|
908 |
+
# Clear cache after each explanation
|
909 |
+
torch.cuda.empty_cache()
|
910 |
else:
|
911 |
result["explanation"] = ""
|
912 |
|