root
commited on
Commit
·
0bfe6dd
1
Parent(s):
ba2dfe6
ss
Browse files- app.py +39 -153
- explanation_generator.py +74 -132
- requirements.txt +1 -1
app.py
CHANGED
@@ -26,6 +26,20 @@ try:
|
|
26 |
except LookupError:
|
27 |
nltk.download('punkt')
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
# Set page configuration
|
30 |
st.set_page_config(
|
31 |
page_title="Resume Screener & Skill Extractor",
|
@@ -34,38 +48,6 @@ st.set_page_config(
|
|
34 |
initial_sidebar_state="expanded"
|
35 |
)
|
36 |
|
37 |
-
# Hugging Face Spaces optimization
|
38 |
-
RUNNING_ON_SPACES = os.environ.get('SPACE_ID') is not None
|
39 |
-
if RUNNING_ON_SPACES:
|
40 |
-
st.sidebar.info("🚀 Running on Hugging Face Spaces")
|
41 |
-
|
42 |
-
# Set up cache directory structure
|
43 |
-
CACHE_DIR = os.path.join(os.getcwd(), ".cache")
|
44 |
-
HF_HOME = os.path.join(CACHE_DIR, "huggingface")
|
45 |
-
os.environ['TRANSFORMERS_CACHE'] = os.path.join(HF_HOME, "transformers")
|
46 |
-
os.environ['HF_HOME'] = HF_HOME
|
47 |
-
os.environ['HF_DATASETS_CACHE'] = os.path.join(HF_HOME, "datasets")
|
48 |
-
|
49 |
-
# Create cache directories if they don't exist
|
50 |
-
for dir_path in [CACHE_DIR, HF_HOME, os.environ['TRANSFORMERS_CACHE'], os.environ['HF_DATASETS_CACHE']]:
|
51 |
-
if not os.path.exists(dir_path):
|
52 |
-
os.makedirs(dir_path)
|
53 |
-
|
54 |
-
# Use downloaded models if available (avoid downloading on every run)
|
55 |
-
os.environ['TRANSFORMERS_OFFLINE'] = '1'
|
56 |
-
|
57 |
-
# Spaces optimization flags
|
58 |
-
USE_PIPELINE = True
|
59 |
-
OPTIMIZE_MEMORY = True
|
60 |
-
|
61 |
-
# Print setup information
|
62 |
-
print(f"Running on Hugging Face Spaces: {os.environ.get('SPACE_ID')}")
|
63 |
-
print(f"Cache directory: {CACHE_DIR}")
|
64 |
-
print(f"HF Home: {HF_HOME}")
|
65 |
-
else:
|
66 |
-
USE_PIPELINE = False
|
67 |
-
OPTIMIZE_MEMORY = False
|
68 |
-
|
69 |
# Sidebar for model selection and weights
|
70 |
with st.sidebar:
|
71 |
st.title("Configuration")
|
@@ -95,15 +77,9 @@ with st.sidebar:
|
|
95 |
use_explanation = st.checkbox("Generate Explanations", value=True)
|
96 |
use_faiss = st.checkbox("Use FAISS for fast search", value=True)
|
97 |
|
98 |
-
# Hugging Face Spaces optimization options
|
99 |
-
if not RUNNING_ON_SPACES:
|
100 |
-
st.subheader("Hugging Face Spaces Optimization")
|
101 |
-
USE_PIPELINE = st.checkbox("Use pipeline API for faster loading", value=USE_PIPELINE)
|
102 |
-
OPTIMIZE_MEMORY = st.checkbox("Optimize memory usage", value=OPTIMIZE_MEMORY)
|
103 |
-
|
104 |
# Memory optimization options
|
105 |
st.subheader("Memory Optimization")
|
106 |
-
memory_optimization = st.checkbox("Enable memory optimization (for large datasets)", value=
|
107 |
clear_embeddings = st.checkbox("Clear embeddings after processing", value=False)
|
108 |
gc_collect_interval = st.number_input(
|
109 |
"Garbage collection interval (files)",
|
@@ -126,92 +102,31 @@ if 'job_description' not in st.session_state:
|
|
126 |
if 'results' not in st.session_state:
|
127 |
st.session_state.results = []
|
128 |
if 'embedding_model' not in st.session_state:
|
129 |
-
st.session_state.embedding_model =
|
130 |
if 'tokenizer' not in st.session_state:
|
131 |
-
st.session_state.tokenizer =
|
132 |
if 'faiss_index' not in st.session_state:
|
133 |
st.session_state.faiss_index = None
|
134 |
if 'explanation_generator' not in st.session_state:
|
135 |
st.session_state.explanation_generator = None
|
136 |
-
if 'screener' not in st.session_state:
|
137 |
-
st.session_state.screener = None
|
138 |
|
139 |
class ResumeScreener:
|
140 |
-
def __init__(self, embedding_model_name="nvidia/NV-Embed-v2", explanation_model_name="Qwen/QwQ-32B"
|
141 |
"""Initialize the ResumeScreener with the specified embedding model"""
|
142 |
self.embedding_model_name = embedding_model_name
|
143 |
self.explanation_model_name = explanation_model_name
|
144 |
-
|
145 |
-
self.
|
146 |
-
self.
|
147 |
self.faiss_index = None
|
148 |
self.embedding_size = None
|
149 |
self.explanation_generator = None
|
150 |
|
151 |
-
#
|
152 |
-
if load_immediately:
|
153 |
-
with st.spinner("Loading models at startup..."):
|
154 |
-
self.load_model()
|
155 |
-
if use_explanation:
|
156 |
-
self.load_explanation_generator()
|
157 |
-
|
158 |
-
def load_model(self):
|
159 |
-
"""Load the embedding model from Hugging Face"""
|
160 |
-
if st.session_state.embedding_model is None:
|
161 |
-
with st.spinner(f"Loading model {self.embedding_model_name}..."):
|
162 |
-
try:
|
163 |
-
# First try to use pipeline for more efficient loading
|
164 |
-
try:
|
165 |
-
from transformers import pipeline
|
166 |
-
self.embedding_pipeline = pipeline(
|
167 |
-
"feature-extraction",
|
168 |
-
model=self.embedding_model_name,
|
169 |
-
trust_remote_code=True,
|
170 |
-
device_map="auto"
|
171 |
-
)
|
172 |
-
print(f"Successfully loaded {self.embedding_model_name} with pipeline API")
|
173 |
-
self.model = self.embedding_pipeline.model
|
174 |
-
self.tokenizer = self.embedding_pipeline.tokenizer
|
175 |
-
except Exception as pipe_e:
|
176 |
-
print(f"Error loading with pipeline API: {str(pipe_e)}")
|
177 |
-
print("Falling back to direct model loading...")
|
178 |
-
|
179 |
-
if "sentence-transformers" in self.embedding_model_name:
|
180 |
-
self.model = SentenceTransformer(self.embedding_model_name)
|
181 |
-
else:
|
182 |
-
self.tokenizer = AutoTokenizer.from_pretrained(self.embedding_model_name, trust_remote_code=True)
|
183 |
-
self.model = AutoModel.from_pretrained(self.embedding_model_name, trust_remote_code=True)
|
184 |
-
|
185 |
-
st.session_state.embedding_model = self.model
|
186 |
-
st.session_state.tokenizer = self.tokenizer
|
187 |
-
if self.embedding_pipeline:
|
188 |
-
st.session_state.embedding_pipeline = self.embedding_pipeline
|
189 |
-
|
190 |
-
# Get embedding size
|
191 |
-
if "sentence-transformers" in self.embedding_model_name:
|
192 |
-
self.embedding_size = self.model.get_sentence_embedding_dimension()
|
193 |
-
else:
|
194 |
-
# For non-sentence-transformers, we'll determine this after first embedding
|
195 |
-
pass
|
196 |
-
|
197 |
-
except Exception as e:
|
198 |
-
st.error(f"Error loading model: {str(e)}")
|
199 |
-
st.stop()
|
200 |
-
else:
|
201 |
-
self.model = st.session_state.embedding_model
|
202 |
-
self.tokenizer = st.session_state.tokenizer
|
203 |
-
if 'embedding_pipeline' in st.session_state:
|
204 |
-
self.embedding_pipeline = st.session_state.embedding_pipeline
|
205 |
-
|
206 |
-
def load_explanation_generator(self):
|
207 |
-
"""Load the explanation generator if needed"""
|
208 |
if use_explanation and st.session_state.explanation_generator is None:
|
209 |
-
with st.spinner(
|
210 |
-
st.session_state.explanation_generator = ExplanationGenerator(
|
211 |
-
|
212 |
-
load_immediately=True
|
213 |
-
)
|
214 |
-
self.explanation_generator = st.session_state.explanation_generator
|
215 |
elif use_explanation:
|
216 |
self.explanation_generator = st.session_state.explanation_generator
|
217 |
|
@@ -259,42 +174,18 @@ class ResumeScreener:
|
|
259 |
|
260 |
def get_embedding(self, text):
|
261 |
"""Generate text embedding for a given text"""
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
# Pipeline returns list of list of embeddings, we want just one vector
|
266 |
-
embeddings = self.embedding_pipeline(
|
267 |
-
text,
|
268 |
-
padding=True,
|
269 |
-
truncation=True,
|
270 |
-
max_length=512
|
271 |
-
)
|
272 |
-
# Mean pooling across token dimension for BERT-like models
|
273 |
-
embedding_np = np.mean(embeddings[0], axis=0)
|
274 |
-
|
275 |
-
# Set embedding size if not set
|
276 |
-
if self.embedding_size is None:
|
277 |
-
self.embedding_size = embedding_np.shape[0]
|
278 |
-
|
279 |
-
return embedding_np
|
280 |
-
except Exception as e:
|
281 |
-
print(f"Error using embedding pipeline: {str(e)}")
|
282 |
-
print("Falling back to direct embedding method...")
|
283 |
-
|
284 |
-
# Fall back to original method
|
285 |
-
if "sentence-transformers" in self.embedding_model_name:
|
286 |
-
# For sentence-transformers models
|
287 |
-
embedding = self.model.encode([text], convert_to_tensor=True, show_progress_bar=False)[0]
|
288 |
-
embedding_np = embedding.cpu().detach().numpy()
|
289 |
|
290 |
-
|
291 |
-
if self.embedding_size is None:
|
292 |
-
self.embedding_size = embedding_np.shape[0]
|
293 |
-
|
294 |
-
return embedding_np
|
295 |
-
else:
|
296 |
# For HuggingFace models
|
297 |
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
|
|
|
|
|
|
|
|
|
|
|
298 |
with torch.no_grad():
|
299 |
outputs = self.model(**inputs)
|
300 |
|
@@ -318,6 +209,9 @@ class ResumeScreener:
|
|
318 |
self.embedding_size = embedding_np.shape[0]
|
319 |
|
320 |
return embedding_np
|
|
|
|
|
|
|
321 |
|
322 |
def create_faiss_index(self, embeddings):
|
323 |
"""Create a FAISS index for fast similarity search"""
|
@@ -572,13 +466,8 @@ def get_huggingface_spaces_datasets():
|
|
572 |
st.title("Resume Screener & Skill Extractor")
|
573 |
st.markdown("---")
|
574 |
|
575 |
-
# Initialize the resume screener
|
576 |
-
|
577 |
-
with st.spinner("Initializing Resume Screener..."):
|
578 |
-
screener = ResumeScreener(embedding_model_name, explanation_model_name, load_immediately=True)
|
579 |
-
st.session_state.screener = screener
|
580 |
-
else:
|
581 |
-
screener = st.session_state.screener
|
582 |
|
583 |
# Job description input
|
584 |
st.header("1. Enter Job Description")
|
@@ -902,9 +791,6 @@ elif upload_option == "Upload from Dataset":
|
|
902 |
|
903 |
# Process button
|
904 |
if st.button("Find Top Candidates", disabled=not (job_description and resume_texts)):
|
905 |
-
with st.spinner("Loading embedding model..."):
|
906 |
-
screener.load_model()
|
907 |
-
|
908 |
with st.spinner("Processing job description and resumes..."):
|
909 |
# Get job description embedding
|
910 |
job_embedding = screener.get_embedding(job_description)
|
|
|
26 |
except LookupError:
|
27 |
nltk.download('punkt')
|
28 |
|
29 |
+
# Initialize embedding model at startup
|
30 |
+
EMBEDDING_MODEL_NAME = "nvidia/NV-Embed-v2"
|
31 |
+
print(f"Loading embedding model {EMBEDDING_MODEL_NAME}...")
|
32 |
+
|
33 |
+
try:
|
34 |
+
# Load embedding model and tokenizer
|
35 |
+
global_embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True)
|
36 |
+
global_embedding_model = AutoModel.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True, device_map="auto")
|
37 |
+
print(f"Successfully loaded {EMBEDDING_MODEL_NAME}")
|
38 |
+
except Exception as e:
|
39 |
+
print(f"Error loading embedding model: {str(e)}")
|
40 |
+
global_embedding_tokenizer = None
|
41 |
+
global_embedding_model = None
|
42 |
+
|
43 |
# Set page configuration
|
44 |
st.set_page_config(
|
45 |
page_title="Resume Screener & Skill Extractor",
|
|
|
48 |
initial_sidebar_state="expanded"
|
49 |
)
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
# Sidebar for model selection and weights
|
52 |
with st.sidebar:
|
53 |
st.title("Configuration")
|
|
|
77 |
use_explanation = st.checkbox("Generate Explanations", value=True)
|
78 |
use_faiss = st.checkbox("Use FAISS for fast search", value=True)
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
# Memory optimization options
|
81 |
st.subheader("Memory Optimization")
|
82 |
+
memory_optimization = st.checkbox("Enable memory optimization (for large datasets)", value=False)
|
83 |
clear_embeddings = st.checkbox("Clear embeddings after processing", value=False)
|
84 |
gc_collect_interval = st.number_input(
|
85 |
"Garbage collection interval (files)",
|
|
|
102 |
if 'results' not in st.session_state:
|
103 |
st.session_state.results = []
|
104 |
if 'embedding_model' not in st.session_state:
|
105 |
+
st.session_state.embedding_model = global_embedding_model
|
106 |
if 'tokenizer' not in st.session_state:
|
107 |
+
st.session_state.tokenizer = global_embedding_tokenizer
|
108 |
if 'faiss_index' not in st.session_state:
|
109 |
st.session_state.faiss_index = None
|
110 |
if 'explanation_generator' not in st.session_state:
|
111 |
st.session_state.explanation_generator = None
|
|
|
|
|
112 |
|
113 |
class ResumeScreener:
|
114 |
+
def __init__(self, embedding_model_name="nvidia/NV-Embed-v2", explanation_model_name="Qwen/QwQ-32B"):
|
115 |
"""Initialize the ResumeScreener with the specified embedding model"""
|
116 |
self.embedding_model_name = embedding_model_name
|
117 |
self.explanation_model_name = explanation_model_name
|
118 |
+
# Initialize with preloaded models
|
119 |
+
self.model = st.session_state.embedding_model
|
120 |
+
self.tokenizer = st.session_state.tokenizer
|
121 |
self.faiss_index = None
|
122 |
self.embedding_size = None
|
123 |
self.explanation_generator = None
|
124 |
|
125 |
+
# Initialize explanation generator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
if use_explanation and st.session_state.explanation_generator is None:
|
127 |
+
with st.spinner("Initializing explanation generator..."):
|
128 |
+
st.session_state.explanation_generator = ExplanationGenerator(self.explanation_model_name)
|
129 |
+
self.explanation_generator = st.session_state.explanation_generator
|
|
|
|
|
|
|
130 |
elif use_explanation:
|
131 |
self.explanation_generator = st.session_state.explanation_generator
|
132 |
|
|
|
174 |
|
175 |
def get_embedding(self, text):
|
176 |
"""Generate text embedding for a given text"""
|
177 |
+
if self.model is None:
|
178 |
+
st.error("Embedding model not available. Please check your environment.")
|
179 |
+
return np.zeros(768) # Default embedding size as fallback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
+
try:
|
|
|
|
|
|
|
|
|
|
|
182 |
# For HuggingFace models
|
183 |
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
|
184 |
+
|
185 |
+
# Move inputs to same device as model
|
186 |
+
device = next(self.model.parameters()).device
|
187 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
188 |
+
|
189 |
with torch.no_grad():
|
190 |
outputs = self.model(**inputs)
|
191 |
|
|
|
209 |
self.embedding_size = embedding_np.shape[0]
|
210 |
|
211 |
return embedding_np
|
212 |
+
except Exception as e:
|
213 |
+
st.error(f"Error generating embedding: {str(e)}")
|
214 |
+
return np.zeros(768) # Default embedding size as fallback
|
215 |
|
216 |
def create_faiss_index(self, embeddings):
|
217 |
"""Create a FAISS index for fast similarity search"""
|
|
|
466 |
st.title("Resume Screener & Skill Extractor")
|
467 |
st.markdown("---")
|
468 |
|
469 |
+
# Initialize the resume screener
|
470 |
+
screener = ResumeScreener(embedding_model_name, explanation_model_name)
|
|
|
|
|
|
|
|
|
|
|
471 |
|
472 |
# Job description input
|
473 |
st.header("1. Enter Job Description")
|
|
|
791 |
|
792 |
# Process button
|
793 |
if st.button("Find Top Candidates", disabled=not (job_description and resume_texts)):
|
|
|
|
|
|
|
794 |
with st.spinner("Processing job description and resumes..."):
|
795 |
# Get job description embedding
|
796 |
job_embedding = screener.get_embedding(job_description)
|
explanation_generator.py
CHANGED
@@ -6,151 +6,93 @@ using the QwQ-32B model from Hugging Face.
|
|
6 |
"""
|
7 |
|
8 |
import torch
|
9 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
10 |
import os
|
11 |
import re
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
class ExplanationGenerator:
|
14 |
-
def __init__(self, model_name="Qwen/QwQ-32B"
|
15 |
"""Initialize the explanation generator with the specified model"""
|
16 |
self.model_name = model_name
|
17 |
-
|
18 |
-
self.
|
19 |
-
self.
|
20 |
-
self.initialized =
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
def load_model(self):
|
27 |
-
"""Load the model and tokenizer if not already loaded"""
|
28 |
-
if not self.initialized:
|
29 |
try:
|
30 |
-
|
31 |
-
|
32 |
-
# Set up 4-bit quantization configuration
|
33 |
-
quantization_config = BitsAndBytesConfig(
|
34 |
-
load_in_4bit=True,
|
35 |
-
bnb_4bit_compute_dtype=torch.bfloat16,
|
36 |
-
bnb_4bit_use_double_quant=True,
|
37 |
-
bnb_4bit_quant_type="nf4"
|
38 |
-
)
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
"text-generation",
|
45 |
-
model=self.model_name,
|
46 |
-
torch_dtype=torch.bfloat16,
|
47 |
-
device_map="auto",
|
48 |
-
trust_remote_code=True,
|
49 |
-
quantization_config=quantization_config,
|
50 |
-
model_kwargs={"attn_implementation": "eager"} # Uses less memory
|
51 |
-
)
|
52 |
-
print(f"Successfully loaded {self.model_name} with pipeline API")
|
53 |
-
# Pipeline includes both model and tokenizer
|
54 |
-
self.tokenizer = self.text_generation_pipeline.tokenizer
|
55 |
-
self.model = self.text_generation_pipeline.model
|
56 |
-
self.initialized = True
|
57 |
-
return
|
58 |
-
except Exception as pipe_e:
|
59 |
-
print(f"Error loading with pipeline API: {str(pipe_e)}")
|
60 |
-
print("Falling back to direct model loading...")
|
61 |
|
62 |
-
#
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
66 |
)
|
67 |
|
68 |
-
#
|
69 |
-
|
70 |
-
self.model = AutoModelForCausalLM.from_pretrained(
|
71 |
-
self.model_name,
|
72 |
-
device_map="auto",
|
73 |
-
trust_remote_code=True,
|
74 |
-
quantization_config=quantization_config
|
75 |
-
)
|
76 |
-
print(f"Successfully loaded {self.model_name} with 4-bit quantization")
|
77 |
-
except Exception as quant_e:
|
78 |
-
print(f"Error loading with 4-bit quantization: {str(quant_e)}")
|
79 |
-
print("Trying to load model with 8-bit quantization...")
|
80 |
-
|
81 |
-
# Fall back to 8-bit or CPU if 4-bit fails
|
82 |
-
if torch.cuda.is_available():
|
83 |
-
self.model = AutoModelForCausalLM.from_pretrained(
|
84 |
-
self.model_name,
|
85 |
-
device_map="auto",
|
86 |
-
trust_remote_code=True,
|
87 |
-
load_in_8bit=True
|
88 |
-
)
|
89 |
-
print(f"Successfully loaded {self.model_name} with 8-bit quantization")
|
90 |
-
else:
|
91 |
-
# Fall back to template-based solution if no GPU
|
92 |
-
self.model = None
|
93 |
-
print(f"Warning: Loading {self.model_name} on CPU is not recommended. Using template-based explanations instead.")
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
"""Generate explanation for why a resume was ranked highly"""
|
104 |
-
# Check if we need to load the model
|
105 |
-
if not self.initialized:
|
106 |
-
self.load_model()
|
107 |
-
|
108 |
-
# If the model is loaded and available, use it for generating explanations
|
109 |
-
if self.model is not None:
|
110 |
-
try:
|
111 |
-
# Prepare prompt for QwQ-32B
|
112 |
-
prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
|
113 |
|
114 |
-
#
|
115 |
-
|
116 |
-
outputs = self.text_generation_pipeline(
|
117 |
-
prompt,
|
118 |
-
max_new_tokens=300,
|
119 |
-
temperature=0.6,
|
120 |
-
top_p=0.95,
|
121 |
-
top_k=30,
|
122 |
-
do_sample=True,
|
123 |
-
return_full_text=False
|
124 |
-
)
|
125 |
-
response = outputs[0]['generated_text']
|
126 |
-
|
127 |
-
else:
|
128 |
-
# Create messages for chat format
|
129 |
-
messages = [
|
130 |
-
{"role": "user", "content": prompt}
|
131 |
-
]
|
132 |
-
|
133 |
-
# Apply chat template
|
134 |
-
text = self.tokenizer.apply_chat_template(
|
135 |
-
messages,
|
136 |
-
tokenize=False,
|
137 |
-
add_generation_prompt=True
|
138 |
-
)
|
139 |
-
|
140 |
-
# Tokenize
|
141 |
-
inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
|
142 |
-
|
143 |
-
# Generate response
|
144 |
-
output_ids = self.model.generate(
|
145 |
-
**inputs,
|
146 |
-
max_new_tokens=300,
|
147 |
-
temperature=0.6,
|
148 |
-
top_p=0.95,
|
149 |
-
top_k=30
|
150 |
-
)
|
151 |
-
|
152 |
-
# Decode the response
|
153 |
-
response = self.tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
154 |
|
155 |
# Clean up the response
|
156 |
cleaned_response = self._clean_response(response)
|
@@ -158,7 +100,7 @@ class ExplanationGenerator:
|
|
158 |
return cleaned_response
|
159 |
|
160 |
except Exception as e:
|
161 |
-
print(f"Error generating explanation with
|
162 |
# Fall back to template-based explanation
|
163 |
return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
|
164 |
else:
|
|
|
6 |
"""
|
7 |
|
8 |
import torch
|
9 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
10 |
import os
|
11 |
import re
|
12 |
|
13 |
+
# Load QwQ model at initialization time
|
14 |
+
print("Loading Qwen/QwQ-32B model with 4-bit quantization...")
|
15 |
+
QWQ_MODEL_NAME = "Qwen/QwQ-32B"
|
16 |
+
|
17 |
+
try:
|
18 |
+
# Configure 4-bit quantization for better performance
|
19 |
+
quantization_config = BitsAndBytesConfig(
|
20 |
+
load_in_4bit=True,
|
21 |
+
bnb_4bit_quant_type="nf4",
|
22 |
+
bnb_4bit_compute_dtype=torch.float16,
|
23 |
+
bnb_4bit_use_double_quant=True
|
24 |
+
)
|
25 |
+
|
26 |
+
# Load QwQ model and tokenizer
|
27 |
+
global_qwq_tokenizer = AutoTokenizer.from_pretrained(QWQ_MODEL_NAME, trust_remote_code=True)
|
28 |
+
global_qwq_model = None
|
29 |
+
|
30 |
+
# Check if we have enough resources to load the model
|
31 |
+
if torch.cuda.is_available():
|
32 |
+
gpu_memory = torch.cuda.get_device_properties(0).total_memory
|
33 |
+
if gpu_memory >= 16 * (1024**3): # 16 GB (reduced thanks to quantization)
|
34 |
+
global_qwq_model = AutoModelForCausalLM.from_pretrained(
|
35 |
+
QWQ_MODEL_NAME,
|
36 |
+
quantization_config=quantization_config,
|
37 |
+
device_map="auto",
|
38 |
+
trust_remote_code=True,
|
39 |
+
torch_dtype=torch.float16
|
40 |
+
)
|
41 |
+
print("Successfully loaded QwQ-32B with 4-bit quantization")
|
42 |
+
else:
|
43 |
+
print("Not enough GPU memory, using template-based explanations")
|
44 |
+
else:
|
45 |
+
print("CUDA not available, using template-based explanations")
|
46 |
+
|
47 |
+
except Exception as e:
|
48 |
+
print(f"Error loading QwQ-32B model: {str(e)}")
|
49 |
+
print("Falling back to template-based explanations.")
|
50 |
+
global_qwq_tokenizer = None
|
51 |
+
global_qwq_model = None
|
52 |
+
|
53 |
class ExplanationGenerator:
|
54 |
+
def __init__(self, model_name="Qwen/QwQ-32B"):
|
55 |
"""Initialize the explanation generator with the specified model"""
|
56 |
self.model_name = model_name
|
57 |
+
# Use globally pre-loaded model and tokenizer
|
58 |
+
self.model = global_qwq_model
|
59 |
+
self.tokenizer = global_qwq_tokenizer
|
60 |
+
self.initialized = True
|
61 |
|
62 |
+
def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
|
63 |
+
"""Generate explanation for why a resume was ranked highly"""
|
64 |
+
# Use the model if it's available
|
65 |
+
if self.model is not None and self.tokenizer is not None:
|
|
|
|
|
|
|
66 |
try:
|
67 |
+
# Prepare prompt for QwQ-32B
|
68 |
+
prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
+
# Create messages for chat format
|
71 |
+
messages = [
|
72 |
+
{"role": "user", "content": prompt}
|
73 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
# Apply chat template
|
76 |
+
text = self.tokenizer.apply_chat_template(
|
77 |
+
messages,
|
78 |
+
tokenize=False,
|
79 |
+
add_generation_prompt=True
|
80 |
)
|
81 |
|
82 |
+
# Tokenize
|
83 |
+
inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
+
# Generate response
|
86 |
+
output_ids = self.model.generate(
|
87 |
+
**inputs,
|
88 |
+
max_new_tokens=300,
|
89 |
+
temperature=0.6,
|
90 |
+
top_p=0.95,
|
91 |
+
top_k=30
|
92 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
+
# Decode the response
|
95 |
+
response = self.tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
# Clean up the response
|
98 |
cleaned_response = self._clean_response(response)
|
|
|
100 |
return cleaned_response
|
101 |
|
102 |
except Exception as e:
|
103 |
+
print(f"Error generating explanation with QwQ-32B: {str(e)}")
|
104 |
# Fall back to template-based explanation
|
105 |
return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
|
106 |
else:
|
requirements.txt
CHANGED
@@ -17,4 +17,4 @@ tqdm==4.66.1
|
|
17 |
huggingface-hub==0.25.0
|
18 |
einops
|
19 |
bitsandbytes>=0.41.0
|
20 |
-
accelerate>=0.
|
|
|
17 |
huggingface-hub==0.25.0
|
18 |
einops
|
19 |
bitsandbytes>=0.41.0
|
20 |
+
accelerate>=0.23.0
|