Spaces:
Running
on
L4
Running
on
L4
File size: 10,631 Bytes
d60989a ba2dfe6 d60989a ba2dfe6 d60989a ba2dfe6 d60989a ba2dfe6 d60989a ba2dfe6 d60989a ba2dfe6 d60989a ba2dfe6 d60989a 53cdf96 ba2dfe6 d60989a ba2dfe6 d60989a ba2dfe6 d60989a ba2dfe6 d60989a ba2dfe6 d60989a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 |
"""
Explanation Generator Module
This module handles the generation of explanations for resume rankings
using the QwQ-32B model from Hugging Face.
"""
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import os
import re
class ExplanationGenerator:
def __init__(self, model_name="Qwen/QwQ-32B", load_immediately=True):
"""Initialize the explanation generator with the specified model"""
self.model_name = model_name
self.model = None
self.tokenizer = None
self.text_generation_pipeline = None
self.initialized = False
# Load model immediately if requested
if load_immediately:
self.load_model()
def load_model(self):
"""Load the model and tokenizer if not already loaded"""
if not self.initialized:
try:
print(f"Loading explanation model: {self.model_name}")
# Set up 4-bit quantization configuration
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
# Try using pipeline API for more efficient loading in Spaces
try:
print("Attempting to load model with pipeline API...")
self.text_generation_pipeline = pipeline(
"text-generation",
model=self.model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True,
quantization_config=quantization_config,
model_kwargs={"attn_implementation": "eager"} # Uses less memory
)
print(f"Successfully loaded {self.model_name} with pipeline API")
# Pipeline includes both model and tokenizer
self.tokenizer = self.text_generation_pipeline.tokenizer
self.model = self.text_generation_pipeline.model
self.initialized = True
return
except Exception as pipe_e:
print(f"Error loading with pipeline API: {str(pipe_e)}")
print("Falling back to direct model loading...")
# Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name,
trust_remote_code=True
)
# Try to load model with 4-bit quantization
try:
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name,
device_map="auto",
trust_remote_code=True,
quantization_config=quantization_config
)
print(f"Successfully loaded {self.model_name} with 4-bit quantization")
except Exception as quant_e:
print(f"Error loading with 4-bit quantization: {str(quant_e)}")
print("Trying to load model with 8-bit quantization...")
# Fall back to 8-bit or CPU if 4-bit fails
if torch.cuda.is_available():
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name,
device_map="auto",
trust_remote_code=True,
load_in_8bit=True
)
print(f"Successfully loaded {self.model_name} with 8-bit quantization")
else:
# Fall back to template-based solution if no GPU
self.model = None
print(f"Warning: Loading {self.model_name} on CPU is not recommended. Using template-based explanations instead.")
self.initialized = True
except Exception as e:
print(f"Error loading explanation model: {str(e)}")
print("Falling back to template-based explanations.")
self.model = None
self.initialized = True
def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
"""Generate explanation for why a resume was ranked highly"""
# Check if we need to load the model
if not self.initialized:
self.load_model()
# If the model is loaded and available, use it for generating explanations
if self.model is not None:
try:
# Prepare prompt for QwQ-32B
prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
# Use pipeline API if available
if self.text_generation_pipeline is not None:
outputs = self.text_generation_pipeline(
prompt,
max_new_tokens=300,
temperature=0.6,
top_p=0.95,
top_k=30,
do_sample=True,
return_full_text=False
)
response = outputs[0]['generated_text']
else:
# Create messages for chat format
messages = [
{"role": "user", "content": prompt}
]
# Apply chat template
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Tokenize
inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
# Generate response
output_ids = self.model.generate(
**inputs,
max_new_tokens=300,
temperature=0.6,
top_p=0.95,
top_k=30
)
# Decode the response
response = self.tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
# Clean up the response
cleaned_response = self._clean_response(response)
return cleaned_response
except Exception as e:
print(f"Error generating explanation with model: {str(e)}")
# Fall back to template-based explanation
return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
else:
# Use template-based explanation if model is not available
return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
def _create_prompt(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
"""Create a prompt for the explanation generation"""
# Use only the first 1000 characters of the resume to keep prompt size manageable
resume_excerpt = resume_text[:1000] + "..." if len(resume_text) > 1000 else resume_text
prompt = f"""You are an AI assistant helping a recruiter understand why a candidate's resume was matched with a job posting.
The resume has been assigned the following scores:
- Overall Match Score: {score:.2f} out of 1.0
- Semantic Relevance Score: {semantic_score:.2f} out of 1.0
- Keyword Match Score: {keyword_score:.2f} out of 1.0
The job description is:
```
{job_description}
```
Based on analysis, the resume contains these skills relevant to the job: {', '.join(skills)}
Resume excerpt:
```
{resume_excerpt}
```
Please provide a short explanation (3-5 sentences) of why this resume received these scores and how well it matches the job requirements. Focus on the relationship between the candidate's experience and the job requirements."""
return prompt
def _clean_response(self, response):
"""Clean the response from the model"""
# Remove any thinking or internal processing tokens
response = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL)
# Limit to a reasonable length
if len(response) > 500:
sentences = response.split('.')
shortened = '.'.join(sentences[:5]) + '.'
return shortened
return response
def _generate_template_explanation(self, score, semantic_score, keyword_score, skills):
"""Generate a template-based explanation when the model is not available"""
# Simple template-based explanation
if score > 0.8:
quality = "excellent"
elif score > 0.6:
quality = "good"
elif score > 0.4:
quality = "moderate"
else:
quality = "limited"
explanation = f"This resume shows {quality} alignment with the job requirements, with an overall score of {score:.2f}. "
if semantic_score > keyword_score:
explanation += f"The candidate's experience demonstrates strong semantic relevance ({semantic_score:.2f}) to the position, though specific keyword matches ({keyword_score:.2f}) could be improved. "
else:
explanation += f"The resume contains many relevant keywords ({keyword_score:.2f}), but could benefit from better contextual alignment ({semantic_score:.2f}) with the job requirements. "
if skills:
if len(skills) > 3:
explanation += f"Key skills identified include {', '.join(skills[:3])}, and {len(skills)-3} others that match the job requirements."
else:
explanation += f"Key skills identified include {', '.join(skills)}."
else:
explanation += "No specific skills were identified that directly match the requirements."
return explanation |