File size: 7,480 Bytes
d60989a 0bfe6dd d60989a 0bfe6dd d60989a 0bfe6dd d60989a 0bfe6dd d60989a 0bfe6dd d60989a 0bfe6dd ba2dfe6 0bfe6dd d60989a 0bfe6dd ba2dfe6 d60989a 0bfe6dd d60989a 0bfe6dd d60989a 0bfe6dd d60989a 0bfe6dd d60989a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
"""
Explanation Generator Module
This module handles the generation of explanations for resume rankings
using the QwQ-32B model from Hugging Face.
"""
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import os
import re
# Load QwQ model at initialization time
print("Loading Qwen/QwQ-32B model with 4-bit quantization...")
QWQ_MODEL_NAME = "Qwen/QwQ-32B"
try:
# Configure 4-bit quantization for better performance
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True
)
# Load QwQ model and tokenizer
global_qwq_tokenizer = AutoTokenizer.from_pretrained(QWQ_MODEL_NAME, trust_remote_code=True)
global_qwq_model = None
# Check if we have enough resources to load the model
if torch.cuda.is_available():
gpu_memory = torch.cuda.get_device_properties(0).total_memory
if gpu_memory >= 16 * (1024**3): # 16 GB (reduced thanks to quantization)
global_qwq_model = AutoModelForCausalLM.from_pretrained(
QWQ_MODEL_NAME,
quantization_config=quantization_config,
device_map="auto",
trust_remote_code=True,
torch_dtype=torch.float16
)
print("Successfully loaded QwQ-32B with 4-bit quantization")
else:
print("Not enough GPU memory, using template-based explanations")
else:
print("CUDA not available, using template-based explanations")
except Exception as e:
print(f"Error loading QwQ-32B model: {str(e)}")
print("Falling back to template-based explanations.")
global_qwq_tokenizer = None
global_qwq_model = None
class ExplanationGenerator:
def __init__(self, model_name="Qwen/QwQ-32B"):
"""Initialize the explanation generator with the specified model"""
self.model_name = model_name
# Use globally pre-loaded model and tokenizer
self.model = global_qwq_model
self.tokenizer = global_qwq_tokenizer
self.initialized = True
def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
"""Generate explanation for why a resume was ranked highly"""
# Use the model if it's available
if self.model is not None and self.tokenizer is not None:
try:
# Prepare prompt for QwQ-32B
prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
# Create messages for chat format
messages = [
{"role": "user", "content": prompt}
]
# Apply chat template
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Tokenize
inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
# Generate response
output_ids = self.model.generate(
**inputs,
max_new_tokens=300,
temperature=0.6,
top_p=0.95,
top_k=30
)
# Decode the response
response = self.tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
# Clean up the response
cleaned_response = self._clean_response(response)
return cleaned_response
except Exception as e:
print(f"Error generating explanation with QwQ-32B: {str(e)}")
# Fall back to template-based explanation
return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
else:
# Use template-based explanation if model is not available
return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
def _create_prompt(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
"""Create a prompt for the explanation generation"""
# Use only the first 1000 characters of the resume to keep prompt size manageable
resume_excerpt = resume_text[:1000] + "..." if len(resume_text) > 1000 else resume_text
prompt = f"""You are an AI assistant helping a recruiter understand why a candidate's resume was matched with a job posting.
The resume has been assigned the following scores:
- Overall Match Score: {score:.2f} out of 1.0
- Semantic Relevance Score: {semantic_score:.2f} out of 1.0
- Keyword Match Score: {keyword_score:.2f} out of 1.0
The job description is:
```
{job_description}
```
Based on analysis, the resume contains these skills relevant to the job: {', '.join(skills)}
Resume excerpt:
```
{resume_excerpt}
```
Please provide a short explanation (3-5 sentences) of why this resume received these scores and how well it matches the job requirements. Focus on the relationship between the candidate's experience and the job requirements."""
return prompt
def _clean_response(self, response):
"""Clean the response from the model"""
# Remove any thinking or internal processing tokens
response = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL)
# Limit to a reasonable length
if len(response) > 500:
sentences = response.split('.')
shortened = '.'.join(sentences[:5]) + '.'
return shortened
return response
def _generate_template_explanation(self, score, semantic_score, keyword_score, skills):
"""Generate a template-based explanation when the model is not available"""
# Simple template-based explanation
if score > 0.8:
quality = "excellent"
elif score > 0.6:
quality = "good"
elif score > 0.4:
quality = "moderate"
else:
quality = "limited"
explanation = f"This resume shows {quality} alignment with the job requirements, with an overall score of {score:.2f}. "
if semantic_score > keyword_score:
explanation += f"The candidate's experience demonstrates strong semantic relevance ({semantic_score:.2f}) to the position, though specific keyword matches ({keyword_score:.2f}) could be improved. "
else:
explanation += f"The resume contains many relevant keywords ({keyword_score:.2f}), but could benefit from better contextual alignment ({semantic_score:.2f}) with the job requirements. "
if skills:
if len(skills) > 3:
explanation += f"Key skills identified include {', '.join(skills[:3])}, and {len(skills)-3} others that match the job requirements."
else:
explanation += f"Key skills identified include {', '.join(skills)}."
else:
explanation += "No specific skills were identified that directly match the requirements."
return explanation |