Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Paused

File size: 7,480 Bytes

d60989a
 
 
 
 
 
 
 
0bfe6dd
d60989a
 
 
0bfe6dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d60989a
0bfe6dd
d60989a
 
0bfe6dd
 
 
 
d60989a
0bfe6dd
 
 
 
d60989a
0bfe6dd
 
ba2dfe6
0bfe6dd
 
 
 
d60989a
0bfe6dd
 
 
 
 
ba2dfe6
d60989a
0bfe6dd
 
d60989a
0bfe6dd
 
 
 
 
 
 
 
d60989a
0bfe6dd
 
d60989a
 
 
 
 
 
 
0bfe6dd
d60989a

"""
Explanation Generator Module

This module handles the generation of explanations for resume rankings
using the QwQ-32B model from Hugging Face.
"""

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import os
import re

# Load QwQ model at initialization time
print("Loading Qwen/QwQ-32B model with 4-bit quantization...")
QWQ_MODEL_NAME = "Qwen/QwQ-32B"

try:
    # Configure 4-bit quantization for better performance
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )
    
    # Load QwQ model and tokenizer
    global_qwq_tokenizer = AutoTokenizer.from_pretrained(QWQ_MODEL_NAME, trust_remote_code=True)
    global_qwq_model = None
    
    # Check if we have enough resources to load the model
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.get_device_properties(0).total_memory
        if gpu_memory >= 16 * (1024**3):  # 16 GB (reduced thanks to quantization)
            global_qwq_model = AutoModelForCausalLM.from_pretrained(
                QWQ_MODEL_NAME,
                quantization_config=quantization_config,
                device_map="auto",
                trust_remote_code=True,
                torch_dtype=torch.float16
            )
            print("Successfully loaded QwQ-32B with 4-bit quantization")
        else:
            print("Not enough GPU memory, using template-based explanations")
    else:
        print("CUDA not available, using template-based explanations")
    
except Exception as e:
    print(f"Error loading QwQ-32B model: {str(e)}")
    print("Falling back to template-based explanations.")
    global_qwq_tokenizer = None
    global_qwq_model = None

class ExplanationGenerator:
    def __init__(self, model_name="Qwen/QwQ-32B"):
        """Initialize the explanation generator with the specified model"""
        self.model_name = model_name
        # Use globally pre-loaded model and tokenizer
        self.model = global_qwq_model
        self.tokenizer = global_qwq_tokenizer
        self.initialized = True
        
    def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
        """Generate explanation for why a resume was ranked highly"""
        # Use the model if it's available
        if self.model is not None and self.tokenizer is not None:
            try:
                # Prepare prompt for QwQ-32B
                prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
                
                # Create messages for chat format
                messages = [
                    {"role": "user", "content": prompt}
                ]
                
                # Apply chat template
                text = self.tokenizer.apply_chat_template(
                    messages,
                    tokenize=False,
                    add_generation_prompt=True
                )
                
                # Tokenize
                inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
                
                # Generate response
                output_ids = self.model.generate(
                    **inputs,
                    max_new_tokens=300,
                    temperature=0.6,
                    top_p=0.95,
                    top_k=30
                )
                
                # Decode the response
                response = self.tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
                
                # Clean up the response
                cleaned_response = self._clean_response(response)
                
                return cleaned_response
                
            except Exception as e:
                print(f"Error generating explanation with QwQ-32B: {str(e)}")
                # Fall back to template-based explanation
                return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
        else:
            # Use template-based explanation if model is not available
            return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
    
    def _create_prompt(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
        """Create a prompt for the explanation generation"""
        # Use only the first 1000 characters of the resume to keep prompt size manageable
        resume_excerpt = resume_text[:1000] + "..." if len(resume_text) > 1000 else resume_text
        
        prompt = f"""You are an AI assistant helping a recruiter understand why a candidate's resume was matched with a job posting.

The resume has been assigned the following scores:
- Overall Match Score: {score:.2f} out of 1.0
- Semantic Relevance Score: {semantic_score:.2f} out of 1.0
- Keyword Match Score: {keyword_score:.2f} out of 1.0

The job description is:
```
{job_description}
```

Based on analysis, the resume contains these skills relevant to the job: {', '.join(skills)}

Resume excerpt:
```
{resume_excerpt}
```

Please provide a short explanation (3-5 sentences) of why this resume received these scores and how well it matches the job requirements. Focus on the relationship between the candidate's experience and the job requirements."""

        return prompt
    
    def _clean_response(self, response):
        """Clean the response from the model"""
        # Remove any thinking or internal processing tokens
        response = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL)
        
        # Limit to a reasonable length
        if len(response) > 500:
            sentences = response.split('.')
            shortened = '.'.join(sentences[:5]) + '.'
            return shortened
        
        return response
    
    def _generate_template_explanation(self, score, semantic_score, keyword_score, skills):
        """Generate a template-based explanation when the model is not available"""
        # Simple template-based explanation
        if score > 0.8:
            quality = "excellent"
        elif score > 0.6:
            quality = "good"
        elif score > 0.4:
            quality = "moderate"
        else:
            quality = "limited"
            
        explanation = f"This resume shows {quality} alignment with the job requirements, with an overall score of {score:.2f}. "
        
        if semantic_score > keyword_score:
            explanation += f"The candidate's experience demonstrates strong semantic relevance ({semantic_score:.2f}) to the position, though specific keyword matches ({keyword_score:.2f}) could be improved. "
        else:
            explanation += f"The resume contains many relevant keywords ({keyword_score:.2f}), but could benefit from better contextual alignment ({semantic_score:.2f}) with the job requirements. "
        
        if skills:
            if len(skills) > 3:
                explanation += f"Key skills identified include {', '.join(skills[:3])}, and {len(skills)-3} others that match the job requirements."
            else:
                explanation += f"Key skills identified include {', '.join(skills)}."
        else:
            explanation += "No specific skills were identified that directly match the requirements."
            
        return explanation