Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Running on L4

File size: 10,631 Bytes

"""
Explanation Generator Module

This module handles the generation of explanations for resume rankings
using the QwQ-32B model from Hugging Face.
"""

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import os
import re

class ExplanationGenerator:
    def __init__(self, model_name="Qwen/QwQ-32B", load_immediately=True):
        """Initialize the explanation generator with the specified model"""
        self.model_name = model_name
        self.model = None
        self.tokenizer = None
        self.text_generation_pipeline = None
        self.initialized = False
        
        # Load model immediately if requested
        if load_immediately:
            self.load_model()
        
    def load_model(self):
        """Load the model and tokenizer if not already loaded"""
        if not self.initialized:
            try:
                print(f"Loading explanation model: {self.model_name}")
                
                # Set up 4-bit quantization configuration
                quantization_config = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_compute_dtype=torch.bfloat16,
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_quant_type="nf4"
                )
                
                # Try using pipeline API for more efficient loading in Spaces
                try:
                    print("Attempting to load model with pipeline API...")
                    self.text_generation_pipeline = pipeline(
                        "text-generation",
                        model=self.model_name,
                        torch_dtype=torch.bfloat16,
                        device_map="auto",
                        trust_remote_code=True,
                        quantization_config=quantization_config,
                        model_kwargs={"attn_implementation": "eager"}  # Uses less memory
                    )
                    print(f"Successfully loaded {self.model_name} with pipeline API")
                    # Pipeline includes both model and tokenizer
                    self.tokenizer = self.text_generation_pipeline.tokenizer
                    self.model = self.text_generation_pipeline.model
                    self.initialized = True
                    return
                except Exception as pipe_e:
                    print(f"Error loading with pipeline API: {str(pipe_e)}")
                    print("Falling back to direct model loading...")
                
                # Load tokenizer
                self.tokenizer = AutoTokenizer.from_pretrained(
                    self.model_name, 
                    trust_remote_code=True
                )
                
                # Try to load model with 4-bit quantization
                try:
                    self.model = AutoModelForCausalLM.from_pretrained(
                        self.model_name,
                        device_map="auto",
                        trust_remote_code=True,
                        quantization_config=quantization_config
                    )
                    print(f"Successfully loaded {self.model_name} with 4-bit quantization")
                except Exception as quant_e:
                    print(f"Error loading with 4-bit quantization: {str(quant_e)}")
                    print("Trying to load model with 8-bit quantization...")
                    
                    # Fall back to 8-bit or CPU if 4-bit fails
                    if torch.cuda.is_available():
                        self.model = AutoModelForCausalLM.from_pretrained(
                            self.model_name,
                            device_map="auto",
                            trust_remote_code=True,
                            load_in_8bit=True
                        )
                        print(f"Successfully loaded {self.model_name} with 8-bit quantization")
                    else:
                        # Fall back to template-based solution if no GPU
                        self.model = None
                        print(f"Warning: Loading {self.model_name} on CPU is not recommended. Using template-based explanations instead.")
                
                self.initialized = True
            except Exception as e:
                print(f"Error loading explanation model: {str(e)}")
                print("Falling back to template-based explanations.")
                self.model = None
                self.initialized = True
    
    def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
        """Generate explanation for why a resume was ranked highly"""
        # Check if we need to load the model
        if not self.initialized:
            self.load_model()
        
        # If the model is loaded and available, use it for generating explanations
        if self.model is not None:
            try:
                # Prepare prompt for QwQ-32B
                prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
                
                # Use pipeline API if available
                if self.text_generation_pipeline is not None:
                    outputs = self.text_generation_pipeline(
                        prompt,
                        max_new_tokens=300,
                        temperature=0.6,
                        top_p=0.95,
                        top_k=30,
                        do_sample=True,
                        return_full_text=False
                    )
                    response = outputs[0]['generated_text']
                    
                else:
                    # Create messages for chat format
                    messages = [
                        {"role": "user", "content": prompt}
                    ]
                    
                    # Apply chat template
                    text = self.tokenizer.apply_chat_template(
                        messages,
                        tokenize=False,
                        add_generation_prompt=True
                    )
                    
                    # Tokenize
                    inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
                    
                    # Generate response
                    output_ids = self.model.generate(
                        **inputs,
                        max_new_tokens=300,
                        temperature=0.6,
                        top_p=0.95,
                        top_k=30
                    )
                    
                    # Decode the response
                    response = self.tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
                
                # Clean up the response
                cleaned_response = self._clean_response(response)
                
                return cleaned_response
                
            except Exception as e:
                print(f"Error generating explanation with model: {str(e)}")
                # Fall back to template-based explanation
                return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
        else:
            # Use template-based explanation if model is not available
            return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
    
    def _create_prompt(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
        """Create a prompt for the explanation generation"""
        # Use only the first 1000 characters of the resume to keep prompt size manageable
        resume_excerpt = resume_text[:1000] + "..." if len(resume_text) > 1000 else resume_text
        
        prompt = f"""You are an AI assistant helping a recruiter understand why a candidate's resume was matched with a job posting.

The resume has been assigned the following scores:
- Overall Match Score: {score:.2f} out of 1.0
- Semantic Relevance Score: {semantic_score:.2f} out of 1.0
- Keyword Match Score: {keyword_score:.2f} out of 1.0

The job description is:
```
{job_description}
```

Based on analysis, the resume contains these skills relevant to the job: {', '.join(skills)}

Resume excerpt:
```
{resume_excerpt}
```

Please provide a short explanation (3-5 sentences) of why this resume received these scores and how well it matches the job requirements. Focus on the relationship between the candidate's experience and the job requirements."""

        return prompt
    
    def _clean_response(self, response):
        """Clean the response from the model"""
        # Remove any thinking or internal processing tokens
        response = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL)
        
        # Limit to a reasonable length
        if len(response) > 500:
            sentences = response.split('.')
            shortened = '.'.join(sentences[:5]) + '.'
            return shortened
        
        return response
    
    def _generate_template_explanation(self, score, semantic_score, keyword_score, skills):
        """Generate a template-based explanation when the model is not available"""
        # Simple template-based explanation
        if score > 0.8:
            quality = "excellent"
        elif score > 0.6:
            quality = "good"
        elif score > 0.4:
            quality = "moderate"
        else:
            quality = "limited"
            
        explanation = f"This resume shows {quality} alignment with the job requirements, with an overall score of {score:.2f}. "
        
        if semantic_score > keyword_score:
            explanation += f"The candidate's experience demonstrates strong semantic relevance ({semantic_score:.2f}) to the position, though specific keyword matches ({keyword_score:.2f}) could be improved. "
        else:
            explanation += f"The resume contains many relevant keywords ({keyword_score:.2f}), but could benefit from better contextual alignment ({semantic_score:.2f}) with the job requirements. "
        
        if skills:
            if len(skills) > 3:
                explanation += f"Key skills identified include {', '.join(skills[:3])}, and {len(skills)-3} others that match the job requirements."
            else:
                explanation += f"Key skills identified include {', '.join(skills)}."
        else:
            explanation += "No specific skills were identified that directly match the requirements."
            
        return explanation