"""
Explanation Generator Module

This module handles the generation of explanations for resume rankings
using the QwQ-32B model from Hugging Face.
"""

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import os
import re

# Load QwQ model at initialization time
print("Loading Qwen/QwQ-32B model with 4-bit quantization...")
QWQ_MODEL_NAME = "Qwen/QwQ-32B"

try:
    # Configure 4-bit quantization for better performance
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )
    
    # Load QwQ model and tokenizer
    global_qwq_tokenizer = AutoTokenizer.from_pretrained(QWQ_MODEL_NAME, trust_remote_code=True)
    global_qwq_model = None
    
    # Check if we have enough resources to load the model
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.get_device_properties(0).total_memory
        if gpu_memory >= 16 * (1024**3):  # 16 GB (reduced thanks to quantization)
            global_qwq_model = AutoModelForCausalLM.from_pretrained(
                QWQ_MODEL_NAME,
                quantization_config=quantization_config,
                device_map="auto",
                trust_remote_code=True,
                torch_dtype=torch.float16
            )
            print("Successfully loaded QwQ-32B with 4-bit quantization")
        else:
            print("Not enough GPU memory, using template-based explanations")
    else:
        print("CUDA not available, using template-based explanations")
    
except Exception as e:
    print(f"Error loading QwQ-32B model: {str(e)}")
    print("Falling back to template-based explanations.")
    global_qwq_tokenizer = None
    global_qwq_model = None

class ExplanationGenerator:
    def __init__(self, model_name="Qwen/QwQ-32B"):
        """Initialize the explanation generator with the specified model"""
        self.model_name = model_name
        # Use globally pre-loaded model and tokenizer
        self.model = global_qwq_model
        self.tokenizer = global_qwq_tokenizer
        self.initialized = True
        
    def generate_explanation(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
        """Generate explanation for why a resume was ranked highly"""
        # Use the model if it's available
        if self.model is not None and self.tokenizer is not None:
            try:
                # Prepare prompt for QwQ-32B
                prompt = self._create_prompt(resume_text, job_description, score, semantic_score, keyword_score, skills)
                
                # Create messages for chat format
                messages = [
                    {"role": "user", "content": prompt}
                ]
                
                # Apply chat template
                text = self.tokenizer.apply_chat_template(
                    messages,
                    tokenize=False,
                    add_generation_prompt=True
                )
                
                # Tokenize
                inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
                
                # Generate response
                output_ids = self.model.generate(
                    **inputs,
                    max_new_tokens=300,
                    temperature=0.6,
                    top_p=0.95,
                    top_k=30
                )
                
                # Decode the response
                response = self.tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
                
                # Clean up the response
                cleaned_response = self._clean_response(response)
                
                return cleaned_response
                
            except Exception as e:
                print(f"Error generating explanation with QwQ-32B: {str(e)}")
                # Fall back to template-based explanation
                return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
        else:
            # Use template-based explanation if model is not available
            return self._generate_template_explanation(score, semantic_score, keyword_score, skills)
    
    def _create_prompt(self, resume_text, job_description, score, semantic_score, keyword_score, skills):
        """Create a prompt for the explanation generation"""
        # Use only the first 1000 characters of the resume to keep prompt size manageable
        resume_excerpt = resume_text[:1000] + "..." if len(resume_text) > 1000 else resume_text
        
        prompt = f"""You are an AI assistant helping a recruiter understand why a candidate's resume was matched with a job posting.

The resume has been assigned the following scores:
- Overall Match Score: {score:.2f} out of 1.0
- Semantic Relevance Score: {semantic_score:.2f} out of 1.0
- Keyword Match Score: {keyword_score:.2f} out of 1.0

The job description is:
```
{job_description}
```

Based on analysis, the resume contains these skills relevant to the job: {', '.join(skills)}

Resume excerpt:
```
{resume_excerpt}
```

Please provide a short explanation (3-5 sentences) of why this resume received these scores and how well it matches the job requirements. Focus on the relationship between the candidate's experience and the job requirements."""

        return prompt
    
    def _clean_response(self, response):
        """Clean the response from the model"""
        # Remove any thinking or internal processing tokens
        response = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL)
        
        # Limit to a reasonable length
        if len(response) > 500:
            sentences = response.split('.')
            shortened = '.'.join(sentences[:5]) + '.'
            return shortened
        
        return response
    
    def _generate_template_explanation(self, score, semantic_score, keyword_score, skills):
        """Generate a template-based explanation when the model is not available"""
        # Simple template-based explanation
        if score > 0.8:
            quality = "excellent"
        elif score > 0.6:
            quality = "good"
        elif score > 0.4:
            quality = "moderate"
        else:
            quality = "limited"
            
        explanation = f"This resume shows {quality} alignment with the job requirements, with an overall score of {score:.2f}. "
        
        if semantic_score > keyword_score:
            explanation += f"The candidate's experience demonstrates strong semantic relevance ({semantic_score:.2f}) to the position, though specific keyword matches ({keyword_score:.2f}) could be improved. "
        else:
            explanation += f"The resume contains many relevant keywords ({keyword_score:.2f}), but could benefit from better contextual alignment ({semantic_score:.2f}) with the job requirements. "
        
        if skills:
            if len(skills) > 3:
                explanation += f"Key skills identified include {', '.join(skills[:3])}, and {len(skills)-3} others that match the job requirements."
            else:
                explanation += f"Key skills identified include {', '.join(skills)}."
        else:
            explanation += "No specific skills were identified that directly match the requirements."
            
        return explanation