from transformers import AutoTokenizer, AutoModelForCausalLM import torch class CodeAssistant: def __init__(self): self.model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) self.model = AutoModelForCausalLM.from_pretrained( self.model_name, torch_dtype=torch.bfloat16, device_map="auto" ) def generate_response(self, query): inputs = self.tokenizer(query, return_tensors="pt").to(self.model.device) outputs = self.model.generate( **inputs, max_length=2048, temperature=0.7, top_p=0.95, do_sample=True ) return self.tokenizer.decode(outputs[0], skip_special_tokens=True)