arya-ai-model commited on
Commit
ad9f174
·
1 Parent(s): 8374669

updated model.py

Browse files
Files changed (1) hide show
  1. model.py +17 -5
model.py CHANGED
@@ -13,16 +13,28 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
13
 
14
  # Ensure the tokenizer has a pad token set
15
  if tokenizer.pad_token is None:
16
- tokenizer.pad_token = tokenizer.eos_token
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  MODEL_NAME,
20
  token=HF_TOKEN,
21
- torch_dtype=torch.float32, # Change to float32 for CPU compatibility
22
  trust_remote_code=True
23
- ).to(device) # Explicitly move to CPU
24
 
25
  def generate_code(prompt: str, max_tokens: int = 256):
26
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)
27
- output = model.generate(**inputs, max_new_tokens=max_tokens, pad_token_id=tokenizer.pad_token_id)
 
 
 
 
 
 
 
 
 
 
 
 
28
  return tokenizer.decode(output[0], skip_special_tokens=True)
 
13
 
14
  # Ensure the tokenizer has a pad token set
15
  if tokenizer.pad_token is None:
16
+ tokenizer.pad_token = tokenizer.eos_token # Set pad_token to eos_token
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  MODEL_NAME,
20
  token=HF_TOKEN,
21
+ torch_dtype=torch.float32, # Use float32 for CPU
22
  trust_remote_code=True
23
+ ).to(device) # Move model explicitly to CPU
24
 
25
  def generate_code(prompt: str, max_tokens: int = 256):
26
+ inputs = tokenizer(
27
+ prompt,
28
+ return_tensors="pt",
29
+ padding=True,
30
+ truncation=True, # Allow truncation
31
+ max_length=1024 # Set a maximum length explicitly
32
+ ).to(device)
33
+
34
+ output = model.generate(
35
+ **inputs,
36
+ max_new_tokens=max_tokens,
37
+ pad_token_id=tokenizer.pad_token_id
38
+ )
39
+
40
  return tokenizer.decode(output[0], skip_special_tokens=True)