oleksandrfluxon
commited on
Commit
•
377986e
1
Parent(s):
76a36d6
Update handler.py
Browse files- handler.py +2 -2
handler.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import torch
|
2 |
|
3 |
from typing import Any, Dict
|
4 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
|
6 |
|
7 |
class EndpointHandler:
|
@@ -9,7 +9,7 @@ class EndpointHandler:
|
|
9 |
# load model and tokenizer from path
|
10 |
self.tokenizer = AutoTokenizer.from_pretrained(path) # AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
11 |
|
12 |
-
config =
|
13 |
config.attn_config['attn_impl'] = 'triton'
|
14 |
config.init_device = 'cuda:0' # For fast initialization directly on GPU!
|
15 |
config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096
|
|
|
1 |
import torch
|
2 |
|
3 |
from typing import Any, Dict
|
4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
5 |
|
6 |
|
7 |
class EndpointHandler:
|
|
|
9 |
# load model and tokenizer from path
|
10 |
self.tokenizer = AutoTokenizer.from_pretrained(path) # AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
11 |
|
12 |
+
config = AutoConfig.from_pretrained(name, trust_remote_code=True)
|
13 |
config.attn_config['attn_impl'] = 'triton'
|
14 |
config.init_device = 'cuda:0' # For fast initialization directly on GPU!
|
15 |
config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096
|