oleksandrfluxon
commited on
Commit
•
c5d461f
1
Parent(s):
9b2ab34
Update handler.py
Browse files- handler.py +1 -1
handler.py
CHANGED
@@ -9,7 +9,7 @@ class EndpointHandler:
|
|
9 |
# load model and tokenizer from path
|
10 |
self.tokenizer = AutoTokenizer.from_pretrained(path) # AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
11 |
|
12 |
-
config = AutoConfig.from_pretrained(
|
13 |
config.attn_config['attn_impl'] = 'triton'
|
14 |
config.init_device = 'cuda:0' # For fast initialization directly on GPU!
|
15 |
config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096
|
|
|
9 |
# load model and tokenizer from path
|
10 |
self.tokenizer = AutoTokenizer.from_pretrained(path) # AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
11 |
|
12 |
+
config = AutoConfig.from_pretrained(path, trust_remote_code=True)
|
13 |
config.attn_config['attn_impl'] = 'triton'
|
14 |
config.init_device = 'cuda:0' # For fast initialization directly on GPU!
|
15 |
config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096
|