EchoStreet
/

mpt-7b

Text Generation

StreamingDatasets

text-generation-inference

Model card Files Files and versions Community

rlanner-echocap commited on Jul 28, 2023

Commit

5ee8ed9

·

1 Parent(s): e234444

Update handler.py

increasing token limit

Files changed (1) hide show

handler.py +2 -1

handler.py CHANGED Viewed

@@ -10,7 +10,8 @@ dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
-        tokenizer = AutoTokenizer.from_pretrained(path)
         model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=dtype, trust_remote_code=True)
         model.to('cuda:0')

 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
+        tokenizer = AutoTokenizer.from_pretrained(path)
+        tokenizer..max_seq_len = 8192
         model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=dtype, trust_remote_code=True)
         model.to('cuda:0')