Tonic commited on
Commit
b2497fe
·
1 Parent(s): b630981

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -14,19 +14,28 @@ You can also use YI-200 by cloning this space. Simply click here: <a style="disp
14
  Join us : TeamTonic is always making cool demos! Join our active builder's community on Discord: [Discord](https://discord.gg/nXx5wbX9) On Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On Github: [Polytonic](https://github.com/tonic-ai) & contribute to [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
15
  """
16
 
 
 
17
  MAX_MAX_NEW_TOKENS = 160000
18
  DEFAULT_MAX_NEW_TOKENS = 20000
19
  MAX_INPUT_TOKEN_LENGTH = 160000
 
20
 
 
21
 
22
- os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:56'
 
 
 
 
 
23
 
24
  # Load the model and tokenizer using transformers
25
- model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-6B-200K", device_map="auto", torch_dtype="auto", trust_remote_code=True)
26
- tokenizer = YiTokenizer(vocab_file="./tokenizer.model")
27
  # model = BetterTransformer.transform(model)
28
 
29
- def run(message, chat_history, max_new_tokens=20000, temperature=1.0, top_p=0.9, top_k=0.9):
30
  prompt = get_prompt(message, chat_history)
31
 
32
  # Encode the prompt to tensor
 
14
  Join us : TeamTonic is always making cool demos! Join our active builder's community on Discord: [Discord](https://discord.gg/nXx5wbX9) On Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On Github: [Polytonic](https://github.com/tonic-ai) & contribute to [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
15
  """
16
 
17
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:126'
18
+
19
  MAX_MAX_NEW_TOKENS = 160000
20
  DEFAULT_MAX_NEW_TOKENS = 20000
21
  MAX_INPUT_TOKEN_LENGTH = 160000
22
+ device = "cuda" if torch.cuda.is_available() else "cpu"
23
 
24
+ model_name = "01-ai/Yi-6B-200K"
25
 
26
+ tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
27
+ model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
28
+ device_map="auto",
29
+ torch_dtype=torch.bfloat16,
30
+ load_in_4bit=True # For efficient inference, if supported by the GPU card
31
+ )
32
 
33
  # Load the model and tokenizer using transformers
34
+ # model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-6B-200K", trust_remote_code=True)
35
+ # tokenizer = YiTokenizer(vocab_file="./tokenizer.model")
36
  # model = BetterTransformer.transform(model)
37
 
38
+ def run(message, chat_history, max_new_tokens=20000, temperature=1.5, top_p=0.9, top_k=900):
39
  prompt = get_prompt(message, chat_history)
40
 
41
  # Encode the prompt to tensor