Spaces:

TeamTonic
/

Tonics-Yi-200K

Paused

Tonic commited on Nov 20, 2023

Commit

b2497fe

1 Parent(s): b630981

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,19 +14,28 @@ You can also use YI-200 by cloning this space. Simply click here: <a style="disp
 Join us : TeamTonic is always making cool demos! Join our active builder's community on Discord: [Discord](https://discord.gg/nXx5wbX9) On Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On Github: [Polytonic](https://github.com/tonic-ai) & contribute to [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
 """
 MAX_MAX_NEW_TOKENS = 160000
 DEFAULT_MAX_NEW_TOKENS = 20000
 MAX_INPUT_TOKEN_LENGTH = 160000
-os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:56'
 # Load the model and tokenizer using transformers
-model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-6B-200K", device_map="auto", torch_dtype="auto", trust_remote_code=True)
-tokenizer = YiTokenizer(vocab_file="./tokenizer.model")
 # model = BetterTransformer.transform(model)
-def run(message, chat_history, max_new_tokens=20000, temperature=1.0, top_p=0.9, top_k=0.9):
     prompt = get_prompt(message, chat_history)
     # Encode the prompt to tensor

 Join us : TeamTonic is always making cool demos! Join our active builder's community on Discord: [Discord](https://discord.gg/nXx5wbX9) On Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On Github: [Polytonic](https://github.com/tonic-ai) & contribute to [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
 """
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:126'
 MAX_MAX_NEW_TOKENS = 160000
 DEFAULT_MAX_NEW_TOKENS = 20000
 MAX_INPUT_TOKEN_LENGTH = 160000
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = "01-ai/Yi-6B-200K"
+tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
+model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    load_in_4bit=True                          # For efficient inference, if supported by the GPU card
+)
 # Load the model and tokenizer using transformers
+# model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-6B-200K", trust_remote_code=True)
+# tokenizer = YiTokenizer(vocab_file="./tokenizer.model")
 # model = BetterTransformer.transform(model)
+def run(message, chat_history, max_new_tokens=20000, temperature=1.5, top_p=0.9, top_k=900):
     prompt = get_prompt(message, chat_history)
     # Encode the prompt to tensor