MS-YUN commited on
Commit
be2dcb9
·
1 Parent(s): 1a9352f

Add application file6

Browse files
Files changed (1) hide show
  1. app.py +19 -7
app.py CHANGED
@@ -1,13 +1,25 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
2
 
3
- model_name_or_path = "TheBloke/Llama-2-7b-Chat-GPTQ"
4
- model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
5
- device_map="auto",
6
- trust_remote_code=False,
7
- revision="main")
8
 
9
- tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def predict(message, chatbot, temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
13
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
 
4
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
5
 
6
+ model_name ="NousResearch/Llama-2-7b-chat-hf"
7
 
8
+ bnb_config = BitsAndBytesConfig(
9
+ load_in_4bit=True,
10
+ bnb_4bit_quant_type="nf4",
11
+ bnb_4bit_compute_dtype=torch.bfloat16,
12
+ )
13
+
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_name,
16
+ torch_dtype=torch.bfloat16,
17
+ quantization_config=bnb_config)
18
+ model.config.use_cache = False
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
+ tokenizer.pad_token = tokenizer.eos_token
22
+ tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
23
 
24
  def predict(message, chatbot, temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
25