Spaces:
Runtime error
Runtime error
MS-YUN
commited on
Commit
·
be2dcb9
1
Parent(s):
1a9352f
Add application file6
Browse files
app.py
CHANGED
@@ -1,13 +1,25 @@
|
|
1 |
-
|
|
|
2 |
|
3 |
-
|
4 |
-
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
5 |
-
device_map="auto",
|
6 |
-
trust_remote_code=False,
|
7 |
-
revision="main")
|
8 |
|
9 |
-
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def predict(message, chatbot, temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
13 |
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
3 |
|
4 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
model_name ="NousResearch/Llama-2-7b-chat-hf"
|
7 |
|
8 |
+
bnb_config = BitsAndBytesConfig(
|
9 |
+
load_in_4bit=True,
|
10 |
+
bnb_4bit_quant_type="nf4",
|
11 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
12 |
+
)
|
13 |
+
|
14 |
+
model = AutoModelForCausalLM.from_pretrained(
|
15 |
+
model_name,
|
16 |
+
torch_dtype=torch.bfloat16,
|
17 |
+
quantization_config=bnb_config)
|
18 |
+
model.config.use_cache = False
|
19 |
+
|
20 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
21 |
+
tokenizer.pad_token = tokenizer.eos_token
|
22 |
+
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
|
23 |
|
24 |
def predict(message, chatbot, temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
25 |
|