Update app.py
Browse files
app.py
CHANGED
@@ -10,23 +10,23 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
10 |
"""
|
11 |
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
12 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
13 |
-
|
14 |
base_model = "Neko-Institute-of-Science/LLaMA-65B-HF"
|
15 |
lora_weights = "adapter_config.json"
|
16 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
|
23 |
model = PeftModel.from_pretrained(
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
31 |
tokenizer.pad_token = tokenizer.unk_token
|
32 |
model.eval()
|
|
|
10 |
"""
|
11 |
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
12 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
13 |
+
replace_llama_rope_with_scaled_rope()
|
14 |
base_model = "Neko-Institute-of-Science/LLaMA-65B-HF"
|
15 |
lora_weights = "adapter_config.json"
|
16 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
17 |
+
base_model,
|
18 |
+
torch_dtype=torch.float16,
|
19 |
+
cache_dir=cache_dir,
|
20 |
+
device_map="auto",
|
21 |
+
)
|
22 |
|
23 |
model = PeftModel.from_pretrained(
|
24 |
+
model,
|
25 |
+
lora_weights,
|
26 |
+
device_map="auto",
|
27 |
+
cache_dir=cache_dir,
|
28 |
+
torch_dtype=torch.float16,
|
29 |
+
)
|
30 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
31 |
tokenizer.pad_token = tokenizer.unk_token
|
32 |
model.eval()
|