Update app.py
Browse files
app.py
CHANGED
@@ -73,21 +73,20 @@ def loadModel():
|
|
73 |
if model is None:
|
74 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
75 |
replace_llama_rope_with_scaled_rope()
|
76 |
-
|
77 |
base_model,
|
78 |
torch_dtype=torch.float16,
|
79 |
cache_dir=cache_dir,
|
80 |
device_map="auto",
|
81 |
)
|
82 |
model = PeftModel.from_pretrained(
|
83 |
-
|
84 |
lora_weights,
|
85 |
device_map="auto",
|
86 |
cache_dir='',
|
87 |
torch_dtype=torch.float16,
|
88 |
is_trainable=False,
|
89 |
)
|
90 |
-
model.eval()
|
91 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
92 |
tokenizer.pad_token = tokenizer.unk_token
|
93 |
model = model.to("cuda")
|
|
|
73 |
if model is None:
|
74 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
75 |
replace_llama_rope_with_scaled_rope()
|
76 |
+
model = transformers.AutoModelForCausalLM.from_pretrained(
|
77 |
base_model,
|
78 |
torch_dtype=torch.float16,
|
79 |
cache_dir=cache_dir,
|
80 |
device_map="auto",
|
81 |
)
|
82 |
model = PeftModel.from_pretrained(
|
83 |
+
model,
|
84 |
lora_weights,
|
85 |
device_map="auto",
|
86 |
cache_dir='',
|
87 |
torch_dtype=torch.float16,
|
88 |
is_trainable=False,
|
89 |
)
|
|
|
90 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
91 |
tokenizer.pad_token = tokenizer.unk_token
|
92 |
model = model.to("cuda")
|