Update app.py
Browse files
app.py
CHANGED
@@ -46,7 +46,7 @@ PROMPT_DICT = {
|
|
46 |
}
|
47 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
48 |
replace_llama_rope_with_scaled_rope()
|
49 |
-
|
50 |
base_model,
|
51 |
torch_dtype=torch.float16,
|
52 |
cache_dir=cache_dir,
|
@@ -54,12 +54,15 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
|
|
54 |
)
|
55 |
|
56 |
model = PeftModel.from_pretrained(
|
57 |
-
|
58 |
lora_weights,
|
59 |
device_map="auto",
|
60 |
cache_dir=cache_dir,
|
61 |
torch_dtype=torch.float16,
|
|
|
62 |
)
|
|
|
|
|
63 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
64 |
tokenizer.pad_token = tokenizer.unk_token
|
65 |
def generate_prompt(instruction, input=None):
|
|
|
46 |
}
|
47 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
48 |
replace_llama_rope_with_scaled_rope()
|
49 |
+
base_model = transformers.AutoModelForCausalLM.from_pretrained(
|
50 |
base_model,
|
51 |
torch_dtype=torch.float16,
|
52 |
cache_dir=cache_dir,
|
|
|
54 |
)
|
55 |
|
56 |
model = PeftModel.from_pretrained(
|
57 |
+
base_model,
|
58 |
lora_weights,
|
59 |
device_map="auto",
|
60 |
cache_dir=cache_dir,
|
61 |
torch_dtype=torch.float16,
|
62 |
+
assign=True
|
63 |
)
|
64 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
65 |
+
model.to(device)
|
66 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
67 |
tokenizer.pad_token = tokenizer.unk_token
|
68 |
def generate_prompt(instruction, input=None):
|