Update app.py
Browse files
app.py
CHANGED
@@ -52,10 +52,6 @@ def generate_prompt(instruction, input=None):
|
|
52 |
else:
|
53 |
return PROMPT_DICT["prompt_no_input"].format(instruction=instruction)
|
54 |
|
55 |
-
|
56 |
-
def getIds(inputs):
|
57 |
-
return inputs["input_ids"].cuda()
|
58 |
-
|
59 |
def generator(input_ids, generation_config, max_new_tokens):
|
60 |
# Without streaming
|
61 |
with torch.no_grad():
|
@@ -79,17 +75,16 @@ def loadModel():
|
|
79 |
cache_dir=cache_dir,
|
80 |
device_map="auto",
|
81 |
)
|
82 |
-
model = PeftModel.from_pretrained(
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
91 |
tokenizer.pad_token = tokenizer.unk_token
|
92 |
-
model = model.to("cuda")
|
93 |
return model
|
94 |
|
95 |
model, tokenizer = loadModel()
|
|
|
52 |
else:
|
53 |
return PROMPT_DICT["prompt_no_input"].format(instruction=instruction)
|
54 |
|
|
|
|
|
|
|
|
|
55 |
def generator(input_ids, generation_config, max_new_tokens):
|
56 |
# Without streaming
|
57 |
with torch.no_grad():
|
|
|
75 |
cache_dir=cache_dir,
|
76 |
device_map="auto",
|
77 |
)
|
78 |
+
#model = PeftModel.from_pretrained(
|
79 |
+
# model,
|
80 |
+
# lora_weights,
|
81 |
+
# device_map="auto",
|
82 |
+
# cache_dir='',
|
83 |
+
# torch_dtype=torch.float16,
|
84 |
+
# is_trainable=False,
|
85 |
+
# )
|
86 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
87 |
tokenizer.pad_token = tokenizer.unk_token
|
|
|
88 |
return model
|
89 |
|
90 |
model, tokenizer = loadModel()
|