Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from huggingface_hub import InferenceClient
|
|
3 |
import transformers
|
4 |
from transformers import AutoTokenizer,GenerationConfig
|
5 |
import torch
|
6 |
-
from peft import PeftModel
|
7 |
|
8 |
"""
|
9 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
@@ -12,23 +12,24 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
12 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
13 |
replace_llama_rope_with_scaled_rope()
|
14 |
base_model = "Neko-Institute-of-Science/LLaMA-65B-HF"
|
15 |
-
lora_weights =
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
32 |
|
33 |
cache_dir = "/data"
|
34 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
|
|
3 |
import transformers
|
4 |
from transformers import AutoTokenizer,GenerationConfig
|
5 |
import torch
|
6 |
+
from peft import PeftModel
|
7 |
|
8 |
"""
|
9 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
|
|
12 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
13 |
replace_llama_rope_with_scaled_rope()
|
14 |
base_model = "Neko-Institute-of-Science/LLaMA-65B-HF"
|
15 |
+
lora_weights = "./"
|
16 |
+
#lora_weights = LoraConfig(
|
17 |
+
# auto_mapping=None,
|
18 |
+
# base_model_name_or_path="Neko-Institute-of-Science/LLaMA-65B-HF",
|
19 |
+
# bias=None,
|
20 |
+
# fan_in_fan_out=False,
|
21 |
+
# inference_mode=True,
|
22 |
+
# init_lora_weights=True,
|
23 |
+
# layers_pattern=None,
|
24 |
+
# layers_to_transform=None,
|
25 |
+
# lora_alpha=16,
|
26 |
+
# lora_dropout=0.05,
|
27 |
+
# modules_to_save=None,
|
28 |
+
# peft_type="LORA",
|
29 |
+
# revision=None,
|
30 |
+
# target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
|
31 |
+
# task_type="CAUSAL_LM",
|
32 |
+
#)
|
33 |
|
34 |
cache_dir = "/data"
|
35 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|