Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from huggingface_hub import InferenceClient
|
|
3 |
import transformers
|
4 |
from transformers import AutoTokenizer,GenerationConfig
|
5 |
import torch
|
6 |
-
from peft import PeftModel
|
7 |
|
8 |
"""
|
9 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
@@ -12,7 +12,26 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
12 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
13 |
replace_llama_rope_with_scaled_rope()
|
14 |
base_model = "Neko-Institute-of-Science/LLaMA-65B-HF"
|
15 |
-
lora_weights =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
cache_dir = "/data"
|
17 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
18 |
base_model,
|
|
|
3 |
import transformers
|
4 |
from transformers import AutoTokenizer,GenerationConfig
|
5 |
import torch
|
6 |
+
from peft import PeftModel, LoraConfig
|
7 |
|
8 |
"""
|
9 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
|
|
12 |
from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
|
13 |
replace_llama_rope_with_scaled_rope()
|
14 |
base_model = "Neko-Institute-of-Science/LLaMA-65B-HF"
|
15 |
+
lora_weights = LoraConfig(
|
16 |
+
auto_mapping=null,
|
17 |
+
base_model_name_or_path="Neko-Institute-of-Science/LLaMA-65B-HF",
|
18 |
+
bias=none,
|
19 |
+
fan_in_fan_out=false,
|
20 |
+
inference_mode=true,
|
21 |
+
init_lora_weights=true,
|
22 |
+
layers_pattern=null,
|
23 |
+
layers_to_transform=null,
|
24 |
+
lora_alpha=16,
|
25 |
+
lora_dropout=0.05,
|
26 |
+
modules_to_save=null,
|
27 |
+
peft_type="LORA",
|
28 |
+
revision=null,
|
29 |
+
target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
|
30 |
+
task_type="CAUSAL_LM",
|
31 |
+
lora_alpha=32,
|
32 |
+
target_modules=["to_k", "to_q", "to_v", "to_out"],
|
33 |
+
)
|
34 |
+
|
35 |
cache_dir = "/data"
|
36 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
37 |
base_model,
|