mamkkl commited on
Commit
26fb70e
·
verified ·
1 Parent(s): ebf7c56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -10,23 +10,23 @@ For more information on `huggingface_hub` Inference API support, please check th
10
  """
11
  #client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
12
  from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
13
- replace_llama_rope_with_scaled_rope()
14
  base_model = "Neko-Institute-of-Science/LLaMA-65B-HF"
15
  lora_weights = "adapter_config.json"
16
  model = transformers.AutoModelForCausalLM.from_pretrained(
17
- base_model,
18
- torch_dtype=torch.float16,
19
- cache_dir=cache_dir,
20
- device_map="auto",
21
- )
22
 
23
  model = PeftModel.from_pretrained(
24
- model,
25
- lora_weights,
26
- device_map="auto",
27
- cache_dir=cache_dir,
28
- torch_dtype=torch.float16,
29
- )
30
  tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
31
  tokenizer.pad_token = tokenizer.unk_token
32
  model.eval()
 
10
  """
11
  #client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
12
  from llama_rope_scaled_monkey_patch import replace_llama_rope_with_scaled_rope
13
+ replace_llama_rope_with_scaled_rope()
14
  base_model = "Neko-Institute-of-Science/LLaMA-65B-HF"
15
  lora_weights = "adapter_config.json"
16
  model = transformers.AutoModelForCausalLM.from_pretrained(
17
+ base_model,
18
+ torch_dtype=torch.float16,
19
+ cache_dir=cache_dir,
20
+ device_map="auto",
21
+ )
22
 
23
  model = PeftModel.from_pretrained(
24
+ model,
25
+ lora_weights,
26
+ device_map="auto",
27
+ cache_dir=cache_dir,
28
+ torch_dtype=torch.float16,
29
+ )
30
  tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
31
  tokenizer.pad_token = tokenizer.unk_token
32
  model.eval()