wmpscc commited on
Commit
c8d71ce
·
1 Parent(s): 7c04cb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
4
  import argparse
5
  import torch
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
- from transformers import LlamaForCausalLM, LlamaForTokenizer
8
 
9
  from utils import load_hyperparam, load_model
10
  from models.tokenize import Tokenizer
@@ -41,7 +41,7 @@ def init_args():
41
  args = load_hyperparam(args)
42
 
43
  # args.tokenizer = Tokenizer(model_path=args.spm_model_path)
44
- args.tokenizer = LlamaForTokenizer.from_pretrained("Linly-AI/Chinese-LLaMA-2-7B-hf", trust_remote_code=True)
45
  args.vocab_size = args.tokenizer.sp_model.vocab_size()
46
 
47
 
@@ -57,7 +57,7 @@ def init_model():
57
 
58
  # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
59
  # model.to(device)
60
- model = LlamaForCausalLM.from_pretrained("Linly-AI/Chinese-LLaMA-2-7B-hf", device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
61
  print(model)
62
  print(torch.cuda.max_memory_allocated() / 1024 ** 3)
63
  lm_generation = LmGeneration(model, args.tokenizer)
 
4
  import argparse
5
  import torch
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
+ # from transformers import LlamaForCausalLM, LlamaForTokenizer
8
 
9
  from utils import load_hyperparam, load_model
10
  from models.tokenize import Tokenizer
 
41
  args = load_hyperparam(args)
42
 
43
  # args.tokenizer = Tokenizer(model_path=args.spm_model_path)
44
+ args.tokenizer = AutoTokenizer.from_pretrained("Linly-AI/Chinese-LLaMA-2-7B-hf", trust_remote_code=True)
45
  args.vocab_size = args.tokenizer.sp_model.vocab_size()
46
 
47
 
 
57
 
58
  # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
59
  # model.to(device)
60
+ model = AutoModelForCausalLM.from_pretrained("Linly-AI/Chinese-LLaMA-2-7B-hf", device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
61
  print(model)
62
  print(torch.cuda.max_memory_allocated() / 1024 ** 3)
63
  lm_generation = LmGeneration(model, args.tokenizer)