Spaces:

Linly-AI
/

Linly-ChatFlow

Runtime error

wmpscc commited on Jul 23, 2023

Commit

c8d71ce

1 Parent(s): 7c04cb8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import gradio as gr
 import argparse
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from transformers import LlamaForCausalLM, LlamaForTokenizer
 from utils import load_hyperparam, load_model
 from models.tokenize import Tokenizer
@@ -41,7 +41,7 @@ def init_args():
     args = load_hyperparam(args)
     # args.tokenizer = Tokenizer(model_path=args.spm_model_path)
-    args.tokenizer = LlamaForTokenizer.from_pretrained("Linly-AI/Chinese-LLaMA-2-7B-hf", trust_remote_code=True)
     args.vocab_size = args.tokenizer.sp_model.vocab_size()
@@ -57,7 +57,7 @@ def init_model():
     # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # model.to(device)
-    model = LlamaForCausalLM.from_pretrained("Linly-AI/Chinese-LLaMA-2-7B-hf", device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
     print(model)
     print(torch.cuda.max_memory_allocated() / 1024 ** 3)
     lm_generation = LmGeneration(model, args.tokenizer)

 import argparse
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# from transformers import LlamaForCausalLM, LlamaForTokenizer
 from utils import load_hyperparam, load_model
 from models.tokenize import Tokenizer
     args = load_hyperparam(args)
     # args.tokenizer = Tokenizer(model_path=args.spm_model_path)
+    args.tokenizer = AutoTokenizer.from_pretrained("Linly-AI/Chinese-LLaMA-2-7B-hf", trust_remote_code=True)
     args.vocab_size = args.tokenizer.sp_model.vocab_size()
     # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # model.to(device)
+    model = AutoModelForCausalLM.from_pretrained("Linly-AI/Chinese-LLaMA-2-7B-hf", device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
     print(model)
     print(torch.cuda.max_memory_allocated() / 1024 ** 3)
     lm_generation = LmGeneration(model, args.tokenizer)