xiaowang7777 commited on
Commit
0c0f97c
·
1 Parent(s): 51ac4d5
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -5,21 +5,26 @@ from models.modeling_moss import MossForCausalLM
5
  from models.tokenization_moss import MossTokenizer
6
  from models.configuration_moss import MossConfig
7
  from accelerate import init_empty_weights, load_checkpoint_and_dispatch
 
8
 
9
  # nstruct_pipeline_3b = pipeline(model="fnlp/moss-moon-003-sft-int4", torch_dtype=torch.float, trust_remote_code=True,
10
  # device_map="auto")
11
  model_path = "fnlp/moss-moon-003-sft-int8"
12
 
13
- config = MossConfig.from_pretrained(model_path)
14
- tokenizer = MossTokenizer.from_pretrained(model_path)
 
 
 
 
 
 
 
 
15
 
16
- with init_empty_weights():
17
- raw_model = MossForCausalLM._from_config(config, torch_dtype=torch.float)
18
- raw_model.tie_weights()
19
- model = load_checkpoint_and_dispatch(
20
- raw_model, checkpoint=model_path, device_map="balanced_low_0", no_split_module_classes=["MossBlock"], dtype=torch.float,
21
- offload_folder="offload_folder"
22
- )
23
 
24
 
25
  def generate(query, temperature, top_p, top_k, max_new_tokens):
 
5
  from models.tokenization_moss import MossTokenizer
6
  from models.configuration_moss import MossConfig
7
  from accelerate import init_empty_weights, load_checkpoint_and_dispatch
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM
9
 
10
  # nstruct_pipeline_3b = pipeline(model="fnlp/moss-moon-003-sft-int4", torch_dtype=torch.float, trust_remote_code=True,
11
  # device_map="auto")
12
  model_path = "fnlp/moss-moon-003-sft-int8"
13
 
14
+ # config = MossConfig.from_pretrained(model_path)
15
+ # tokenizer = MossTokenizer.from_pretrained(model_path)
16
+ #
17
+ # with init_empty_weights():
18
+ # raw_model = MossForCausalLM._from_config(config, torch_dtype=torch.float)
19
+ # raw_model.tie_weights()
20
+ # model = load_checkpoint_and_dispatch(
21
+ # raw_model, checkpoint=model_path, device_map="balanced_low_0", no_split_module_classes=["MossBlock"], dtype=torch.float,
22
+ # offload_folder="offload_folder"
23
+ # )
24
 
25
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
26
+ model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).float()
27
+ model = model.eval()
 
 
 
 
28
 
29
 
30
  def generate(query, temperature, top_p, top_k, max_new_tokens):