SpiketheCowboy commited on
Commit
99660e9
·
1 Parent(s): a7e983f

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +97 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ simple demo adapted from [gradio](https://gradio.app/creating-a-chatbot/).
3
+ '''
4
+
5
+ import gradio as gr
6
+ import random
7
+ import time
8
+ import transformers
9
+ import os
10
+ import json
11
+ import torch
12
+ import argparse
13
+ from transformers import LlamaTokenizer, LlamaForCausalLM
14
+
15
+
16
+ def apply_delta(base_model_path, target_model_path, delta_path):
17
+ print(f"Loading the delta weights from {delta_path}")
18
+ delta_tokenizer = LlamaTokenizer.from_pretrained(delta_path, use_fast=False)
19
+ delta = LlamaForCausalLM.from_pretrained(
20
+ delta_path, low_cpu_mem_usage=True, torch_dtype=torch.float16
21
+ )
22
+
23
+ print(f"Loading the base model from {base_model_path}")
24
+ base_tokenizer = LlamaTokenizer.from_pretrained(base_model_path, use_fast=False)
25
+ base = LlamaForCausalLM.from_pretrained(
26
+ base_model_path, low_cpu_mem_usage=True
27
+ )
28
+
29
+ # following alpaca training recipe, we have added new initialized tokens
30
+ DEFAULT_PAD_TOKEN = "[PAD]"
31
+ DEFAULT_EOS_TOKEN = "</s>"
32
+ DEFAULT_BOS_TOKEN = "<s>"
33
+ DEFAULT_UNK_TOKEN = "<unk>"
34
+ special_tokens_dict = {
35
+ "pad_token": DEFAULT_PAD_TOKEN,
36
+ "eos_token": DEFAULT_EOS_TOKEN,
37
+ "bos_token": DEFAULT_BOS_TOKEN,
38
+ "unk_token": DEFAULT_UNK_TOKEN,
39
+ }
40
+ num_new_tokens = base_tokenizer.add_special_tokens(special_tokens_dict)
41
+ base.resize_token_embeddings(len(base_tokenizer))
42
+ input_embeddings = base.get_input_embeddings().weight.data
43
+ output_embeddings = base.get_output_embeddings().weight.data
44
+
45
+ input_embeddings[-num_new_tokens:] = 0
46
+ output_embeddings[-num_new_tokens:] = 0
47
+
48
+ print("Applying the delta")
49
+ target_weights = {}
50
+ for name, param in tqdm(base.state_dict().items(), desc="Applying delta"):
51
+ assert name in delta.state_dict()
52
+ param.data += delta.state_dict()[name]
53
+ target_weights[name] = param.data
54
+
55
+ print(f"Saving the target model to {target_model_path}")
56
+ base.load_state_dict(target_weights)
57
+ base.save_pretrained(target_model_path)
58
+ delta_tokenizer.save_pretrained(target_model_path)
59
+
60
+
61
+ base_weights = 'decapoda-research/llama-7b-hf'
62
+ target_weights = 'expertllama' # local path
63
+ delta_weights = 'OFA-Sys/expertllama-7b-delta'
64
+ apply_delta(base_weights, target_weights, delta_weights)
65
+
66
+ tokenizer = transformers.LlamaTokenizer.from_pretrained(expertllama_path)
67
+ model = transformers.LlamaForCausalLM.from_pretrained(expertllama_path, torch_dtype=torch.float16, low_cpu_mem_usage=True)
68
+ # model.cuda()
69
+
70
+ with gr.Blocks() as demo:
71
+ chatbot = gr.Chatbot()
72
+ msg = gr.Textbox()
73
+ clear = gr.Button("Clear")
74
+
75
+ def respond(message, chat_history):
76
+
77
+ # prompt wrapper, only single-turn is allowed for now
78
+ prompt = f"### Human:\n{prompt}\n\n### Assistant:\n"
79
+
80
+ batch = tokenizer(
81
+ prompt,
82
+ return_tensors="pt",
83
+ add_special_tokens=False
84
+ )
85
+ batch = {k: v.cuda() for k, v in batch.items()}
86
+ generated = model.generate(batch["input_ids"], max_length=1024, temperature=0.8)
87
+ bot_message = tokenizer.decode(generated[0][:-1]).split("### Assistant:\n", 1)[1]
88
+
89
+ chat_history.append((message, bot_message))
90
+ time.sleep(1)
91
+
92
+ return "", chat_history
93
+
94
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
95
+ clear.click(lambda: None, None, chatbot, queue=False)
96
+
97
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ torch
2
+ transformers