Upload 4 files
Browse files- README.md +57 -0
- rwkv6_7b_v2.1_triples.pth +3 -0
- triples.py +47 -0
- triplesdemo.png +0 -0
README.md
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This is a state for rwkv6_7b_v2.1 that generates triples given entities and their relations
|
2 |
+
|
3 |
+
* The input is solely the context that you want this model to analyze
|
4 |
+
* The output are domain, expert role in this domain and specific tasks that this export can do in a jsonl format.
|
5 |
+
|
6 |
+
# Please refer to the following demo as test code:
|
7 |
+
```python
|
8 |
+
from rwkv.model import RWKV
|
9 |
+
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
10 |
+
import torch
|
11 |
+
|
12 |
+
# download models: https://huggingface.co/BlinkDL
|
13 |
+
model = RWKV(model='/home/rwkv/Peter/model/base/RWKV-x060-World-7B-v2.1-20240507-ctx4096.pth', strategy='cuda fp16')
|
14 |
+
print(model.args)
|
15 |
+
pipeline = PIPELINE(model, "rwkv_vocab_v20230424") # 20B_tokenizer.json is in https://github.com/BlinkDL/ChatRWKV
|
16 |
+
# use pipeline = PIPELINE(model, "rwkv_vocab_v20230424") for rwkv "world" models
|
17 |
+
states_file = '/home/rwkv/Peter/model/state/triples/3/rwkv-0.pth'
|
18 |
+
states = torch.load(states_file)
|
19 |
+
states_value = []
|
20 |
+
device = 'cuda'
|
21 |
+
n_head = model.args.n_head
|
22 |
+
head_size = model.args.n_embd//model.args.n_head
|
23 |
+
for i in range(model.args.n_layer):
|
24 |
+
key = f'blocks.{i}.att.time_state'
|
25 |
+
value = states[key]
|
26 |
+
prev_x = torch.zeros(model.args.n_embd,device=device,dtype=torch.float16)
|
27 |
+
prev_states = value.clone().detach().to(device=device,dtype=torch.float16).transpose(1,2)
|
28 |
+
prev_ffn = torch.zeros(model.args.n_embd,device=device,dtype=torch.float16)
|
29 |
+
states_value.append(prev_x)
|
30 |
+
states_value.append(prev_states)
|
31 |
+
states_value.append(prev_ffn)
|
32 |
+
|
33 |
+
|
34 |
+
instruction ='根据input中的input和entity_types,帮助用户找到文本中每种entity_types的实体,标明实体类型并且简单描述。然后给找到实体之间的关系,并且描述这段关系以及对关系强度打分。 避免使用诸如“其他”或“未知”的通用实体类型。 非常重要的是:不要生成冗余或重叠的实体类型和关系。用JSON格式输出。'
|
35 |
+
input_text = '{\"input\": \"有个空空道人访道求仙,从大荒山无稽崖青埂峰下经过,忽见一大块石上字迹分明,编述历历,《石头记》是也。空空道人将《石头记》抄录下来,改名为《情僧录》。至吴玉峰题曰《红楼梦》。东鲁孔梅溪则题曰《风月宝鉴》。后因曹雪芹于悼红轩中披阅十载,增删五次,纂成目录,分出章回,则题曰《金陵十二钗》。姑苏乡宦甄士隐梦见一僧一道携无缘补天之石(通灵宝玉)下凡历练,又讲绛珠仙子为报神瑛侍者浇灌之恩追随神瑛侍者下世为人,以泪报恩。梦醒后,抱女儿英莲去看“过会”[2]。甄士隐结交并接济了寄居于隔壁葫芦庙内的胡州人氏贾化(号雨村)。某日,贾雨村造访甄士隐,无意中遇见甄家丫鬟娇杏,以为娇杏对其有意。中秋时节,甄士隐于家中宴请贾雨村,得知贾雨村的抱负后,赠银送衣以作贾雨村上京赴考之盘缠,第二天,贾雨村不辞而别便上路赴考。第二年元宵佳节当晚,甄家仆人霍启在看社火花灯时,不慎丢失了甄士隐唯一的女儿英莲。三月十五日,葫芦庙失火祸及甄家,落魄的甄士隐带家人寄居于如州岳丈封肃家中,后遇一僧一道,悟出《好了歌》真谛,随僧道而去。\"}, {\"entity_types\": ["文学与神话", "历史背景", "影响分析", "改编过程", "角色贡献", "写作技巧评估"]}'
|
36 |
+
ctx = f'Instruction: {instruction}\nInput: {input_text}\n\nResponse:'
|
37 |
+
print(ctx)
|
38 |
+
|
39 |
+
def my_print(s):
|
40 |
+
print(s, end='', flush=True)
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
args = PIPELINE_ARGS(temperature = 0.8, top_p = 0.1, top_k = 0, # top_k = 0 then ignore
|
45 |
+
alpha_frequency = 0.25,
|
46 |
+
alpha_presence = 0.25,
|
47 |
+
|
48 |
+
alpha_decay = 0.996, # gradually decay the penalty
|
49 |
+
token_ban = [0], # ban the generation of some tokens
|
50 |
+
token_stop = [0,1], # stop generation whenever you see any token here
|
51 |
+
chunk_len = 256) # split input into chunks to save VRAM (shorter -> slower)
|
52 |
+
|
53 |
+
pipeline.generate(ctx, token_count=2000, args=args, callback=my_print,state=states_value)
|
54 |
+
print('\n')
|
55 |
+
```
|
56 |
+
# The final printed input and output:
|
57 |
+
![](triples/triplesdemo.png)
|
rwkv6_7b_v2.1_triples.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06a5b6c40217766cde95b917dc7c3179b349ac9252678b33251ecca3fb44d674
|
3 |
+
size 16781463
|
triples.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from rwkv.model import RWKV
|
2 |
+
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
3 |
+
import torch
|
4 |
+
|
5 |
+
# download models: https://huggingface.co/BlinkDL
|
6 |
+
model = RWKV(model='/home/rwkv/Peter/model/base/RWKV-x060-World-7B-v2.1-20240507-ctx4096.pth', strategy='cuda fp16')
|
7 |
+
print(model.args)
|
8 |
+
pipeline = PIPELINE(model, "rwkv_vocab_v20230424") # 20B_tokenizer.json is in https://github.com/BlinkDL/ChatRWKV
|
9 |
+
# use pipeline = PIPELINE(model, "rwkv_vocab_v20230424") for rwkv "world" models
|
10 |
+
states_file = '/home/rwkv/Peter/model/state/triples/3/rwkv-0.pth'
|
11 |
+
states = torch.load(states_file)
|
12 |
+
states_value = []
|
13 |
+
device = 'cuda'
|
14 |
+
n_head = model.args.n_head
|
15 |
+
head_size = model.args.n_embd//model.args.n_head
|
16 |
+
for i in range(model.args.n_layer):
|
17 |
+
key = f'blocks.{i}.att.time_state'
|
18 |
+
value = states[key]
|
19 |
+
prev_x = torch.zeros(model.args.n_embd,device=device,dtype=torch.float16)
|
20 |
+
prev_states = value.clone().detach().to(device=device,dtype=torch.float16).transpose(1,2)
|
21 |
+
prev_ffn = torch.zeros(model.args.n_embd,device=device,dtype=torch.float16)
|
22 |
+
states_value.append(prev_x)
|
23 |
+
states_value.append(prev_states)
|
24 |
+
states_value.append(prev_ffn)
|
25 |
+
|
26 |
+
|
27 |
+
instruction ='根据input中的input和entity_types,帮助用户找到文本中每种entity_types的实体,标明实体类型并且简单描述。然后给找到实体之间的关系,并且描述这段关系以及对关系强度打分。 避免使用诸如“其他”或“未知”的通用实体类型。 非常重要的是:不要生成冗余或重叠的实体类型和关系。用JSON格式输出。'
|
28 |
+
input_text = '{\"input\": \"有个空空道人访道求仙,从大荒山无稽崖青埂峰下经过,忽见一大块石上字迹分明,编述历历,《石头记》是也。空空道人将《石头记》抄录下来,改名为《情僧录》。至吴玉峰题曰《红楼梦》。东鲁孔梅溪则题曰《风月宝鉴》。后因曹雪芹于悼红轩中披阅十载,增删五次,纂成目录,分出章回,则题曰《金陵十二钗》。姑苏乡宦甄士隐梦见一僧一道携无缘补天之石(通灵宝玉)下凡历练,又讲绛珠仙子为报神瑛侍者浇灌之恩追随神瑛侍者下世为人,以泪报恩。梦醒后,抱女儿英莲去看“过会”[2]。甄士隐结交并接济了寄居于隔壁葫芦庙内的胡州人氏贾化(号雨村)。某日,贾雨村造访甄士隐,无意中遇见甄家丫鬟娇杏,以为娇杏对其有意。中秋时节,甄士隐于家中宴请贾雨村,得知贾雨村的抱负后,赠银送衣以作贾雨村上京赴考之盘缠,第二天,贾雨村不辞而别便上路赴考。第二年元宵佳节当晚,甄家仆人霍启在看社火花灯时,不慎丢失了甄士隐唯一的女儿英莲。三月十五日,葫芦庙失火祸及甄家,落魄的甄士隐带家人寄居于如州岳丈封肃家中,后遇一僧一道,悟出《好了歌》真谛,随僧道而去。\"}, {\"entity_types\": ["文学与神话", "历史背景", "影响分析", "改编过程", "角色贡献", "写作技巧评估"]}'
|
29 |
+
ctx = f'Instruction: {instruction}\nInput: {input_text}\n\nResponse:'
|
30 |
+
print(ctx)
|
31 |
+
|
32 |
+
def my_print(s):
|
33 |
+
print(s, end='', flush=True)
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
args = PIPELINE_ARGS(temperature = 0.8, top_p = 0.1, top_k = 0, # top_k = 0 then ignore
|
38 |
+
alpha_frequency = 0.25,
|
39 |
+
alpha_presence = 0.25,
|
40 |
+
|
41 |
+
alpha_decay = 0.996, # gradually decay the penalty
|
42 |
+
token_ban = [0], # ban the generation of some tokens
|
43 |
+
token_stop = [0,1], # stop generation whenever you see any token here
|
44 |
+
chunk_len = 256) # split input into chunks to save VRAM (shorter -> slower)
|
45 |
+
|
46 |
+
pipeline.generate(ctx, token_count=2000, args=args, callback=my_print,state=states_value)
|
47 |
+
print('\n')
|
triplesdemo.png
ADDED