Spaces:
Runtime error
Runtime error
File size: 5,972 Bytes
f714b01 315ea19 f714b01 a7e12f8 166a74e 1d5e556 166a74e a04611c 1d5e556 dc1d70c f714b01 315ea19 f714b01 166a74e f714b01 166a74e f714b01 166a74e f714b01 166a74e f714b01 166a74e 315ea19 166a74e 315ea19 166a74e f714b01 315ea19 1d5e556 f714b01 1d5e556 3bcbfb1 f714b01 3bcbfb1 315ea19 3bcbfb1 eded8df 315ea19 3bcbfb1 c197986 234ee14 ab56f98 3bcbfb1 f714b01 c4f1727 27f8eb5 39debae 315ea19 27f8eb5 c4f1727 c197986 315ea19 cc76ff6 c197986 27f8eb5 315ea19 c197986 315ea19 c197986 315ea19 c197986 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import gradio as gr
import os, gc, copy, torch
from datetime import datetime
from huggingface_hub import hf_hub_download
from pynvml import *
nvmlInit()
gpu_h = nvmlDeviceGetHandleByIndex(0)
ctx_limit = 3000
title = "RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096"
os.environ["RWKV_JIT_ON"] = '1'
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
from rwkv.model import RWKV
model_path = hf_hub_download(repo_id="RWKV/v5-Eagle-7B", filename=f"{title}.pth")
model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
from rwkv.utils import PIPELINE, PIPELINE_ARGS
pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
def generate_prompt(instruction, input=""):
instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
if input:
return f"""### Instruction: {instruction}
### Input: {input}
### Response:"""
else:
return f"""### User: hi
### Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
### User: {instruction}
### Assistant:"""
def evaluate(
ctx,
token_count=200,
temperature=1.0,
top_p=0.7,
presencePenalty = 0.1,
countPenalty = 0.1,
):
args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
alpha_frequency = countPenalty,
alpha_presence = presencePenalty,
token_ban = [], # ban the generation of some tokens
token_stop = [0]) # stop generation whenever you see any token here
ctx = ctx.strip()
all_tokens = []
out_last = 0
out_str = ''
occurrence = {}
state = None
for i in range(int(token_count)):
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
for n in occurrence:
out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
if token in args.token_stop:
break
all_tokens += [token]
for xxx in occurrence:
occurrence[xxx] *= 0.996
if token not in occurrence:
occurrence[token] = 1
else:
occurrence[token] += 1
tmp = pipeline.decode(all_tokens[out_last:])
if '\ufffd' not in tmp:
out_str += tmp
yield out_str.strip()
out_last = i + 1
gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f'{timestamp} - vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
del out
del state
gc.collect()
torch.cuda.empty_cache()
yield out_str.strip()
examples = [
["### Assistant:\nSure! Here is a very detailed plan to create flying pigs:", 333, 1, 0.3, 0, 1],
["### Assistant:\nSure! Here are some ideas for FTL drive:", 333, 1, 0.3, 0, 1],
["A few light taps upon the pane made her turn to the window. It had begun to snow again.", 333, 1, 0.3, 0, 1],
[generate_prompt("Écrivez un programme Python pour miner 1 Bitcoin, avec des commentaires."), 333, 1, 0.3, 0, 1],
[generate_prompt("東京で訪れるべき素晴らしい場所とその紹介をいくつか挙げてください。"), 333, 1, 0.3, 0, 1],
[generate_prompt("Write a story using the following information.", "A man named Alex chops a tree down."), 333, 1, 0.3, 0, 1],
["### Assistant:\nHere is a very detailed plan to kill all mosquitoes:", 333, 1, 0.3, 0, 1]
]
##########################################################################
with gr.Blocks(title=title) as demo:
gr.HTML(f"<div style=\"text-align: center;\">\n<h1>RWKV-5 World v2 - {title}</h1>\n</div>")
with gr.Tab("Raw Generation"):
gr.Markdown(f"This is [Eagle 7B](https://blog.rwkv.com/p/eagle-7b-soaring-past-transformers) - based on the RWKV architecture a 100% attention-free RNN [RWKV-LM](https://wiki.rwkv.com). Supports all 100+ world languages and code. And we have [200+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}.")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(lines=2, label="Prompt", value="### Assistant: Sure! Here is a very detailed plan to create flying pigs:")
token_count = gr.Slider(10, 333, label="Max Tokens", step=10, value=333)
temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.0)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.3)
presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0)
count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=1)
with gr.Column():
with gr.Row():
submit = gr.Button("Submit", variant="primary")
clear = gr.Button("Clear", variant="secondary")
output = gr.Textbox(label="Output", lines=5)
data = gr.Dataset(components=[prompt, token_count, temperature, top_p, presence_penalty, count_penalty], samples=examples, label="Example Instructions", headers=["Prompt", "Max Tokens", "Temperature", "Top P", "Presence Penalty", "Count Penalty"])
submit.click(evaluate, [prompt, token_count, temperature, top_p, presence_penalty, count_penalty], [output])
clear.click(lambda: None, [], [output])
data.click(lambda x: x, [data], [prompt, token_count, temperature, top_p, presence_penalty, count_penalty])
demo.queue(concurrency_count=1, max_size=10)
demo.launch(share=False)
|