Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,6 @@ from io import BytesIO
|
|
12 |
import torch
|
13 |
import torch.nn.functional as F
|
14 |
from datetime import datetime
|
15 |
-
from transformers import CLIPImageProcessor
|
16 |
from huggingface_hub import hf_hub_download
|
17 |
from pynvml import *
|
18 |
nvmlInit()
|
@@ -20,14 +19,13 @@ gpu_h = nvmlDeviceGetHandleByIndex(0)
|
|
20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
21 |
|
22 |
ctx_limit = 4000
|
23 |
-
gen_limit =
|
24 |
|
25 |
########################## text rwkv ################################################################
|
26 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
27 |
|
28 |
-
title_v6 = "
|
29 |
-
model_path_v6 = hf_hub_download(repo_id="BlinkDL/
|
30 |
-
# model_path_v6 = f'/mnt/e/RWKV-Runner/models/{title_v6}' # conda activate torch2; cd /mnt/program/git-public/RWKV-Gradio-1; python app.py
|
31 |
model_v6 = RWKV(model=model_path_v6.replace('.pth',''), strategy='cuda fp16')
|
32 |
pipeline_v6 = PIPELINE(model_v6, "rwkv_vocab_v20230424")
|
33 |
|
@@ -108,7 +106,7 @@ def evaluate(
|
|
108 |
yield out_str.strip()
|
109 |
|
110 |
examples = [
|
111 |
-
[
|
112 |
[generate_prompt("Please give the pros and cons of hodl versus active trading."), gen_limit, 1, 0.3, 0.5, 0.5],
|
113 |
["Assistant: How can we craft an engaging story featuring vampires on Mars? Let's think step by step and provide an expert response:", gen_limit, 1, 0.3, 0.5, 0.5],
|
114 |
["Assistant: How can we persuade Elon Musk to follow you on Twitter? Let's think step by step and provide an expert response:", gen_limit, 1, 0.3, 0.5, 0.5],
|
@@ -129,10 +127,10 @@ with gr.Blocks(title=title_v6) as demo:
|
|
129 |
gr.HTML(f"<div style=\"text-align: center;\">\n<h1>{title_v6}</h1>\n</div>")
|
130 |
|
131 |
with gr.Tab("=== Base Model (Raw Generation) ==="):
|
132 |
-
gr.Markdown(f
|
133 |
with gr.Row():
|
134 |
with gr.Column():
|
135 |
-
prompt = gr.Textbox(lines=6, label="Prompt", value=
|
136 |
token_count = gr.Slider(10, gen_limit, label="Max Tokens", step=10, value=gen_limit)
|
137 |
temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.0)
|
138 |
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.3)
|
@@ -149,4 +147,4 @@ with gr.Blocks(title=title_v6) as demo:
|
|
149 |
data.click(lambda x: x, [data], [prompt, token_count, temperature, top_p, presence_penalty, count_penalty])
|
150 |
|
151 |
demo.queue(concurrency_count=1, max_size=10)
|
152 |
-
demo.launch(share=False)
|
|
|
12 |
import torch
|
13 |
import torch.nn.functional as F
|
14 |
from datetime import datetime
|
|
|
15 |
from huggingface_hub import hf_hub_download
|
16 |
from pynvml import *
|
17 |
nvmlInit()
|
|
|
19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
|
21 |
ctx_limit = 4000
|
22 |
+
gen_limit = 1000
|
23 |
|
24 |
########################## text rwkv ################################################################
|
25 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
26 |
|
27 |
+
title_v6 = "rwkv7-g1-2.9b-20250519-ctx4096"
|
28 |
+
model_path_v6 = hf_hub_download(repo_id="BlinkDL/rwkv7-g1", filename=f"{title_v6}.pth")
|
|
|
29 |
model_v6 = RWKV(model=model_path_v6.replace('.pth',''), strategy='cuda fp16')
|
30 |
pipeline_v6 = PIPELINE(model_v6, "rwkv_vocab_v20230424")
|
31 |
|
|
|
106 |
yield out_str.strip()
|
107 |
|
108 |
examples = [
|
109 |
+
["User: simulate SpaceX mars landing using python\n\nAssistant: <think", gen_limit, 1, 0.3, 0.5, 0.5],
|
110 |
[generate_prompt("Please give the pros and cons of hodl versus active trading."), gen_limit, 1, 0.3, 0.5, 0.5],
|
111 |
["Assistant: How can we craft an engaging story featuring vampires on Mars? Let's think step by step and provide an expert response:", gen_limit, 1, 0.3, 0.5, 0.5],
|
112 |
["Assistant: How can we persuade Elon Musk to follow you on Twitter? Let's think step by step and provide an expert response:", gen_limit, 1, 0.3, 0.5, 0.5],
|
|
|
127 |
gr.HTML(f"<div style=\"text-align: center;\">\n<h1>{title_v6}</h1>\n</div>")
|
128 |
|
129 |
with gr.Tab("=== Base Model (Raw Generation) ==="):
|
130 |
+
gr.Markdown(f'This is [RWKV7 G1](https://huggingface.co/BlinkDL/rwkv7-g1) 2.9B reasoning base LM - an attention-free pure RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM). Supports 100+ world languages and code. Check [400+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Can try examples (bottom of page) *** (can edit them). Demo limited to ctxlen {ctx_limit}.')
|
131 |
with gr.Row():
|
132 |
with gr.Column():
|
133 |
+
prompt = gr.Textbox(lines=6, label="Prompt", value="User: simulate SpaceX mars landing using python\n\nAssistant: <think")
|
134 |
token_count = gr.Slider(10, gen_limit, label="Max Tokens", step=10, value=gen_limit)
|
135 |
temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.0)
|
136 |
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.3)
|
|
|
147 |
data.click(lambda x: x, [data], [prompt, token_count, temperature, top_p, presence_penalty, count_penalty])
|
148 |
|
149 |
demo.queue(concurrency_count=1, max_size=10)
|
150 |
+
demo.launch(share=False)
|