Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,253 Bytes
50e1afb c48005f adefb5a 7f7f074 8b470c5 cabbba6 adefb5a 7f7f074 3c4cd05 adefb5a 7f7f074 adefb5a 7f7f074 595c3d2 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 8b470c5 adefb5a ca0543e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
from __future__ import annotations
import spaces
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import whoami
import os
os.removedirs('/data-nvme/zerogpu-offload/')
# 定义系统提示语
system_prompt = """你是 Skywork-o1,Skywork AI 开发的思维模型,擅长通过深度思考解决涉及数学、编码和逻辑推理的复杂问题。面对用户请求时,你首先会进行一段漫长而深入的思考过程,探索问题的可能解决方案。完成思考后,你会在回复中详细解释解决过程。"""
# 初始化模型和分词器
model_name = "Skywork/Skywork-o1-Open-Llama-3.1-8B"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# 定义生成回复的函数
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# 构造对话历史
conversation = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
conversation.append({"role": "user", "content": user_msg})
if assistant_msg:
conversation.append({"role": "assistant", "content": assistant_msg})
conversation.append({"role": "user", "content": message})
# 构造输入
input_ids = tokenizer.apply_chat_template(
conversation,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
# 模型生成
generation = model.generate(
input_ids=input_ids,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.pad_token_id,
)
# 解码生成内容
completion = tokenizer.decode(
generation[0][len(input_ids[0]):],
skip_special_tokens=True,
clean_up_tokenization_spaces=True
)
return completion
# 定义Gradio界面
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value=system_prompt, label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
),
],
# chatbot_style="default"
)
def hello(profile: gr.OAuthProfile | None) -> str:
if profile is None:
return "I don't know you."
return f"Hello {profile.name}"
def list_organizations(oauth_token: gr.OAuthToken | None) -> str:
if oauth_token is None:
return "Please deploy this on Spaces and log in to list organizations."
org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
return f"You belong to {', '.join(org_names)}."
with gr.Blocks() as demo:
gr.LoginButton()
m1 = gr.Markdown()
m2 = gr.Markdown()
demo.load(hello, inputs=None, outputs=m1)
demo.load(list_organizations, inputs=None, outputs=m2)
if __name__ == "__main__":
demo.launch()
|