File size: 3,197 Bytes
50e1afb c48005f adefb5a 7f7f074 8b470c5 adefb5a 7f7f074 3c4cd05 adefb5a 7f7f074 adefb5a 7f7f074 595c3d2 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 7f7f074 adefb5a 8b470c5 adefb5a ca0543e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from __future__ import annotations
import spaces
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import whoami
# 定义系统提示语
system_prompt = """你是 Skywork-o1,Skywork AI 开发的思维模型,擅长通过深度思考解决涉及数学、编码和逻辑推理的复杂问题。面对用户请求时,你首先会进行一段漫长而深入的思考过程,探索问题的可能解决方案。完成思考后,你会在回复中详细解释解决过程。"""
# 初始化模型和分词器
model_name = "Skywork/Skywork-o1-Open-Llama-3.1-8B"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# 定义生成回复的函数
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# 构造对话历史
conversation = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
conversation.append({"role": "user", "content": user_msg})
if assistant_msg:
conversation.append({"role": "assistant", "content": assistant_msg})
conversation.append({"role": "user", "content": message})
# 构造输入
input_ids = tokenizer.apply_chat_template(
conversation,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
# 模型生成
generation = model.generate(
input_ids=input_ids,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.pad_token_id,
)
# 解码生成内容
completion = tokenizer.decode(
generation[0][len(input_ids[0]):],
skip_special_tokens=True,
clean_up_tokenization_spaces=True
)
return completion
# 定义Gradio界面
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value=system_prompt, label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
),
],
# chatbot_style="default"
)
def hello(profile: gr.OAuthProfile | None) -> str:
if profile is None:
return "I don't know you."
return f"Hello {profile.name}"
def list_organizations(oauth_token: gr.OAuthToken | None) -> str:
if oauth_token is None:
return "Please deploy this on Spaces and log in to list organizations."
org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
return f"You belong to {', '.join(org_names)}."
with gr.Blocks() as demo:
gr.LoginButton()
m1 = gr.Markdown()
m2 = gr.Markdown()
demo.load(hello, inputs=None, outputs=m1)
demo.load(list_organizations, inputs=None, outputs=m2)
if __name__ == "__main__":
demo.launch()
|