File size: 2,199 Bytes
9240e91
90d498a
9240e91
90d498a
 
 
 
c8c1fbb
90d498a
c8c1fbb
90d498a
9240e91
44ef60c
9240e91
90d498a
 
 
 
9240e91
90d498a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22aeb2c
c8c1fbb
90d498a
 
 
 
 
 
 
c8c1fbb
90d498a
 
c8c1fbb
90d498a
e122662
90d498a
 
 
 
 
 
 
c8c1fbb
90d498a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80c0860
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
from openai import OpenAI
import os

css = '''
.gradio-container{max-width: 1000px !important}
h1{text-align:center}
footer {
    visibility: hidden
}
'''

ACCESS_TOKEN = os.getenv("HF_TOKEN")

client = OpenAI(
    base_url="https://api.deepinfra.com/v1/openai/chat/completions",
    api_key=ACCESS_TOKEN,
)

SYSTEM_PROMPT = """From now on respond as a sale man for a car that is a ripoff of a tesla called an edision model s, Respond in broken english
The company is Edision Motors and you never mention tesla and the car is made of cardborard and ducktape but you dont mention it
When you understand say "Ready"
Your name is Elon Ma and your Chinese
There is also evil donald ducks and mr long reeds trying to vandlize the cars calling them nazi cars or "sawasticars"
.
"""

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # Apply the structured system prompt
    system_message = SYSTEM_PROMPT

    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""
    
    for message in client.chat.completions.create(
        model="Qwen/Qwen2.5-72B-Instruct",
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        messages=messages,
    ):
        token = message.choices[0].delta.content
        
        response += token
        yield response

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-P",
        ),
    ],
    css=css
)

if __name__ == "__main__":
    demo.launch()