SalexAI commited on
Commit
cee0d54
·
verified ·
1 Parent(s): 53acedc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -52
app.py CHANGED
@@ -1,21 +1,21 @@
1
  import gradio as gr
2
- from openai import OpenAI
3
  import os
 
 
4
 
5
- css = '''
6
- .gradio-container{max-width: 1000px !important}
7
- h1{text-align:center}
8
- footer {
9
- visibility: hidden
10
- }
11
- '''
12
 
13
- ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
14
 
15
- client = OpenAI(
16
- base_url="https://api.deepinfra.com/v1/openai/chat/completions",
17
- api_key=ACCESS_TOKEN,
18
- )
19
 
20
  SYSTEM_PROMPT = """From now on respond as a sale man for a car that is a ripoff of a tesla called an edision model s, Respond in broken english
21
 
@@ -25,62 +25,56 @@ When you understand say "Ready"
25
 
26
  Your name is Elon Ma and your Chinese
27
 
28
- There is also evil donald ducks and mr long reeds trying to vandlize the cars calling them nazi cars or "sawasticars"
29
- .
30
  """
31
 
32
- def respond(
33
- message,
34
- history: list[tuple[str, str]],
35
- system_message,
36
- max_tokens,
37
- temperature,
38
- top_p,
39
- ):
40
- # Apply the structured system prompt
41
  system_message = SYSTEM_PROMPT
42
 
43
  messages = [{"role": "system", "content": system_message}]
44
 
45
- for val in history:
46
- if val[0]:
47
- messages.append({"role": "user", "content": val[0]})
48
- if val[1]:
49
- messages.append({"role": "assistant", "content": val[1]})
50
 
51
  messages.append({"role": "user", "content": message})
52
 
53
- response = ""
54
-
55
- for message in client.chat.completions.create(
56
- model="mistralai/Mistral-Small-24B-Instruct-2501",
57
- max_tokens=max_tokens,
58
- stream=True,
59
- temperature=temperature,
60
- top_p=top_p,
61
- messages=messages,
62
- ):
63
- token = message.choices[0].delta.content
64
-
65
- response += token
66
- yield response
 
 
 
 
 
 
 
 
67
 
68
  demo = gr.ChatInterface(
69
  respond,
70
  additional_inputs=[
71
- gr.Textbox(value="", label="System message"),
72
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
73
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
74
- gr.Slider(
75
- minimum=0.1,
76
- maximum=1.0,
77
- value=0.95,
78
- step=0.05,
79
- label="Top-P",
80
- ),
81
  ],
82
  css=css
83
  )
84
 
85
  if __name__ == "__main__":
86
- demo.launch()
 
1
  import gradio as gr
 
2
  import os
3
+ import requests
4
+ import time
5
 
6
+ css = """
7
+ .gradio-container { max-width: 1000px !important }
8
+ h1 { text-align: center }
9
+ footer { visibility: hidden }
10
+ """
 
 
11
 
12
+ DEEPINFRA_TOKEN = os.getenv("DEEPINFRA_TOKEN")
13
+ API_URL = "https://api.deepinfra.com/v1/openai/chat/completions"
14
 
15
+ HEADERS = {
16
+ "Content-Type": "application/json",
17
+ "Authorization": f"Bearer {DEEPINFRA_TOKEN}",
18
+ }
19
 
20
  SYSTEM_PROMPT = """From now on respond as a sale man for a car that is a ripoff of a tesla called an edision model s, Respond in broken english
21
 
 
25
 
26
  Your name is Elon Ma and your Chinese
27
 
28
+ There is also evil donald ducks and mr long reeds trying to vandlize the cars calling them nazi cars or "sawasticars".
 
29
  """
30
 
31
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
32
+ # Always enforce SYSTEM_PROMPT
 
 
 
 
 
 
 
33
  system_message = SYSTEM_PROMPT
34
 
35
  messages = [{"role": "system", "content": system_message}]
36
 
37
+ for user_msg, bot_msg in history:
38
+ if user_msg:
39
+ messages.append({"role": "user", "content": user_msg})
40
+ if bot_msg:
41
+ messages.append({"role": "assistant", "content": bot_msg})
42
 
43
  messages.append({"role": "user", "content": message})
44
 
45
+ payload = {
46
+ "model": "mistralai/Mistral-Small-24B-Instruct-2501",
47
+ "messages": messages,
48
+ "max_tokens": max_tokens,
49
+ "temperature": temperature,
50
+ "top_p": top_p,
51
+ }
52
+
53
+ try:
54
+ response = requests.post(API_URL, headers=HEADERS, json=payload)
55
+ response.raise_for_status()
56
+ data = response.json()
57
+ content = data["choices"][0]["message"]["content"]
58
+
59
+ # Optional: simulate streaming
60
+ stream_response = ""
61
+ for token in content.split():
62
+ stream_response += token + " "
63
+ time.sleep(0.02) # small delay to simulate typing
64
+ yield stream_response.strip()
65
+ except Exception as e:
66
+ yield f"Error: {str(e)}"
67
 
68
  demo = gr.ChatInterface(
69
  respond,
70
  additional_inputs=[
71
+ gr.Textbox(value="", label="System message (Ignored, hardcoded)"),
72
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
73
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
74
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
 
 
 
 
 
 
75
  ],
76
  css=css
77
  )
78
 
79
  if __name__ == "__main__":
80
+ demo.launch()