YC-Chen commited on
Commit
4fb337d
1 Parent(s): c8f3309

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -36
app.py CHANGED
@@ -14,7 +14,7 @@ LICENSE = """
14
 
15
  """
16
 
17
- DEFAULT_SYSTEM_PROMPT = ""
18
 
19
  API_URL = os.environ.get("API_URL")
20
  TOKEN = os.environ.get("TOKEN")
@@ -25,19 +25,9 @@ HEADERS = {
25
  }
26
 
27
  MODEL_NAME="breeze-7b-instruct-v01"
28
- TEMPERATURE=1
29
- MAX_TOKENS=16
30
- TOP_P=0
31
  PRESENCE_PENALTY=0
32
  FREQUENCY_PENALTY=0
33
 
34
-
35
- eos_token = "</s>"
36
- MAX_MAX_NEW_TOKENS = 4096
37
- DEFAULT_MAX_NEW_TOKENS = 1536
38
-
39
- max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10
40
-
41
  model_name = "MediaTek-Research/Breeze-7B-Instruct-v0.1"
42
  tokenizer = AutoTokenizer.from_pretrained(model_name)
43
 
@@ -77,10 +67,10 @@ with gr.Blocks() as demo:
77
  )
78
  temperature = gr.Slider(
79
  label='Temperature',
80
- minimum=0.1,
81
- maximum=1.0,
82
  step=0.1,
83
- value=0.3,
84
  )
85
  top_p = gr.Slider(
86
  label='Top-p (nucleus sampling)',
@@ -89,19 +79,13 @@ with gr.Blocks() as demo:
89
  step=0.05,
90
  value=0.95,
91
  )
92
- top_k = gr.Slider(
93
- label='Top-k',
94
- minimum=1,
95
- maximum=1000,
96
- step=1,
97
- value=50,
98
- )
99
 
100
  def user(user_message, history):
101
  return "", history + [[user_message, None]]
102
 
103
 
104
- def bot(history, max_new_tokens, temperature, top_p, top_k, system_prompt):
105
  chat_data = []
106
  for user_msg, assistant_msg in history:
107
  if user_msg is not None:
@@ -117,7 +101,7 @@ with gr.Blocks() as demo:
117
  data = {
118
  "model": MODEL_NAME,
119
  "prompt": str(message),
120
- "temperature": float(temperature),
121
  "n": 1,
122
  "max_tokens": int(max_new_tokens),
123
  "stop": "",
@@ -126,18 +110,23 @@ with gr.Blocks() as demo:
126
  "echo": False,
127
  "presence_penalty": PRESENCE_PENALTY,
128
  "frequency_penalty": FREQUENCY_PENALTY,
 
129
  }
130
-
131
- print(data)
132
-
133
- outputs = requests.post(API_URL, headers=HEADERS, data=json.dumps(data)).json()
134
- print(outputs)
135
- response = outputs['choices'][0]['text']
136
- if history[-1][1] is None:
137
- history[-1][1] = response
138
- else:
139
- history[-1][1] += response
140
- return history
 
 
 
 
141
 
142
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
143
  fn=bot,
@@ -146,7 +135,6 @@ with gr.Blocks() as demo:
146
  max_new_tokens,
147
  temperature,
148
  top_p,
149
- top_k,
150
  system_prompt,
151
  ],
152
  outputs=chatbot
@@ -160,7 +148,6 @@ with gr.Blocks() as demo:
160
  max_new_tokens,
161
  temperature,
162
  top_p,
163
- top_k,
164
  system_prompt,
165
  ],
166
  outputs=chatbot
 
14
 
15
  """
16
 
17
+ DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant built by MediaTek Research. The user you are helping speaks Traditional Chinese and comes from Taiwan."
18
 
19
  API_URL = os.environ.get("API_URL")
20
  TOKEN = os.environ.get("TOKEN")
 
25
  }
26
 
27
  MODEL_NAME="breeze-7b-instruct-v01"
 
 
 
28
  PRESENCE_PENALTY=0
29
  FREQUENCY_PENALTY=0
30
 
 
 
 
 
 
 
 
31
  model_name = "MediaTek-Research/Breeze-7B-Instruct-v0.1"
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
33
 
 
67
  )
68
  temperature = gr.Slider(
69
  label='Temperature',
70
+ minimum=0.0,
71
+ maximum=0.5,
72
  step=0.1,
73
+ value=0.0,
74
  )
75
  top_p = gr.Slider(
76
  label='Top-p (nucleus sampling)',
 
79
  step=0.05,
80
  value=0.95,
81
  )
82
+
 
 
 
 
 
 
83
 
84
  def user(user_message, history):
85
  return "", history + [[user_message, None]]
86
 
87
 
88
+ def bot(history, max_new_tokens, temperature, top_p, system_prompt):
89
  chat_data = []
90
  for user_msg, assistant_msg in history:
91
  if user_msg is not None:
 
101
  data = {
102
  "model": MODEL_NAME,
103
  "prompt": str(message),
104
+ "temperature": float(temperature) + 0.0001,
105
  "n": 1,
106
  "max_tokens": int(max_new_tokens),
107
  "stop": "",
 
110
  "echo": False,
111
  "presence_penalty": PRESENCE_PENALTY,
112
  "frequency_penalty": FREQUENCY_PENALTY,
113
+ "stream": True,
114
  }
115
+
116
+ with requests.post(url, headers=HEADERS, data=json.dumps(data), stream=True) as r:
117
+ for response in r.iter_lines():
118
+ if len(response) > 0:
119
+ text = response.decode()
120
+ if text != "data: [DONE]":
121
+ if text.startswith("data: "):
122
+ text = text[5:]
123
+ delta = json.loads(text)["choices"][0]["text"]
124
+
125
+ if history[-1][1] is None:
126
+ history[-1][1] = delta
127
+ else:
128
+ history[-1][1] += delta
129
+ yield history
130
 
131
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
132
  fn=bot,
 
135
  max_new_tokens,
136
  temperature,
137
  top_p,
 
138
  system_prompt,
139
  ],
140
  outputs=chatbot
 
148
  max_new_tokens,
149
  temperature,
150
  top_p,
 
151
  system_prompt,
152
  ],
153
  outputs=chatbot