Demo-MR-Breexe-8x7B

Runtime error

App Files Files Community

YC-Chen commited on Jan 15

Commit

4fb337d

•

1 Parent(s): c8f3309

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -36

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ LICENSE = """
 """
-DEFAULT_SYSTEM_PROMPT = ""
 API_URL = os.environ.get("API_URL")
 TOKEN = os.environ.get("TOKEN")
@@ -25,19 +25,9 @@ HEADERS = {
 }
 MODEL_NAME="breeze-7b-instruct-v01"
-TEMPERATURE=1
-MAX_TOKENS=16
-TOP_P=0
 PRESENCE_PENALTY=0
 FREQUENCY_PENALTY=0
-eos_token = "</s>"
-MAX_MAX_NEW_TOKENS = 4096
-DEFAULT_MAX_NEW_TOKENS = 1536
-max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10
 model_name = "MediaTek-Research/Breeze-7B-Instruct-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -77,10 +67,10 @@ with gr.Blocks() as demo:
         )
         temperature = gr.Slider(
             label='Temperature',
-            minimum=0.1,
-            maximum=1.0,
             step=0.1,
-            value=0.3,
         )
         top_p = gr.Slider(
             label='Top-p (nucleus sampling)',
@@ -89,19 +79,13 @@ with gr.Blocks() as demo:
             step=0.05,
             value=0.95,
         )
-        top_k = gr.Slider(
-            label='Top-k',
-            minimum=1,
-            maximum=1000,
-            step=1,
-            value=50,
-        )
     def user(user_message, history):
         return "", history + [[user_message, None]]
-    def bot(history, max_new_tokens, temperature, top_p, top_k, system_prompt):
         chat_data = []
         for user_msg, assistant_msg in history:
             if user_msg is not None:
@@ -117,7 +101,7 @@ with gr.Blocks() as demo:
         data = {
             "model": MODEL_NAME,
             "prompt": str(message),
-            "temperature": float(temperature),
             "n": 1,
             "max_tokens": int(max_new_tokens),
             "stop": "",
@@ -126,18 +110,23 @@ with gr.Blocks() as demo:
             "echo": False,
             "presence_penalty": PRESENCE_PENALTY,
             "frequency_penalty": FREQUENCY_PENALTY,
         }
-        print(data)
-        outputs = requests.post(API_URL, headers=HEADERS, data=json.dumps(data)).json()
-        print(outputs)
-        response = outputs['choices'][0]['text']
-        if history[-1][1] is None:
-            history[-1][1] = response
-        else:
-            history[-1][1] += response
-        return history
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         fn=bot,
@@ -146,7 +135,6 @@ with gr.Blocks() as demo:
             max_new_tokens,
             temperature,
             top_p,
-            top_k,
             system_prompt,
         ],
         outputs=chatbot
@@ -160,7 +148,6 @@ with gr.Blocks() as demo:
             max_new_tokens,
             temperature,
             top_p,
-            top_k,
             system_prompt,
         ],
         outputs=chatbot

 """
+DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant built by MediaTek Research. The user you are helping speaks Traditional Chinese and comes from Taiwan."
 API_URL = os.environ.get("API_URL")
 TOKEN = os.environ.get("TOKEN")
 }
 MODEL_NAME="breeze-7b-instruct-v01"
 PRESENCE_PENALTY=0
 FREQUENCY_PENALTY=0
 model_name = "MediaTek-Research/Breeze-7B-Instruct-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
         )
         temperature = gr.Slider(
             label='Temperature',
+            minimum=0.0,
+            maximum=0.5,
             step=0.1,
+            value=0.0,
         )
         top_p = gr.Slider(
             label='Top-p (nucleus sampling)',
             step=0.05,
             value=0.95,
         )
     def user(user_message, history):
         return "", history + [[user_message, None]]
+    def bot(history, max_new_tokens, temperature, top_p, system_prompt):
         chat_data = []
         for user_msg, assistant_msg in history:
             if user_msg is not None:
         data = {
             "model": MODEL_NAME,
             "prompt": str(message),
+            "temperature": float(temperature) + 0.0001,
             "n": 1,
             "max_tokens": int(max_new_tokens),
             "stop": "",
             "echo": False,
             "presence_penalty": PRESENCE_PENALTY,
             "frequency_penalty": FREQUENCY_PENALTY,
+            "stream": True,
         }
+        with requests.post(url, headers=HEADERS, data=json.dumps(data), stream=True) as r:
+            for response in r.iter_lines():
+                if len(response) > 0:
+                    text = response.decode()
+                    if text != "data: [DONE]":
+                        if text.startswith("data: "):
+                            text = text[5:]
+                        delta = json.loads(text)["choices"][0]["text"]
+                        if history[-1][1] is None:
+                            history[-1][1] = delta
+                        else:
+                            history[-1][1] += delta
+                        yield history
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         fn=bot,
             max_new_tokens,
             temperature,
             top_p,
             system_prompt,
         ],
         outputs=chatbot
             max_new_tokens,
             temperature,
             top_p,
             system_prompt,
         ],
         outputs=chatbot