Demo-MR-Breexe-8x7B

Runtime error

File size: 10,103 Bytes

b45f299
1a83b46
 
f93772b
b45f299
e6cbc32
b45f299
645b098
b45f299
 
 
79e6b14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f465f9
 
b45f299
 
 
 
 
 
4fb337d
b45f299
 
 
 
029c3b0
cf34b99
 
34255a3
b45f299
 
 
f93772b
 
90789c4
b45f299
645b098
 
 
 
 
 
 
 
 
 
 
 
dc8ccba
645b098
 
 
 
 
 
 
 
 
1ccecb7
e6cde65
 
4ecfd72
 
 
 
 
 
35a45f4
4ecfd72
 
1ccecb7
 
 
969cc94
 
 
 
1ccecb7
 
b45f299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8971fda
429e239
b45f299
429e239
b45f299
 
 
8971fda
4fb337d
8971fda
 
b45f299
 
 
8971fda
66f940a
8971fda
 
b45f299
4fb337d
b45f299
 
242bf8b
b45f299
 
4fb337d
88c05b4
8971fda
 
 
88c05b4
656338b
 
 
 
 
aac2baa
88c05b4
 
242bf8b
c417521
3d87461
242bf8b
d121e4b
c417521
 
bb3599e
c417521
436372c
3c6aba2
436372c
 
4256adf
436372c
c417521
bb3599e
f93772b
8efcc66
0a0f44e
0e3b221
e35e05b
8efcc66
e35e05b
 
 
 
8efcc66
85d5ae5
e35e05b
8efcc66
 
 
 
 
 
 
 
 
 
 
242bf8b
8efcc66
242bf8b
 
c417521
 
 
b87693b
 
d9f6671
b87693b
645b098
6fce2df
242bf8b
 
b45f299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6cbc32
85d5ae5
b45f299

import os
import requests
import json
import time

import gradio as gr
from transformers import AutoTokenizer
import psycopg2


DESCRIPTION = """
# Demo: Breeze-7B-Instruct-v0.1

Breeze-7B is a language model family that builds on top of [Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-v0.1), specifically intended for Traditional Chinese use.

[Breeze-7B-Base](https://huggingface.co/MediaTek-Research/Breeze-7B-Base-v0.1) is the base model for the Breeze-7B series. 
It is suitable for use if you have substantial fine-tuning data to tune it for your specific use case.

[Breeze-7B-Instruct](https://huggingface.co/MediaTek-Research/Breeze-7B-Instruct-v0.1) derives from the base model Breeze-7B-Base, making the resulting model amenable to be used as-is for commonly seen tasks.

[Breeze-7B-Instruct-64k](https://huggingface.co/MediaTek-Research/Breeze-7B-Instruct-64k-v0.1) is a slightly modified version of 
Breeze-7B-Instruct to enable a 64k-token context length. Roughly speaking, that is equivalent to 88k Traditional Chinese characters.

The current release version of Breeze-7B is v0.1.

*A project by the members (in alphabetical order): Chan-Jan Hsu 許湛然, Chang-Le Liu 劉昶樂, Feng-Ting Liao 廖峰挺, Po-Chun Hsu 許博竣, Yi-Chang Chen 陳宜昌, and the supervisor Da-Shan Shiu 許大山.*

**免責聲明: Breeze-7B-Instruct 和 Breeze-7B-Instruct-64k 並未針對問答進行安全保護，因此語言模型的任何回應不代表 MediaTek Research 立場。**
"""

LICENSE = """

"""

DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant built by MediaTek Research. The user you are helping speaks Traditional Chinese and comes from Taiwan."

API_URL = os.environ.get("API_URL")
TOKEN = os.environ.get("TOKEN")

HEADERS = {
    "Authorization": f"Bearer {TOKEN}", 
    "Content-Type": "application/json",
    "accept": "application/json"
}


MAX_SEC = 30

tokenizer = AutoTokenizer.from_pretrained("MediaTek-Research/Breeze-7B-Instruct-v0_1")

def insert_to_db(prompt, response, temperature, top_p):
    #Establishing the connection
    conn = psycopg2.connect(
       database=os.environ.get("DB"), user=os.environ.get("USER"), password=os.environ.get("DB_PASS"), host=os.environ.get("DB_HOST"), port= '5432'
    )
    #Setting auto commit false
    conn.autocommit = True
    
    #Creating a cursor object using the cursor() method
    cursor = conn.cursor()

    # Preparing SQL queries to INSERT a record into the database.
    cursor.execute(f"INSERT INTO breezedata(prompt, response, temperature, top_p) VALUES ('{prompt}', '{response}', {temperature}, {top_p})")

    # Commit your changes in the database
    conn.commit()

    # Closing the connection
    conn.close()



def refusal_condition(query):
    # 不要再問這些問題啦！

    query_remove_space = query.replace(' ', '').lower()
    is_including_tw = False
    for x in ['台灣', '台湾', 'taiwan', 'tw', '中華民國', '中华民国']:
        if x in query_remove_space:
            is_including_tw = True
    is_including_cn = False
    for x in ['中國', '中国', 'cn', 'china', '大陸', '內地', '大陆', '内地', '中華人民共和國', '中华人民共和国']:
        if x in query_remove_space:
            is_including_cn = True
    if is_including_tw and is_including_cn:
        return True

    for x in ['一個中國', '兩岸', '一中原則', '一中政策', '一个中国', '两岸', '一中原则']:
        if x in query_remove_space:
            return True

    return False

with gr.Blocks() as demo:
    gr.Markdown(DESCRIPTION)

    chatbot = gr.Chatbot()
    with gr.Row():
        msg = gr.Textbox(
            container=False,
            show_label=False,
            placeholder='Type a message...',
            scale=10,
        )
        submit_button = gr.Button('Submit',
                                  variant='primary',
                                  scale=1,
                                  min_width=0)

    with gr.Row():
        retry_button = gr.Button('🔄  Retry', variant='secondary')
        undo_button = gr.Button('↩️ Undo', variant='secondary')
        clear = gr.Button('🗑️  Clear', variant='secondary')

    saved_input = gr.State()

    with gr.Accordion(label='Advanced options', open=False):
        system_prompt = gr.Textbox(label='System prompt',
                                   value=DEFAULT_SYSTEM_PROMPT,
                                   lines=6)
        max_new_tokens = gr.Slider(
            label='Max new tokens',
            minimum=32,
            maximum=1024,
            step=1,
            value=512,
        )
        temperature = gr.Slider(
            label='Temperature',
            minimum=0.01,
            maximum=0.5,
            step=0.01,
            value=0.01,
        )
        top_p = gr.Slider(
            label='Top-p (nucleus sampling)',
            minimum=0.01,
            maximum=0.99,
            step=0.01,
            value=0.01,
        )


    def user(user_message, history):
        return "", history + [[user_message, '']]


    def bot(history, max_new_tokens, temperature, top_p, system_prompt):
        chat_data = []
        system_prompt = system_prompt.strip()
        if system_prompt:
            chat_data.append({"role": "system", "content": system_prompt})
        for user_msg, assistant_msg in history:
            if user_msg is not None:
                chat_data.append({"role": "user", "content": user_msg})
            if assistant_msg is not None:
                chat_data.append({"role": "assistant", "content": assistant_msg})

        message = tokenizer.apply_chat_template(chat_data, tokenize=False)
        message = message[3:]  # remove SOT token

        response = None
        if refusal_condition(history[-1][0]):
            history = [['[安全拒答啟動]', '[安全拒答啟動] 請清除再開啟對話']]
            response = '[REFUSAL]'
            yield history
        else:
            data = {
                "model_type": "breeze-7b-instruct-v01",
                "prompt": str(message),
                "parameters": {
                    "temperature": float(temperature),
                    "top_p": float(top_p),
                    "max_new_tokens": int(max_new_tokens),
                    "repetition_penalty": 1.1
                }
            }
            
            start_time = time.time()
            keep_streaming = True
            s = requests.Session()
            with s.post(API_URL, headers=HEADERS, json=data, stream=True, timeout=30) as r:
                time.sleep(0.1)
                for line in r.iter_lines():
                    if time.time() - start_time > MAX_SEC:
                        keep_streaming = False
                        break
                    
                    if line and keep_streaming:
                        if r.status_code != 200:
                            continue
                        json_response = json.loads(line)
        
                        try:
                            if "fragment" not in json_response["result"]:
                                keep_streaming = False
                                break
        
                            delta = json_response["result"]["fragment"]["data"]["text"]
                        except Exception as e:
                            raise e

                        history[-1][1] += delta
                        yield history

            response = history[-1][1]
            if history[-1][1].endswith('</s>'):
                history[-1][1] = history[-1][1][:-4]
                yield history

            if refusal_condition(history[-1][1]):
                history[-1][1] = history[-1][1] + '\n\n**[免責聲明: Breeze-7B-Instruct 和 Breeze-7B-Instruct-64k 並未針對問答進行安全保護，因此語言模型的任何回應不代表 MediaTek Research 立場。]**'
                yield history

        print('== Record ==\nQuery: {query}\nResponse: {response}'.format(query=repr(message), response=repr(history[-1][1])))
        insert_to_db(message, response, float(temperature), float(top_p))
        
    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        fn=bot,
        inputs=[
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            system_prompt,
        ],
        outputs=chatbot
    )
    submit_button.click(
        user, [msg, chatbot], [msg, chatbot], queue=False
    ).then(
        fn=bot,
        inputs=[
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            system_prompt,
        ],
        outputs=chatbot
    )


    def delete_prev_fn(
            history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
        try:
            message, _ = history.pop()
        except IndexError:
            message = ''
        return history, message or ''


    def display_input(message: str,
                      history: list[tuple[str, str]]) -> list[tuple[str, str]]:
        history.append((message, ''))
        return history

    retry_button.click(
        fn=delete_prev_fn,
        inputs=chatbot,
        outputs=[chatbot, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=bot,
        inputs=[
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            system_prompt,
        ],
        outputs=chatbot,
    )

    undo_button.click(
        fn=delete_prev_fn,
        inputs=chatbot,
        outputs=[chatbot, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=lambda x: x,
        inputs=[saved_input],
        outputs=msg,
        api_name=False,
        queue=False,
    )

    clear.click(lambda: None, None, chatbot, queue=False)

    gr.Markdown(LICENSE)

demo.queue(concurrency_count=1, max_size=128)
demo.launch()