Spaces:

TeamTonic
/

TonicsYI-6B-200k

Paused

File size: 8,343 Bytes

9ff18cc
2e98411
9ff18cc
d43a99c
 
ab4ecf4
 
9ff18cc
8f7d4c9
cf9bb0c
b70a398
9ff18cc
f8c306d
 
9ff18cc
 
 
 
 
 
 
 
 
 
 
6ea968f
91aaa3e
 
9ff18cc
 
 
 
 
 
f8c306d
 
9ff18cc
 
 
 
 
f8c306d
9ff18cc
f8c306d
9ff18cc
 
cf9bb0c
 
 
 
 
9ff18cc
cf9bb0c
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8c306d
c7eff8d
9ff18cc
 
 
f8c306d
9ff18cc
 
 
 
 
 
 
 
 
f8c306d
9ff18cc
 
 
 
f8c306d
9ff18cc
 
 
 
 
 
 
 
 
 
49774f4
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8c306d
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8c306d
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
85b4edc
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8c306d
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
85b4edc
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf9bb0c

from transformers import AutoModelForCausalLM, AutoTokenizer
from tokenization_yi import YiTokenizer
import torch
import os
import gradio as gr
import sentencepiece


# Load the model and tokenizer using transformers
model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-34B-200K", device_map="auto", torch_dtype="auto", trust_remote_code=True)
tokenizer = YiTokenizer(vocab_file="./tokenizer.model")

def run(message, chat_history, max_new_tokens=100000, temperature=3.5, top_p=0.9, top_k=800):
    prompt = get_prompt(message, chat_history)

    # Encode the prompt to tensor
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Generate a response using the model with adjusted parameters
    response_ids = model.generate(
        input_ids,
        max_length=max_new_tokens + input_ids.shape[1],
        temperature=temperature,  # Controls randomness. Lower values make text more deterministic.
        top_p=top_p,              # Nucleus sampling: higher values allow more diversity.
        top_k=top_k,              # Top-k sampling: limits the number of top tokens considered.
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True            # Enable sampling-based generation

    )

    # Decode the response
    response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
    return response

def get_prompt(message, chat_history):
    texts = []

    do_strip = False
    for user_input, response in chat_history:
        user_input = user_input.strip() if do_strip else user_input
        do_strip = True
        texts.append(f" {response.strip()} {user_input} ")
    message = message.strip() if do_strip else message
    texts.append(f"{message}")
    return ''.join(texts)

DESCRIPTION = """
# 👋🏻Welcome to 🙋🏻‍♂️Tonic's🧑🏻‍🚀YI-200K🚀"
You can use this Space to test out the current model [Tonic/YI](https://huggingface.co/01-ai/Yi-34B)
You can also use 🧑🏻‍🚀YI-200K🚀 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/YiTonic?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
"""

MAX_MAX_NEW_TOKENS = 200000
DEFAULT_MAX_NEW_TOKENS = 100000
MAX_INPUT_TOKEN_LENGTH = 100000

def clear_and_save_textbox(message): return '', message

def display_input(message, history=[]):
    history.append((message, ''))
    return history

def delete_prev_fn(history=[]):
    try:
        message, _ = history.pop()
    except IndexError:
        message = ''
    return history, message or ''

def generate(message, history_with_input, max_new_tokens, temperature, top_p, top_k):
    if int(max_new_tokens) > MAX_MAX_NEW_TOKENS:
        raise ValueError

    history = history_with_input[:-1]
    generator = run(message, history, max_new_tokens, temperature, top_p, top_k)
    try:
        first_response = next(generator)
        yield history + [(message, first_response)]
    except StopIteration:
        yield history + [(message, '')]
    for response in generator:
        yield history + [(message, response)]

def process_example(message):
    generator = generate(message, [], 1024, 2.5, 0.95, 900)
    for x in generator:
        pass
    return '', x

def check_input_token_length(message, chat_history):
    input_token_length = len(message) + len(chat_history)
    if input_token_length > MAX_INPUT_TOKEN_LENGTH:
        raise gr.Error(f"The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.")

with gr.Blocks(theme='ParityError/Anime') as demo:
    gr.Markdown(DESCRIPTION)


    
    with gr.Group():
        chatbot = gr.Chatbot(label='TonicYi-30B-200K')
        with gr.Row():
            textbox = gr.Textbox(
                container=False,
                show_label=False,
                placeholder='Hi, Yi',
                scale=10
            )
            submit_button = gr.Button('Submit', variant='primary', scale=1, min_width=0)

    with gr.Row():
        retry_button = gr.Button('Retry', variant='secondary')
        undo_button = gr.Button('Undo', variant='secondary')
        clear_button = gr.Button('Clear', variant='secondary')

    saved_input = gr.State()

    with gr.Accordion(label='Advanced options', open=False):
#       system_prompt = gr.Textbox(label='System prompt', value=DEFAULT_SYSTEM_PROMPT, lines=5, interactive=False)
        max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
        temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=0.1)
        top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
        top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=10)

    textbox.submit(
        fn=clear_and_save_textbox,
        inputs=textbox,
        outputs=[textbox, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=check_input_token_length,
        inputs=[saved_input, chatbot],
        api_name=False,
        queue=False,
    ).success(
        fn=generate,
        inputs=[
            saved_input,
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            top_k,
        ],
        outputs=chatbot,
        api_name="Generate",
    )

    button_event_preprocess = submit_button.click(
        fn=clear_and_save_textbox,
        inputs=textbox,
        outputs=[textbox, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=check_input_token_length,
        inputs=[saved_input, chatbot],
        api_name=False,
        queue=False,
    ).success(
        fn=generate,
        inputs=[
            saved_input,
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            top_k,
        ],
        outputs=chatbot,
        api_name="Cgenerate",
    )

    retry_button.click(
        fn=delete_prev_fn,
        inputs=chatbot,
        outputs=[chatbot, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=generate,
        inputs=[
            saved_input,
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            top_k,
        ],
        outputs=chatbot,
        api_name=False,
    )

    undo_button.click(
        fn=delete_prev_fn,
        inputs=chatbot,
        outputs=[chatbot, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=lambda x: x,
        inputs=[saved_input],
        outputs=textbox,
        api_name=False,
        queue=False,
    )

    clear_button.click(
        fn=lambda: ([], ''),
        outputs=[chatbot, saved_input],
        queue=False,
        api_name=False,
    )

demo.queue(max_size=32).launch(show_api=True)