import os from threading import Thread from typing import Iterator import gradio as gr from gradio.themes.base import Base from gradio.themes.utils import colors, sizes, fonts import time import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer MAX_MAX_NEW_TOKENS = 2048 DEFAULT_MAX_NEW_TOKENS = 1024 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_id = "ussipan/SipanGPT-0.2-Llama-3.2-1B-GGUF" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.bfloat16, ) model.eval() # Main Gradio inference function def generate( message: str, chat_history: list[tuple[str, str]], max_new_tokens: int = 1024, temperature: float = 0.6, top_p: float = 0.9, top_k: int = 50, repetition_penalty: float = 1.2, ) -> Iterator[str]: conversation = [{k: v for k, v in d.items() if k != 'metadata'} for d in chat_history] conversation.append({"role": "user", "content": message}) input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt") if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH: input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:] gr.Warning(f"Se recortó la entrada de la conversación porque era más larga que {MAX_INPUT_TOKEN_LENGTH} tokens.") input_ids = input_ids.to(model.device) streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True) generate_kwargs = dict( {"input_ids": input_ids}, streamer=streamer, max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p, top_k=top_k, temperature=temperature, num_beams=1, repetition_penalty=repetition_penalty, ) t = Thread(target=model.generate, kwargs=generate_kwargs) t.start() conversation.append({"role": "assistant", "content": ""}) outputs = [] for text in streamer: outputs.append(text) bot_response = "".join(outputs) conversation[-1]['content'] = bot_response yield "", conversation # Implementing Gradio 5 features and building a ChatInterface UI yourself PLACEHOLDER = """

SipánGPT 0.2 Llama 3.2

Forked from @ysharma

Este modelo es experimental, puede generar alucinaciones o respuestas incorrectas.

Entrenado con un dataset de 5.4k conversaciones.

Ver el dataset aquí

""" def handle_retry(history, retry_data: gr.RetryData): new_history = history[:retry_data.index] previous_prompt = history[retry_data.index]['content'] yield from generate(previous_prompt, chat_history = new_history, max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2) def handle_like(data: gr.LikeData): if data.liked: print("Votaste positivamente esta respuesta: ", data.value) else: print("Votaste negativamente esta respuesta: ", data.value) def handle_undo(history, undo_data: gr.UndoData): chatbot = history[:undo_data.index] prompt = history[undo_data.index]['content'] return chatbot, prompt def chat_examples_fill(data: gr.SelectData): yield from generate(data.value['text'], chat_history = [], max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2) class SipanGPTTheme(Base): def __init__( self, *, primary_hue: colors.Color | str = colors.Color( name="custom_green", c50="#f0fde4", c100="#e1fbc8", c200="#c3f789", c300="#a5f34a", c400="#7dfa00", # primary color c500="#5ef000", c600="#4cc700", c700="#39a000", c800="#2b7900", c900="#1d5200", c950="#102e00", ), secondary_hue: colors.Color | str = colors.Color( name="custom_secondary_green", c50="#edfce0", c100="#dbf9c1", c200="#b7f583", c300="#93f145", c400="#5fed00", # secondary color c500="#4ed400", c600="#3fad00", c700="#308700", c800="#236100", c900="#153b00", c950="#0a1f00", ), neutral_hue: colors.Color | str = colors.gray, spacing_size: sizes.Size | str = sizes.spacing_md, radius_size: sizes.Size | str = sizes.radius_md, text_size: sizes.Size | str = sizes.text_md, font: fonts.Font | str | list[fonts.Font | str] = [ fonts.GoogleFont("Exo 2"), "ui-sans-serif", "system-ui", "sans-serif", ], font_mono: fonts.Font | str | list[fonts.Font | str] = [ fonts.GoogleFont("Fraunces"), "ui-monospace", "monospace", ], ): super().__init__( primary_hue=primary_hue, secondary_hue=secondary_hue, neutral_hue=neutral_hue, spacing_size=spacing_size, radius_size=radius_size, text_size=text_size, font=font, font_mono=font_mono, ) self.set( body_background_fill=colors.Color(hex="#333333"), body_background_fill_dark=colors.Color(hex="#333333"), body_text_color=colors.Color(hex="#ffffff"), body_text_color_dark=colors.Color(hex="#ffffff"), color_accent_soft="*secondary_200", button_primary_background_fill="*primary_400", button_primary_background_fill_hover="*primary_500", button_primary_text_color=colors.Color(hex="#333333"), button_primary_text_color_dark=colors.Color(hex="#333333"), block_title_text_color="*primary_400", block_title_text_color_dark="*primary_400", input_background_fill=colors.Color(hex="#444444"), input_background_fill_dark=colors.Color(hex="#444444"), input_border_color=colors.Color(hex="#555555"), input_border_color_dark=colors.Color(hex="#555555"), input_placeholder_color=colors.Color(hex="#888888"), input_placeholder_color_dark=colors.Color(hex="#888888"), ) # Uso del tema theme = SipanGPTTheme() with gr.Blocks(theme=theme, fill_height=True) as demo: with gr.Column(elem_id="container", scale=1): chatbot = gr.Chatbot( label="SipánGPT 0.2 Llama 3.2", show_label=False, type="messages", scale=1, suggestions = [ {"text": "Háblame del reglamento de estudiantes de la universidad"}, {"text": "Qué becas ofrece la universidad"}, ], placeholder = PLACEHOLDER, ) msg = gr.Textbox(submit_btn=True, show_label=False) with gr.Accordion('Additional inputs', open=False): max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS, ) temperature = gr.Slider(label="Temperature",minimum=0.1, maximum=4.0, step=0.1, value=0.6,) top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9, ) top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50, ) repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2, ) msg.submit(generate, [msg, chatbot, max_new_tokens, temperature, top_p, top_k, repetition_penalty], [msg, chatbot]) chatbot.retry(handle_retry, chatbot, [msg, chatbot]) chatbot.like(handle_like, None, None) chatbot.undo(handle_undo, chatbot, [chatbot, msg]) chatbot.suggestion_select(chat_examples_fill, None, [msg, chatbot] ) demo.launch()