Spaces:

ussipan
/

sipangpt-test

Runtime error

App Files Files Community

sipangpt-test / app.py

jhangmez

Se cambio modelo nuevo

fd2259a 12 days ago

raw

history blame

10.2 kB

	import os
	from threading import Thread
	from typing import Iterator

	import gradio as gr
	from gradio.themes.base import Base
	from gradio.themes.utils import colors, sizes, fonts
	import time
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

	DESCRIPTION = """\
	# Llama 3.2 1B Instruct
	Llama 3.2 1B is Meta's latest iteration of open LLMs.
	This is a demo of [`meta-llama/Llama-3.2-3B-Instruct`](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct), fine-tuned for instruction following.
	For more details, please check [our post](https://huggingface.co/blog/llama32).
	"""

	MAX_MAX_NEW_TOKENS = 2048
	DEFAULT_MAX_NEW_TOKENS = 1024
	MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

	model_id = "ussipan/SipanGPT-0.3-Llama-3.2-1B-GGUF"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	)
	model.eval()

	# Main Gradio inference function
	def generate(
	message: str,
	chat_history: list[tuple[str, str]],
	max_new_tokens: int = 1024,
	temperature: float = 0.6,
	top_p: float = 0.9,
	top_k: int = 50,
	repetition_penalty: float = 1.2,
	) -> Iterator[str]:

	conversation = [{k: v for k, v in d.items() if k != 'metadata'} for d in chat_history]
	conversation.append({"role": "user", "content": message})

	input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
	if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
	input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
	gr.Warning(f"Se recortó la entrada de la conversación porque era más larga que {MAX_INPUT_TOKEN_LENGTH} tokens.")
	input_ids = input_ids.to(model.device)

	streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
	generate_kwargs = dict(
	{"input_ids": input_ids},
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	top_p=top_p,
	top_k=top_k,
	temperature=temperature,
	num_beams=1,
	repetition_penalty=repetition_penalty,
	)
	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()

	conversation.append({"role": "assistant", "content": ""})
	outputs = []
	for text in streamer:
	outputs.append(text)
	bot_response = "".join(outputs)
	conversation[-1]['content'] = bot_response
	yield "", conversation


	# Implementing Gradio 5 features and building a ChatInterface UI yourself
	PLACEHOLDER = """<div style="padding: 20px; text-align: center; display: flex; flex-direction: column; align-items: center;">
	<img src="https://corladlalibertad.org.pe/wp-content/uploads/2024/01/USS.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; margin-bottom: 10px;">
	<h1 style="font-size: 28px; margin: 0;">SipánGPT 0.3 Llama 3.2</h1>
	<p style="font-size: 8px; margin: 5px 0 0; opacity: 0.65;">
	<a href="https://huggingface.co/spaces/ysharma/Llama3-2_with_Gradio-5" target="_blank" style="color: inherit; text-decoration: none;">Forked from @ysharma</a>
	</p>
	<p style="font-size: 12px; margin: 5px 0 0; opacity: 0.9;">Este modelo es experimental, puede generar alucinaciones o respuestas incorrectas.</p>
	<p style="font-size: 12px; margin: 5px 0 0; opacity: 0.9;">Entrenado con un dataset de 50k conversaciones.</p>
	<p style="font-size: 12px; margin: 5px 0 0; opacity: 0.9;">
	<a href="https://huggingface.co/datasets/ussipan/sipangpt" target="_blank" style="color: inherit; text-decoration: none;">Ver el dataset aquí</a>
	</p>
	</div>"""

	def handle_retry(history, retry_data: gr.RetryData):
	new_history = history[:retry_data.index]
	previous_prompt = history[retry_data.index]['content']
	yield from generate(previous_prompt, chat_history = new_history, max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2)

	def handle_like(data: gr.LikeData):
	if data.liked:
	print("Votaste positivamente esta respuesta: ", data.value)
	else:
	print("Votaste negativamente esta respuesta: ", data.value)

	def handle_undo(history, undo_data: gr.UndoData):
	chatbot = history[:undo_data.index]
	prompt = history[undo_data.index]['content']
	return chatbot, prompt

	def chat_examples_fill(data: gr.SelectData):
	yield from generate(data.value['text'], chat_history = [], max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2)

	class SipanGPTTheme(Base):
	def __init__(
	self,
	*,
	primary_hue: colors.Color \| str = colors.Color(
	name="custom_green",
	c50="#f0fde4",
	c100="#e1fbc8",
	c200="#c3f789",
	c300="#a5f34a",
	c400="#7dfa00", # primary color
	c500="#5ef000",
	c600="#4cc700",
	c700="#39a000",
	c800="#2b7900",
	c900="#1d5200",
	c950="#102e00",
	),
	secondary_hue: colors.Color \| str = colors.Color(
	name="custom_secondary_green",
	c50="#edfce0",
	c100="#dbf9c1",
	c200="#b7f583",
	c300="#93f145",
	c400="#5fed00", # secondary color
	c500="#4ed400",
	c600="#3fad00",
	c700="#308700",
	c800="#236100",
	c900="#153b00",
	c950="#0a1f00",
	),
	neutral_hue: colors.Color \| str = colors.gray,
	spacing_size: sizes.Size \| str = sizes.spacing_md,
	radius_size: sizes.Size \| str = sizes.radius_md,
	text_size: sizes.Size \| str = sizes.text_md,
	font: fonts.Font \| str \| list[fonts.Font \| str] = [
	fonts.GoogleFont("Exo 2"),
	"ui-sans-serif",
	"system-ui",
	"sans-serif",
	],
	font_mono: fonts.Font \| str \| list[fonts.Font \| str] = [
	fonts.GoogleFont("Fraunces"),
	"ui-monospace",
	"monospace",
	],
	):
	super().__init__(
	primary_hue=primary_hue,
	secondary_hue=secondary_hue,
	neutral_hue=neutral_hue,
	spacing_size=spacing_size,
	radius_size=radius_size,
	text_size=text_size,
	font=font,
	font_mono=font_mono,
	)
	self.set(
	# Light mode settings
	body_background_fill="*neutral_50",
	body_text_color="*neutral_900",
	color_accent_soft="*secondary_200",
	button_primary_background_fill="*primary_600",
	button_primary_background_fill_hover="*primary_500",
	button_primary_text_color="*neutral_50",
	block_title_text_color="*primary_600",
	input_background_fill="*neutral_200",
	input_border_color="*neutral_300",
	input_placeholder_color="*neutral_500",
	block_background_fill="*neutral_100",
	block_label_background_fill="*primary_100",
	block_label_text_color="*neutral_800",
	checkbox_background_color="*neutral_200",
	checkbox_border_color="*primary_500",
	loader_color="*primary_500",
	slider_color="*primary_500",

	# Dark mode settings
	body_background_fill_dark="*neutral_900",
	body_text_color_dark="*neutral_50",
	color_accent_soft_dark="*secondary_800",
	button_primary_background_fill_dark="*primary_700",
	button_primary_background_fill_hover_dark="*primary_600",
	button_primary_text_color_dark="*neutral_950",
	block_title_text_color_dark="*primary_400",
	input_background_fill_dark="*neutral_800",
	input_border_color_dark="*neutral_700",
	input_placeholder_color_dark="*neutral_400",
	block_background_fill_dark="*neutral_850",
	block_label_background_fill_dark="*primary_900",
	block_label_text_color_dark="*neutral_200",
	checkbox_background_color_dark="*neutral_800",
	checkbox_border_color_dark="*primary_600",
	loader_color_dark="*primary_400",
	slider_color_dark="*primary_600",
	)

	# Uso del tema
	theme = SipanGPTTheme()

	with gr.Blocks(theme=theme, fill_height=True) as demo:
	with gr.Column(elem_id="container", scale=1):
	chatbot = gr.Chatbot(
	label="SipánGPT 0.3 Llama 3.2",
	show_label=False,
	type="messages",
	scale=1,
	suggestions = [
	{"text": "Háblame del reglamento de estudiantes de la universidad"},
	{"text": "Qué becas ofrece la universidad"},
	{"text": "Hablame sobre el temario del examen de admisión para pregrado"},
	{"text": "Cuando se fundó la universidad?"},
	],
	placeholder = PLACEHOLDER,
	)

	msg = gr.Textbox(submit_btn=True, show_label=False)
	with gr.Accordion('Additional inputs', open=False):
	max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS, )
	temperature = gr.Slider(label="Temperature",minimum=0.1, maximum=4.0, step=0.1, value=0.6,)
	top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9, )
	top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50, )
	repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2, )

	msg.submit(generate, [msg, chatbot, max_new_tokens, temperature, top_p, top_k, repetition_penalty], [msg, chatbot])
	chatbot.retry(handle_retry, chatbot, [msg, chatbot])
	chatbot.like(handle_like, None, None)
	chatbot.undo(handle_undo, chatbot, [chatbot, msg])
	chatbot.suggestion_select(chat_examples_fill, None, [msg, chatbot] )


	demo.launch()