Spaces:

Locutusque
/

Locutusque-Models

Running on Zero

App Files Files Community

Locutusque-Models / app.py

Locutusque

Update app.py

87a4475 verified 3 months ago

raw

history blame

3.38 kB

	import spaces
	import gradio as gr
	from transformers import pipeline, AutoTokenizer, TextIteratorStreamer
	import torch
	from threading import Thread
	import os

	@spaces.GPU()
	def load_model(model_name):
	return pipeline("text-generation", model=model_name, device_map="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["token"], use_fast=True)
	@spaces.GPU(duration=45)
	def generate(
	message,
	history,
	model_name,
	system,
	temperature=0.4,
	top_p=0.95,
	min_p=0.1,
	top_k=50,
	max_new_tokens=256,
	):
	try:
	pipe = load_model(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=os.environ["token"])
	tokenizer.eos_token = "<\|im_end\|>"
	print(tokenizer)
	pipe.tokenizer = tokenizer

	prompt = f"<\|im_start\|>system\n{system}<\|im_end\|>\n"
	for (user_turn, assistant_turn) in history:
	prompt += f"<\|im_start\|>user\n{user_turn}<\|im_end\|>\n<\|im_start\|>assistant\n{assistant_turn}<\|im_end\|>\n"
	prompt += f"<\|im_start\|>user\n{message}<\|im_end\|>\n<\|im_start\|>assistant\n"

	streamer = TextIteratorStreamer(pipe.tokenizer, timeout=240.0, skip_prompt=True, skip_special_tokens=True)
	generation_kwargs = dict(
	text_inputs=prompt,
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	top_p=top_p,
	min_p=min_p,
	top_k=top_k,
	temperature=temperature,
	num_beams=1,
	repetition_penalty=1.1
	)

	t = Thread(target=pipe.__call__, kwargs=generation_kwargs)
	t.start()

	outputs = []
	for chunk in streamer:
	outputs.append(chunk)
	yield "".join(outputs)
	except StopAsyncIteration:
	print("Stream stopped unexpectedly.")
	yield "".join(outputs)
	except Exception as e:
	print(f"An error occurred: {e}")
	yield "An error occurred during generation."

	model_choices = ["Locutusque/Apollo-2.0-Llama-3.1-8B", "Locutusque/Apollo-2.0-Qwen-2.5-7B", "Locutusque/Hercules-6.0-Llama-3.1-8B", "Locutusque/Llama-3-NeuralHercules-5.0-8B", "Locutusque/Hercules-5.0-Index-1.9B", "Locutusque/Llama-3-Hercules-5.0-8B"]
	# What at the best options?
	g = gr.ChatInterface(
	fn=generate,
	additional_inputs=[
	gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
	gr.components.Textbox(lines=2, label="System Prompt", value="You are a sentient, super intelligent AI developed by a man named Locutusque."),
	gr.components.Slider(minimum=0, maximum=2, value=0.8, label="Temperature"),
	gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
	gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Min P"),
	gr.components.Slider(minimum=0, maximum=100, step=1, value=15, label="Top k"),
	gr.components.Slider(minimum=1, maximum=8192, step=1, value=1024, label="Max tokens"),
	],
	title="Locutusque's Language Models",
	description="Try out Locutusque's language models here! Credit goes to Mediocreatmybest for this space. You may also find some experimental preview models that have not been made public here.",
	)
	if __name__ == "__main__":
	g.launch()