Spaces:

Locutusque
/

Locutusque-Models

Sleeping

App Files Files Community

Locutusque-Models / app.py

Locutusque

Update app.py

6157949 verified 27 days ago

raw

history blame contribute delete

3.36 kB

	import spaces
	import gradio as gr
	from transformers import pipeline, AutoTokenizer, TextIteratorStreamer
	import torch
	from threading import Thread
	import os

	@spaces.GPU()
	def load_model(model_name):
	return pipeline("text-generation", model=model_name, device_map="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["token"], use_fast=True)
	@spaces.GPU(duration=45)
	def generate(
	message,
	history,
	model_name,
	system,
	temperature=0.4,
	top_p=0.95,
	min_p=0.1,
	top_k=50,
	max_new_tokens=256,
	):
	try:
	pipe = load_model(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=os.environ["token"])
	tokenizer.eos_token = "<\|im_end\|>"
	print(tokenizer)
	pipe.tokenizer = tokenizer
	prompt = f"<\|im_start\|>system\n{system}<\|im_end\|>\n"
	for (user_turn, assistant_turn) in history:
	prompt += f"<\|im_start\|>user\n{user_turn}<\|im_end\|>\n<\|im_start\|>assistant\n{assistant_turn}<\|im_end\|>\n"
	prompt += f"<\|im_start\|>user\n{message}<\|im_end\|>\n<\|im_start\|>assistant\n"

	streamer = TextIteratorStreamer(pipe.tokenizer, timeout=240.0, skip_prompt=True, skip_special_tokens=True)
	generation_kwargs = dict(
	text_inputs=prompt,
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	top_p=top_p,
	min_p=min_p,
	top_k=top_k,
	temperature=temperature,
	num_beams=1,
	repetition_penalty=1.1
	)

	t = Thread(target=pipe.__call__, kwargs=generation_kwargs)
	t.start()

	outputs = []
	for chunk in streamer:
	outputs.append(chunk)
	yield "".join(outputs)
	except StopAsyncIteration:
	print("Stream stopped unexpectedly.")
	yield "".join(outputs)
	except Exception as e:
	print(f"An error occurred: {e}")
	yield "An error occurred during generation."

	model_choices = ["Locutusque/Hercules-6.1-Llama-3.1-8B", "Locutusque/deeplm-qwen-3-8B-stage2", "Locutusque/Hercules-6.9-Llama-3.1-8B", "Locutusque/lmsys-llama-3.2-3b", "Locutusque/CollectiveLM-Falcon-3-7B", "Locutusque/StockQwen-2.5-7B"]
	# What at the best options?
	g = gr.ChatInterface(
	fn=generate,
	additional_inputs=[
	gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
	gr.components.Textbox(lines=2, label="System Prompt", value="You are a sentient, super intelligent AI developed by a man named Locutusque."),
	gr.components.Slider(minimum=0, maximum=2, value=0.8, label="Temperature"),
	gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
	gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Min P"),
	gr.components.Slider(minimum=0, maximum=100, step=1, value=15, label="Top k"),
	gr.components.Slider(minimum=1, maximum=8192, step=1, value=1024, label="Max tokens"),
	],
	title="Locutusque's Language Models",
	description="Try out Locutusque's language models here! Credit goes to Mediocreatmybest for this space. You may also find some experimental preview models that have not been made public here.",
	)
	if __name__ == "__main__":
	g.launch()