Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,884 Bytes
6f3f07e 3f2900f 8bd462e 3f2900f 8bd462e 2d26215 3f2900f 3d0fb66 b87f04a ded7267 888022c b87f04a a184d8d b87f04a c77e10a d168895 b87f04a 5545870 f80709d 7be893d fc61ac0 d363063 8bd462e b87f04a 850a1aa b87f04a 3f2900f b87f04a 760514e 634313c b87f04a a184d8d 634313c ad9f68c 3f2900f b87f04a 80f2b5c acc3ae4 b87f04a 3f2900f c7e5485 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import spaces
import gradio as gr
from transformers import pipeline, AutoTokenizer, TextIteratorStreamer
import torch
from threading import Thread
import os
@spaces.GPU
def load_model(model_name):
return pipeline("text-generation", model=model_name, device_map="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["token"], use_fast=True)
@spaces.GPU()
def generate(
model_name,
user_input,
temperature=0.4,
top_p=0.95,
min_p=0.1,
top_k=50,
max_new_tokens=256,
):
pipe = load_model(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=os.environ["token"])
print(tokenizer)
pipe.tokenizer = tokenizer
# Set tokenize correctly. Otherwise ticking the box breaks it.
if model_name == "M4-ai/tau-1.8B":
prompt = user_input
else:
prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
streamer = TextIteratorStreamer(pipe.tokenizer, timeout=240.0, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(text_inputs=prompt, streamer=streamer, max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p, min_p=min_p, top_k=top_k,
temperature=temperature, num_beams=1, repetition_penalty=1.1)
t = Thread(target=pipe.__call__, kwargs=generation_kwargs)
t.start()
outputs = []
for chunk in streamer:
outputs.append(chunk)
yield "".join(outputs)
model_choices = ["Locutusque/Apollo-2.0-Nemo-2407-12B", "Locutusque/Apollo-2.0-Llama-3.1-8B", "Locutusque/Llama-3-NeuralHermes-Pro-8B", "Locutusque/Hercules-5.0-Qwen2-7B", "Locutusque/Llama-3-NeuralHercules-5.0-8B", "Locutusque/Hercules-5.0-Index-1.9B", "Locutusque/Llama-3-Hercules-5.0-8B"]
# What at the best options?
g = gr.Interface(
fn=generate,
inputs=[
gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
gr.components.Textbox(lines=2, label="Prompt", value="Write me a Python program that calculates the factorial of a given number."),
gr.components.Slider(minimum=0, maximum=1, value=0.8, label="Temperature"),
gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Min P"),
gr.components.Slider(minimum=0, maximum=100, step=1, value=15, label="Top k"),
gr.components.Slider(minimum=1, maximum=2048, step=1, value=1024, label="Max tokens"),
],
outputs=[gr.Textbox(lines=10, label="Output")],
title="Locutusque's Language Models",
description="Try out Locutusque's language models here! Credit goes to Mediocreatmybest for this space. You may also find some experimental preview models that have not been made public here.",
concurrency_limit=1
)
g.launch(max_threads=4)
|