Spaces:

Locutusque
/

Locutusque-Models

Sleeping

File size: 2,255 Bytes

3f2900f
b87f04a
3f2900f
3d0fb66
2d26215
3f2900f
3d0fb66
b87f04a
9fb578d
3d0fb66
b87f04a
 
 
 
 
 
 
 
 
 
 
f80709d
 
 
 
b87f04a
 
 
 
7c8916c
b87f04a
 
 
3f2900f
b87f04a
760514e
b87f04a
 
 
 
3f2900f
b87f04a
80f2b5c
f80709d
b87f04a
3f2900f
 
b87f04a

import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch
import spaces
import os

@spaces.GPU
def load_model(model_name):
    return pipeline("text-generation", model=model_name, device="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["token"])
@spaces.GPU
def generate(
    model_name,
    user_input,
    temperature=0.4,
    top_p=0.95,
    top_k=50,
    max_new_tokens=256,
):
    pipe = load_model(model_name)

    # Set tokenize correctly. Otherwise ticking the box breaks it.
    if model_name == "Locutusque/TinyMistral-248M-v3":
        prompt = user_input
    else:
        prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
    outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
                   temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
    return outputs[0]["generated_text"]

model_choices = ["Locutusque/TinyMistral-248M-v3", "Locutusque/UltraQwen-7B", "Locutusque/UltraQwen-1_8B", "Locutusque/TinyMistral-248M-v2.5-Instruct", "M4-ai/TinyMistral-6x248M-Instruct", "Locutusque/Hercules-1.0-Mistral-7B", "Locutusque/Hercules-2.0-Mistral-7B", "Locutusque/Hercules-2.0-Qwen1.5-0.5B"]
# What at the best options? 
g = gr.Interface(
    fn=generate,
    inputs=[
        gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
        gr.components.Textbox(lines=2, label="Prompt", value="Write me a Python program that calculates the factorial of a given number."),
        gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
        gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
        gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
        gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),  
    ],
    outputs=[gr.Textbox(lines=10, label="Output")],
    title="Locutusque's Language Models",
    description="Try out Locutusque's language models here! Credit goes to Mediocreatmybest for this space. You may also find some experimental preview models that have not been made public here.",
    concurrency_limit=1
)

g.launch(max_threads=2)