File size: 2,255 Bytes
3f2900f
b87f04a
3f2900f
3d0fb66
2d26215
3f2900f
3d0fb66
b87f04a
9fb578d
3d0fb66
b87f04a
 
 
 
 
 
 
 
 
 
 
f80709d
 
 
 
b87f04a
 
 
 
7c8916c
b87f04a
 
 
3f2900f
b87f04a
760514e
b87f04a
 
 
 
3f2900f
b87f04a
80f2b5c
f80709d
b87f04a
3f2900f
 
b87f04a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch
import spaces
import os

@spaces.GPU
def load_model(model_name):
    return pipeline("text-generation", model=model_name, device="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["token"])
@spaces.GPU
def generate(
    model_name,
    user_input,
    temperature=0.4,
    top_p=0.95,
    top_k=50,
    max_new_tokens=256,
):
    pipe = load_model(model_name)

    # Set tokenize correctly. Otherwise ticking the box breaks it.
    if model_name == "Locutusque/TinyMistral-248M-v3":
        prompt = user_input
    else:
        prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
    outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
                   temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
    return outputs[0]["generated_text"]

model_choices = ["Locutusque/TinyMistral-248M-v3", "Locutusque/UltraQwen-7B", "Locutusque/UltraQwen-1_8B", "Locutusque/TinyMistral-248M-v2.5-Instruct", "M4-ai/TinyMistral-6x248M-Instruct", "Locutusque/Hercules-1.0-Mistral-7B", "Locutusque/Hercules-2.0-Mistral-7B", "Locutusque/Hercules-2.0-Qwen1.5-0.5B"]
# What at the best options? 
g = gr.Interface(
    fn=generate,
    inputs=[
        gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
        gr.components.Textbox(lines=2, label="Prompt", value="Write me a Python program that calculates the factorial of a given number."),
        gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
        gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
        gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
        gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),  
    ],
    outputs=[gr.Textbox(lines=10, label="Output")],
    title="Locutusque's Language Models",
    description="Try out Locutusque's language models here! Credit goes to Mediocreatmybest for this space. You may also find some experimental preview models that have not been made public here.",
    concurrency_limit=1
)

g.launch(max_threads=2)