Spaces:

wop
/

Kosmox

Paused

File size: 2,326 Bytes

bd5df33
c18814e
e44990b
bd5df33
c18814e
e44990b
c18814e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd5df33
e44990b
 
 
c18814e
e44990b
 
c18814e
e44990b
c18814e
 
bd5df33
c18814e
 
 
 
 
e44990b
 
 
 
c18814e
e44990b
 
 
 
 
 
c18814e
e44990b
 
 
bd5df33
 
 
 
 
 
e44990b
bd5df33
 
 
e44990b
bd5df33
e44990b

import gradio as gr
from transformers import AutoModelForCausalLM
import torch

# Load the model
model_name = "wop/kosmox-gguf"
model = AutoModelForCausalLM.from_pretrained(model_name)

# Define the chat template function
def format_chat(messages, add_generation_prompt):
    formatted = "<BOS>"
    for message in messages:
        if message['from'] == 'human':
            formatted += ' ' + message['value'] + ' '
        elif message['from'] == 'gpt':
            formatted += ' ' + message['value'] + ' '
        else:
            formatted += '<|' + message['from'] + '|> ' + message['value'] + ' '
    if add_generation_prompt:
        formatted += ' '
    return formatted

# Function to generate responses
def respond(message, history, system_message, max_tokens, temperature, top_p):
    # Prepare the chat history
    messages = [{"from": "system", "value": system_message}]
    for user_msg, bot_msg in history:
        if user_msg:
            messages.append({"from": "human", "value": user_msg})
        if bot_msg:
            messages.append({"from": "gpt", "value": bot_msg})
    messages.append({"from": "human", "value": message})

    # Format the chat input for the model
    chat_input = format_chat(messages, add_generation_prompt=False)

    # Tokenize input (assuming model can handle raw text inputs internally)
    inputs = torch.tensor([ord(c) for c in chat_input]).unsqueeze(0)  # Dummy tokenization

    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_length=max_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True
        )
    
    response = ''.join([chr(t) for t in outputs[0].tolist() if t < 256])  # Dummy decoding
    yield response.strip()

# Define the Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

# Launch the demo
if __name__ == "__main__":
    demo.launch()