Spaces:

Braszczynski
/

ID2223Lab2

Runtime error

File size: 1,879 Bytes

037a015
b42ac71
ce0d45f
5ce5e24
ce0d45f
1ccc577
 
 
 
ce0d45f
1ccc577
 
 
 
 
 
ce0d45f
037a015
 
 
 
 
 
 
 
 
b42ac71
 
 
 
 
 
 
 
 
 
 
 
 
037a015
 
b42ac71
 
037a015
b42ac71
 
 
 
037a015
b42ac71
037a015
 
 
b42ac71
037a015
 
b42ac71
037a015

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import AutoModel, AutoTokenizer
from adapters import AutoAdapterModel


model_name = "unsloth/Meta-Llama-3.1-8B-Instruct"


tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the base model with adapters
model = AutoAdapterModel.from_pretrained(model_name)

model.load_adapter("Braszczynski/Llama-3.2-3B-Instruct-bnb-4bit-460steps")



def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # Combine system message and chat history
    chat_history = f"{system_message}\n"
    for user_msg, bot_reply in history:
        chat_history += f"User: {user_msg}\nAssistant: {bot_reply}\n"
    chat_history += f"User: {message}\nAssistant:"

    # Tokenize the input
    inputs = tokenizer(chat_history, return_tensors="pt", truncation=True).to("cuda")

    # Generate response
    outputs = model.generate(
        inputs["input_ids"],
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and format the output
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response[len(chat_history):].strip()  # Remove input context from output
    return response

# Define the Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

if __name__ == "__main__":
    demo.launch()