import gradio as gr from transformers import AutoModel, AutoTokenizer # Load the model model_name = "wop/kosmox-gguf" model = AutoModel.from_pretrained(model_name) # Assuming we need to load a corresponding tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) # Define the chat template chat_template = "{{ bos_token }}{% for message in messages %}{% if message['from'] == 'human' %}{{' ' + message['value'] + ' '}}{% elif message['from'] == 'gpt' %}{{' ' + message['value'] + ' '}}{% else %}{{'<|' + message['from'] + '|> ' + message['value'] + ' '}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ ' ' }}{% endif %}" def chat(messages): # Prepare the input context = chat_template.format( bos_token=tokenizer.bos_token, messages=messages, add_generation_prompt=True ) # Tokenize the input inputs = tokenizer(context, return_tensors='pt') # Generate response outputs = model.generate(**inputs) # Decode the response response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Define the Gradio interface with gr.Blocks() as demo: chatbot = gr.Chatbot() with gr.Row(): with gr.Column(): user_input = gr.Textbox( placeholder="Type your message here...", label="Your message" ) send_button = gr.Button("Send") with gr.Column(): chat_output = gr.Textbox( label="Chatbot response", interactive=False ) def respond(message, history): history = history or [] history.append({"from": "human", "value": message}) response = chat(history) history.append({"from": "gpt", "value": response}) return history, history[-1]['value'] send_button.click(respond, [user_input, chatbot], [chatbot, chat_output]) # Launch the Gradio interface demo.launch()