|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
from transformers import pipeline |
|
import gradio as gr |
|
|
|
|
|
model_id = "meta-llama/Llama-3.2-3B-Instruct" |
|
device = "cpu" |
|
|
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=model_id, |
|
torch_dtype=torch.bfloat16, |
|
device_map="auto", |
|
) |
|
|
|
def generate_response(prompt): |
|
""" |
|
Generate a response from the model based on the given prompt. |
|
|
|
Args: |
|
prompt (str): The input message from the user. |
|
|
|
Returns: |
|
str: The generated response from the model. |
|
""" |
|
messages = [ |
|
{"role": "system", "content": "You are a helpful assistant!"}, |
|
{"role": "user", "content": prompt}, |
|
] |
|
outputs = pipe( |
|
messages, |
|
max_new_tokens=256, |
|
) |
|
return outputs[0]["generated_text"][-1] |
|
|
|
|
|
def gradio_interface(): |
|
""" |
|
Define the Gradio interface for the app. |
|
""" |
|
iface = gr.Interface( |
|
fn=generate_response, |
|
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your message here..."), |
|
outputs="text", |
|
title="Llama-3.2-3B-Instruct Chatbot", |
|
description="Chat with the Llama-3.2-3B-Instruct model. Enter your message and get a response!", |
|
) |
|
return iface |
|
|
|
|
|
if __name__ == "__main__": |
|
iface = gradio_interface() |
|
iface.launch() |