|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
|
model_name = "anasmkh/customized_llama3.1_8b" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) |
|
|
|
generator = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_new_tokens=64, |
|
temperature=1.5, |
|
min_p=0.1, |
|
) |
|
|
|
def chat(message, history): |
|
history = history or [] |
|
history.append({"role": "user", "content": message}) |
|
response = generator(history)[-1]["generated_text"] |
|
history.append({"role": "assistant", "content": response}) |
|
return history |
|
|
|
with gr.Blocks() as demo: |
|
chatbot = gr.Chatbot() |
|
message = gr.Textbox() |
|
clear = gr.ClearButton([message, chatbot]) |
|
|
|
message.submit(chat, [message, chatbot], chatbot) |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
demo.launch() |
|
|