|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
|
model_name = "anasmkh/customized_llama3.1_8b" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) |
|
|
|
generator = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_new_tokens=64, |
|
temperature=1.5, |
|
min_p=0.1 |
|
) |
|
|
|
def generate_response(prompt): |
|
messages = [ |
|
{"role": "user", "content": prompt}, |
|
] |
|
response = generator(messages)[0]['generated_text'] |
|
return response.split("<|end_header_id|>")[1].strip() |
|
|
|
demo = gr.Interface( |
|
fn=generate_response, |
|
inputs=gr.Textbox(lines=5, label="Enter your prompt"), |
|
outputs=gr.Textbox(label="Model Response") |
|
) |
|
|
|
demo.launch() |
|
|