import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline model_name = "anasmkh/customized_llama3.1_8b" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) generator = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=64, temperature=1.5, min_p=0.1 ) def generate_response(prompt): messages = [ {"role": "user", "content": prompt}, ] response = generator(messages)[0]['generated_text'] return response.split("<|end_header_id|>")[1].strip() demo = gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=5, label="Enter your prompt"), outputs=gr.Textbox(label="Model Response") ) demo.launch()