import gradio as gr # Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf") model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf") def eval_text(text): # Encode the input text input_ids = tokenizer.encode(text, return_tensors="pt") # Generate text out = model.generate( input_ids, min_length=100, max_length=100, eos_token_id=5, pad_token_id=1, top_k=10, top_p=0.0, no_repeat_ngram_size=5 ) # Decode the generated output generated_text = list(map(tokenizer.decode, out))[0] print(generated_text) return(f"Result: {generation[0]['generated_text']}") demo = gr.Interface(fn=eval_text, inputs="text", outputs="text", title="Llama2") demo.launch(share=True)