Spaces:
Runtime error
Runtime error
import os | |
from dotenv import load_dotenv | |
from huggingface_hub import login | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import gradio as gr | |
load_dotenv() | |
API_TOKEN = os.environ.get("HUGGINGFACE_TOKEN") | |
login(API_TOKEN) | |
model_id = "meta-llama/Llama-2-7b-chat-hf" | |
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto") | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
generate_text_pipeline = pipeline( | |
model=model, tokenizer=tokenizer, | |
return_full_text=True, | |
task='text-generation', | |
temperature=0.1, | |
max_new_tokens=512, | |
repetition_penalty=1.1 # without this output begins repeating | |
) | |
def get_results(text): | |
res = generate_text_pipeline(text) | |
return res[0]["generated_text"] | |
iface = gr.Interface(fn=get_results, inputs="text", outputs="text") | |
iface.launch() | |