Spaces:
Runtime error
Runtime error
import gradio as gr | |
from langchain.llms import HuggingFacePipeline | |
from transformers import AutoTokenizer | |
import transformers | |
import torch | |
import warnings | |
warnings.filterwarnings('ignore') | |
# llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q3_K_S.bin") | |
tokenizer=AutoTokenizer.from_pretrained(model="TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q3_K_S.bin") | |
pipeline=transformers.pipeline( | |
"text-generation", | |
model=(model="TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q3_K_S.bin"), | |
tokenizer=tokenizer, | |
torch_dtype=torch.bfloat16, | |
trust_remote_code=True, | |
device_map="auto", | |
max_length=500, | |
do_sample=True, | |
top_k=10, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id | |
) | |
llm=HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature':0}) | |
def greet(prompt): | |
return llm(prompt) | |
iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
iface.launch() | |