Spaces:

alexkueck
/

TestInferenceAPI

Sleeping

File size: 2,523 Bytes

56f16a2
 
 
96e4b44
 
b8d58df
 
 
724c4ca
56f16a2
504a6c1
fed089e
504a6c1
a857851
56f16a2
1883988
56f16a2
a857851
56f16a2
1883988
56f16a2
 
 
edb8f75
56f16a2
 
 
 
 
 
 
 
 
b8d58df
65d6ec0
 
 
 
b8d58df
 
56f16a2
 
b8d58df
 
 
 
56f16a2
 
b8d58df
56f16a2
c1b11f0

from huggingface_hub import InferenceClient, login
from transformers import AutoTokenizer
from langchain.chat_models import ChatOpenAI
import os, sys, json
import gradio as gr
from langchain.evaluation import load_evaluator
from pprint import pprint as print


# access token with permission to access the model and PRO subscription
#HUGGINGFACEHUB_API_TOKEN = os.getenv("HF_ACCESS_READ")
OAI_API_KEY=os.getenv("OPENAI_API_KEY")
login(token=os.environ["HF_ACCESS_READ"])

# tokenizer for generating prompt
print ("Tokenizer")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf")

# inference client
print ("Inf.Client")
client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")

# generate function
def generate(text, history):
    payload = tokenizer.apply_chat_template([{"role":"user","content":text}],tokenize=False)
    res = client.text_generation(
                    payload,
                    do_sample=True,
                    return_full_text=False,
                    max_new_tokens=2048,
                    top_p=0.9,
                    temperature=0.6,
                )
    #zum Evaluieren:
    # custom eli5 criteria
    custom_criterion = {"eli5": "Is the output explained in a way that a 5 yeard old would unterstand it?"}

    eval_result = evaluator.evaluate_strings(prediction=res.strip(), input=text, criteria=custom_criterion, requires_reference=True)
    print ("eval_result:............    ")
    print(eval_result)
    return res.strip()

########################################
#Evaluation
########################################
evaluation_llm = ChatOpenAI(model="gpt-4")

# create evaluator
evaluator = load_evaluator("criteria", criteria="conciseness", llm=evaluation_llm)


################################################
#GUI
###############################################
#Beschreibung oben in GUI
################################################



chatbot_stream = gr.Chatbot()

chat_interface_stream = gr.ChatInterface(fn=generate, 
                title = "ChatGPT vom LI",
                theme="soft",
                chatbot=chatbot_stream,
                retry_btn="🔄 Wiederholen",
                undo_btn="↩️ Letztes löschen",
                clear_btn="🗑️ Verlauf löschen",
                submit_btn = "Abschicken",
                ) 

with gr.Blocks() as demo:
    with gr.Tab("Chatbot"):
        #chatbot_stream.like(vote, None, None)
        chat_interface_stream.queue().launch()