Spaces:

alexkueck
/

TestInferenceAPI

Sleeping

App Files Files Community

TestInferenceAPI / app.py

alexkueck

Update app.py

c1b11f0 over 1 year ago

raw

history blame

1.98 kB

	import os
	from huggingface_hub import InferenceClient, login
	from transformers import AutoTokenizer
	from langchain.chat_models import ChatOpenAI

	# access token with permission to access the model and PRO subscription
	HUGGINGFACEHUB_API_TOKEN = os.getenv("HF_ACCESS_READ")
	OAI_API_KEY=os.getenv("OPENAI_API_KEY")
	login(token=HUGGINGFACEHUB_API_TOKEN)

	# tokenizer for generating prompt
	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf")

	# inference client
	client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")

	# generate function
	def generate(text):
	payload = tokenizer.apply_chat_template([{"role":"user","content":text}],tokenize=False)
	res = client.text_generation(
	payload,
	do_sample=True,
	return_full_text=False,
	max_new_tokens=2048,
	top_p=0.9,
	temperature=0.6,
	)
	return res.strip()

	# test client
	#assert generate("What is 2+2?") == "The answer to 2+2 is 4."

	# create evaluator
	#assert OAI_API_KEY is not None, "Please set OPENAI_API_KEY environment variable"

	evaluation_llm = ChatOpenAI(model="gpt-4")

	################################################
	#GUI
	###############################################
	#Beschreibung oben in GUI
	################################################



	chatbot_stream = gr.Chatbot()

	chat_interface_stream = gr.ChatInterface(fn=generate,
	title = "ChatGPT vom LI",
	theme="soft",
	chatbot=chatbot_stream,
	retry_btn="🔄 Wiederholen",
	undo_btn="↩️ Letztes löschen",
	clear_btn="🗑️ Verlauf löschen",
	submit_btn = "Abschicken",
	)

	with gr.Blocks() as demo:
	with gr.Tab("Chatbot"):
	#chatbot_stream.like(vote, None, None)
	chat_interface_stream.queue().launch()