Llama-3-70B / app.py
Kvikontent's picture
Update app.py
3fbf8a9 verified
raw
history blame contribute delete
504 Bytes
import gradio as gr
from huggingface_hub import InferenceClient
import spaces
import os
client = InferenceClient("meta-llama/Llama-2-70b-chat-hf", token=f"Bearer {os.environ.get('API_KEY')}")
messages=[]
client.chat_completion(messages, max_tokens=1024)
@spaces.GPU()
def respond(prompt):
response = client.chat_completion(
model="meta-llama/Meta-Llama-3-70B-Instruct",
messages=messages,
max_tokens=500,
)
return response.content
gr.ChatInterface(respond).launch()