import gradio as gr from huggingface_hub import InferenceClient import spaces import os client = InferenceClient("meta-llama/Llama-2-70b-chat-hf", token=f"Bearer {os.environ.get('API_KEY')}") messages=[] client.chat_completion(messages, max_tokens=1024) @spaces.GPU() def respond(prompt): response = client.chat_completion( model="meta-llama/Meta-Llama-3-70B-Instruct", messages=messages, max_tokens=500, ) return response.content gr.ChatInterface(respond).launch()