import gradio as gr import os import requests API_URL = "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF" headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"} async def generate_response(user_input): payload = { "model": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", "messages": [{"role": "user", "content": user_input}], "max_tokens": 16384, "max_completion_tokens": 16384 } response = requests.post(API_URL, headers=headers, json=payload) return response.json()[0]['generated_text'] demo = gr.Interface( fn=generate_response, inputs=gr.Textbox(label="Your message"), outputs=gr.Textbox(label="AI Response"), title="AI Chat Interface", description="Chat with Llama 3.1 Nemotron" ) demo.launch()