import gradio as gr import os from openai import OpenAI API_URL = "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF" headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"} async def generate_response(user_input): client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=os.getenv('HUGGINGFACE_API_KEY') ) messages = [ {"role": "user", "content": user_input} ] response = client.chat.completions.create( model="meta-llama/Llama-3.1-70B-Instruct", messages=messages, max_tokens=500, stream=False ) return response.choices[0].message.content demo = gr.Interface( fn=generate_response, inputs=gr.Textbox(label="Your message"), outputs=gr.Textbox(label="AI Response"), title="AI Chat Interface", description="Chat with Llama 3.1 Nemotron" ) demo.launch()