import gradio as gr import os from openai import OpenAI async def generate_response(user_input): client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=os.getenv('HUGGINGFACE_API_KEY') ) messages = [ {"role": "user", "content": user_input} ] response = client.chat.completions.create( model= 'meta-llama/Llama-3.1-70B-Instruct', messages=messages, max_tokens=16384, max_completion_tokens=16384 ) return response.choices[0].message.content demo = gr.Interface( fn=generate_response, inputs=gr.Textbox(label="Your message"), outputs=gr.Textbox(label="AI Response"), title="AI Chat Interface", description="Chat with Llama 3.1 Nemotron" ) demo.launch()