import os import gradio as gr from huggingface_hub import InferenceClient # Setup clients for each provider llama_client = InferenceClient(provider="sambanova", api_key=os.environ["HF_TOKEN"]) minimax_client = InferenceClient(provider="novita", api_key=os.environ["HF_TOKEN"]) mistral_client = InferenceClient(provider="together", api_key=os.environ["HF_TOKEN"]) # Context-aware response function def chat_with_model(model_choice, prompt, image_url, chat_history): if not prompt: return "Please enter a text prompt.", chat_history if chat_history is None: chat_history = [] try: # === LLaMA 4 === if model_choice == "LLaMA 4 (SambaNova)": user_msg = [{"type": "text", "text": prompt}] if image_url: user_msg.append({"type": "image_url", "image_url": {"url": image_url}}) chat_history.append({"role": "user", "content": user_msg}) response = llama_client.chat.completions.create( model="meta-llama/Llama-4-Maverick-17B-128E-Instruct", messages=chat_history ) bot_msg = response.choices[0].message.content chat_history.append({"role": "assistant", "content": bot_msg}) return bot_msg, chat_history # === MiniMax === elif model_choice == "MiniMax M1 (Novita)": chat_history.append({"role": "user", "content": prompt}) response = minimax_client.chat.completions.create( model="MiniMaxAI/MiniMax-M1-80k", messages=chat_history ) bot_msg = response.choices[0].message.content chat_history.append({"role": "assistant", "content": bot_msg}) return bot_msg, chat_history # === Mistral === elif model_choice == "Mistral Mixtral-8x7B (Together)": chat_history.append({"role": "user", "content": prompt}) response = mistral_client.chat.completions.create( model="mistralai/Mixtral-8x7B-Instruct-v0.1", messages=chat_history ) bot_msg = response.choices[0].message.content chat_history.append({"role": "assistant", "content": bot_msg}) return bot_msg, chat_history else: return "Unsupported model selected.", chat_history except Exception as e: return f"Error: {e}", chat_history # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🤖 Multi-Model Context-Aware Chatbot") gr.Markdown("Supports LLaMA 4 (with optional image), MiniMax, and Mistral. Conversation memory is preserved.") model_dropdown = gr.Dropdown( choices=[ "LLaMA 4 (SambaNova)", "MiniMax M1 (Novita)", "Mistral Mixtral-8x7B (Together)" ], value="LLaMA 4 (SambaNova)", label="Select Model" ) prompt_input = gr.Textbox(label="Text Prompt", placeholder="Ask something...", lines=2) image_url_input = gr.Textbox(label="Optional Image URL (for LLaMA only)", placeholder="https://example.com/image.jpg") submit_btn = gr.Button("Generate Response") reset_btn = gr.Button("🔄 Reset Conversation") output_box = gr.Textbox(label="Response", lines=8) state = gr.State([]) submit_btn.click( fn=chat_with_model, inputs=[model_dropdown, prompt_input, image_url_input, state], outputs=[output_box, state] ) reset_btn.click( fn=lambda: ("Conversation reset. You can start a new one.", []), inputs=[], outputs=[output_box, state] ) demo.launch()