import os import base64 import gradio as gr import requests import json from io import BytesIO from PIL import Image import time # Get API key from environment variable for security OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") # Model information free_models = [ ("Google: Gemini Pro 2.0 Experimental (free)", "google/gemini-2.0-pro-exp-02-05:free", 0, 0, 2000000), ("Google: Gemini 2.0 Flash Thinking Experimental 01-21 (free)", "google/gemini-2.0-flash-thinking-exp:free", 0, 0, 1048576), ("Google: Gemini Flash 2.0 Experimental (free)", "google/gemini-2.0-flash-exp:free", 0, 0, 1048576), ("Google: Gemini Pro 2.5 Experimental (free)", "google/gemini-2.5-pro-exp-03-25:free", 0, 0, 1000000), ("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 0, 0, 1000000), ("DeepSeek: DeepSeek R1 Zero (free)", "deepseek/deepseek-r1-zero:free", 0, 0, 163840), ("DeepSeek: R1 (free)", "deepseek/deepseek-r1:free", 0, 0, 163840), ("DeepSeek: DeepSeek V3 Base (free)", "deepseek/deepseek-v3-base:free", 0, 0, 131072), ("DeepSeek: DeepSeek V3 0324 (free)", "deepseek/deepseek-chat-v3-0324:free", 0, 0, 131072), ("Google: Gemma 3 4B (free)", "google/gemma-3-4b-it:free", 0, 0, 131072), ("Google: Gemma 3 12B (free)", "google/gemma-3-12b-it:free", 0, 0, 131072), ("Nous: DeepHermes 3 Llama 3 8B Preview (free)", "nousresearch/deephermes-3-llama-3-8b-preview:free", 0, 0, 131072), ("Qwen: Qwen2.5 VL 72B Instruct (free)", "qwen/qwen2.5-vl-72b-instruct:free", 0, 0, 131072), ("DeepSeek: DeepSeek V3 (free)", "deepseek/deepseek-chat:free", 0, 0, 131072), ("NVIDIA: Llama 3.1 Nemotron 70B Instruct (free)", "nvidia/llama-3.1-nemotron-70b-instruct:free", 0, 0, 131072), ("Meta: Llama 3.2 1B Instruct (free)", "meta-llama/llama-3.2-1b-instruct:free", 0, 0, 131072), ("Meta: Llama 3.2 11B Vision Instruct (free)", "meta-llama/llama-3.2-11b-vision-instruct:free", 0, 0, 131072), ("Meta: Llama 3.1 8B Instruct (free)", "meta-llama/llama-3.1-8b-instruct:free", 0, 0, 131072), ("Mistral: Mistral Nemo (free)", "mistralai/mistral-nemo:free", 0, 0, 128000), ("Mistral: Mistral Small 3.1 24B (free)", "mistralai/mistral-small-3.1-24b-instruct:free", 0, 0, 96000), ("Google: Gemma 3 27B (free)", "google/gemma-3-27b-it:free", 0, 0, 96000), ("Qwen: Qwen2.5 VL 3B Instruct (free)", "qwen/qwen2.5-vl-3b-instruct:free", 0, 0, 64000), ("DeepSeek: R1 Distill Qwen 14B (free)", "deepseek/deepseek-r1-distill-qwen-14b:free", 0, 0, 64000), ("Qwen: Qwen2.5-VL 7B Instruct (free)", "qwen/qwen-2.5-vl-7b-instruct:free", 0, 0, 64000), ("Google: LearnLM 1.5 Pro Experimental (free)", "google/learnlm-1.5-pro-experimental:free", 0, 0, 40960), ("Qwen: QwQ 32B (free)", "qwen/qwq-32b:free", 0, 0, 40000), ("Google: Gemini 2.0 Flash Thinking Experimental (free)", "google/gemini-2.0-flash-thinking-exp-1219:free", 0, 0, 40000), ("Bytedance: UI-TARS 72B (free)", "bytedance-research/ui-tars-72b:free", 0, 0, 32768), ("Qwerky 72b (free)", "featherless/qwerky-72b:free", 0, 0, 32768), ("OlympicCoder 7B (free)", "open-r1/olympiccoder-7b:free", 0, 0, 32768), ("OlympicCoder 32B (free)", "open-r1/olympiccoder-32b:free", 0, 0, 32768), ("Google: Gemma 3 1B (free)", "google/gemma-3-1b-it:free", 0, 0, 32768), ("Reka: Flash 3 (free)", "rekaai/reka-flash-3:free", 0, 0, 32768), ("Dolphin3.0 R1 Mistral 24B (free)", "cognitivecomputations/dolphin3.0-r1-mistral-24b:free", 0, 0, 32768), ("Dolphin3.0 Mistral 24B (free)", "cognitivecomputations/dolphin3.0-mistral-24b:free", 0, 0, 32768), ("Mistral: Mistral Small 3 (free)", "mistralai/mistral-small-24b-instruct-2501:free", 0, 0, 32768), ("Qwen2.5 Coder 32B Instruct (free)", "qwen/qwen-2.5-coder-32b-instruct:free", 0, 0, 32768), ("Qwen2.5 72B Instruct (free)", "qwen/qwen-2.5-72b-instruct:free", 0, 0, 32768), ("Meta: Llama 3.2 3B Instruct (free)", "meta-llama/llama-3.2-3b-instruct:free", 0, 0, 20000), ("Qwen: QwQ 32B Preview (free)", "qwen/qwq-32b-preview:free", 0, 0, 16384), ("DeepSeek: R1 Distill Qwen 32B (free)", "deepseek/deepseek-r1-distill-qwen-32b:free", 0, 0, 16000), ("Qwen: Qwen2.5 VL 32B Instruct (free)", "qwen/qwen2.5-vl-32b-instruct:free", 0, 0, 8192), ("Moonshot AI: Moonlight 16B A3B Instruct (free)", "moonshotai/moonlight-16b-a3b-instruct:free", 0, 0, 8192), ("DeepSeek: R1 Distill Llama 70B (free)", "deepseek/deepseek-r1-distill-llama-70b:free", 0, 0, 8192), ("Qwen 2 7B Instruct (free)", "qwen/qwen-2-7b-instruct:free", 0, 0, 8192), ("Google: Gemma 2 9B (free)", "google/gemma-2-9b-it:free", 0, 0, 8192), ("Mistral: Mistral 7B Instruct (free)", "mistralai/mistral-7b-instruct:free", 0, 0, 8192), ("Microsoft: Phi-3 Mini 128K Instruct (free)", "microsoft/phi-3-mini-128k-instruct:free", 0, 0, 8192), ("Microsoft: Phi-3 Medium 128K Instruct (free)", "microsoft/phi-3-medium-128k-instruct:free", 0, 0, 8192), ("Meta: Llama 3 8B Instruct (free)", "meta-llama/llama-3-8b-instruct:free", 0, 0, 8192), ("OpenChat 3.5 7B (free)", "openchat/openchat-7b:free", 0, 0, 8192), ("Meta: Llama 3.3 70B Instruct (free)", "meta-llama/llama-3.3-70b-instruct:free", 0, 0, 8000), ("AllenAI: Molmo 7B D (free)", "allenai/molmo-7b-d:free", 0, 0, 4096), ("Rogue Rose 103B v0.2 (free)", "sophosympatheia/rogue-rose-103b-v0.2:free", 0, 0, 4096), ("Toppy M 7B (free)", "undi95/toppy-m-7b:free", 0, 0, 4096), ("Hugging Face: Zephyr 7B (free)", "huggingfaceh4/zephyr-7b-beta:free", 0, 0, 4096), ("MythoMax 13B (free)", "gryphe/mythomax-l2-13b:free", 0, 0, 4096), ] # Filter for vision models vision_model_ids = [ "meta-llama/llama-3.2-11b-vision-instruct:free", "qwen/qwen2.5-vl-72b-instruct:free", "qwen/qwen2.5-vl-3b-instruct:free", "qwen/qwen2.5-vl-32b-instruct:free", "qwen/qwen-2.5-vl-7b-instruct:free", "google/gemini-2.0-pro-exp-02-05:free", "google/gemini-2.5-pro-exp-03-25:free" ] # Format model names to include context size def format_model_name(name, context_size): if context_size >= 1000000: context_str = f"{context_size/1000000:.1f}M tokens" else: context_str = f"{context_size/1000:.0f}K tokens" return f"{name} ({context_str})" # Prefilter vision models vision_models = [(format_model_name(name, context_size), model_id, context_size) for name, model_id, _, _, context_size in free_models if model_id in vision_model_ids] text_models = [(format_model_name(name, context_size), model_id, context_size) for name, model_id, _, _, context_size in free_models] def encode_image(image): """Convert PIL Image to base64 string""" buffered = BytesIO() image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue()).decode("utf-8") def encode_file(file_path): """Convert text file to string""" try: with open(file_path, 'r', encoding='utf-8') as file: return file.read() except Exception as e: return f"Error reading file: {str(e)}" def process_message_stream(message, chat_history, model_name, uploaded_image=None, uploaded_file=None, temperature=0.7, top_p=1.0, max_tokens=None, stream=True): """Process message and stream the model response""" # Extract model_id from the display name model_id = model_name.split(' ')[1] if len(model_name.split(' ')) > 1 else model_name # Check if API key is set if not OPENROUTER_API_KEY: yield "Please set your OpenRouter API key in the environment variables.", chat_history return # Setup headers and URL headers = { "Content-Type": "application/json", "Authorization": f"Bearer {OPENROUTER_API_KEY}", "HTTP-Referer": "https://huggingface.co/spaces", # Replace with your actual space URL in production } url = "https://openrouter.ai/api/v1/chat/completions" # Build message content messages = [] # Add chat history for item in chat_history: if isinstance(item, tuple): # Old format compatibility human_msg, ai_msg = item messages.append({"role": "user", "content": human_msg}) messages.append({"role": "assistant", "content": ai_msg}) else: # New message format messages.append(item) # Add current message with any attachments if uploaded_image: # Image processing for vision models base64_image = encode_image(uploaded_image) content = [ {"type": "text", "text": message} ] # Add text from file if provided if uploaded_file: file_content = encode_file(uploaded_file) content[0]["text"] = f"{message}\n\nFile content:\n```\n{file_content}\n```" # Add image content.append({ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } }) messages.append({"role": "user", "content": content}) else: if uploaded_file: file_content = encode_file(uploaded_file) content = f"{message}\n\nFile content:\n```\n{file_content}\n```" messages.append({"role": "user", "content": content}) else: messages.append({"role": "user", "content": message}) # Get context length for the model context_length = next((context for _, model_id, context in text_models if model_id == model_id), 4096) # Calculate default max tokens if not specified if not max_tokens: # Use 25% of context length as a reasonable default max_tokens = min(4000, int(context_length * 0.25)) # Build request data data = { "model": model_id, "messages": messages, "stream": stream, "temperature": temperature, "top_p": top_p, "max_tokens": max_tokens } try: # Create a new message pair in the chat history user_msg = {"role": "user", "content": message} ai_msg = {"role": "assistant", "content": ""} chat_history.append(user_msg) chat_history.append(ai_msg) full_response = "" if stream: # Make streaming API call with requests.post(url, headers=headers, json=data, stream=True) as response: response.raise_for_status() buffer = "" for chunk in response.iter_content(chunk_size=1024, decode_unicode=False): if chunk: buffer += chunk.decode('utf-8') while True: line_end = buffer.find('\n') if line_end == -1: break line = buffer[:line_end].strip() buffer = buffer[line_end + 1:] if line.startswith('data: '): data = line[6:] if data == '[DONE]': break try: data_obj = json.loads(data) delta_content = data_obj["choices"][0]["delta"].get("content", "") if delta_content: full_response += delta_content # Update the last assistant message chat_history[-1]["content"] = full_response yield chat_history except json.JSONDecodeError: pass else: # Non-streaming API call response = requests.post(url, headers=headers, json=data) response.raise_for_status() result = response.json() full_response = result.get("choices", [{}])[0].get("message", {}).get("content", "No response") chat_history[-1]["content"] = full_response yield chat_history return chat_history except Exception as e: error_msg = f"Error: {str(e)}" chat_history[-1]["content"] = error_msg yield chat_history # Create a nice CSS theme css = """ .gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; } .chat-message { padding: 15px; border-radius: 10px; margin-bottom: 10px; } .user-message { background-color: #f0f4f8; } .assistant-message { background-color: #e9f5ff; } #chat-container { height: 600px; overflow-y: auto; } #chat-input { min-height: 120px; border-radius: 8px; padding: 10px; } #model-select-container { border-radius: 8px; padding: 15px; background-color: #f8fafc; } .app-header { text-align: center; margin-bottom: 20px; } .app-header h1 { font-weight: 700; color: #2C3E50; margin-bottom: 5px; } .app-header p { color: #7F8C8D; margin-top: 0; } .parameter-container { background-color: #f8fafc; padding: 10px; border-radius: 8px; margin-top: 10px; } .file-upload-container { margin-top: 10px; } """ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: gr.HTML("""
Chat with free OpenRouter AI models - supports text, images, and files