import os import base64 import gradio as gr import requests import json from io import BytesIO from PIL import Image import time # Get API key from environment variable for security OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") # Model information free_models = [ ("Google: Gemini Pro 2.0 Experimental (free)", "google/gemini-2.0-pro-exp-02-05:free", 0, 0, 2000000), ("Google: Gemini 2.0 Flash Thinking Experimental 01-21 (free)", "google/gemini-2.0-flash-thinking-exp:free", 0, 0, 1048576), ("Google: Gemini Flash 2.0 Experimental (free)", "google/gemini-2.0-flash-exp:free", 0, 0, 1048576), ("Google: Gemini Pro 2.5 Experimental (free)", "google/gemini-2.5-pro-exp-03-25:free", 0, 0, 1000000), ("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 0, 0, 1000000), ("DeepSeek: DeepSeek R1 Zero (free)", "deepseek/deepseek-r1-zero:free", 0, 0, 163840), ("DeepSeek: R1 (free)", "deepseek/deepseek-r1:free", 0, 0, 163840), ("DeepSeek: DeepSeek V3 Base (free)", "deepseek/deepseek-v3-base:free", 0, 0, 131072), ("DeepSeek: DeepSeek V3 0324 (free)", "deepseek/deepseek-chat-v3-0324:free", 0, 0, 131072), ("Google: Gemma 3 4B (free)", "google/gemma-3-4b-it:free", 0, 0, 131072), ("Google: Gemma 3 12B (free)", "google/gemma-3-12b-it:free", 0, 0, 131072), ("Nous: DeepHermes 3 Llama 3 8B Preview (free)", "nousresearch/deephermes-3-llama-3-8b-preview:free", 0, 0, 131072), ("Qwen: Qwen2.5 VL 72B Instruct (free)", "qwen/qwen2.5-vl-72b-instruct:free", 0, 0, 131072), ("DeepSeek: DeepSeek V3 (free)", "deepseek/deepseek-chat:free", 0, 0, 131072), ("NVIDIA: Llama 3.1 Nemotron 70B Instruct (free)", "nvidia/llama-3.1-nemotron-70b-instruct:free", 0, 0, 131072), ("Meta: Llama 3.2 1B Instruct (free)", "meta-llama/llama-3.2-1b-instruct:free", 0, 0, 131072), ("Meta: Llama 3.2 11B Vision Instruct (free)", "meta-llama/llama-3.2-11b-vision-instruct:free", 0, 0, 131072), ("Meta: Llama 3.1 8B Instruct (free)", "meta-llama/llama-3.1-8b-instruct:free", 0, 0, 131072), ("Mistral: Mistral Nemo (free)", "mistralai/mistral-nemo:free", 0, 0, 128000), ("Mistral: Mistral Small 3.1 24B (free)", "mistralai/mistral-small-3.1-24b-instruct:free", 0, 0, 96000), ("Google: Gemma 3 27B (free)", "google/gemma-3-27b-it:free", 0, 0, 96000), ("Qwen: Qwen2.5 VL 3B Instruct (free)", "qwen/qwen2.5-vl-3b-instruct:free", 0, 0, 64000), ("DeepSeek: R1 Distill Qwen 14B (free)", "deepseek/deepseek-r1-distill-qwen-14b:free", 0, 0, 64000), ("Qwen: Qwen2.5-VL 7B Instruct (free)", "qwen/qwen-2.5-vl-7b-instruct:free", 0, 0, 64000), ("Google: LearnLM 1.5 Pro Experimental (free)", "google/learnlm-1.5-pro-experimental:free", 0, 0, 40960), ("Qwen: QwQ 32B (free)", "qwen/qwq-32b:free", 0, 0, 40000), ("Google: Gemini 2.0 Flash Thinking Experimental (free)", "google/gemini-2.0-flash-thinking-exp-1219:free", 0, 0, 40000), ("Bytedance: UI-TARS 72B (free)", "bytedance-research/ui-tars-72b:free", 0, 0, 32768), ("Qwerky 72b (free)", "featherless/qwerky-72b:free", 0, 0, 32768), ("OlympicCoder 7B (free)", "open-r1/olympiccoder-7b:free", 0, 0, 32768), ("OlympicCoder 32B (free)", "open-r1/olympiccoder-32b:free", 0, 0, 32768), ("Google: Gemma 3 1B (free)", "google/gemma-3-1b-it:free", 0, 0, 32768), ("Reka: Flash 3 (free)", "rekaai/reka-flash-3:free", 0, 0, 32768), ("Dolphin3.0 R1 Mistral 24B (free)", "cognitivecomputations/dolphin3.0-r1-mistral-24b:free", 0, 0, 32768), ("Dolphin3.0 Mistral 24B (free)", "cognitivecomputations/dolphin3.0-mistral-24b:free", 0, 0, 32768), ("Mistral: Mistral Small 3 (free)", "mistralai/mistral-small-24b-instruct-2501:free", 0, 0, 32768), ("Qwen2.5 Coder 32B Instruct (free)", "qwen/qwen-2.5-coder-32b-instruct:free", 0, 0, 32768), ("Qwen2.5 72B Instruct (free)", "qwen/qwen-2.5-72b-instruct:free", 0, 0, 32768), ("Meta: Llama 3.2 3B Instruct (free)", "meta-llama/llama-3.2-3b-instruct:free", 0, 0, 20000), ("Qwen: QwQ 32B Preview (free)", "qwen/qwq-32b-preview:free", 0, 0, 16384), ("DeepSeek: R1 Distill Qwen 32B (free)", "deepseek/deepseek-r1-distill-qwen-32b:free", 0, 0, 16000), ("Qwen: Qwen2.5 VL 32B Instruct (free)", "qwen/qwen2.5-vl-32b-instruct:free", 0, 0, 8192), ("Moonshot AI: Moonlight 16B A3B Instruct (free)", "moonshotai/moonlight-16b-a3b-instruct:free", 0, 0, 8192), ("DeepSeek: R1 Distill Llama 70B (free)", "deepseek/deepseek-r1-distill-llama-70b:free", 0, 0, 8192), ("Qwen 2 7B Instruct (free)", "qwen/qwen-2-7b-instruct:free", 0, 0, 8192), ("Google: Gemma 2 9B (free)", "google/gemma-2-9b-it:free", 0, 0, 8192), ("Mistral: Mistral 7B Instruct (free)", "mistralai/mistral-7b-instruct:free", 0, 0, 8192), ("Microsoft: Phi-3 Mini 128K Instruct (free)", "microsoft/phi-3-mini-128k-instruct:free", 0, 0, 8192), ("Microsoft: Phi-3 Medium 128K Instruct (free)", "microsoft/phi-3-medium-128k-instruct:free", 0, 0, 8192), ("Meta: Llama 3 8B Instruct (free)", "meta-llama/llama-3-8b-instruct:free", 0, 0, 8192), ("OpenChat 3.5 7B (free)", "openchat/openchat-7b:free", 0, 0, 8192), ("Meta: Llama 3.3 70B Instruct (free)", "meta-llama/llama-3.3-70b-instruct:free", 0, 0, 8000), ("AllenAI: Molmo 7B D (free)", "allenai/molmo-7b-d:free", 0, 0, 4096), ("Rogue Rose 103B v0.2 (free)", "sophosympatheia/rogue-rose-103b-v0.2:free", 0, 0, 4096), ("Toppy M 7B (free)", "undi95/toppy-m-7b:free", 0, 0, 4096), ("Hugging Face: Zephyr 7B (free)", "huggingfaceh4/zephyr-7b-beta:free", 0, 0, 4096), ("MythoMax 13B (free)", "gryphe/mythomax-l2-13b:free", 0, 0, 4096), ] # Filter for vision models vision_model_ids = [ "meta-llama/llama-3.2-11b-vision-instruct:free", "qwen/qwen2.5-vl-72b-instruct:free", "qwen/qwen2.5-vl-3b-instruct:free", "qwen/qwen2.5-vl-32b-instruct:free", "qwen/qwen-2.5-vl-7b-instruct:free", "google/gemini-2.0-pro-exp-02-05:free", "google/gemini-2.5-pro-exp-03-25:free" ] # Format model names to include context size def format_model_name(name, context_size): if context_size >= 1000000: context_str = f"{context_size/1000000:.1f}M tokens" else: context_str = f"{context_size/1000:.0f}K tokens" return f"{name} ({context_str})" # Prefilter vision models vision_models = [(format_model_name(name, context_size), model_id, context_size) for name, model_id, _, _, context_size in free_models if model_id in vision_model_ids] text_models = [(format_model_name(name, context_size), model_id, context_size) for name, model_id, _, _, context_size in free_models] def encode_image(image): """Convert PIL Image to base64 string""" buffered = BytesIO() image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue()).decode("utf-8") def encode_file(file_path): """Convert text file to string""" try: with open(file_path, 'r', encoding='utf-8') as file: return file.read() except Exception as e: return f"Error reading file: {str(e)}" def process_message_stream(message, chat_history, model_name, uploaded_image=None, uploaded_file=None, temperature=0.7, top_p=1.0, max_tokens=None, stream=True): """Process message and stream the model response""" # Extract model_id from the display name model_id = model_name.split(' ')[1] if len(model_name.split(' ')) > 1 else model_name # Check if API key is set if not OPENROUTER_API_KEY: yield "Please set your OpenRouter API key in the environment variables.", chat_history return # Setup headers and URL headers = { "Content-Type": "application/json", "Authorization": f"Bearer {OPENROUTER_API_KEY}", "HTTP-Referer": "https://huggingface.co/spaces", # Replace with your actual space URL in production } url = "https://openrouter.ai/api/v1/chat/completions" # Build message content messages = [] # Add chat history for item in chat_history: if isinstance(item, tuple): # Old format compatibility human_msg, ai_msg = item messages.append({"role": "user", "content": human_msg}) messages.append({"role": "assistant", "content": ai_msg}) else: # New message format messages.append(item) # Add current message with any attachments if uploaded_image: # Image processing for vision models base64_image = encode_image(uploaded_image) content = [ {"type": "text", "text": message} ] # Add text from file if provided if uploaded_file: file_content = encode_file(uploaded_file) content[0]["text"] = f"{message}\n\nFile content:\n```\n{file_content}\n```" # Add image content.append({ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } }) messages.append({"role": "user", "content": content}) else: if uploaded_file: file_content = encode_file(uploaded_file) content = f"{message}\n\nFile content:\n```\n{file_content}\n```" messages.append({"role": "user", "content": content}) else: messages.append({"role": "user", "content": message}) # Get context length for the model context_length = next((context for _, model_id, context in text_models if model_id == model_id), 4096) # Calculate default max tokens if not specified if not max_tokens: # Use 25% of context length as a reasonable default max_tokens = min(4000, int(context_length * 0.25)) # Build request data data = { "model": model_id, "messages": messages, "stream": stream, "temperature": temperature, "top_p": top_p, "max_tokens": max_tokens } try: # Create a new message pair in the chat history user_msg = {"role": "user", "content": message} ai_msg = {"role": "assistant", "content": ""} chat_history.append(user_msg) chat_history.append(ai_msg) full_response = "" if stream: # Make streaming API call with requests.post(url, headers=headers, json=data, stream=True) as response: response.raise_for_status() buffer = "" for chunk in response.iter_content(chunk_size=1024, decode_unicode=False): if chunk: buffer += chunk.decode('utf-8') while True: line_end = buffer.find('\n') if line_end == -1: break line = buffer[:line_end].strip() buffer = buffer[line_end + 1:] if line.startswith('data: '): data = line[6:] if data == '[DONE]': break try: data_obj = json.loads(data) delta_content = data_obj["choices"][0]["delta"].get("content", "") if delta_content: full_response += delta_content # Update the last assistant message chat_history[-1]["content"] = full_response yield chat_history except json.JSONDecodeError: pass else: # Non-streaming API call response = requests.post(url, headers=headers, json=data) response.raise_for_status() result = response.json() full_response = result.get("choices", [{}])[0].get("message", {}).get("content", "No response") chat_history[-1]["content"] = full_response yield chat_history return chat_history except Exception as e: error_msg = f"Error: {str(e)}" chat_history[-1]["content"] = error_msg yield chat_history # Create a nice CSS theme css = """ .gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; } .chat-message { padding: 15px; border-radius: 10px; margin-bottom: 10px; } .user-message { background-color: #f0f4f8; } .assistant-message { background-color: #e9f5ff; } #chat-container { height: 600px; overflow-y: auto; } #chat-input { min-height: 120px; border-radius: 8px; padding: 10px; } #model-select-container { border-radius: 8px; padding: 15px; background-color: #f8fafc; } .app-header { text-align: center; margin-bottom: 20px; } .app-header h1 { font-weight: 700; color: #2C3E50; margin-bottom: 5px; } .app-header p { color: #7F8C8D; margin-top: 0; } .parameter-container { background-color: #f8fafc; padding: 10px; border-radius: 8px; margin-top: 10px; } .file-upload-container { margin-top: 10px; } """ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: gr.HTML("""

🔆 CrispChat

Chat with free OpenRouter AI models - supports text, images, and files

""") with gr.Row(): with gr.Column(scale=4): chatbot = gr.Chatbot( height=600, show_copy_button=True, show_share_button=False, elem_id="chatbot", layout="bubble", avatar_images=("👤", "🤖"), bubble_full_width=False, type="messages" # Use new message format ) with gr.Row(): with gr.Column(scale=10): user_message = gr.Textbox( placeholder="Type your message here...", show_label=False, elem_id="chat-input", lines=3 ) with gr.Row(): image_upload = gr.Image( type="pil", label="Image (optional)", show_label=True, scale=1 ) file_upload = gr.File( label="Text File (optional)", file_types=[".txt", ".md", ".py", ".js", ".html", ".css", ".json"], scale=1 ) submit_btn = gr.Button("Send", scale=1, variant="primary") with gr.Column(scale=2): with gr.Accordion("Model Selection", open=True): using_vision = gr.Checkbox(label="Using image", value=False) model_selector = gr.Dropdown( choices=[name for name, _, _ in text_models], value=text_models[0][0], label="Select Model", elem_id="model-selector" ) context_info = gr.Markdown(value=f"Context: {text_models[0][2]:,} tokens") with gr.Accordion("Parameters", open=False): with gr.Group(): temperature = gr.Slider( minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature", info="Higher = more creative, Lower = more deterministic" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=1.0, step=0.1, label="Top P", info="Controls token diversity" ) max_tokens = gr.Slider( minimum=100, maximum=8000, value=1000, step=100, label="Max Tokens", info="Maximum length of the response" ) use_streaming = gr.Checkbox( label="Stream Response", value=True, info="Show response as it's generated" ) with gr.Accordion("Tips", open=False): gr.Markdown(""" * Select a vision-capable model for images * Upload text files to include their content * Check model context window sizes * Adjust temperature for creativity level * Top P controls diversity of responses """) # Define events def update_model_selector(use_vision): if use_vision: return ( gr.Dropdown(choices=[name for name, _, _ in vision_models], value=vision_models[0][0]), f"Context: {vision_models[0][2]:,} tokens" ) else: return ( gr.Dropdown(choices=[name for name, _, _ in text_models], value=text_models[0][0]), f"Context: {text_models[0][2]:,} tokens" ) def update_context_info(model_name): # Extract context size from model name for name, _, context_size in text_models: if name == model_name: return f"Context: {context_size:,} tokens" for name, _, context_size in vision_models: if name == model_name: return f"Context: {context_size:,} tokens" return "Context size unknown" using_vision.change( fn=update_model_selector, inputs=using_vision, outputs=[model_selector, context_info] ) model_selector.change( fn=update_context_info, inputs=model_selector, outputs=context_info ) # Submit function def on_submit(message, history, model, image, file, temp, top_p_val, max_tok, stream): if not message and not image and not file: return "", history return "", process_message_stream( message, history, model, image, file.name if file else None, temperature=temp, top_p=top_p_val, max_tokens=max_tok, stream=stream ) # Set up submission events submit_btn.click( on_submit, inputs=[ user_message, chatbot, model_selector, image_upload, file_upload, temperature, top_p, max_tokens, use_streaming ], outputs=[user_message, chatbot] ) user_message.submit( on_submit, inputs=[ user_message, chatbot, model_selector, image_upload, file_upload, temperature, top_p, max_tokens, use_streaming ], outputs=[user_message, chatbot] ) # Define FastAPI endpoint from fastapi import FastAPI, Request, HTTPException from fastapi.responses import JSONResponse from pydantic import BaseModel from fastapi.middleware.cors import CORSMiddleware app = FastAPI() class GenerateRequest(BaseModel): message: str model: str = None image_data: str = None @app.post("/api/generate") async def api_generate(request: GenerateRequest): """API endpoint for generating responses""" try: message = request.message model_name = request.model image_data = request.image_data # Process image if provided image = None if image_data: try: # Decode base64 image image_bytes = base64.b64decode(image_data) image = Image.open(BytesIO(image_bytes)) except Exception as e: return JSONResponse( status_code=400, content={"error": f"Image processing error: {str(e)}"} ) # Generate response try: # Setup headers and URL headers = { "Content-Type": "application/json", "Authorization": f"Bearer {OPENROUTER_API_KEY}", "HTTP-Referer": "https://huggingface.co/spaces", } url = "https://openrouter.ai/api/v1/chat/completions" # Get model_id from model_name model_id = None if model_name: for _, mid, _ in text_models + vision_models: if model_name in mid or model_name == mid: model_id = mid break if not model_id: model_id = text_models[0][1] # Build messages messages = [] if image: # Image processing for vision models base64_image = encode_image(image) content = [ {"type": "text", "text": message}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] messages.append({"role": "user", "content": content}) else: messages.append({"role": "user", "content": message}) # Build request data data = { "model": model_id, "messages": messages, "temperature": 0.7 } # Make API call response = requests.post(url, headers=headers, json=data) response.raise_for_status() # Parse response result = response.json() reply = result.get("choices", [{}])[0].get("message", {}).get("content", "No response") return {"response": reply} except Exception as e: return JSONResponse( status_code=500, content={"error": f"Error generating response: {str(e)}"} ) except Exception as e: return JSONResponse( status_code=500, content={"error": f"Server error: {str(e)}"} ) # Add CORS middleware to allow cross-origin requests app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Mount Gradio app import gradio as gr app = gr.mount_gradio_app(app, demo, path="/") # Start the app if __name__ == "__main__": # Use 'uvicorn' directly in HF Spaces import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)