import os import gradio as gr import requests import json import base64 import logging import io from typing import List, Dict, Any, Union, Tuple, Optional # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Gracefully import libraries with fallbacks try: from PIL import Image except ImportError: logger.warning("PIL not installed. Image processing will be limited.") Image = None try: import PyPDF2 except ImportError: logger.warning("PyPDF2 not installed. PDF processing will be limited.") PyPDF2 = None try: import markdown except ImportError: logger.warning("Markdown not installed. Markdown processing will be limited.") markdown = None # API key OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") # Log API key status (masked for security) if OPENROUTER_API_KEY: masked_key = OPENROUTER_API_KEY[:4] + "..." + OPENROUTER_API_KEY[-4:] if len(OPENROUTER_API_KEY) > 8 else "***" logger.info(f"Using API key: {masked_key}") else: logger.warning("No API key provided!") # Keep the existing model lists MODELS = [ # 1M+ Context Models {"category": "1M+ Context", "models": [ ("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000), ("Google: Gemini 2.0 Flash Thinking Experimental 01-21", "google/gemini-2.0-flash-thinking-exp:free", 1048576), ("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576), ("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000), ("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000), ]}, # 100K-1M Context Models {"category": "100K+ Context", "models": [ ("DeepSeek: DeepSeek R1 Zero", "deepseek/deepseek-r1-zero:free", 163840), ("DeepSeek: R1", "deepseek/deepseek-r1:free", 163840), ("DeepSeek: DeepSeek V3 Base", "deepseek/deepseek-v3-base:free", 131072), ("DeepSeek: DeepSeek V3 0324", "deepseek/deepseek-chat-v3-0324:free", 131072), ("Google: Gemma 3 4B", "google/gemma-3-4b-it:free", 131072), ("Google: Gemma 3 12B", "google/gemma-3-12b-it:free", 131072), ("Nous: DeepHermes 3 Llama 3 8B Preview", "nousresearch/deephermes-3-llama-3-8b-preview:free", 131072), ("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072), ("DeepSeek: DeepSeek V3", "deepseek/deepseek-chat:free", 131072), ("NVIDIA: Llama 3.1 Nemotron 70B Instruct", "nvidia/llama-3.1-nemotron-70b-instruct:free", 131072), ("Meta: Llama 3.2 1B Instruct", "meta-llama/llama-3.2-1b-instruct:free", 131072), ("Meta: Llama 3.2 11B Vision Instruct", "meta-llama/llama-3.2-11b-vision-instruct:free", 131072), ("Meta: Llama 3.1 8B Instruct", "meta-llama/llama-3.1-8b-instruct:free", 131072), ("Mistral: Mistral Nemo", "mistralai/mistral-nemo:free", 128000), ]}, # 64K-100K Context Models {"category": "64K-100K Context", "models": [ ("Mistral: Mistral Small 3.1 24B", "mistralai/mistral-small-3.1-24b-instruct:free", 96000), ("Google: Gemma 3 27B", "google/gemma-3-27b-it:free", 96000), ("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000), ("DeepSeek: R1 Distill Qwen 14B", "deepseek/deepseek-r1-distill-qwen-14b:free", 64000), ("Qwen: Qwen2.5-VL 7B Instruct", "qwen/qwen-2.5-vl-7b-instruct:free", 64000), ]}, # 32K-64K Context Models {"category": "32K-64K Context", "models": [ ("Google: LearnLM 1.5 Pro Experimental", "google/learnlm-1.5-pro-experimental:free", 40960), ("Qwen: QwQ 32B", "qwen/qwq-32b:free", 40000), ("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp-1219:free", 40000), ("Bytedance: UI-TARS 72B", "bytedance-research/ui-tars-72b:free", 32768), ("Qwerky 72b", "featherless/qwerky-72b:free", 32768), ("OlympicCoder 7B", "open-r1/olympiccoder-7b:free", 32768), ("OlympicCoder 32B", "open-r1/olympiccoder-32b:free", 32768), ("Google: Gemma 3 1B", "google/gemma-3-1b-it:free", 32768), ("Reka: Flash 3", "rekaai/reka-flash-3:free", 32768), ("Dolphin3.0 R1 Mistral 24B", "cognitivecomputations/dolphin3.0-r1-mistral-24b:free", 32768), ("Dolphin3.0 Mistral 24B", "cognitivecomputations/dolphin3.0-mistral-24b:free", 32768), ("Mistral: Mistral Small 3", "mistralai/mistral-small-24b-instruct-2501:free", 32768), ("Qwen2.5 Coder 32B Instruct", "qwen/qwen-2.5-coder-32b-instruct:free", 32768), ("Qwen2.5 72B Instruct", "qwen/qwen-2.5-72b-instruct:free", 32768), ]}, # 8K-32K Context Models {"category": "8K-32K Context", "models": [ ("Meta: Llama 3.2 3B Instruct", "meta-llama/llama-3.2-3b-instruct:free", 20000), ("Qwen: QwQ 32B Preview", "qwen/qwq-32b-preview:free", 16384), ("DeepSeek: R1 Distill Qwen 32B", "deepseek/deepseek-r1-distill-qwen-32b:free", 16000), ("Qwen: Qwen2.5 VL 32B Instruct", "qwen/qwen2.5-vl-32b-instruct:free", 8192), ("Moonshot AI: Moonlight 16B A3B Instruct", "moonshotai/moonlight-16b-a3b-instruct:free", 8192), ("DeepSeek: R1 Distill Llama 70B", "deepseek/deepseek-r1-distill-llama-70b:free", 8192), ("Qwen 2 7B Instruct", "qwen/qwen-2-7b-instruct:free", 8192), ("Google: Gemma 2 9B", "google/gemma-2-9b-it:free", 8192), ("Mistral: Mistral 7B Instruct", "mistralai/mistral-7b-instruct:free", 8192), ("Microsoft: Phi-3 Mini 128K Instruct", "microsoft/phi-3-mini-128k-instruct:free", 8192), ("Microsoft: Phi-3 Medium 128K Instruct", "microsoft/phi-3-medium-128k-instruct:free", 8192), ("Meta: Llama 3 8B Instruct", "meta-llama/llama-3-8b-instruct:free", 8192), ("OpenChat 3.5 7B", "openchat/openchat-7b:free", 8192), ("Meta: Llama 3.3 70B Instruct", "meta-llama/llama-3.3-70b-instruct:free", 8000), ]}, # <8K Context Models {"category": "4K Context", "models": [ ("AllenAI: Molmo 7B D", "allenai/molmo-7b-d:free", 4096), ("Rogue Rose 103B v0.2", "sophosympatheia/rogue-rose-103b-v0.2:free", 4096), ("Toppy M 7B", "undi95/toppy-m-7b:free", 4096), ("Hugging Face: Zephyr 7B", "huggingfaceh4/zephyr-7b-beta:free", 4096), ("MythoMax 13B", "gryphe/mythomax-l2-13b:free", 4096), ]}, # Vision-capable Models {"category": "Vision Models", "models": [ ("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000), ("Google: Gemini 2.0 Flash Thinking Experimental 01-21", "google/gemini-2.0-flash-thinking-exp:free", 1048576), ("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576), ("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000), ("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000), ("Google: Gemma 3 4B", "google/gemma-3-4b-it:free", 131072), ("Google: Gemma 3 12B", "google/gemma-3-12b-it:free", 131072), ("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072), ("Meta: Llama 3.2 11B Vision Instruct", "meta-llama/llama-3.2-11b-vision-instruct:free", 131072), ("Mistral: Mistral Small 3.1 24B", "mistralai/mistral-small-3.1-24b-instruct:free", 96000), ("Google: Gemma 3 27B", "google/gemma-3-27b-it:free", 96000), ("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000), ("Qwen: Qwen2.5-VL 7B Instruct", "qwen/qwen-2.5-vl-7b-instruct:free", 64000), ("Google: LearnLM 1.5 Pro Experimental", "google/learnlm-1.5-pro-experimental:free", 40960), ("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp-1219:free", 40000), ("Bytedance: UI-TARS 72B", "bytedance-research/ui-tars-72b:free", 32768), ("Google: Gemma 3 1B", "google/gemma-3-1b-it:free", 32768), ("Qwen: Qwen2.5 VL 32B Instruct", "qwen/qwen2.5-vl-32b-instruct:free", 8192), ("AllenAI: Molmo 7B D", "allenai/molmo-7b-d:free", 4096), ]}, ] # Flatten model list for easy searching ALL_MODELS = [] for category in MODELS: for model in category["models"]: if model not in ALL_MODELS: # Avoid duplicates ALL_MODELS.append(model) # Helper functions moved to the top to avoid undefined references def filter_models(search_term): """Filter models based on search term""" if not search_term: return [model[0] for model in ALL_MODELS], ALL_MODELS[0][0] filtered_models = [model[0] for model in ALL_MODELS if search_term.lower() in model[0].lower()] if filtered_models: return filtered_models, filtered_models[0] else: return [model[0] for model in ALL_MODELS], ALL_MODELS[0][0] def update_context_display(model_name): """Update context size display for the selected model""" for model in ALL_MODELS: if model[0] == model_name: _, _, context_size = model context_formatted = f"{context_size:,}" return f"{context_formatted} tokens" return "Unknown" def update_model_info(model_name): """Generate HTML info display for the selected model""" for model in ALL_MODELS: if model[0] == model_name: name, model_id, context_size = model # Check if this is a vision model is_vision_model = False for cat in MODELS: if cat["category"] == "Vision Models": if any(m[0] == model_name for m in cat["models"]): is_vision_model = True break vision_badge = 'Vision' if is_vision_model else '' return f"""
Model ID: {model_id}
Context Size: {context_size:,} tokens
Provider: {model_id.split('/')[0]}
{f'Features: Supports image understanding
' if is_vision_model else ''}Model information not available
" def update_category_models_ui(category): """Completely regenerate the models dropdown based on selected category""" for cat in MODELS: if cat["category"] == category: model_names = [model[0] for model in cat["models"]] if model_names: # Return a completely new dropdown component return gr.Dropdown( choices=model_names, value=model_names[0], label="Models in Category", allow_custom_value=True ) # Return empty dropdown if no models found return gr.Dropdown( choices=[], value=None, label="Models in Category", allow_custom_value=True ) def encode_image_to_base64(image_path): """Encode an image file to base64 string""" try: if isinstance(image_path, str): # File path as string with open(image_path, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode('utf-8') file_extension = image_path.split('.')[-1].lower() mime_type = f"image/{file_extension}" if file_extension in ["jpg", "jpeg"]: mime_type = "image/jpeg" elif file_extension == "png": mime_type = "image/png" elif file_extension == "webp": mime_type = "image/webp" return f"data:{mime_type};base64,{encoded_string}" elif hasattr(image_path, 'name'): # Handle Gradio file objects directly with open(image_path.name, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode('utf-8') file_extension = image_path.name.split('.')[-1].lower() mime_type = f"image/{file_extension}" if file_extension in ["jpg", "jpeg"]: mime_type = "image/jpeg" elif file_extension == "png": mime_type = "image/png" elif file_extension == "webp": mime_type = "image/webp" return f"data:{mime_type};base64,{encoded_string}" else: # Handle file object or other types logger.error(f"Unsupported image type: {type(image_path)}") return None except Exception as e: logger.error(f"Error encoding image: {str(e)}") return None def extract_text_from_file(file_path): """Extract text from various file types""" try: file_extension = file_path.split('.')[-1].lower() if file_extension == 'pdf': if PyPDF2 is not None: text = "" with open(file_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text += page.extract_text() + "\n\n" return text else: return "PDF processing is not available (PyPDF2 not installed)" elif file_extension == 'md': with open(file_path, 'r', encoding='utf-8') as file: return file.read() elif file_extension == 'txt': with open(file_path, 'r', encoding='utf-8') as file: return file.read() else: return f"Unsupported file type: {file_extension}" except Exception as e: logger.error(f"Error extracting text from file: {str(e)}") return f"Error processing file: {str(e)}" def prepare_message_with_media(text, images=None, documents=None): """Prepare a message with text, images, and document content""" # If no media, return text only if not images and not documents: return text # Start with text content if documents and len(documents) > 0: # If there are documents, append their content to the text document_texts = [] for doc in documents: if doc is None: continue # Make sure to handle file objects properly doc_path = doc.name if hasattr(doc, 'name') else doc doc_text = extract_text_from_file(doc_path) if doc_text: document_texts.append(doc_text) # Add document content to text if document_texts: if not text: text = "Please analyze these documents:" else: text = f"{text}\n\nDocument content:\n\n" text += "\n\n".join(document_texts) # If no images, return text only if not images: return text # If we have images, create a multimodal content array content = [{"type": "text", "text": text}] # Add images if any if images: # Check if images is a list of image paths or file objects if isinstance(images, list): for img in images: if img is None: continue encoded_image = encode_image_to_base64(img) if encoded_image: content.append({ "type": "image_url", "image_url": {"url": encoded_image} }) else: # For single image or Gallery component logger.warning(f"Images is not a list: {type(images)}") # Try to handle as single image encoded_image = encode_image_to_base64(images) if encoded_image: content.append({ "type": "image_url", "image_url": {"url": encoded_image} }) return content def format_to_message_dict(history): """Convert history to proper message format""" messages = [] for pair in history: if len(pair) == 2: human, ai = pair if human: messages.append({"role": "user", "content": human}) if ai: messages.append({"role": "assistant", "content": ai}) return messages def process_uploaded_images(files): """Process uploaded image files""" file_paths = [] for file in files: if hasattr(file, 'name'): file_paths.append(file.name) return file_paths def get_model_info(model_choice): """Get model ID and context size from model name""" for name, model_id_value, ctx_size in ALL_MODELS: if name == model_choice: return model_id_value, ctx_size return None, 0 def get_models_for_category(category): """Get model list for a specific category""" for cat in MODELS: if cat["category"] == category: return [model[0] for model in cat["models"]] return [] def call_openrouter_api(payload): """Make a call to OpenRouter API with error handling""" try: response = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers={ "Content-Type": "application/json", "Authorization": f"Bearer {OPENROUTER_API_KEY}", "HTTP-Referer": "https://huggingface.co/spaces/cstr/CrispChat" }, json=payload, timeout=180 # Longer timeout for document processing ) return response except requests.RequestException as e: logger.error(f"API request error: {str(e)}") raise e def extract_ai_response(result): """Extract AI response from OpenRouter API result""" try: if "choices" in result and len(result["choices"]) > 0: if "message" in result["choices"][0]: # Handle reasoning field if available message = result["choices"][0]["message"] if message.get("reasoning") and not message.get("content"): # Extract response from reasoning if there's no content reasoning = message.get("reasoning") # If reasoning contains the actual response, find it lines = reasoning.strip().split('\n') for line in lines: if line and not line.startswith('I should') and not line.startswith('Let me'): return line.strip() # If no clear response found, return the first non-empty line for line in lines: if line.strip(): return line.strip() return message.get("content", "") elif "delta" in result["choices"][0]: return result["choices"][0]["delta"].get("content", "") logger.error(f"Unexpected response structure: {result}") return "Error: Could not extract response from API result" except Exception as e: logger.error(f"Error extracting AI response: {str(e)}") return f"Error: {str(e)}" # streaming code: def streaming_handler(response, chatbot, message_idx): try: # First add the user message if needed if len(chatbot) == message_idx: chatbot.append({"role": "user", "content": message}) chatbot.append({"role": "assistant", "content": ""}) for line in response.iter_lines(): if not line: continue line = line.decode('utf-8') if not line.startswith('data: '): continue data = line[6:] if data.strip() == '[DONE]': break try: chunk = json.loads(data) if "choices" in chunk and len(chunk["choices"]) > 0: delta = chunk["choices"][0].get("delta", {}) if "content" in delta and delta["content"]: # Update the last message content chatbot[-1]["content"] += delta["content"] yield chatbot except json.JSONDecodeError: logger.error(f"Failed to parse JSON from chunk: {data}") except Exception as e: logger.error(f"Error in streaming handler: {str(e)}") # Add error message to the current response if len(chatbot) > message_idx: chatbot[-1]["content"] += f"\n\nError during streaming: {str(e)}" yield chatbot def ask_ai(message, history, model_choice, temperature, max_tokens, top_p, frequency_penalty, presence_penalty, repetition_penalty, top_k, min_p, seed, top_a, stream_output, response_format, images, documents, reasoning_effort, system_message, transforms): """Redesigned AI query function with proper error handling for Gradio 4.44.1""" # Validate input if not message.strip() and not images and not documents: return history # Get model information model_id, context_size = get_model_info(model_choice) if not model_id: logger.error(f"Model not found: {model_choice}") history.append((message, f"Error: Model '{model_choice}' not found")) return history # Copy history to new list to avoid modifying the original chat_history = list(history) # Create messages from chat history messages = format_to_message_dict(chat_history) # Add system message if provided if system_message and system_message.strip(): # Remove any existing system message messages = [msg for msg in messages if msg.get("role") != "system"] # Add new system message at the beginning messages.insert(0, {"role": "system", "content": system_message.strip()}) # Prepare message with images and documents if any content = prepare_message_with_media(message, images, documents) # Add current message messages.append({"role": "user", "content": content}) # Build the payload with all parameters payload = { "model": model_id, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "top_p": top_p, "frequency_penalty": frequency_penalty, "presence_penalty": presence_penalty, "stream": stream_output } # Add optional parameters if set if repetition_penalty != 1.0: payload["repetition_penalty"] = repetition_penalty if top_k > 0: payload["top_k"] = top_k if min_p > 0: payload["min_p"] = min_p if seed > 0: payload["seed"] = seed if top_a > 0: payload["top_a"] = top_a # Add response format if JSON is requested if response_format == "json_object": payload["response_format"] = {"type": "json_object"} # Add reasoning if selected if reasoning_effort != "none": payload["reasoning"] = { "effort": reasoning_effort } # Add transforms if selected if transforms: payload["transforms"] = transforms # Log the request logger.info(f"Sending request to model: {model_id}") logger.info(f"Request payload: {json.dumps(payload, default=str)}") try: # Call OpenRouter API response = call_openrouter_api(payload) logger.info(f"Response status: {response.status_code}") # Handle streaming response if stream_output and response.status_code == 200: # Add empty response slot to history chat_history.append([message, ""]) # Set up generator for streaming updates def streaming_generator(): for updated_history in streaming_handler(response, chat_history, len(chat_history) - 1): yield updated_history return streaming_generator() # Handle normal response elif response.status_code == 200: result = response.json() logger.info(f"Response content: {result}") # Extract AI response ai_response = extract_ai_response(result) # Log token usage if available if "usage" in result: logger.info(f"Token usage: {result['usage']}") # Add response to history chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": ai_response}) return chat_history # Handle error response else: error_message = f"Error: Status code {response.status_code}" try: response_data = response.json() error_message += f"\n\nDetails: {json.dumps(response_data, indent=2)}" except: error_message += f"\n\nResponse: {response.text}" logger.error(error_message) chat_history.append([message, error_message]) return chat_history except Exception as e: error_message = f"Error: {str(e)}" logger.error(f"Exception during API call: {error_message}") chat_history.append([message, error_message]) return chat_history def clear_chat(): """Reset all inputs""" return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, 1.0, 40, 0.1, 0, 0.0, False, "default", "none", "", [] def create_app(): """Create the Gradio application with improved UI and response handling""" with gr.Blocks( title="CrispChat - AI Assistant", css=""" .context-size { font-size: 0.9em; color: #666; margin-left: 10px; } footer { display: none !important; } .model-selection-row { display: flex; align-items: center; } .parameter-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 10px; } .vision-badge { background-color: #4CAF50; color: white; padding: 3px 6px; border-radius: 3px; font-size: 0.8em; margin-left: 5px; } """ ) as demo: gr.Markdown(""" # CrispChat AI Assistant Chat with various AI models from OpenRouter with support for images and documents. """) with gr.Row(): with gr.Column(scale=2): # Chatbot interface - properly configured for Gradio 4.44.1 chatbot = gr.Chatbot( height=500, show_copy_button=True, show_label=False, avatar_images=(None, "https://upload.wikimedia.org/wikipedia/commons/0/04/ChatGPT_logo.svg"), type="messages", # Explicitly set the type to messages elem_id="chat-window" # Add elem_id for debugging ) # Debug output for development debug_output = gr.JSON( label="Debug Output (Hidden in Production)", visible=False ) with gr.Row(): message = gr.Textbox( placeholder="Type your message here...", label="Message", lines=2, elem_id="message-input", # Add elem_id for debugging scale=4 ) with gr.Row(): with gr.Column(scale=3): submit_btn = gr.Button("Send", variant="primary", elem_id="send-btn") with gr.Column(scale=1): clear_btn = gr.Button("Clear Chat", variant="secondary") with gr.Row(): # Image upload with gr.Accordion("Upload Images (for vision models)", open=False): images = gr.File( label="Uploaded Images", file_types=["image"], file_count="multiple" ) image_upload_btn = gr.UploadButton( label="Upload Images", file_types=["image"], file_count="multiple" ) # Document upload with gr.Accordion("Upload Documents (PDF, MD, TXT)", open=False): documents = gr.File( label="Uploaded Documents", file_types=[".pdf", ".md", ".txt"], file_count="multiple" ) with gr.Column(scale=1): with gr.Group(): gr.Markdown("### Model Selection") with gr.Row(elem_classes="model-selection-row"): model_search = gr.Textbox( placeholder="Search models...", label="", show_label=False ) with gr.Row(elem_classes="model-selection-row"): model_choice = gr.Dropdown( [model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0], label="Model", elem_id="model-choice", elem_classes="model-choice", allow_custom_value=True ) context_display = gr.Textbox( value=update_context_display(ALL_MODELS[0][0]), label="Context", interactive=False, elem_classes="context-size" ) # Model category selection with gr.Accordion("Browse by Category", open=False): model_categories = gr.Dropdown( [category["category"] for category in MODELS], label="Categories", value=MODELS[0]["category"] ) # Create a container for the category models dropdown with gr.Column(visible=True, elem_id="category-models-container") as category_models_container: # Create a hidden text component to store model choices as JSON category_model_choices = gr.Text(visible=False) # Create the dropdown with no initial choices category_models = gr.Dropdown( [], label="Models in Category", value=None, elem_classes="category-models", allow_custom_value=True ) with gr.Accordion("Generation Parameters", open=False): with gr.Group(elem_classes="parameter-grid"): temperature = gr.Slider( minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature" ) max_tokens = gr.Slider( minimum=100, maximum=4000, value=1000, step=100, label="Max Tokens" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.8, step=0.1, label="Top P" ) frequency_penalty = gr.Slider( minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty" ) presence_penalty = gr.Slider( minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Presence Penalty" ) reasoning_effort = gr.Radio( ["none", "low", "medium", "high"], value="none", label="Reasoning Effort" ) with gr.Accordion("Advanced Options", open=False): with gr.Row(): with gr.Column(): repetition_penalty = gr.Slider( minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Repetition Penalty" ) top_k = gr.Slider( minimum=1, maximum=100, value=40, step=1, label="Top K" ) min_p = gr.Slider( minimum=0.0, maximum=1.0, value=0.1, step=0.05, label="Min P" ) with gr.Column(): seed = gr.Number( value=0, label="Seed (0 for random)", precision=0 ) top_a = gr.Slider( minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Top A" ) stream_output = gr.Checkbox( label="Stream Output", value=False ) with gr.Row(): response_format = gr.Radio( ["default", "json_object"], value="default", label="Response Format" ) gr.Markdown(""" * **json_object**: Forces the model to respond with valid JSON only. * Only available on certain models - check model support on OpenRouter. """) # Custom instructing options with gr.Accordion("Custom Instructions", open=False): system_message = gr.Textbox( placeholder="Enter a system message to guide the model's behavior...", label="System Message", lines=3 ) transforms = gr.CheckboxGroup( ["prompt_optimize", "prompt_distill", "prompt_compress"], label="Prompt Transforms (OpenRouter specific)" ) gr.Markdown(""" * **prompt_optimize**: Improve prompt for better responses. * **prompt_distill**: Compress prompt to use fewer tokens without changing meaning. * **prompt_compress**: Aggressively compress prompt to fit larger contexts. """) # Add a model information section with gr.Accordion("About Selected Model", open=False): model_info_display = gr.HTML( value=update_model_info(ALL_MODELS[0][0]) ) # Add usage instructions with gr.Accordion("Usage Instructions", open=False): gr.Markdown(""" ## Basic Usage 1. Type your message in the input box 2. Select a model from the dropdown 3. Click "Send" or press Enter ## Working with Files - **Images**: Upload images to use with vision-capable models - **Documents**: Upload PDF, Markdown, or text files to analyze their content ## Advanced Parameters - **Temperature**: Controls randomness (higher = more creative, lower = more deterministic) - **Max Tokens**: Maximum length of the response - **Top P**: Nucleus sampling threshold (higher = consider more tokens) - **Reasoning Effort**: Some models can show their reasoning process ## Tips - For code generation, use models like Qwen Coder - For visual tasks, choose vision-capable models - For long context, check the context window size next to the model name """) # Add a footer with version info footer_md = gr.Markdown(""" --- ### CrispChat v1.1 Built with ❤️ using Gradio 4.44.1 and OpenRouter API | Context sizes shown next to model names """) # Define a test function for debugging def test_chatbot(test_message): """Simple test function to verify chatbot updates work""" logger.info(f"Test function called with: {test_message}") return [[test_message, "This is a test response to verify the chatbot is working"]] # Connect model search to dropdown filter model_search.change( fn=filter_models, inputs=model_search, outputs=[model_choice, model_choice] ) # Update context display when model changes model_choice.change( fn=update_context_display, inputs=model_choice, outputs=context_display ) # Update model info when model changes model_choice.change( fn=update_model_info, inputs=model_choice, outputs=model_info_display ) # Update model list when category changes model_categories.change( fn=lambda cat: json.dumps(get_models_for_category(cat)), inputs=model_categories, outputs=category_model_choices ) # Update main model choice when category model is selected category_models.change( fn=lambda x: x, inputs=category_models, outputs=model_choice ) category_model_choices.change( fn=None, inputs=None, outputs=None, _js=""" function(choices_json) { // Parse JSON string to array const choices = JSON.parse(choices_json); // Find the dropdown element const dropdown = document.querySelector('.category-models select'); // Clear existing options dropdown.innerHTML = ''; // Add new options choices.forEach(model => { const option = document.createElement('option'); option.value = model; option.textContent = model; dropdown.appendChild(option); }); // Set the first option as selected if available if (choices.length > 0) { dropdown.value = choices[0]; // Update the main model dropdown const mainDropdown = document.querySelector('.model-choice select'); mainDropdown.value = choices[0]; // Trigger change events dropdown.dispatchEvent(new Event('change', { bubbles: true })); mainDropdown.dispatchEvent(new Event('change', { bubbles: true })); } } """ ) # Function to initialize the category models dropdown def init_category_models(): initial_category = MODELS[0]["category"] initial_models = get_models_for_category(initial_category) return json.dumps(initial_models) # Set initial choices for category models dropdown category_model_choices.value = init_category_models() # Process uploaded images image_upload_btn.upload( fn=lambda files: files, inputs=image_upload_btn, outputs=images ) # Set up events for the submit button submit_btn.click( fn=ask_ai, inputs=[ message, chatbot, model_choice, temperature, max_tokens, top_p, frequency_penalty, presence_penalty, repetition_penalty, top_k, min_p, seed, top_a, stream_output, response_format, images, documents, reasoning_effort, system_message, transforms ], outputs=chatbot, show_progress="minimal", ).then( fn=lambda: "", # Clear message box after sending inputs=None, outputs=message ) # Set up events for message submission (pressing Enter) message.submit( fn=ask_ai, inputs=[ message, chatbot, model_choice, temperature, max_tokens, top_p, frequency_penalty, presence_penalty, repetition_penalty, top_k, min_p, seed, top_a, stream_output, response_format, images, documents, reasoning_effort, system_message, transforms ], outputs=chatbot, show_progress="minimal", ).then( fn=lambda: "", # Clear message box after sending inputs=None, outputs=message ) # Set up events for the clear button clear_btn.click( fn=clear_chat, inputs=[], outputs=[ chatbot, message, images, documents, temperature, max_tokens, top_p, frequency_penalty, presence_penalty, repetition_penalty, top_k, min_p, seed, top_a, stream_output, response_format, reasoning_effort, system_message, transforms ] ) # Debug button (hidden in production) debug_btn = gr.Button("Debug Chatbot", visible=False) debug_btn.click( fn=test_chatbot, inputs=[message], outputs=[chatbot] ) # Enable debugging for key components # gr.debug(chatbot) return demo # Launch the app if __name__ == "__main__": # Check API key before starting if not OPENROUTER_API_KEY: logger.warning("WARNING: OPENROUTER_API_KEY environment variable is not set") print("WARNING: OpenRouter API key not found. Set OPENROUTER_API_KEY environment variable.") demo = create_app() demo.launch( server_name="0.0.0.0", server_port=7860, debug=True, show_error=True )