CrispChat

Sleeping

File size: 37,277 Bytes

import os
import logging
import json
import base64
from io import BytesIO

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Graceful imports with fallbacks
try:
    import gradio as gr
except ImportError:
    logger.error("Gradio not found. Please install with 'pip install gradio'")
    raise

try:
    import requests
except ImportError:
    logger.error("Requests not found. Please install with 'pip install requests'")
    raise

# Optional libraries with fallbacks
try:
    from PIL import Image
    PIL_AVAILABLE = True
except ImportError:
    logger.warning("PIL not found. Image processing functionality will be limited.")
    PIL_AVAILABLE = False

# PDF processing
PDF_AVAILABLE = False
try:
    import PyPDF2
    PDF_AVAILABLE = True
except ImportError:
    logger.warning("PyPDF2 not found. Attempting to use pdfminer.six as fallback...")
    try:
        from pdfminer.high_level import extract_text as pdf_extract_text
        PDF_AVAILABLE = True
        
        # Create a wrapper to mimic PyPDF2 functionality
        def extract_text_from_pdf(file_path):
            return pdf_extract_text(file_path)
    except ImportError:
        logger.warning("No PDF processing libraries found. PDF support will be disabled.")

# Markdown processing
MD_AVAILABLE = False
try:
    import markdown
    MD_AVAILABLE = True
except ImportError:
    logger.warning("Markdown not found. Attempting to use markdownify as fallback...")
    try:
        from markdownify import markdownify as md
        MD_AVAILABLE = True
        
        # Create a wrapper for markdown
        def convert_markdown(text):
            return md(text)
    except ImportError:
        logger.warning("No Markdown processing libraries found. Markdown support will be limited.")

# API key
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")

# Model list with context sizes - organized by capability
MODELS = [
    # Vision Models
    {"category": "Vision Models", "models": [
        ("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
        ("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
        ("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp:free", 1048576),
        ("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576),
        ("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000),
        ("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp-1219:free", 40000),
        ("Meta: Llama 3.2 11B Vision Instruct", "meta-llama/llama-3.2-11b-vision-instruct:free", 131072),
        ("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072),
        ("Qwen: Qwen2.5 VL 32B Instruct", "qwen/qwen2.5-vl-32b-instruct:free", 8192),
        ("Qwen: Qwen2.5 VL 7B Instruct", "qwen/qwen-2.5-vl-7b-instruct:free", 64000),
        ("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000),
        ("Bytedance: UI-TARS 72B", "bytedance-research/ui-tars-72b:free", 32768),
    ]},
    
    # Largest Context Models
    {"category": "Largest Context (500K+)", "models": [
        ("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
        ("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp:free", 1048576),
        ("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576),
        ("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
        ("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000),
    ]},
    
    # High-performance Models
    {"category": "High Performance", "models": [
        ("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
        ("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
        ("Google: Gemma 3 27B", "google/gemma-3-27b-it:free", 96000),
        ("Mistral: Mistral Small 3.1 24B", "mistralai/mistral-small-3.1-24b-instruct:free", 96000),
        ("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072),
    ]},
    
    # Mid-size Models
    {"category": "Mid-size Models", "models": [
        ("Google: Gemma 3 12B", "google/gemma-3-12b-it:free", 131072),
        ("Google: Gemma 3 4B", "google/gemma-3-4b-it:free", 131072),
        ("Google: LearnLM 1.5 Pro Experimental", "google/learnlm-1.5-pro-experimental:free", 40960),
        ("Meta: Llama 3.1 8B Instruct", "meta-llama/llama-3.1-8b-instruct:free", 131072),
    ]},
    
    # Smaller Models
    {"category": "Smaller Models", "models": [
        ("Google: Gemma 3 1B", "google/gemma-3-1b-it:free", 32768),
        ("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000),
        ("AllenAI: Molmo 7B D", "allenai/molmo-7b-d:free", 4096),
    ]},
    
    # Sorting Options
    {"category": "Sort By", "models": [
        ("Context: High to Low", "sort_context_desc", 0),
        ("Context: Low to High", "sort_context_asc", 0),
        ("Newest", "sort_newest", 0),
        ("Throughput: High to Low", "sort_throughput", 0),
        ("Latency: Low to High", "sort_latency", 0),
    ]},
]

# Flatten model list for easy searching
ALL_MODELS = []
for category in MODELS:
    if category["category"] != "Sort By":  # Skip the sorting options
        for model in category["models"]:
            if model not in ALL_MODELS:
                ALL_MODELS.append(model)

# Sort models by context size (descending) by default
ALL_MODELS.sort(key=lambda x: x[2], reverse=True)

def format_to_message_dict(history):
    """Convert history to proper message format"""
    messages = []
    for pair in history:
        if len(pair) == 2:
            human, ai = pair
            if human:
                messages.append({"role": "user", "content": human})
            if ai:
                messages.append({"role": "assistant", "content": ai})
    return messages

def encode_image_to_base64(image_path):
    """Encode an image file to base64 string with fallback methods"""
    try:
        if isinstance(image_path, str):  # File path as string
            with open(image_path, "rb") as image_file:
                encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
                file_extension = image_path.split('.')[-1].lower()
                mime_type = f"image/{file_extension}"
                if file_extension in ["jpg", "jpeg"]:
                    mime_type = "image/jpeg"
                elif file_extension == "png":
                    mime_type = "image/png"
                elif file_extension in ["webp", "gif"]:
                    mime_type = f"image/{file_extension}"
                else:
                    mime_type = "image/jpeg"  # Default fallback
                return f"data:{mime_type};base64,{encoded_string}"
        elif PIL_AVAILABLE:  # Pillow Image object
            buffered = BytesIO()
            # Handle if it's a PIL Image or file-like object
            try:
                image_path.save(buffered, format="PNG")
            except AttributeError:
                if hasattr(image_path, 'read'):
                    # It's a file-like object but not a PIL Image
                    buffered.write(image_path.read())
                else:
                    raise
            encoded_string = base64.b64encode(buffered.getvalue()).decode('utf-8')
            return f"data:image/png;base64,{encoded_string}"
        else:
            logger.error("Cannot process image: PIL not available and input is not a file path")
            return None
    except Exception as e:
        logger.error(f"Error encoding image: {str(e)}")
        return None

def extract_text_from_file(file_path):
    """Extract text from various file types with fallbacks"""
    try:
        file_extension = file_path.split('.')[-1].lower()
        
        if file_extension == 'pdf':
            if PDF_AVAILABLE:
                if 'PyPDF2' in globals():
                    text = ""
                    with open(file_path, 'rb') as file:
                        pdf_reader = PyPDF2.PdfReader(file)
                        for page_num in range(len(pdf_reader.pages)):
                            page = pdf_reader.pages[page_num]
                            text += page.extract_text() + "\n\n"
                    return text
                else:
                    # Use pdfminer fallback
                    return extract_text_from_pdf(file_path)
            else:
                return "PDF support not available. Please install PyPDF2 or pdfminer.six."
        
        elif file_extension == 'md':
            if MD_AVAILABLE:
                with open(file_path, 'r', encoding='utf-8') as file:
                    md_text = file.read()
                    return md_text
            else:
                # Simple fallback - just read the file
                with open(file_path, 'r', encoding='utf-8') as file:
                    return file.read()
        
        elif file_extension == 'txt':
            with open(file_path, 'r', encoding='utf-8') as file:
                return file.read()
                
        else:
            return f"Unsupported file type: {file_extension}"
            
    except Exception as e:
        logger.error(f"Error extracting text from file: {str(e)}")
        return f"Error processing file: {str(e)}"

def prepare_message_with_media(text, images=None, documents=None):
    """Prepare a message with text, images, and document content"""
    # If no media, return text only
    if not images and not documents:
        return text
    
    # Start with text content
    if documents and len(documents) > 0:
        # If there are documents, append their content to the text
        document_texts = []
        for doc in documents:
            if doc is None:
                continue
            doc_text = extract_text_from_file(doc)
            if doc_text:
                document_texts.append(doc_text)
        
        # Add document content to text
        if document_texts:
            if not text:
                text = "Please analyze these documents:"
            else:
                text = f"{text}\n\nDocument content:\n\n"
            
            text += "\n\n".join(document_texts)
            
        # If no images, return text only
        if not images:
            return text
    
    # If we have images, create a multimodal content array
    content = [{"type": "text", "text": text or "Please analyze these images:"}]
    
    # Add images if any
    if images:
        for img in images:
            if img is None:
                continue
            
            encoded_image = encode_image_to_base64(img)
            if encoded_image:
                content.append({
                    "type": "image_url",
                    "image_url": {"url": encoded_image}
                })
    
    return content

def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p, frequency_penalty, 
           presence_penalty, images, documents, reasoning_effort):
    """Enhanced AI query function with comprehensive options and fallbacks"""
    if not message.strip() and not images and not documents:
        return chatbot, ""
    
    # Check if this is a sorting option
    if model_choice.startswith("Sort By"):
        return chatbot + [[message, "Please select a model to chat with first."]], ""
    
    # Get model ID and context size
    model_id = None
    context_size = 0
    for name, model_id_value, ctx_size in ALL_MODELS:
        if name == model_choice:
            model_id = model_id_value
            context_size = ctx_size
            break
    
    if model_id is None:
        logger.error(f"Model not found: {model_choice}")
        return chatbot + [[message, "Error: Model not found"]], ""
    
    # Create messages from chatbot history
    messages = format_to_message_dict(chatbot)
    
    # Prepare message with images and documents if any
    content = prepare_message_with_media(message, images, documents)
    
    # Add current message
    messages.append({"role": "user", "content": content})
    
    # Call API
    try:
        logger.info(f"Sending request to model: {model_id}")
        
        # Build the payload with all parameters
        payload = {
            "model": model_id,
            "messages": messages,
            "temperature": temperature,
            "max_tokens": max_tokens,
        }
        
        # Add optional parameters if they have non-default values
        if top_p < 1.0:
            payload["top_p"] = top_p
        
        if frequency_penalty != 0:
            payload["frequency_penalty"] = frequency_penalty
            
        if presence_penalty != 0:
            payload["presence_penalty"] = presence_penalty
        
        # Add reasoning if selected
        if reasoning_effort != "none":
            payload["reasoning"] = {
                "effort": reasoning_effort
            }
        
        logger.info(f"Request payload: {json.dumps(payload, default=str)}")
        
        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Content-Type": "application/json",
                "Authorization": f"Bearer {OPENROUTER_API_KEY}",
                "HTTP-Referer": "https://huggingface.co/spaces"
            },
            json=payload,
            timeout=120  # Longer timeout for document processing
        )
        
        logger.info(f"Response status: {response.status_code}")
        
        response_text = response.text
        logger.debug(f"Response body: {response_text}")
        
        if response.status_code == 200:
            result = response.json()
            ai_response = result.get("choices", [{}])[0].get("message", {}).get("content", "")
            chatbot = chatbot + [[message, ai_response]]
            
            # Log token usage if available
            if "usage" in result:
                logger.info(f"Token usage: {result['usage']}")
        else:
            error_message = f"Error: Status code {response.status_code}\n\nResponse: {response_text}"
            chatbot = chatbot + [[message, error_message]]
    except Exception as e:
        logger.error(f"Exception during API call: {str(e)}")
        chatbot = chatbot + [[message, f"Error: {str(e)}"]]
    
    return chatbot, ""

def clear_chat():
    return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, "none"

def apply_sort(sort_option):
    """Apply sorting option to models list"""
    if sort_option == "sort_context_desc":
        # Sort by context size (high to low)
        sorted_models = sorted(ALL_MODELS, key=lambda x: x[2], reverse=True)
    elif sort_option == "sort_context_asc":
        # Sort by context size (low to high)
        sorted_models = sorted(ALL_MODELS, key=lambda x: x[2])
    elif sort_option == "sort_newest":
        # This would need a proper timestamp, using a rough approximation
        # Models with "Experimental" in the name come first as they're likely newer
        sorted_models = sorted(ALL_MODELS, key=lambda x: "Experimental" not in x[0])
    elif sort_option == "sort_throughput" or sort_option == "sort_latency":
        # These would need actual performance metrics
        # For now, use model size as a rough proxy (smaller models generally have higher throughput and lower latency)
        # Rough heuristic: models with smaller numbers in their names might be smaller
        sorted_models = sorted(ALL_MODELS, key=lambda x: sum(int(s) for s in x[0] if s.isdigit()))
    else:
        # Default to context size sorting
        sorted_models = sorted(ALL_MODELS, key=lambda x: x[2], reverse=True)
    
    return sorted_models

def filter_models(search_term):
    """Filter models based on search term"""
    if not search_term:
        return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])
    
    filtered_models = [model[0] for model in ALL_MODELS if search_term.lower() in model[0].lower()]
    
    if filtered_models:
        return gr.Dropdown.update(choices=filtered_models, value=filtered_models[0])
    else:
        return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])

def get_model_info(model_name):
    """Get model information by name"""
    for model in ALL_MODELS:
        if model[0] == model_name:
            return model
    return None

def update_context_display(model_name):
    """Update the context size display based on the selected model"""
    model_info = get_model_info(model_name)
    if model_info:
        name, model_id, context_size = model_info
        context_formatted = f"{context_size:,}"
        return f"{context_formatted} tokens"
    return "Unknown"

def update_models_from_sort(sort_option):
    """Update models list based on sorting option"""
    for category in MODELS:
        if category["category"] == "Sort By":
            for option in category["models"]:
                if option[0] == sort_option:
                    sort_key = option[1]
                    sorted_models = apply_sort(sort_key)
                    return gr.Dropdown.update(choices=[model[0] for model in sorted_models], value=sorted_models[0][0])
    
    # Default sorting if option not found
    return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])

# Create enhanced interface
with gr.Blocks(css="""
    .context-size { 
        font-size: 0.9em;
        color: #666;
        margin-left: 10px;
    }
    footer { display: none !important; }
    .model-selection-row {
        display: flex;
        align-items: center;
    }
    .parameter-grid {
        display: grid;
        grid-template-columns: 1fr 1fr;
        gap: 10px;
    }
""") as demo:
    gr.Markdown("""
    # Vision AI Chat
    
    Chat with various AI vision models from OpenRouter with support for images and documents.
    """)
    
    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(
                height=500, 
                show_copy_button=True, 
                show_label=False,
                avatar_images=(None, "https://upload.wikimedia.org/wikipedia/commons/0/04/ChatGPT_logo.svg")
            )
            
            with gr.Row():
                message = gr.Textbox(
                    placeholder="Type your message here...",
                    label="Message",
                    lines=2
                )
            
            with gr.Row():
                with gr.Column(scale=3):
                    submit_btn = gr.Button("Send", variant="primary")
                
                with gr.Column(scale=1):
                    clear_btn = gr.Button("Clear Chat", variant="secondary")
            
            with gr.Row():
                # Image upload
                with gr.Accordion("Upload Images", open=False):
                    images = gr.Gallery(
                        label="Uploaded Images", 
                        show_label=True,
                        columns=4, 
                        height="auto",
                        object_fit="contain"
                    )
                    
                    image_upload_btn = gr.UploadButton(
                        label="Upload Images",
                        file_types=["image"],
                        file_count="multiple"
                    )
                
                # Document upload
                with gr.Accordion("Upload Documents (PDF, MD, TXT)", open=False):
                    documents = gr.File(
                        label="Uploaded Documents",
                        file_types=[".pdf", ".md", ".txt"], 
                        file_count="multiple"
                    )
        
        with gr.Column(scale=1):
            with gr.Group():
                gr.Markdown("### Model Selection")
                
                with gr.Row(elem_classes="model-selection-row"):
                    model_search = gr.Textbox(
                        placeholder="Search models...",
                        label="",
                        show_label=False
                    )
                
                with gr.Row(elem_classes="model-selection-row"):
                    model_choice = gr.Dropdown(
                        [model[0] for model in ALL_MODELS],
                        value=ALL_MODELS[0][0],
                        label="Model"
                    )
                    context_display = gr.Textbox(
                        value=update_context_display(ALL_MODELS[0][0]),
                        label="Context",
                        interactive=False,
                        elem_classes="context-size"
                    )
                
                # Model category selection
                with gr.Accordion("Browse by Category", open=False):
                    model_categories = gr.Radio(
                        [category["category"] for category in MODELS],
                        label="Categories",
                        value=MODELS[0]["category"]
                    )
                    
                    category_models = gr.Radio(
                        [model[0] for model in MODELS[0]["models"]],
                        label="Models in Category"
                    )
                
                # Sort options
                with gr.Accordion("Sort Models", open=False):
                    sort_options = gr.Radio(
                        ["Context: High to Low", "Context: Low to High", "Newest", 
                         "Throughput: High to Low", "Latency: Low to High"],
                        label="Sort By",
                        value="Context: High to Low"
                    )
            
            with gr.Accordion("Generation Parameters", open=False):
                with gr.Group(elem_classes="parameter-grid"):
                    temperature = gr.Slider(
                        minimum=0.0, 
                        maximum=2.0, 
                        value=0.7, 
                        step=0.1,
                        label="Temperature"
                    )
                    
                    max_tokens = gr.Slider(
                        minimum=100, 
                        maximum=4000, 
                        value=1000, 
                        step=100,
                        label="Max Tokens"
                    )
                    
                    top_p = gr.Slider(
                        minimum=0.1, 
                        maximum=1.0, 
                        value=0.8, 
                        step=0.1,
                        label="Top P"
                    )
                    
                    frequency_penalty = gr.Slider(
                        minimum=-2.0, 
                        maximum=2.0, 
                        value=0.0, 
                        step=0.1,
                        label="Frequency Penalty"
                    )
                    
                    presence_penalty = gr.Slider(
                        minimum=-2.0, 
                        maximum=2.0, 
                        value=0.0, 
                        step=0.1,
                        label="Presence Penalty"
                    )
                    
                    reasoning_effort = gr.Radio(
                        ["none", "low", "medium", "high"],
                        value="none",
                        label="Reasoning Effort"
                    )
        
            with gr.Accordion("Advanced Options", open=False):
                with gr.Row():
                    with gr.Column():
                        repetition_penalty = gr.Slider(
                            minimum=0.1, 
                            maximum=2.0, 
                            value=1.0, 
                            step=0.1,
                            label="Repetition Penalty"
                        )
                        
                        top_k = gr.Slider(
                            minimum=1, 
                            maximum=100, 
                            value=40, 
                            step=1,
                            label="Top K"
                        )
                        
                        min_p = gr.Slider(
                            minimum=0.0, 
                            maximum=1.0, 
                            value=0.1, 
                            step=0.05,
                            label="Min P"
                        )
                    
                    with gr.Column():
                        seed = gr.Number(
                            value=0,
                            label="Seed (0 for random)",
                            precision=0
                        )
                        
                        top_a = gr.Slider(
                            minimum=0.0, 
                            maximum=1.0, 
                            value=0.0, 
                            step=0.05,
                            label="Top A"
                        )
                        
                        stream_output = gr.Checkbox(
                            label="Stream Output",
                            value=False
                        )
                
                with gr.Row():
                    response_format = gr.Radio(
                        ["default", "json_object"],
                        value="default",
                        label="Response Format"
                    )
                    
                    gr.Markdown("""
                    * **json_object**: Forces the model to respond with valid JSON only.
                    * Only available on certain models - check model support on OpenRouter.
                    """)

            # Custom instructing options
            with gr.Accordion("Custom Instructions", open=False):
                system_message = gr.Textbox(
                    placeholder="Enter a system message to guide the model's behavior...",
                    label="System Message",
                    lines=3
                )
                
                transforms = gr.CheckboxGroup(
                    ["prompt_optimize", "prompt_distill", "prompt_compress"],
                    label="Prompt Transforms (OpenRouter specific)"
                )
                
                gr.Markdown("""
                * **prompt_optimize**: Improve prompt for better responses.
                * **prompt_distill**: Compress prompt to use fewer tokens without changing meaning.
                * **prompt_compress**: Aggressively compress prompt to fit larger contexts.
                """)

# Connect model search to dropdown filter
model_search.change(
    fn=filter_models,
    inputs=[model_search],
    outputs=[model_choice]
)

# Update context display when model changes
model_choice.change(
    fn=update_context_display,
    inputs=[model_choice],
    outputs=[context_display]
)

# Update model list when category changes
def update_category_models(category):
    for cat in MODELS:
        if cat["category"] == category:
            return gr.Radio.update(choices=[model[0] for model in cat["models"]], value=cat["models"][0][0])
    return gr.Radio.update(choices=[], value=None)

model_categories.change(
    fn=update_category_models,
    inputs=[model_categories],
    outputs=[category_models]
)

# Update main model choice when category model is selected
category_models.change(
    fn=lambda x: x,
    inputs=[category_models],
    outputs=[model_choice]
)

# Process uploaded images
def process_uploaded_images(files):
    return [file.name for file in files]

image_upload_btn.upload(
    fn=process_uploaded_images,
    inputs=[image_upload_btn],
    outputs=[images]
)

# Enhanced AI query function with all advanced parameters
def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p, 
           frequency_penalty, presence_penalty, repetition_penalty, top_k, 
           min_p, seed, top_a, stream_output, response_format, 
           images, documents, reasoning_effort, system_message, transforms):
    """Comprehensive AI query function with all parameters"""
    if not message.strip() and not images and not documents:
        return chatbot, ""
    
    # Get model ID and context size
    model_id = None
    context_size = 0
    for name, model_id_value, ctx_size in ALL_MODELS:
        if name == model_choice:
            model_id = model_id_value
            context_size = ctx_size
            break
    
    if model_id is None:
        logger.error(f"Model not found: {model_choice}")
        return chatbot + [[message, "Error: Model not found"]], ""
    
    # Create messages from chatbot history
    messages = format_to_message_dict(chatbot)
    
    # Add system message if provided
    if system_message and system_message.strip():
        # Insert at the beginning to override any existing system message
        for i, msg in enumerate(messages):
            if msg.get("role") == "system":
                messages.pop(i)
                break
        messages.insert(0, {"role": "system", "content": system_message.strip()})
    
    # Prepare message with images and documents if any
    content = prepare_message_with_media(message, images, documents)
    
    # Add current message
    messages.append({"role": "user", "content": content})
    
    # Call API
    try:
        logger.info(f"Sending request to model: {model_id}")
        
        # Build the comprehensive payload with all parameters
        payload = {
            "model": model_id,
            "messages": messages,
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": top_p,
            "frequency_penalty": frequency_penalty,
            "presence_penalty": presence_penalty,
            "repetition_penalty": repetition_penalty if repetition_penalty != 1.0 else None,
            "top_k": top_k,
            "min_p": min_p if min_p > 0 else None,
            "seed": seed if seed > 0 else None,
            "top_a": top_a if top_a > 0 else None,
            "stream": stream_output
        }
        
        # Add response format if not default
        if response_format == "json_object":
            payload["response_format"] = {"type": "json_object"}
        
        # Add reasoning if selected
        if reasoning_effort != "none":
            payload["reasoning"] = {
                "effort": reasoning_effort
            }
        
        # Add transforms if selected
        if transforms:
            payload["transforms"] = transforms
        
        # Remove None values
        payload = {k: v for k, v in payload.items() if v is not None}
        
        logger.info(f"Request payload: {json.dumps(payload, default=str)}")
        
        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Content-Type": "application/json",
                "Authorization": f"Bearer {OPENROUTER_API_KEY}",
                "HTTP-Referer": "https://huggingface.co/spaces"
            },
            json=payload,
            timeout=180,  # Longer timeout for document processing and streaming
            stream=stream_output
        )
        
        logger.info(f"Response status: {response.status_code}")
        
        if stream_output and response.status_code == 200:
            # Handle streaming response
            chatbot = chatbot + [[message, ""]]
            
            for line in response.iter_lines():
                if line:
                    line = line.decode('utf-8')
                    if line.startswith('data: '):
                        data = line[6:]
                        if data.strip() == '[DONE]':
                            break
                        try:
                            chunk = json.loads(data)
                            if "choices" in chunk and len(chunk["choices"]) > 0:
                                delta = chunk["choices"][0].get("delta", {})
                                if "content" in delta and delta["content"]:
                                    chatbot[-1][1] += delta["content"]
                                    yield chatbot, ""
                        except json.JSONDecodeError:
                            continue
            return chatbot, ""
        
        elif response.status_code == 200:
            # Handle normal response
            result = response.json()
            ai_response = result.get("choices", [{}])[0].get("message", {}).get("content", "")
            chatbot = chatbot + [[message, ai_response]]
            
            # Log token usage if available
            if "usage" in result:
                logger.info(f"Token usage: {result['usage']}")
        else:
            response_text = response.text
            logger.info(f"Error response body: {response_text}")
            error_message = f"Error: Status code {response.status_code}\n\nResponse: {response_text}"
            chatbot = chatbot + [[message, error_message]]
    except Exception as e:
        logger.error(f"Exception during API call: {str(e)}")
        chatbot = chatbot + [[message, f"Error: {str(e)}"]]
    
    return chatbot, ""

# Function to clear chat and reset parameters
def clear_chat():
    return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, 1.0, 40, 0.1, 0, 0.0, False, "default", "none", "", []

# Set up events for the submit button
submit_btn.click(
    fn=ask_ai,
    inputs=[
        message, chatbot, model_choice, temperature, max_tokens, 
        top_p, frequency_penalty, presence_penalty, repetition_penalty, 
        top_k, min_p, seed, top_a, stream_output, response_format,
        images, documents, reasoning_effort, system_message, transforms
    ],
    outputs=[chatbot, message]
)

# Set up events for message submission (pressing Enter)
message.submit(
    fn=ask_ai,
    inputs=[
        message, chatbot, model_choice, temperature, max_tokens, 
        top_p, frequency_penalty, presence_penalty, repetition_penalty, 
        top_k, min_p, seed, top_a, stream_output, response_format,
        images, documents, reasoning_effort, system_message, transforms
    ],
    outputs=[chatbot, message]
)

# Set up events for the clear button
clear_btn.click(
    fn=clear_chat,
    inputs=[],
    outputs=[
        chatbot, message, images, documents, temperature, 
        max_tokens, top_p, frequency_penalty, presence_penalty,
        repetition_penalty, top_k, min_p, seed, top_a, stream_output,
        response_format, reasoning_effort, system_message, transforms
    ]
)

# Add a model information section
with gr.Accordion("About Selected Model", open=False):
    model_info_display = gr.HTML(
        value="<p>Select a model to see details</p>"
    )
    
    # Update model info when model changes
    def update_model_info(model_name):
        model_info = get_model_info(model_name)
        if model_info:
            name, model_id, context_size = model_info
            return f"""
            <div class="model-info">
                <h3>{name}</h3>
                <p><strong>Model ID:</strong> {model_id}</p>
                <p><strong>Context Size:</strong> {context_size:,} tokens</p>
                <p><strong>Provider:</strong> {model_id.split('/')[0]}</p>
            </div>
            """
        return "<p>Model information not available</p>"
    
    model_choice.change(
        fn=update_model_info,
        inputs=[model_choice],
        outputs=[model_info_display]
    )

# Add usage instructions
with gr.Accordion("Usage Instructions", open=False):
    gr.Markdown("""
    ## Basic Usage
    1. Type your message in the input box
    2. Select a model from the dropdown
    3. Click "Send" or press Enter
    
    ## Working with Files
    - **Images**: Upload images to use with vision-capable models like Llama 3.2 Vision
    - **Documents**: Upload PDF, Markdown, or text files to analyze their content
    
    ## Advanced Parameters
    - **Temperature**: Controls randomness (higher = more creative, lower = more deterministic)
    - **Max Tokens**: Maximum length of the response
    - **Top P**: Nucleus sampling threshold (higher = consider more tokens)
    - **Reasoning Effort**: Some models can show their reasoning process
    
    ## Tips
    - For code generation, use models like Qwen Coder
    - For visual tasks, choose vision-capable models
    - For long context, check the context window size next to the model name
    """)

# Add a footer with version info
footer_md = gr.Markdown("""
---
### OpenRouter AI Chat Interface v1.0
Built with ❤️ using Gradio and OpenRouter API | Context sizes shown next to model names
""")

# Launch directly with Gradio's built-in server
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)