CrispChat / app.py
cstr's picture
Update app.py
9dba8e1 verified
raw
history blame
37.3 kB
import os
import logging
import json
import base64
from io import BytesIO
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Graceful imports with fallbacks
try:
import gradio as gr
except ImportError:
logger.error("Gradio not found. Please install with 'pip install gradio'")
raise
try:
import requests
except ImportError:
logger.error("Requests not found. Please install with 'pip install requests'")
raise
# Optional libraries with fallbacks
try:
from PIL import Image
PIL_AVAILABLE = True
except ImportError:
logger.warning("PIL not found. Image processing functionality will be limited.")
PIL_AVAILABLE = False
# PDF processing
PDF_AVAILABLE = False
try:
import PyPDF2
PDF_AVAILABLE = True
except ImportError:
logger.warning("PyPDF2 not found. Attempting to use pdfminer.six as fallback...")
try:
from pdfminer.high_level import extract_text as pdf_extract_text
PDF_AVAILABLE = True
# Create a wrapper to mimic PyPDF2 functionality
def extract_text_from_pdf(file_path):
return pdf_extract_text(file_path)
except ImportError:
logger.warning("No PDF processing libraries found. PDF support will be disabled.")
# Markdown processing
MD_AVAILABLE = False
try:
import markdown
MD_AVAILABLE = True
except ImportError:
logger.warning("Markdown not found. Attempting to use markdownify as fallback...")
try:
from markdownify import markdownify as md
MD_AVAILABLE = True
# Create a wrapper for markdown
def convert_markdown(text):
return md(text)
except ImportError:
logger.warning("No Markdown processing libraries found. Markdown support will be limited.")
# API key
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
# Model list with context sizes - organized by capability
MODELS = [
# Vision Models
{"category": "Vision Models", "models": [
("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp:free", 1048576),
("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576),
("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000),
("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp-1219:free", 40000),
("Meta: Llama 3.2 11B Vision Instruct", "meta-llama/llama-3.2-11b-vision-instruct:free", 131072),
("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072),
("Qwen: Qwen2.5 VL 32B Instruct", "qwen/qwen2.5-vl-32b-instruct:free", 8192),
("Qwen: Qwen2.5 VL 7B Instruct", "qwen/qwen-2.5-vl-7b-instruct:free", 64000),
("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000),
("Bytedance: UI-TARS 72B", "bytedance-research/ui-tars-72b:free", 32768),
]},
# Largest Context Models
{"category": "Largest Context (500K+)", "models": [
("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp:free", 1048576),
("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576),
("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000),
]},
# High-performance Models
{"category": "High Performance", "models": [
("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
("Google: Gemma 3 27B", "google/gemma-3-27b-it:free", 96000),
("Mistral: Mistral Small 3.1 24B", "mistralai/mistral-small-3.1-24b-instruct:free", 96000),
("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072),
]},
# Mid-size Models
{"category": "Mid-size Models", "models": [
("Google: Gemma 3 12B", "google/gemma-3-12b-it:free", 131072),
("Google: Gemma 3 4B", "google/gemma-3-4b-it:free", 131072),
("Google: LearnLM 1.5 Pro Experimental", "google/learnlm-1.5-pro-experimental:free", 40960),
("Meta: Llama 3.1 8B Instruct", "meta-llama/llama-3.1-8b-instruct:free", 131072),
]},
# Smaller Models
{"category": "Smaller Models", "models": [
("Google: Gemma 3 1B", "google/gemma-3-1b-it:free", 32768),
("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000),
("AllenAI: Molmo 7B D", "allenai/molmo-7b-d:free", 4096),
]},
# Sorting Options
{"category": "Sort By", "models": [
("Context: High to Low", "sort_context_desc", 0),
("Context: Low to High", "sort_context_asc", 0),
("Newest", "sort_newest", 0),
("Throughput: High to Low", "sort_throughput", 0),
("Latency: Low to High", "sort_latency", 0),
]},
]
# Flatten model list for easy searching
ALL_MODELS = []
for category in MODELS:
if category["category"] != "Sort By": # Skip the sorting options
for model in category["models"]:
if model not in ALL_MODELS:
ALL_MODELS.append(model)
# Sort models by context size (descending) by default
ALL_MODELS.sort(key=lambda x: x[2], reverse=True)
def format_to_message_dict(history):
"""Convert history to proper message format"""
messages = []
for pair in history:
if len(pair) == 2:
human, ai = pair
if human:
messages.append({"role": "user", "content": human})
if ai:
messages.append({"role": "assistant", "content": ai})
return messages
def encode_image_to_base64(image_path):
"""Encode an image file to base64 string with fallback methods"""
try:
if isinstance(image_path, str): # File path as string
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
file_extension = image_path.split('.')[-1].lower()
mime_type = f"image/{file_extension}"
if file_extension in ["jpg", "jpeg"]:
mime_type = "image/jpeg"
elif file_extension == "png":
mime_type = "image/png"
elif file_extension in ["webp", "gif"]:
mime_type = f"image/{file_extension}"
else:
mime_type = "image/jpeg" # Default fallback
return f"data:{mime_type};base64,{encoded_string}"
elif PIL_AVAILABLE: # Pillow Image object
buffered = BytesIO()
# Handle if it's a PIL Image or file-like object
try:
image_path.save(buffered, format="PNG")
except AttributeError:
if hasattr(image_path, 'read'):
# It's a file-like object but not a PIL Image
buffered.write(image_path.read())
else:
raise
encoded_string = base64.b64encode(buffered.getvalue()).decode('utf-8')
return f"data:image/png;base64,{encoded_string}"
else:
logger.error("Cannot process image: PIL not available and input is not a file path")
return None
except Exception as e:
logger.error(f"Error encoding image: {str(e)}")
return None
def extract_text_from_file(file_path):
"""Extract text from various file types with fallbacks"""
try:
file_extension = file_path.split('.')[-1].lower()
if file_extension == 'pdf':
if PDF_AVAILABLE:
if 'PyPDF2' in globals():
text = ""
with open(file_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text() + "\n\n"
return text
else:
# Use pdfminer fallback
return extract_text_from_pdf(file_path)
else:
return "PDF support not available. Please install PyPDF2 or pdfminer.six."
elif file_extension == 'md':
if MD_AVAILABLE:
with open(file_path, 'r', encoding='utf-8') as file:
md_text = file.read()
return md_text
else:
# Simple fallback - just read the file
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
elif file_extension == 'txt':
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
else:
return f"Unsupported file type: {file_extension}"
except Exception as e:
logger.error(f"Error extracting text from file: {str(e)}")
return f"Error processing file: {str(e)}"
def prepare_message_with_media(text, images=None, documents=None):
"""Prepare a message with text, images, and document content"""
# If no media, return text only
if not images and not documents:
return text
# Start with text content
if documents and len(documents) > 0:
# If there are documents, append their content to the text
document_texts = []
for doc in documents:
if doc is None:
continue
doc_text = extract_text_from_file(doc)
if doc_text:
document_texts.append(doc_text)
# Add document content to text
if document_texts:
if not text:
text = "Please analyze these documents:"
else:
text = f"{text}\n\nDocument content:\n\n"
text += "\n\n".join(document_texts)
# If no images, return text only
if not images:
return text
# If we have images, create a multimodal content array
content = [{"type": "text", "text": text or "Please analyze these images:"}]
# Add images if any
if images:
for img in images:
if img is None:
continue
encoded_image = encode_image_to_base64(img)
if encoded_image:
content.append({
"type": "image_url",
"image_url": {"url": encoded_image}
})
return content
def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p, frequency_penalty,
presence_penalty, images, documents, reasoning_effort):
"""Enhanced AI query function with comprehensive options and fallbacks"""
if not message.strip() and not images and not documents:
return chatbot, ""
# Check if this is a sorting option
if model_choice.startswith("Sort By"):
return chatbot + [[message, "Please select a model to chat with first."]], ""
# Get model ID and context size
model_id = None
context_size = 0
for name, model_id_value, ctx_size in ALL_MODELS:
if name == model_choice:
model_id = model_id_value
context_size = ctx_size
break
if model_id is None:
logger.error(f"Model not found: {model_choice}")
return chatbot + [[message, "Error: Model not found"]], ""
# Create messages from chatbot history
messages = format_to_message_dict(chatbot)
# Prepare message with images and documents if any
content = prepare_message_with_media(message, images, documents)
# Add current message
messages.append({"role": "user", "content": content})
# Call API
try:
logger.info(f"Sending request to model: {model_id}")
# Build the payload with all parameters
payload = {
"model": model_id,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
# Add optional parameters if they have non-default values
if top_p < 1.0:
payload["top_p"] = top_p
if frequency_penalty != 0:
payload["frequency_penalty"] = frequency_penalty
if presence_penalty != 0:
payload["presence_penalty"] = presence_penalty
# Add reasoning if selected
if reasoning_effort != "none":
payload["reasoning"] = {
"effort": reasoning_effort
}
logger.info(f"Request payload: {json.dumps(payload, default=str)}")
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"HTTP-Referer": "https://huggingface.co/spaces"
},
json=payload,
timeout=120 # Longer timeout for document processing
)
logger.info(f"Response status: {response.status_code}")
response_text = response.text
logger.debug(f"Response body: {response_text}")
if response.status_code == 200:
result = response.json()
ai_response = result.get("choices", [{}])[0].get("message", {}).get("content", "")
chatbot = chatbot + [[message, ai_response]]
# Log token usage if available
if "usage" in result:
logger.info(f"Token usage: {result['usage']}")
else:
error_message = f"Error: Status code {response.status_code}\n\nResponse: {response_text}"
chatbot = chatbot + [[message, error_message]]
except Exception as e:
logger.error(f"Exception during API call: {str(e)}")
chatbot = chatbot + [[message, f"Error: {str(e)}"]]
return chatbot, ""
def clear_chat():
return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, "none"
def apply_sort(sort_option):
"""Apply sorting option to models list"""
if sort_option == "sort_context_desc":
# Sort by context size (high to low)
sorted_models = sorted(ALL_MODELS, key=lambda x: x[2], reverse=True)
elif sort_option == "sort_context_asc":
# Sort by context size (low to high)
sorted_models = sorted(ALL_MODELS, key=lambda x: x[2])
elif sort_option == "sort_newest":
# This would need a proper timestamp, using a rough approximation
# Models with "Experimental" in the name come first as they're likely newer
sorted_models = sorted(ALL_MODELS, key=lambda x: "Experimental" not in x[0])
elif sort_option == "sort_throughput" or sort_option == "sort_latency":
# These would need actual performance metrics
# For now, use model size as a rough proxy (smaller models generally have higher throughput and lower latency)
# Rough heuristic: models with smaller numbers in their names might be smaller
sorted_models = sorted(ALL_MODELS, key=lambda x: sum(int(s) for s in x[0] if s.isdigit()))
else:
# Default to context size sorting
sorted_models = sorted(ALL_MODELS, key=lambda x: x[2], reverse=True)
return sorted_models
def filter_models(search_term):
"""Filter models based on search term"""
if not search_term:
return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])
filtered_models = [model[0] for model in ALL_MODELS if search_term.lower() in model[0].lower()]
if filtered_models:
return gr.Dropdown.update(choices=filtered_models, value=filtered_models[0])
else:
return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])
def get_model_info(model_name):
"""Get model information by name"""
for model in ALL_MODELS:
if model[0] == model_name:
return model
return None
def update_context_display(model_name):
"""Update the context size display based on the selected model"""
model_info = get_model_info(model_name)
if model_info:
name, model_id, context_size = model_info
context_formatted = f"{context_size:,}"
return f"{context_formatted} tokens"
return "Unknown"
def update_models_from_sort(sort_option):
"""Update models list based on sorting option"""
for category in MODELS:
if category["category"] == "Sort By":
for option in category["models"]:
if option[0] == sort_option:
sort_key = option[1]
sorted_models = apply_sort(sort_key)
return gr.Dropdown.update(choices=[model[0] for model in sorted_models], value=sorted_models[0][0])
# Default sorting if option not found
return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])
# Create enhanced interface
with gr.Blocks(css="""
.context-size {
font-size: 0.9em;
color: #666;
margin-left: 10px;
}
footer { display: none !important; }
.model-selection-row {
display: flex;
align-items: center;
}
.parameter-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 10px;
}
""") as demo:
gr.Markdown("""
# Vision AI Chat
Chat with various AI vision models from OpenRouter with support for images and documents.
""")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(
height=500,
show_copy_button=True,
show_label=False,
avatar_images=(None, "https://upload.wikimedia.org/wikipedia/commons/0/04/ChatGPT_logo.svg")
)
with gr.Row():
message = gr.Textbox(
placeholder="Type your message here...",
label="Message",
lines=2
)
with gr.Row():
with gr.Column(scale=3):
submit_btn = gr.Button("Send", variant="primary")
with gr.Column(scale=1):
clear_btn = gr.Button("Clear Chat", variant="secondary")
with gr.Row():
# Image upload
with gr.Accordion("Upload Images", open=False):
images = gr.Gallery(
label="Uploaded Images",
show_label=True,
columns=4,
height="auto",
object_fit="contain"
)
image_upload_btn = gr.UploadButton(
label="Upload Images",
file_types=["image"],
file_count="multiple"
)
# Document upload
with gr.Accordion("Upload Documents (PDF, MD, TXT)", open=False):
documents = gr.File(
label="Uploaded Documents",
file_types=[".pdf", ".md", ".txt"],
file_count="multiple"
)
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### Model Selection")
with gr.Row(elem_classes="model-selection-row"):
model_search = gr.Textbox(
placeholder="Search models...",
label="",
show_label=False
)
with gr.Row(elem_classes="model-selection-row"):
model_choice = gr.Dropdown(
[model[0] for model in ALL_MODELS],
value=ALL_MODELS[0][0],
label="Model"
)
context_display = gr.Textbox(
value=update_context_display(ALL_MODELS[0][0]),
label="Context",
interactive=False,
elem_classes="context-size"
)
# Model category selection
with gr.Accordion("Browse by Category", open=False):
model_categories = gr.Radio(
[category["category"] for category in MODELS],
label="Categories",
value=MODELS[0]["category"]
)
category_models = gr.Radio(
[model[0] for model in MODELS[0]["models"]],
label="Models in Category"
)
# Sort options
with gr.Accordion("Sort Models", open=False):
sort_options = gr.Radio(
["Context: High to Low", "Context: Low to High", "Newest",
"Throughput: High to Low", "Latency: Low to High"],
label="Sort By",
value="Context: High to Low"
)
with gr.Accordion("Generation Parameters", open=False):
with gr.Group(elem_classes="parameter-grid"):
temperature = gr.Slider(
minimum=0.0,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature"
)
max_tokens = gr.Slider(
minimum=100,
maximum=4000,
value=1000,
step=100,
label="Max Tokens"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.8,
step=0.1,
label="Top P"
)
frequency_penalty = gr.Slider(
minimum=-2.0,
maximum=2.0,
value=0.0,
step=0.1,
label="Frequency Penalty"
)
presence_penalty = gr.Slider(
minimum=-2.0,
maximum=2.0,
value=0.0,
step=0.1,
label="Presence Penalty"
)
reasoning_effort = gr.Radio(
["none", "low", "medium", "high"],
value="none",
label="Reasoning Effort"
)
with gr.Accordion("Advanced Options", open=False):
with gr.Row():
with gr.Column():
repetition_penalty = gr.Slider(
minimum=0.1,
maximum=2.0,
value=1.0,
step=0.1,
label="Repetition Penalty"
)
top_k = gr.Slider(
minimum=1,
maximum=100,
value=40,
step=1,
label="Top K"
)
min_p = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.1,
step=0.05,
label="Min P"
)
with gr.Column():
seed = gr.Number(
value=0,
label="Seed (0 for random)",
precision=0
)
top_a = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.0,
step=0.05,
label="Top A"
)
stream_output = gr.Checkbox(
label="Stream Output",
value=False
)
with gr.Row():
response_format = gr.Radio(
["default", "json_object"],
value="default",
label="Response Format"
)
gr.Markdown("""
* **json_object**: Forces the model to respond with valid JSON only.
* Only available on certain models - check model support on OpenRouter.
""")
# Custom instructing options
with gr.Accordion("Custom Instructions", open=False):
system_message = gr.Textbox(
placeholder="Enter a system message to guide the model's behavior...",
label="System Message",
lines=3
)
transforms = gr.CheckboxGroup(
["prompt_optimize", "prompt_distill", "prompt_compress"],
label="Prompt Transforms (OpenRouter specific)"
)
gr.Markdown("""
* **prompt_optimize**: Improve prompt for better responses.
* **prompt_distill**: Compress prompt to use fewer tokens without changing meaning.
* **prompt_compress**: Aggressively compress prompt to fit larger contexts.
""")
# Connect model search to dropdown filter
model_search.change(
fn=filter_models,
inputs=[model_search],
outputs=[model_choice]
)
# Update context display when model changes
model_choice.change(
fn=update_context_display,
inputs=[model_choice],
outputs=[context_display]
)
# Update model list when category changes
def update_category_models(category):
for cat in MODELS:
if cat["category"] == category:
return gr.Radio.update(choices=[model[0] for model in cat["models"]], value=cat["models"][0][0])
return gr.Radio.update(choices=[], value=None)
model_categories.change(
fn=update_category_models,
inputs=[model_categories],
outputs=[category_models]
)
# Update main model choice when category model is selected
category_models.change(
fn=lambda x: x,
inputs=[category_models],
outputs=[model_choice]
)
# Process uploaded images
def process_uploaded_images(files):
return [file.name for file in files]
image_upload_btn.upload(
fn=process_uploaded_images,
inputs=[image_upload_btn],
outputs=[images]
)
# Enhanced AI query function with all advanced parameters
def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p,
frequency_penalty, presence_penalty, repetition_penalty, top_k,
min_p, seed, top_a, stream_output, response_format,
images, documents, reasoning_effort, system_message, transforms):
"""Comprehensive AI query function with all parameters"""
if not message.strip() and not images and not documents:
return chatbot, ""
# Get model ID and context size
model_id = None
context_size = 0
for name, model_id_value, ctx_size in ALL_MODELS:
if name == model_choice:
model_id = model_id_value
context_size = ctx_size
break
if model_id is None:
logger.error(f"Model not found: {model_choice}")
return chatbot + [[message, "Error: Model not found"]], ""
# Create messages from chatbot history
messages = format_to_message_dict(chatbot)
# Add system message if provided
if system_message and system_message.strip():
# Insert at the beginning to override any existing system message
for i, msg in enumerate(messages):
if msg.get("role") == "system":
messages.pop(i)
break
messages.insert(0, {"role": "system", "content": system_message.strip()})
# Prepare message with images and documents if any
content = prepare_message_with_media(message, images, documents)
# Add current message
messages.append({"role": "user", "content": content})
# Call API
try:
logger.info(f"Sending request to model: {model_id}")
# Build the comprehensive payload with all parameters
payload = {
"model": model_id,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
"top_p": top_p,
"frequency_penalty": frequency_penalty,
"presence_penalty": presence_penalty,
"repetition_penalty": repetition_penalty if repetition_penalty != 1.0 else None,
"top_k": top_k,
"min_p": min_p if min_p > 0 else None,
"seed": seed if seed > 0 else None,
"top_a": top_a if top_a > 0 else None,
"stream": stream_output
}
# Add response format if not default
if response_format == "json_object":
payload["response_format"] = {"type": "json_object"}
# Add reasoning if selected
if reasoning_effort != "none":
payload["reasoning"] = {
"effort": reasoning_effort
}
# Add transforms if selected
if transforms:
payload["transforms"] = transforms
# Remove None values
payload = {k: v for k, v in payload.items() if v is not None}
logger.info(f"Request payload: {json.dumps(payload, default=str)}")
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"HTTP-Referer": "https://huggingface.co/spaces"
},
json=payload,
timeout=180, # Longer timeout for document processing and streaming
stream=stream_output
)
logger.info(f"Response status: {response.status_code}")
if stream_output and response.status_code == 200:
# Handle streaming response
chatbot = chatbot + [[message, ""]]
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
data = line[6:]
if data.strip() == '[DONE]':
break
try:
chunk = json.loads(data)
if "choices" in chunk and len(chunk["choices"]) > 0:
delta = chunk["choices"][0].get("delta", {})
if "content" in delta and delta["content"]:
chatbot[-1][1] += delta["content"]
yield chatbot, ""
except json.JSONDecodeError:
continue
return chatbot, ""
elif response.status_code == 200:
# Handle normal response
result = response.json()
ai_response = result.get("choices", [{}])[0].get("message", {}).get("content", "")
chatbot = chatbot + [[message, ai_response]]
# Log token usage if available
if "usage" in result:
logger.info(f"Token usage: {result['usage']}")
else:
response_text = response.text
logger.info(f"Error response body: {response_text}")
error_message = f"Error: Status code {response.status_code}\n\nResponse: {response_text}"
chatbot = chatbot + [[message, error_message]]
except Exception as e:
logger.error(f"Exception during API call: {str(e)}")
chatbot = chatbot + [[message, f"Error: {str(e)}"]]
return chatbot, ""
# Function to clear chat and reset parameters
def clear_chat():
return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, 1.0, 40, 0.1, 0, 0.0, False, "default", "none", "", []
# Set up events for the submit button
submit_btn.click(
fn=ask_ai,
inputs=[
message, chatbot, model_choice, temperature, max_tokens,
top_p, frequency_penalty, presence_penalty, repetition_penalty,
top_k, min_p, seed, top_a, stream_output, response_format,
images, documents, reasoning_effort, system_message, transforms
],
outputs=[chatbot, message]
)
# Set up events for message submission (pressing Enter)
message.submit(
fn=ask_ai,
inputs=[
message, chatbot, model_choice, temperature, max_tokens,
top_p, frequency_penalty, presence_penalty, repetition_penalty,
top_k, min_p, seed, top_a, stream_output, response_format,
images, documents, reasoning_effort, system_message, transforms
],
outputs=[chatbot, message]
)
# Set up events for the clear button
clear_btn.click(
fn=clear_chat,
inputs=[],
outputs=[
chatbot, message, images, documents, temperature,
max_tokens, top_p, frequency_penalty, presence_penalty,
repetition_penalty, top_k, min_p, seed, top_a, stream_output,
response_format, reasoning_effort, system_message, transforms
]
)
# Add a model information section
with gr.Accordion("About Selected Model", open=False):
model_info_display = gr.HTML(
value="<p>Select a model to see details</p>"
)
# Update model info when model changes
def update_model_info(model_name):
model_info = get_model_info(model_name)
if model_info:
name, model_id, context_size = model_info
return f"""
<div class="model-info">
<h3>{name}</h3>
<p><strong>Model ID:</strong> {model_id}</p>
<p><strong>Context Size:</strong> {context_size:,} tokens</p>
<p><strong>Provider:</strong> {model_id.split('/')[0]}</p>
</div>
"""
return "<p>Model information not available</p>"
model_choice.change(
fn=update_model_info,
inputs=[model_choice],
outputs=[model_info_display]
)
# Add usage instructions
with gr.Accordion("Usage Instructions", open=False):
gr.Markdown("""
## Basic Usage
1. Type your message in the input box
2. Select a model from the dropdown
3. Click "Send" or press Enter
## Working with Files
- **Images**: Upload images to use with vision-capable models like Llama 3.2 Vision
- **Documents**: Upload PDF, Markdown, or text files to analyze their content
## Advanced Parameters
- **Temperature**: Controls randomness (higher = more creative, lower = more deterministic)
- **Max Tokens**: Maximum length of the response
- **Top P**: Nucleus sampling threshold (higher = consider more tokens)
- **Reasoning Effort**: Some models can show their reasoning process
## Tips
- For code generation, use models like Qwen Coder
- For visual tasks, choose vision-capable models
- For long context, check the context window size next to the model name
""")
# Add a footer with version info
footer_md = gr.Markdown("""
---
### OpenRouter AI Chat Interface v1.0
Built with ❤️ using Gradio and OpenRouter API | Context sizes shown next to model names
""")
# Launch directly with Gradio's built-in server
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)