CrispChat

Sleeping

App Files Files Community

CrispChat / app.py

cstr

Update app.py

9dba8e1 verified 3 months ago

raw

history blame

37.3 kB

	import os
	import logging
	import json
	import base64
	from io import BytesIO

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Graceful imports with fallbacks
	try:
	import gradio as gr
	except ImportError:
	logger.error("Gradio not found. Please install with 'pip install gradio'")
	raise

	try:
	import requests
	except ImportError:
	logger.error("Requests not found. Please install with 'pip install requests'")
	raise

	# Optional libraries with fallbacks
	try:
	from PIL import Image
	PIL_AVAILABLE = True
	except ImportError:
	logger.warning("PIL not found. Image processing functionality will be limited.")
	PIL_AVAILABLE = False

	# PDF processing
	PDF_AVAILABLE = False
	try:
	import PyPDF2
	PDF_AVAILABLE = True
	except ImportError:
	logger.warning("PyPDF2 not found. Attempting to use pdfminer.six as fallback...")
	try:
	from pdfminer.high_level import extract_text as pdf_extract_text
	PDF_AVAILABLE = True

	# Create a wrapper to mimic PyPDF2 functionality
	def extract_text_from_pdf(file_path):
	return pdf_extract_text(file_path)
	except ImportError:
	logger.warning("No PDF processing libraries found. PDF support will be disabled.")

	# Markdown processing
	MD_AVAILABLE = False
	try:
	import markdown
	MD_AVAILABLE = True
	except ImportError:
	logger.warning("Markdown not found. Attempting to use markdownify as fallback...")
	try:
	from markdownify import markdownify as md
	MD_AVAILABLE = True

	# Create a wrapper for markdown
	def convert_markdown(text):
	return md(text)
	except ImportError:
	logger.warning("No Markdown processing libraries found. Markdown support will be limited.")

	# API key
	OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")

	# Model list with context sizes - organized by capability
	MODELS = [
	# Vision Models
	{"category": "Vision Models", "models": [
	("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
	("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
	("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp:free", 1048576),
	("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576),
	("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000),
	("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp-1219:free", 40000),
	("Meta: Llama 3.2 11B Vision Instruct", "meta-llama/llama-3.2-11b-vision-instruct:free", 131072),
	("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072),
	("Qwen: Qwen2.5 VL 32B Instruct", "qwen/qwen2.5-vl-32b-instruct:free", 8192),
	("Qwen: Qwen2.5 VL 7B Instruct", "qwen/qwen-2.5-vl-7b-instruct:free", 64000),
	("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000),
	("Bytedance: UI-TARS 72B", "bytedance-research/ui-tars-72b:free", 32768),
	]},

	# Largest Context Models
	{"category": "Largest Context (500K+)", "models": [
	("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
	("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp:free", 1048576),
	("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576),
	("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
	("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000),
	]},

	# High-performance Models
	{"category": "High Performance", "models": [
	("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
	("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
	("Google: Gemma 3 27B", "google/gemma-3-27b-it:free", 96000),
	("Mistral: Mistral Small 3.1 24B", "mistralai/mistral-small-3.1-24b-instruct:free", 96000),
	("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072),
	]},

	# Mid-size Models
	{"category": "Mid-size Models", "models": [
	("Google: Gemma 3 12B", "google/gemma-3-12b-it:free", 131072),
	("Google: Gemma 3 4B", "google/gemma-3-4b-it:free", 131072),
	("Google: LearnLM 1.5 Pro Experimental", "google/learnlm-1.5-pro-experimental:free", 40960),
	("Meta: Llama 3.1 8B Instruct", "meta-llama/llama-3.1-8b-instruct:free", 131072),
	]},

	# Smaller Models
	{"category": "Smaller Models", "models": [
	("Google: Gemma 3 1B", "google/gemma-3-1b-it:free", 32768),
	("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000),
	("AllenAI: Molmo 7B D", "allenai/molmo-7b-d:free", 4096),
	]},

	# Sorting Options
	{"category": "Sort By", "models": [
	("Context: High to Low", "sort_context_desc", 0),
	("Context: Low to High", "sort_context_asc", 0),
	("Newest", "sort_newest", 0),
	("Throughput: High to Low", "sort_throughput", 0),
	("Latency: Low to High", "sort_latency", 0),
	]},
	]

	# Flatten model list for easy searching
	ALL_MODELS = []
	for category in MODELS:
	if category["category"] != "Sort By": # Skip the sorting options
	for model in category["models"]:
	if model not in ALL_MODELS:
	ALL_MODELS.append(model)

	# Sort models by context size (descending) by default
	ALL_MODELS.sort(key=lambda x: x[2], reverse=True)

	def format_to_message_dict(history):
	"""Convert history to proper message format"""
	messages = []
	for pair in history:
	if len(pair) == 2:
	human, ai = pair
	if human:
	messages.append({"role": "user", "content": human})
	if ai:
	messages.append({"role": "assistant", "content": ai})
	return messages

	def encode_image_to_base64(image_path):
	"""Encode an image file to base64 string with fallback methods"""
	try:
	if isinstance(image_path, str): # File path as string
	with open(image_path, "rb") as image_file:
	encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
	file_extension = image_path.split('.')[-1].lower()
	mime_type = f"image/{file_extension}"
	if file_extension in ["jpg", "jpeg"]:
	mime_type = "image/jpeg"
	elif file_extension == "png":
	mime_type = "image/png"
	elif file_extension in ["webp", "gif"]:
	mime_type = f"image/{file_extension}"
	else:
	mime_type = "image/jpeg" # Default fallback
	return f"data:{mime_type};base64,{encoded_string}"
	elif PIL_AVAILABLE: # Pillow Image object
	buffered = BytesIO()
	# Handle if it's a PIL Image or file-like object
	try:
	image_path.save(buffered, format="PNG")
	except AttributeError:
	if hasattr(image_path, 'read'):
	# It's a file-like object but not a PIL Image
	buffered.write(image_path.read())
	else:
	raise
	encoded_string = base64.b64encode(buffered.getvalue()).decode('utf-8')
	return f"data:image/png;base64,{encoded_string}"
	else:
	logger.error("Cannot process image: PIL not available and input is not a file path")
	return None
	except Exception as e:
	logger.error(f"Error encoding image: {str(e)}")
	return None

	def extract_text_from_file(file_path):
	"""Extract text from various file types with fallbacks"""
	try:
	file_extension = file_path.split('.')[-1].lower()

	if file_extension == 'pdf':
	if PDF_AVAILABLE:
	if 'PyPDF2' in globals():
	text = ""
	with open(file_path, 'rb') as file:
	pdf_reader = PyPDF2.PdfReader(file)
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text += page.extract_text() + "\n\n"
	return text
	else:
	# Use pdfminer fallback
	return extract_text_from_pdf(file_path)
	else:
	return "PDF support not available. Please install PyPDF2 or pdfminer.six."

	elif file_extension == 'md':
	if MD_AVAILABLE:
	with open(file_path, 'r', encoding='utf-8') as file:
	md_text = file.read()
	return md_text
	else:
	# Simple fallback - just read the file
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()

	elif file_extension == 'txt':
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()

	else:
	return f"Unsupported file type: {file_extension}"

	except Exception as e:
	logger.error(f"Error extracting text from file: {str(e)}")
	return f"Error processing file: {str(e)}"

	def prepare_message_with_media(text, images=None, documents=None):
	"""Prepare a message with text, images, and document content"""
	# If no media, return text only
	if not images and not documents:
	return text

	# Start with text content
	if documents and len(documents) > 0:
	# If there are documents, append their content to the text
	document_texts = []
	for doc in documents:
	if doc is None:
	continue
	doc_text = extract_text_from_file(doc)
	if doc_text:
	document_texts.append(doc_text)

	# Add document content to text
	if document_texts:
	if not text:
	text = "Please analyze these documents:"
	else:
	text = f"{text}\n\nDocument content:\n\n"

	text += "\n\n".join(document_texts)

	# If no images, return text only
	if not images:
	return text

	# If we have images, create a multimodal content array
	content = [{"type": "text", "text": text or "Please analyze these images:"}]

	# Add images if any
	if images:
	for img in images:
	if img is None:
	continue

	encoded_image = encode_image_to_base64(img)
	if encoded_image:
	content.append({
	"type": "image_url",
	"image_url": {"url": encoded_image}
	})

	return content

	def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p, frequency_penalty,
	presence_penalty, images, documents, reasoning_effort):
	"""Enhanced AI query function with comprehensive options and fallbacks"""
	if not message.strip() and not images and not documents:
	return chatbot, ""

	# Check if this is a sorting option
	if model_choice.startswith("Sort By"):
	return chatbot + [[message, "Please select a model to chat with first."]], ""

	# Get model ID and context size
	model_id = None
	context_size = 0
	for name, model_id_value, ctx_size in ALL_MODELS:
	if name == model_choice:
	model_id = model_id_value
	context_size = ctx_size
	break

	if model_id is None:
	logger.error(f"Model not found: {model_choice}")
	return chatbot + [[message, "Error: Model not found"]], ""

	# Create messages from chatbot history
	messages = format_to_message_dict(chatbot)

	# Prepare message with images and documents if any
	content = prepare_message_with_media(message, images, documents)

	# Add current message
	messages.append({"role": "user", "content": content})

	# Call API
	try:
	logger.info(f"Sending request to model: {model_id}")

	# Build the payload with all parameters
	payload = {
	"model": model_id,
	"messages": messages,
	"temperature": temperature,
	"max_tokens": max_tokens,
	}

	# Add optional parameters if they have non-default values
	if top_p < 1.0:
	payload["top_p"] = top_p

	if frequency_penalty != 0:
	payload["frequency_penalty"] = frequency_penalty

	if presence_penalty != 0:
	payload["presence_penalty"] = presence_penalty

	# Add reasoning if selected
	if reasoning_effort != "none":
	payload["reasoning"] = {
	"effort": reasoning_effort
	}

	logger.info(f"Request payload: {json.dumps(payload, default=str)}")

	response = requests.post(
	"https://openrouter.ai/api/v1/chat/completions",
	headers={
	"Content-Type": "application/json",
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"HTTP-Referer": "https://huggingface.co/spaces"
	},
	json=payload,
	timeout=120 # Longer timeout for document processing
	)

	logger.info(f"Response status: {response.status_code}")

	response_text = response.text
	logger.debug(f"Response body: {response_text}")

	if response.status_code == 200:
	result = response.json()
	ai_response = result.get("choices", [{}])[0].get("message", {}).get("content", "")
	chatbot = chatbot + [[message, ai_response]]

	# Log token usage if available
	if "usage" in result:
	logger.info(f"Token usage: {result['usage']}")
	else:
	error_message = f"Error: Status code {response.status_code}\n\nResponse: {response_text}"
	chatbot = chatbot + [[message, error_message]]
	except Exception as e:
	logger.error(f"Exception during API call: {str(e)}")
	chatbot = chatbot + [[message, f"Error: {str(e)}"]]

	return chatbot, ""

	def clear_chat():
	return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, "none"

	def apply_sort(sort_option):
	"""Apply sorting option to models list"""
	if sort_option == "sort_context_desc":
	# Sort by context size (high to low)
	sorted_models = sorted(ALL_MODELS, key=lambda x: x[2], reverse=True)
	elif sort_option == "sort_context_asc":
	# Sort by context size (low to high)
	sorted_models = sorted(ALL_MODELS, key=lambda x: x[2])
	elif sort_option == "sort_newest":
	# This would need a proper timestamp, using a rough approximation
	# Models with "Experimental" in the name come first as they're likely newer
	sorted_models = sorted(ALL_MODELS, key=lambda x: "Experimental" not in x[0])
	elif sort_option == "sort_throughput" or sort_option == "sort_latency":
	# These would need actual performance metrics
	# For now, use model size as a rough proxy (smaller models generally have higher throughput and lower latency)
	# Rough heuristic: models with smaller numbers in their names might be smaller
	sorted_models = sorted(ALL_MODELS, key=lambda x: sum(int(s) for s in x[0] if s.isdigit()))
	else:
	# Default to context size sorting
	sorted_models = sorted(ALL_MODELS, key=lambda x: x[2], reverse=True)

	return sorted_models

	def filter_models(search_term):
	"""Filter models based on search term"""
	if not search_term:
	return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])

	filtered_models = [model[0] for model in ALL_MODELS if search_term.lower() in model[0].lower()]

	if filtered_models:
	return gr.Dropdown.update(choices=filtered_models, value=filtered_models[0])
	else:
	return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])

	def get_model_info(model_name):
	"""Get model information by name"""
	for model in ALL_MODELS:
	if model[0] == model_name:
	return model
	return None

	def update_context_display(model_name):
	"""Update the context size display based on the selected model"""
	model_info = get_model_info(model_name)
	if model_info:
	name, model_id, context_size = model_info
	context_formatted = f"{context_size:,}"
	return f"{context_formatted} tokens"
	return "Unknown"

	def update_models_from_sort(sort_option):
	"""Update models list based on sorting option"""
	for category in MODELS:
	if category["category"] == "Sort By":
	for option in category["models"]:
	if option[0] == sort_option:
	sort_key = option[1]
	sorted_models = apply_sort(sort_key)
	return gr.Dropdown.update(choices=[model[0] for model in sorted_models], value=sorted_models[0][0])

	# Default sorting if option not found
	return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])

	# Create enhanced interface
	with gr.Blocks(css="""
	.context-size {
	font-size: 0.9em;
	color: #666;
	margin-left: 10px;
	}
	footer { display: none !important; }
	.model-selection-row {
	display: flex;
	align-items: center;
	}
	.parameter-grid {
	display: grid;
	grid-template-columns: 1fr 1fr;
	gap: 10px;
	}
	""") as demo:
	gr.Markdown("""
	# Vision AI Chat

	Chat with various AI vision models from OpenRouter with support for images and documents.
	""")

	with gr.Row():
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(
	height=500,
	show_copy_button=True,
	show_label=False,
	avatar_images=(None, "https://upload.wikimedia.org/wikipedia/commons/0/04/ChatGPT_logo.svg")
	)

	with gr.Row():
	message = gr.Textbox(
	placeholder="Type your message here...",
	label="Message",
	lines=2
	)

	with gr.Row():
	with gr.Column(scale=3):
	submit_btn = gr.Button("Send", variant="primary")

	with gr.Column(scale=1):
	clear_btn = gr.Button("Clear Chat", variant="secondary")

	with gr.Row():
	# Image upload
	with gr.Accordion("Upload Images", open=False):
	images = gr.Gallery(
	label="Uploaded Images",
	show_label=True,
	columns=4,
	height="auto",
	object_fit="contain"
	)

	image_upload_btn = gr.UploadButton(
	label="Upload Images",
	file_types=["image"],
	file_count="multiple"
	)

	# Document upload
	with gr.Accordion("Upload Documents (PDF, MD, TXT)", open=False):
	documents = gr.File(
	label="Uploaded Documents",
	file_types=[".pdf", ".md", ".txt"],
	file_count="multiple"
	)

	with gr.Column(scale=1):
	with gr.Group():
	gr.Markdown("### Model Selection")

	with gr.Row(elem_classes="model-selection-row"):
	model_search = gr.Textbox(
	placeholder="Search models...",
	label="",
	show_label=False
	)

	with gr.Row(elem_classes="model-selection-row"):
	model_choice = gr.Dropdown(
	[model[0] for model in ALL_MODELS],
	value=ALL_MODELS[0][0],
	label="Model"
	)
	context_display = gr.Textbox(
	value=update_context_display(ALL_MODELS[0][0]),
	label="Context",
	interactive=False,
	elem_classes="context-size"
	)

	# Model category selection
	with gr.Accordion("Browse by Category", open=False):
	model_categories = gr.Radio(
	[category["category"] for category in MODELS],
	label="Categories",
	value=MODELS[0]["category"]
	)

	category_models = gr.Radio(
	[model[0] for model in MODELS[0]["models"]],
	label="Models in Category"
	)

	# Sort options
	with gr.Accordion("Sort Models", open=False):
	sort_options = gr.Radio(
	["Context: High to Low", "Context: Low to High", "Newest",
	"Throughput: High to Low", "Latency: Low to High"],
	label="Sort By",
	value="Context: High to Low"
	)

	with gr.Accordion("Generation Parameters", open=False):
	with gr.Group(elem_classes="parameter-grid"):
	temperature = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)

	max_tokens = gr.Slider(
	minimum=100,
	maximum=4000,
	value=1000,
	step=100,
	label="Max Tokens"
	)

	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.8,
	step=0.1,
	label="Top P"
	)

	frequency_penalty = gr.Slider(
	minimum=-2.0,
	maximum=2.0,
	value=0.0,
	step=0.1,
	label="Frequency Penalty"
	)

	presence_penalty = gr.Slider(
	minimum=-2.0,
	maximum=2.0,
	value=0.0,
	step=0.1,
	label="Presence Penalty"
	)

	reasoning_effort = gr.Radio(
	["none", "low", "medium", "high"],
	value="none",
	label="Reasoning Effort"
	)

	with gr.Accordion("Advanced Options", open=False):
	with gr.Row():
	with gr.Column():
	repetition_penalty = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="Repetition Penalty"
	)

	top_k = gr.Slider(
	minimum=1,
	maximum=100,
	value=40,
	step=1,
	label="Top K"
	)

	min_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.1,
	step=0.05,
	label="Min P"
	)

	with gr.Column():
	seed = gr.Number(
	value=0,
	label="Seed (0 for random)",
	precision=0
	)

	top_a = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.0,
	step=0.05,
	label="Top A"
	)

	stream_output = gr.Checkbox(
	label="Stream Output",
	value=False
	)

	with gr.Row():
	response_format = gr.Radio(
	["default", "json_object"],
	value="default",
	label="Response Format"
	)

	gr.Markdown("""
	* json_object: Forces the model to respond with valid JSON only.
	* Only available on certain models - check model support on OpenRouter.
	""")

	# Custom instructing options
	with gr.Accordion("Custom Instructions", open=False):
	system_message = gr.Textbox(
	placeholder="Enter a system message to guide the model's behavior...",
	label="System Message",
	lines=3
	)

	transforms = gr.CheckboxGroup(
	["prompt_optimize", "prompt_distill", "prompt_compress"],
	label="Prompt Transforms (OpenRouter specific)"
	)

	gr.Markdown("""
	* prompt_optimize: Improve prompt for better responses.
	* prompt_distill: Compress prompt to use fewer tokens without changing meaning.
	* prompt_compress: Aggressively compress prompt to fit larger contexts.
	""")

	# Connect model search to dropdown filter
	model_search.change(
	fn=filter_models,
	inputs=[model_search],
	outputs=[model_choice]
	)

	# Update context display when model changes
	model_choice.change(
	fn=update_context_display,
	inputs=[model_choice],
	outputs=[context_display]
	)

	# Update model list when category changes
	def update_category_models(category):
	for cat in MODELS:
	if cat["category"] == category:
	return gr.Radio.update(choices=[model[0] for model in cat["models"]], value=cat["models"][0][0])
	return gr.Radio.update(choices=[], value=None)

	model_categories.change(
	fn=update_category_models,
	inputs=[model_categories],
	outputs=[category_models]
	)

	# Update main model choice when category model is selected
	category_models.change(
	fn=lambda x: x,
	inputs=[category_models],
	outputs=[model_choice]
	)

	# Process uploaded images
	def process_uploaded_images(files):
	return [file.name for file in files]

	image_upload_btn.upload(
	fn=process_uploaded_images,
	inputs=[image_upload_btn],
	outputs=[images]
	)

	# Enhanced AI query function with all advanced parameters
	def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p,
	frequency_penalty, presence_penalty, repetition_penalty, top_k,
	min_p, seed, top_a, stream_output, response_format,
	images, documents, reasoning_effort, system_message, transforms):
	"""Comprehensive AI query function with all parameters"""
	if not message.strip() and not images and not documents:
	return chatbot, ""

	# Get model ID and context size
	model_id = None
	context_size = 0
	for name, model_id_value, ctx_size in ALL_MODELS:
	if name == model_choice:
	model_id = model_id_value
	context_size = ctx_size
	break

	if model_id is None:
	logger.error(f"Model not found: {model_choice}")
	return chatbot + [[message, "Error: Model not found"]], ""

	# Create messages from chatbot history
	messages = format_to_message_dict(chatbot)

	# Add system message if provided
	if system_message and system_message.strip():
	# Insert at the beginning to override any existing system message
	for i, msg in enumerate(messages):
	if msg.get("role") == "system":
	messages.pop(i)
	break
	messages.insert(0, {"role": "system", "content": system_message.strip()})

	# Prepare message with images and documents if any
	content = prepare_message_with_media(message, images, documents)

	# Add current message
	messages.append({"role": "user", "content": content})

	# Call API
	try:
	logger.info(f"Sending request to model: {model_id}")

	# Build the comprehensive payload with all parameters
	payload = {
	"model": model_id,
	"messages": messages,
	"temperature": temperature,
	"max_tokens": max_tokens,
	"top_p": top_p,
	"frequency_penalty": frequency_penalty,
	"presence_penalty": presence_penalty,
	"repetition_penalty": repetition_penalty if repetition_penalty != 1.0 else None,
	"top_k": top_k,
	"min_p": min_p if min_p > 0 else None,
	"seed": seed if seed > 0 else None,
	"top_a": top_a if top_a > 0 else None,
	"stream": stream_output
	}

	# Add response format if not default
	if response_format == "json_object":
	payload["response_format"] = {"type": "json_object"}

	# Add reasoning if selected
	if reasoning_effort != "none":
	payload["reasoning"] = {
	"effort": reasoning_effort
	}

	# Add transforms if selected
	if transforms:
	payload["transforms"] = transforms

	# Remove None values
	payload = {k: v for k, v in payload.items() if v is not None}

	logger.info(f"Request payload: {json.dumps(payload, default=str)}")

	response = requests.post(
	"https://openrouter.ai/api/v1/chat/completions",
	headers={
	"Content-Type": "application/json",
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"HTTP-Referer": "https://huggingface.co/spaces"
	},
	json=payload,
	timeout=180, # Longer timeout for document processing and streaming
	stream=stream_output
	)

	logger.info(f"Response status: {response.status_code}")

	if stream_output and response.status_code == 200:
	# Handle streaming response
	chatbot = chatbot + [[message, ""]]

	for line in response.iter_lines():
	if line:
	line = line.decode('utf-8')
	if line.startswith('data: '):
	data = line[6:]
	if data.strip() == '[DONE]':
	break
	try:
	chunk = json.loads(data)
	if "choices" in chunk and len(chunk["choices"]) > 0:
	delta = chunk["choices"][0].get("delta", {})
	if "content" in delta and delta["content"]:
	chatbot[-1][1] += delta["content"]
	yield chatbot, ""
	except json.JSONDecodeError:
	continue
	return chatbot, ""

	elif response.status_code == 200:
	# Handle normal response
	result = response.json()
	ai_response = result.get("choices", [{}])[0].get("message", {}).get("content", "")
	chatbot = chatbot + [[message, ai_response]]

	# Log token usage if available
	if "usage" in result:
	logger.info(f"Token usage: {result['usage']}")
	else:
	response_text = response.text
	logger.info(f"Error response body: {response_text}")
	error_message = f"Error: Status code {response.status_code}\n\nResponse: {response_text}"
	chatbot = chatbot + [[message, error_message]]
	except Exception as e:
	logger.error(f"Exception during API call: {str(e)}")
	chatbot = chatbot + [[message, f"Error: {str(e)}"]]

	return chatbot, ""

	# Function to clear chat and reset parameters
	def clear_chat():
	return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, 1.0, 40, 0.1, 0, 0.0, False, "default", "none", "", []

	# Set up events for the submit button
	submit_btn.click(
	fn=ask_ai,
	inputs=[
	message, chatbot, model_choice, temperature, max_tokens,
	top_p, frequency_penalty, presence_penalty, repetition_penalty,
	top_k, min_p, seed, top_a, stream_output, response_format,
	images, documents, reasoning_effort, system_message, transforms
	],
	outputs=[chatbot, message]
	)

	# Set up events for message submission (pressing Enter)
	message.submit(
	fn=ask_ai,
	inputs=[
	message, chatbot, model_choice, temperature, max_tokens,
	top_p, frequency_penalty, presence_penalty, repetition_penalty,
	top_k, min_p, seed, top_a, stream_output, response_format,
	images, documents, reasoning_effort, system_message, transforms
	],
	outputs=[chatbot, message]
	)

	# Set up events for the clear button
	clear_btn.click(
	fn=clear_chat,
	inputs=[],
	outputs=[
	chatbot, message, images, documents, temperature,
	max_tokens, top_p, frequency_penalty, presence_penalty,
	repetition_penalty, top_k, min_p, seed, top_a, stream_output,
	response_format, reasoning_effort, system_message, transforms
	]
	)

	# Add a model information section
	with gr.Accordion("About Selected Model", open=False):
	model_info_display = gr.HTML(
	value="<p>Select a model to see details</p>"
	)

	# Update model info when model changes
	def update_model_info(model_name):
	model_info = get_model_info(model_name)
	if model_info:
	name, model_id, context_size = model_info
	return f"""
	<div class="model-info">
	<h3>{name}</h3>
	<p><strong>Model ID:</strong> {model_id}</p>
	<p><strong>Context Size:</strong> {context_size:,} tokens</p>
	<p><strong>Provider:</strong> {model_id.split('/')[0]}</p>
	</div>
	"""
	return "<p>Model information not available</p>"

	model_choice.change(
	fn=update_model_info,
	inputs=[model_choice],
	outputs=[model_info_display]
	)

	# Add usage instructions
	with gr.Accordion("Usage Instructions", open=False):
	gr.Markdown("""
	## Basic Usage
	1. Type your message in the input box
	2. Select a model from the dropdown
	3. Click "Send" or press Enter

	## Working with Files
	- Images: Upload images to use with vision-capable models like Llama 3.2 Vision
	- Documents: Upload PDF, Markdown, or text files to analyze their content

	## Advanced Parameters
	- Temperature: Controls randomness (higher = more creative, lower = more deterministic)
	- Max Tokens: Maximum length of the response
	- Top P: Nucleus sampling threshold (higher = consider more tokens)
	- Reasoning Effort: Some models can show their reasoning process

	## Tips
	- For code generation, use models like Qwen Coder
	- For visual tasks, choose vision-capable models
	- For long context, check the context window size next to the model name
	""")

	# Add a footer with version info
	footer_md = gr.Markdown("""
	---
	### OpenRouter AI Chat Interface v1.0
	Built with ❤️ using Gradio and OpenRouter API \| Context sizes shown next to model names
	""")

	# Launch directly with Gradio's built-in server
	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)