CrispChat / app.py
cstr's picture
Update app.py
3e6631d verified
raw
history blame
23.7 kB
import os
import base64
import gradio as gr
import requests
import json
from io import BytesIO
from PIL import Image
import time
# Get API key from environment variable for security
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
# Model information
free_models = [
("Google: Gemini Pro 2.0 Experimental (free)", "google/gemini-2.0-pro-exp-02-05:free", 0, 0, 2000000),
("Google: Gemini 2.0 Flash Thinking Experimental 01-21 (free)", "google/gemini-2.0-flash-thinking-exp:free", 0, 0, 1048576),
("Google: Gemini Flash 2.0 Experimental (free)", "google/gemini-2.0-flash-exp:free", 0, 0, 1048576),
("Google: Gemini Pro 2.5 Experimental (free)", "google/gemini-2.5-pro-exp-03-25:free", 0, 0, 1000000),
("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 0, 0, 1000000),
("DeepSeek: DeepSeek R1 Zero (free)", "deepseek/deepseek-r1-zero:free", 0, 0, 163840),
("DeepSeek: R1 (free)", "deepseek/deepseek-r1:free", 0, 0, 163840),
("DeepSeek: DeepSeek V3 Base (free)", "deepseek/deepseek-v3-base:free", 0, 0, 131072),
("DeepSeek: DeepSeek V3 0324 (free)", "deepseek/deepseek-chat-v3-0324:free", 0, 0, 131072),
("Google: Gemma 3 4B (free)", "google/gemma-3-4b-it:free", 0, 0, 131072),
("Google: Gemma 3 12B (free)", "google/gemma-3-12b-it:free", 0, 0, 131072),
("Nous: DeepHermes 3 Llama 3 8B Preview (free)", "nousresearch/deephermes-3-llama-3-8b-preview:free", 0, 0, 131072),
("Qwen: Qwen2.5 VL 72B Instruct (free)", "qwen/qwen2.5-vl-72b-instruct:free", 0, 0, 131072),
("DeepSeek: DeepSeek V3 (free)", "deepseek/deepseek-chat:free", 0, 0, 131072),
("NVIDIA: Llama 3.1 Nemotron 70B Instruct (free)", "nvidia/llama-3.1-nemotron-70b-instruct:free", 0, 0, 131072),
("Meta: Llama 3.2 1B Instruct (free)", "meta-llama/llama-3.2-1b-instruct:free", 0, 0, 131072),
("Meta: Llama 3.2 11B Vision Instruct (free)", "meta-llama/llama-3.2-11b-vision-instruct:free", 0, 0, 131072),
("Meta: Llama 3.1 8B Instruct (free)", "meta-llama/llama-3.1-8b-instruct:free", 0, 0, 131072),
("Mistral: Mistral Nemo (free)", "mistralai/mistral-nemo:free", 0, 0, 128000),
("Mistral: Mistral Small 3.1 24B (free)", "mistralai/mistral-small-3.1-24b-instruct:free", 0, 0, 96000),
("Google: Gemma 3 27B (free)", "google/gemma-3-27b-it:free", 0, 0, 96000),
("Qwen: Qwen2.5 VL 3B Instruct (free)", "qwen/qwen2.5-vl-3b-instruct:free", 0, 0, 64000),
("DeepSeek: R1 Distill Qwen 14B (free)", "deepseek/deepseek-r1-distill-qwen-14b:free", 0, 0, 64000),
("Qwen: Qwen2.5-VL 7B Instruct (free)", "qwen/qwen-2.5-vl-7b-instruct:free", 0, 0, 64000),
("Google: LearnLM 1.5 Pro Experimental (free)", "google/learnlm-1.5-pro-experimental:free", 0, 0, 40960),
("Qwen: QwQ 32B (free)", "qwen/qwq-32b:free", 0, 0, 40000),
("Google: Gemini 2.0 Flash Thinking Experimental (free)", "google/gemini-2.0-flash-thinking-exp-1219:free", 0, 0, 40000),
("Bytedance: UI-TARS 72B (free)", "bytedance-research/ui-tars-72b:free", 0, 0, 32768),
("Qwerky 72b (free)", "featherless/qwerky-72b:free", 0, 0, 32768),
("OlympicCoder 7B (free)", "open-r1/olympiccoder-7b:free", 0, 0, 32768),
("OlympicCoder 32B (free)", "open-r1/olympiccoder-32b:free", 0, 0, 32768),
("Google: Gemma 3 1B (free)", "google/gemma-3-1b-it:free", 0, 0, 32768),
("Reka: Flash 3 (free)", "rekaai/reka-flash-3:free", 0, 0, 32768),
("Dolphin3.0 R1 Mistral 24B (free)", "cognitivecomputations/dolphin3.0-r1-mistral-24b:free", 0, 0, 32768),
("Dolphin3.0 Mistral 24B (free)", "cognitivecomputations/dolphin3.0-mistral-24b:free", 0, 0, 32768),
("Mistral: Mistral Small 3 (free)", "mistralai/mistral-small-24b-instruct-2501:free", 0, 0, 32768),
("Qwen2.5 Coder 32B Instruct (free)", "qwen/qwen-2.5-coder-32b-instruct:free", 0, 0, 32768),
("Qwen2.5 72B Instruct (free)", "qwen/qwen-2.5-72b-instruct:free", 0, 0, 32768),
("Meta: Llama 3.2 3B Instruct (free)", "meta-llama/llama-3.2-3b-instruct:free", 0, 0, 20000),
("Qwen: QwQ 32B Preview (free)", "qwen/qwq-32b-preview:free", 0, 0, 16384),
("DeepSeek: R1 Distill Qwen 32B (free)", "deepseek/deepseek-r1-distill-qwen-32b:free", 0, 0, 16000),
("Qwen: Qwen2.5 VL 32B Instruct (free)", "qwen/qwen2.5-vl-32b-instruct:free", 0, 0, 8192),
("Moonshot AI: Moonlight 16B A3B Instruct (free)", "moonshotai/moonlight-16b-a3b-instruct:free", 0, 0, 8192),
("DeepSeek: R1 Distill Llama 70B (free)", "deepseek/deepseek-r1-distill-llama-70b:free", 0, 0, 8192),
("Qwen 2 7B Instruct (free)", "qwen/qwen-2-7b-instruct:free", 0, 0, 8192),
("Google: Gemma 2 9B (free)", "google/gemma-2-9b-it:free", 0, 0, 8192),
("Mistral: Mistral 7B Instruct (free)", "mistralai/mistral-7b-instruct:free", 0, 0, 8192),
("Microsoft: Phi-3 Mini 128K Instruct (free)", "microsoft/phi-3-mini-128k-instruct:free", 0, 0, 8192),
("Microsoft: Phi-3 Medium 128K Instruct (free)", "microsoft/phi-3-medium-128k-instruct:free", 0, 0, 8192),
("Meta: Llama 3 8B Instruct (free)", "meta-llama/llama-3-8b-instruct:free", 0, 0, 8192),
("OpenChat 3.5 7B (free)", "openchat/openchat-7b:free", 0, 0, 8192),
("Meta: Llama 3.3 70B Instruct (free)", "meta-llama/llama-3.3-70b-instruct:free", 0, 0, 8000),
("AllenAI: Molmo 7B D (free)", "allenai/molmo-7b-d:free", 0, 0, 4096),
("Rogue Rose 103B v0.2 (free)", "sophosympatheia/rogue-rose-103b-v0.2:free", 0, 0, 4096),
("Toppy M 7B (free)", "undi95/toppy-m-7b:free", 0, 0, 4096),
("Hugging Face: Zephyr 7B (free)", "huggingfaceh4/zephyr-7b-beta:free", 0, 0, 4096),
("MythoMax 13B (free)", "gryphe/mythomax-l2-13b:free", 0, 0, 4096),
]
# Filter for vision models
vision_model_ids = [
"meta-llama/llama-3.2-11b-vision-instruct:free",
"qwen/qwen2.5-vl-72b-instruct:free",
"qwen/qwen2.5-vl-3b-instruct:free",
"qwen/qwen2.5-vl-32b-instruct:free",
"qwen/qwen-2.5-vl-7b-instruct:free",
"google/gemini-2.0-pro-exp-02-05:free",
"google/gemini-2.5-pro-exp-03-25:free"
]
# Format model names to include context size
def format_model_name(name, context_size):
if context_size >= 1000000:
context_str = f"{context_size/1000000:.1f}M tokens"
else:
context_str = f"{context_size/1000:.0f}K tokens"
return f"{name} ({context_str})"
# Prefilter vision models
vision_models = [(format_model_name(name, context_size), model_id, context_size)
for name, model_id, _, _, context_size in free_models
if model_id in vision_model_ids]
text_models = [(format_model_name(name, context_size), model_id, context_size)
for name, model_id, _, _, context_size in free_models]
def encode_image(image):
"""Convert PIL Image to base64 string"""
buffered = BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")
def encode_file(file_path):
"""Convert text file to string"""
try:
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
except Exception as e:
return f"Error reading file: {str(e)}"
def process_message_stream(message, chat_history, model_name, uploaded_image=None, uploaded_file=None,
temperature=0.7, top_p=1.0, max_tokens=None, stream=True):
"""Process message and stream the model response"""
# Extract model_id from the display name
model_id = model_name.split(' ')[1] if len(model_name.split(' ')) > 1 else model_name
# Check if API key is set
if not OPENROUTER_API_KEY:
yield "Please set your OpenRouter API key in the environment variables.", chat_history
return
# Setup headers and URL
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"HTTP-Referer": "https://huggingface.co/spaces", # Replace with your actual space URL in production
}
url = "https://openrouter.ai/api/v1/chat/completions"
# Build message content
messages = []
# Add chat history
for item in chat_history:
if isinstance(item, tuple):
# Old format compatibility
human_msg, ai_msg = item
messages.append({"role": "user", "content": human_msg})
messages.append({"role": "assistant", "content": ai_msg})
else:
# New message format
messages.append(item)
# Add current message with any attachments
if uploaded_image:
# Image processing for vision models
base64_image = encode_image(uploaded_image)
content = [
{"type": "text", "text": message}
]
# Add text from file if provided
if uploaded_file:
file_content = encode_file(uploaded_file)
content[0]["text"] = f"{message}\n\nFile content:\n```\n{file_content}\n```"
# Add image
content.append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
})
messages.append({"role": "user", "content": content})
else:
if uploaded_file:
file_content = encode_file(uploaded_file)
content = f"{message}\n\nFile content:\n```\n{file_content}\n```"
messages.append({"role": "user", "content": content})
else:
messages.append({"role": "user", "content": message})
# Get context length for the model
context_length = next((context for _, model_id, context in text_models if model_id == model_id), 4096)
# Calculate default max tokens if not specified
if not max_tokens:
# Use 25% of context length as a reasonable default
max_tokens = min(4000, int(context_length * 0.25))
# Build request data
data = {
"model": model_id,
"messages": messages,
"stream": stream,
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens
}
try:
# Create a new message pair in the chat history
user_msg = {"role": "user", "content": message}
ai_msg = {"role": "assistant", "content": ""}
chat_history.append(user_msg)
chat_history.append(ai_msg)
full_response = ""
if stream:
# Make streaming API call
with requests.post(url, headers=headers, json=data, stream=True) as response:
response.raise_for_status()
buffer = ""
for chunk in response.iter_content(chunk_size=1024, decode_unicode=False):
if chunk:
buffer += chunk.decode('utf-8')
while True:
line_end = buffer.find('\n')
if line_end == -1:
break
line = buffer[:line_end].strip()
buffer = buffer[line_end + 1:]
if line.startswith('data: '):
data = line[6:]
if data == '[DONE]':
break
try:
data_obj = json.loads(data)
delta_content = data_obj["choices"][0]["delta"].get("content", "")
if delta_content:
full_response += delta_content
# Update the last assistant message
chat_history[-1]["content"] = full_response
yield chat_history
except json.JSONDecodeError:
pass
else:
# Non-streaming API call
response = requests.post(url, headers=headers, json=data)
response.raise_for_status()
result = response.json()
full_response = result.get("choices", [{}])[0].get("message", {}).get("content", "No response")
chat_history[-1]["content"] = full_response
yield chat_history
return chat_history
except Exception as e:
error_msg = f"Error: {str(e)}"
chat_history[-1]["content"] = error_msg
yield chat_history
# Create a nice CSS theme
css = """
.gradio-container {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
}
.chat-message {
padding: 15px;
border-radius: 10px;
margin-bottom: 10px;
}
.user-message {
background-color: #f0f4f8;
}
.assistant-message {
background-color: #e9f5ff;
}
#chat-container {
height: 600px;
overflow-y: auto;
}
#chat-input {
min-height: 120px;
border-radius: 8px;
padding: 10px;
}
#model-select-container {
border-radius: 8px;
padding: 15px;
background-color: #f8fafc;
}
.app-header {
text-align: center;
margin-bottom: 20px;
}
.app-header h1 {
font-weight: 700;
color: #2C3E50;
margin-bottom: 5px;
}
.app-header p {
color: #7F8C8D;
margin-top: 0;
}
.parameter-container {
background-color: #f8fafc;
padding: 10px;
border-radius: 8px;
margin-top: 10px;
}
.file-upload-container {
margin-top: 10px;
}
"""
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
gr.HTML("""
<div class="app-header">
<h1>🔆 CrispChat</h1>
<p>Chat with free OpenRouter AI models - supports text, images, and files</p>
</div>
""")
with gr.Row():
with gr.Column(scale=4):
chatbot = gr.Chatbot(
height=600,
show_copy_button=True,
show_share_button=False,
elem_id="chatbot",
layout="bubble",
avatar_images=("👤", "🤖"),
bubble_full_width=False,
type="messages" # Use new message format
)
with gr.Row():
with gr.Column(scale=10):
user_message = gr.Textbox(
placeholder="Type your message here...",
show_label=False,
elem_id="chat-input",
lines=3
)
with gr.Row():
image_upload = gr.Image(
type="pil",
label="Image (optional)",
show_label=True,
scale=1
)
file_upload = gr.File(
label="Text File (optional)",
file_types=[".txt", ".md", ".py", ".js", ".html", ".css", ".json"],
scale=1
)
submit_btn = gr.Button("Send", scale=1, variant="primary")
with gr.Column(scale=2):
with gr.Accordion("Model Selection", open=True):
using_vision = gr.Checkbox(label="Using image", value=False)
model_selector = gr.Dropdown(
choices=[name for name, _, _ in text_models],
value=text_models[0][0],
label="Select Model",
elem_id="model-selector"
)
context_info = gr.Markdown(value=f"Context: {text_models[0][2]:,} tokens")
with gr.Accordion("Parameters", open=False):
with gr.Group():
temperature = gr.Slider(
minimum=0.0,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature",
info="Higher = more creative, Lower = more deterministic"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=1.0,
step=0.1,
label="Top P",
info="Controls token diversity"
)
max_tokens = gr.Slider(
minimum=100,
maximum=8000,
value=1000,
step=100,
label="Max Tokens",
info="Maximum length of the response"
)
use_streaming = gr.Checkbox(
label="Stream Response",
value=True,
info="Show response as it's generated"
)
with gr.Accordion("Tips", open=False):
gr.Markdown("""
* Select a vision-capable model for images
* Upload text files to include their content
* Check model context window sizes
* Adjust temperature for creativity level
* Top P controls diversity of responses
""")
# Define events
def update_model_selector(use_vision):
if use_vision:
return (
gr.Dropdown(choices=[name for name, _, _ in vision_models], value=vision_models[0][0]),
f"Context: {vision_models[0][2]:,} tokens"
)
else:
return (
gr.Dropdown(choices=[name for name, _, _ in text_models], value=text_models[0][0]),
f"Context: {text_models[0][2]:,} tokens"
)
def update_context_info(model_name):
# Extract context size from model name
for name, _, context_size in text_models:
if name == model_name:
return f"Context: {context_size:,} tokens"
for name, _, context_size in vision_models:
if name == model_name:
return f"Context: {context_size:,} tokens"
return "Context size unknown"
using_vision.change(
fn=update_model_selector,
inputs=using_vision,
outputs=[model_selector, context_info]
)
model_selector.change(
fn=update_context_info,
inputs=model_selector,
outputs=context_info
)
# Submit function
def on_submit(message, history, model, image, file, temp, top_p_val, max_tok, stream):
if not message and not image and not file:
return "", history
return "", process_message_stream(
message,
history,
model,
image,
file.name if file else None,
temperature=temp,
top_p=top_p_val,
max_tokens=max_tok,
stream=stream
)
# Set up submission events
submit_btn.click(
on_submit,
inputs=[
user_message, chatbot, model_selector,
image_upload, file_upload,
temperature, top_p, max_tokens, use_streaming
],
outputs=[user_message, chatbot]
)
user_message.submit(
on_submit,
inputs=[
user_message, chatbot, model_selector,
image_upload, file_upload,
temperature, top_p, max_tokens, use_streaming
],
outputs=[user_message, chatbot]
)
# Define FastAPI endpoint
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI()
class GenerateRequest(BaseModel):
message: str
model: str = None
image_data: str = None
@app.post("/api/generate")
async def api_generate(request: GenerateRequest):
"""API endpoint for generating responses"""
try:
message = request.message
model_name = request.model
image_data = request.image_data
# Process image if provided
image = None
if image_data:
try:
# Decode base64 image
image_bytes = base64.b64decode(image_data)
image = Image.open(BytesIO(image_bytes))
except Exception as e:
return JSONResponse(
status_code=400,
content={"error": f"Image processing error: {str(e)}"}
)
# Generate response
try:
# Setup headers and URL
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"HTTP-Referer": "https://huggingface.co/spaces",
}
url = "https://openrouter.ai/api/v1/chat/completions"
# Get model_id from model_name
model_id = None
if model_name:
for _, mid, _ in text_models + vision_models:
if model_name in mid or model_name == mid:
model_id = mid
break
if not model_id:
model_id = text_models[0][1]
# Build messages
messages = []
if image:
# Image processing for vision models
base64_image = encode_image(image)
content = [
{"type": "text", "text": message},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
messages.append({"role": "user", "content": content})
else:
messages.append({"role": "user", "content": message})
# Build request data
data = {
"model": model_id,
"messages": messages,
"temperature": 0.7
}
# Make API call
response = requests.post(url, headers=headers, json=data)
response.raise_for_status()
# Parse response
result = response.json()
reply = result.get("choices", [{}])[0].get("message", {}).get("content", "No response")
return {"response": reply}
except Exception as e:
return JSONResponse(
status_code=500,
content={"error": f"Error generating response: {str(e)}"}
)
except Exception as e:
return JSONResponse(
status_code=500,
content={"error": f"Server error: {str(e)}"}
)
# Add CORS middleware to allow cross-origin requests
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Mount Gradio app
import gradio as gr
app = gr.mount_gradio_app(app, demo, path="/")
# Start the app
if __name__ == "__main__":
# Use 'uvicorn' directly in HF Spaces
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)