grok / app.py
merterbak's picture
UI update and added multiple file and streaming support
202de3a verified
raw
history blame
6.37 kB
import os
import base64
import markdown
import gradio as gr
from openai import OpenAI
from dotenv import load_dotenv
from typing import List, Dict
load_dotenv()
XAI_API_KEY = os.getenv("XAI_API_KEY")
client = OpenAI(
api_key=XAI_API_KEY,
base_url="https://api.x.ai/v1",
)
#I will try out system prompts and change it later
def build_system_prompt() -> dict:
return {
"role": "system",
"content": (
"You are Grok Vision, created by xAI. You're designed to understand and describe images and answer text-based queries. "
"Use all previous conversation context to provide clear, positive, and helpful responses. "
"Respond in markdown format when appropriate."
)
}
def encode_image(image_path: str) -> str:
file_size = os.path.getsize(image_path)
if file_size > 10 * 1024 * 1024:
raise ValueError("Image exceeds maximum size of 10MB.")
ext = os.path.splitext(image_path)[1].lower()
if ext in ['.jpg', '.jpeg']:
mime_type = 'image/jpeg'
elif ext == '.png':
mime_type = 'image/png'
else:
raise ValueError("Unsupported image format. Only JPEG and PNG are allowed.")
#Encodes a local image file to base64 which only supports
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
return f"data:{mime_type};base64,{encoded_string}"
def process_input(user_text: str, user_image_paths: List[str]) -> tuple[str, List[str]]:
user_text = user_text.strip() if user_text else ""
image_urls = []
# Extract URLs
text_parts = user_text.split()
remaining_text = []
for part in text_parts:
if part.startswith("http"):
image_urls.append(part)
else:
remaining_text.append(part)
user_text = " ".join(remaining_text) if remaining_text else ""
if user_image_paths:
for path in user_image_paths:
if path:
image_urls.append(encode_image(path))
return user_text, image_urls
def create_message_content(text: str, image_urls: List[str]) -> list[dict]:
content = []
for image_url in image_urls:
content.append({
"type": "image_url",
"image_url": {
"url": image_url,
"detail": "high"
}
})
if text:
content.append({
"type": "text",
"text": text
})
return content
def stream_response(history: List[Dict], user_text: str, user_image_paths: List[str]):
user_text, image_urls = process_input(user_text, user_image_paths)
if not user_text and not image_urls:
history.append({"role": "assistant", "content": "Please provide text or at least one image (JPEG/PNG only)."})
yield history
return
messages = [build_system_prompt()]
for entry in history:
if entry["role"] == "user":
content = create_message_content(entry["content"], entry.get("image_urls", []))
messages.append({"role": "user", "content": content})
elif entry["role"] == "assistant":
messages.append({"role": "assistant", "content": entry["content"]})
new_content = create_message_content(user_text, image_urls)
messages.append({"role": "user", "content": new_content})
history.append({"role": "user", "content": user_text, "image_urls": image_urls})
stream = client.chat.completions.create(
model="grok-2-vision-1212",
messages=messages,
stream=True,
temperature=0.01,
)
response_text = ""
temp_history = history.copy()
temp_history.append({"role": "assistant", "content": ""})
for chunk in stream:
delta_content = chunk.choices[0].delta.content
if delta_content is not None:
response_text += delta_content
temp_history[-1] = {"role": "assistant", "content": response_text}
yield temp_history
def clear_inputs_and_chat():
return [], [], "", None
def update_and_clear(history: List[Dict], streamed_response: List[Dict]) -> tuple[List[Dict], str, None]:
if streamed_response and history[-1]["content"] != streamed_response[-1]["content"]:
history[-1] = streamed_response[-1]
return history, "", None
with gr.Blocks(
theme=gr.themes.Soft(),
css="""
.chatbot-container {max-height: 80vh; overflow-y: auto;}
.input-container {margin-top: 20px;}
.title {text-align: center; margin-bottom: 20px;}
"""
) as demo:
gr.Markdown(
"""
# Grok 2 Vision Chatbot 𝕏
Interact with Grok 2 Vision you can do:
- πŸ“Έ Upload one or more images (Max 10MB each)
- πŸ”— Provide image URLs in your message (`https://example.com/image1.jpg)
- ✍️ Ask text-only questions
- πŸ’¬ Chat history is preserved.
"""
)
with gr.Column(elem_classes="chatbot-container"):
chatbot = gr.Chatbot(
label="Conversation",
type="messages",
bubble_full_width=False
)
with gr.Row(elem_classes="input-container"):
with gr.Column(scale=1):
image_input = gr.File(
file_count="multiple",
file_types=[".jpg", ".jpeg", ".png"],
label="Upload JPEG or PNG Images",
height=300,
interactive=True
)
with gr.Column(scale=3):
message_input = gr.Textbox(
label="Your Message",
placeholder="Type your question or paste JPEG/PNG image URLs",
lines=3
)
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
state = gr.State([])
submit_btn.click(
fn=stream_response,
inputs=[state, message_input, image_input],
outputs=chatbot,
queue=True
).then(
fn=update_and_clear,
inputs=[state, chatbot],
outputs=[state, message_input, image_input]
)
clear_btn.click(
fn=clear_inputs_and_chat,
inputs=[],
outputs=[chatbot, state, message_input, image_input]
)
if __name__ == "__main__":
demo.launch()