Spaces:

merterbak
/

grok

Running

App Files Files Community

grok / app.py

merterbak

UI update and added multiple file and streaming support

202de3a verified 5 months ago

raw

history blame

6.37 kB

	import os
	import base64
	import markdown
	import gradio as gr
	from openai import OpenAI
	from dotenv import load_dotenv
	from typing import List, Dict

	load_dotenv()
	XAI_API_KEY = os.getenv("XAI_API_KEY")

	client = OpenAI(
	api_key=XAI_API_KEY,
	base_url="https://api.x.ai/v1",
	)

	#I will try out system prompts and change it later
	def build_system_prompt() -> dict:
	return {
	"role": "system",
	"content": (
	"You are Grok Vision, created by xAI. You're designed to understand and describe images and answer text-based queries. "
	"Use all previous conversation context to provide clear, positive, and helpful responses. "
	"Respond in markdown format when appropriate."
	)
	}

	def encode_image(image_path: str) -> str:
	file_size = os.path.getsize(image_path)
	if file_size > 10 * 1024 * 1024:
	raise ValueError("Image exceeds maximum size of 10MB.")
	ext = os.path.splitext(image_path)[1].lower()
	if ext in ['.jpg', '.jpeg']:
	mime_type = 'image/jpeg'
	elif ext == '.png':
	mime_type = 'image/png'
	else:
	raise ValueError("Unsupported image format. Only JPEG and PNG are allowed.")
	#Encodes a local image file to base64 which only supports
	with open(image_path, "rb") as image_file:
	encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
	return f"data:{mime_type};base64,{encoded_string}"

	def process_input(user_text: str, user_image_paths: List[str]) -> tuple[str, List[str]]:
	user_text = user_text.strip() if user_text else ""
	image_urls = []
	# Extract URLs
	text_parts = user_text.split()
	remaining_text = []
	for part in text_parts:
	if part.startswith("http"):
	image_urls.append(part)
	else:
	remaining_text.append(part)
	user_text = " ".join(remaining_text) if remaining_text else ""
	if user_image_paths:
	for path in user_image_paths:
	if path:
	image_urls.append(encode_image(path))

	return user_text, image_urls

	def create_message_content(text: str, image_urls: List[str]) -> list[dict]:
	content = []
	for image_url in image_urls:
	content.append({
	"type": "image_url",
	"image_url": {
	"url": image_url,
	"detail": "high"
	}
	})
	if text:
	content.append({
	"type": "text",
	"text": text
	})
	return content

	def stream_response(history: List[Dict], user_text: str, user_image_paths: List[str]):
	user_text, image_urls = process_input(user_text, user_image_paths)
	if not user_text and not image_urls:
	history.append({"role": "assistant", "content": "Please provide text or at least one image (JPEG/PNG only)."})
	yield history
	return
	messages = [build_system_prompt()]
	for entry in history:
	if entry["role"] == "user":
	content = create_message_content(entry["content"], entry.get("image_urls", []))
	messages.append({"role": "user", "content": content})
	elif entry["role"] == "assistant":
	messages.append({"role": "assistant", "content": entry["content"]})
	new_content = create_message_content(user_text, image_urls)
	messages.append({"role": "user", "content": new_content})
	history.append({"role": "user", "content": user_text, "image_urls": image_urls})
	stream = client.chat.completions.create(
	model="grok-2-vision-1212",
	messages=messages,
	stream=True,
	temperature=0.01,
	)
	response_text = ""
	temp_history = history.copy()
	temp_history.append({"role": "assistant", "content": ""})
	for chunk in stream:
	delta_content = chunk.choices[0].delta.content
	if delta_content is not None:
	response_text += delta_content
	temp_history[-1] = {"role": "assistant", "content": response_text}
	yield temp_history

	def clear_inputs_and_chat():
	return [], [], "", None

	def update_and_clear(history: List[Dict], streamed_response: List[Dict]) -> tuple[List[Dict], str, None]:
	if streamed_response and history[-1]["content"] != streamed_response[-1]["content"]:
	history[-1] = streamed_response[-1]
	return history, "", None

	with gr.Blocks(
	theme=gr.themes.Soft(),
	css="""
	.chatbot-container {max-height: 80vh; overflow-y: auto;}
	.input-container {margin-top: 20px;}
	.title {text-align: center; margin-bottom: 20px;}
	"""
	) as demo:
	gr.Markdown(
	"""
	# Grok 2 Vision Chatbot 𝕏

	Interact with Grok 2 Vision you can do:
	- 📸 Upload one or more images (Max 10MB each)
	- 🔗 Provide image URLs in your message (`https://example.com/image1.jpg)
	- ✍️ Ask text-only questions
	- 💬 Chat history is preserved.
	"""
	)

	with gr.Column(elem_classes="chatbot-container"):
	chatbot = gr.Chatbot(
	label="Conversation",
	type="messages",
	bubble_full_width=False
	)

	with gr.Row(elem_classes="input-container"):
	with gr.Column(scale=1):
	image_input = gr.File(
	file_count="multiple",
	file_types=[".jpg", ".jpeg", ".png"],
	label="Upload JPEG or PNG Images",
	height=300,
	interactive=True
	)
	with gr.Column(scale=3):
	message_input = gr.Textbox(
	label="Your Message",
	placeholder="Type your question or paste JPEG/PNG image URLs",
	lines=3
	)
	with gr.Row():
	submit_btn = gr.Button("Send", variant="primary")
	clear_btn = gr.Button("Clear", variant="secondary")

	state = gr.State([])

	submit_btn.click(
	fn=stream_response,
	inputs=[state, message_input, image_input],
	outputs=chatbot,
	queue=True
	).then(
	fn=update_and_clear,
	inputs=[state, chatbot],
	outputs=[state, message_input, image_input]
	)

	clear_btn.click(
	fn=clear_inputs_and_chat,
	inputs=[],
	outputs=[chatbot, state, message_input, image_input]
	)

	if __name__ == "__main__":
	demo.launch()