Llama-3.2-Vision-Free

Runtime error

App Files Files Community

Llama-3.2-Vision-Free / app.py

hassanelmghari

Update app.py

04fcd0b verified 9 months ago

raw

history blame

4.27 kB

	import gradio as gr
	from PIL import Image
	import requests
	import os
	from together import Together
	import base64
	from threading import Thread
	import time
	import io

	# Initialize Together client
	client = None

	def initialize_client(api_key=None):
	global client
	if api_key:
	os.environ["TOGETHER_API_KEY"] = api_key
	if "TOGETHER_API_KEY" in os.environ:
	client = Together()
	else:
	raise ValueError("Please provide a Together API Key")

	def encode_image(image_path, max_size=(800, 800), quality=85):
	with Image.open(image_path) as img:
	img.thumbnail(max_size)
	if img.mode in ('RGBA', 'LA'):
	background = Image.new(img.mode[:-1], img.size, (255, 255, 255))
	background.paste(img, mask=img.split()[-1])
	img = background
	buffered = io.BytesIO()
	img.save(buffered, format="JPEG", quality=quality)
	return base64.b64encode(buffered.getvalue()).decode('utf-8')

	def bot_streaming(message, history, together_api_key, max_new_tokens=250, max_history=5):
	if client is None:
	initialize_client(together_api_key)

	txt = message["text"]
	messages = []
	images = []

	for i, msg in enumerate(history[-max_history:]):
	if isinstance(msg[0], tuple):
	messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]})
	messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
	elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
	pass
	elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
	messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
	messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})

	if len(message["files"]) == 1:
	if isinstance(message["files"][0], str): # examples
	image_path = message["files"][0]
	else: # regular input
	image_path = message["files"][0]["path"]
	messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]})
	else:
	messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})

	try:
	stream = client.chat.completions.create(
	model="meta-llama/Llama-Vision-Free",
	messages=messages,
	max_tokens=max_new_tokens,
	stream=True,
	)

	buffer = ""
	for chunk in stream:
	if chunk.choices[0].delta.content is not None:
	buffer += chunk.choices[0].delta.content
	time.sleep(0.01)
	yield buffer

	except Exception as e:
	if "Request Entity Too Large" in str(e):
	yield "The image is too large. Please try with a smaller image or compress the existing one."
	else:
	yield f"An error occurred: {str(e)}"

	with gr.Blocks() as demo:
	gr.Markdown("# Meta Llama-3.2-11B-Vision-Instruct (FREE)")
	gr.Markdown("Try the new Llama 3.2 11B Vision API by Meta for free through Together AI. Upload an image, and start chatting about it. Just paste in your [Together AI API key](https://api.together.xyz/settings/api-keys) and get started!")

	together_api_key = gr.Textbox(
	label="Together API Key",
	placeholder="Enter your TOGETHER_API_KEY here",
	type="password"
	)

	chatbot = gr.ChatInterface(
	fn=bot_streaming,
	textbox=gr.MultimodalTextbox(),
	additional_inputs=[
	gr.Slider(
	minimum=10,
	maximum=500,
	value=250,
	step=10,
	label="Maximum number of new tokens to generate",
	)
	],
	cache_examples=False,
	stop_btn="Stop Generation",
	fill_height=True,
	multimodal=True
	)

	together_api_key.change(lambda x: x, inputs=[together_api_key], outputs=[chatbot.additional_inputs[0]])

	if __name__ == "__main__":
	demo.launch(debug=True)