Spaces:
Running
Running
import gradio as gr | |
from src.agents.image_edit_agent import image_edit_agent, ImageEditDeps, EditImageResult | |
import os | |
from src.hopter.client import Hopter, Environment | |
from src.services.generate_mask import GenerateMaskService | |
from dotenv import load_dotenv | |
from src.utils import upload_image | |
from pydantic_ai.messages import ToolCallPart, ToolReturnPart | |
from pydantic_ai.models.openai import OpenAIModel | |
model = OpenAIModel( | |
"gpt-4o", | |
api_key=os.environ.get("OPENAI_API_KEY"), | |
) | |
INTRO = """ | |
# Image Editing Assistant | |
### Experience seamless image editing using natural language in a chat-based interface. | |
With this demo, you can: | |
- Enhance or upscale an image | |
- Remove objects from an image | |
- Replace elements within an image | |
- Change the background | |
""" | |
EXAMPLES = [ | |
{ | |
"text": "Replace the background to the space with stars and planets", | |
"files": [ | |
"https://cdn.prod.website-files.com/66f230993926deadc0ac3a44/66f370d65f158cbbcfbcc532_Crossed%20Arms%20Levi%20Meir%20Clancy.jpg" | |
], | |
}, | |
{ | |
"text": "Change all the balloons to red in the image", | |
"files": [ | |
"https://www.apple.com/tv-pr/articles/2024/10/apple-tv-unveils-severance-season-two-teaser-ahead-of-the-highly-anticipated-return-of-the-emmy-and-peabody-award-winning-phenomenon/images/big-image/big-image-01/1023024_Severance_Season_Two_Official_Trailer_Big_Image_01_big_image_post.jpg.large_2x.jpg" | |
], | |
}, | |
{ | |
"text": "Change coffee to a glass of water", | |
"files": [ | |
"https://previews.123rf.com/images/vadymvdrobot/vadymvdrobot1812/vadymvdrobot181201149/113217373-image-of-smiling-woman-holding-takeaway-coffee-in-paper-cup-and-taking-selfie-while-walking-through.jpg" | |
], | |
}, | |
{ | |
"text": "ENHANCE!", | |
"files": [ | |
"https://m.media-amazon.com/images/M/MV5BNzM3ODc5NzEtNzJkOC00MDM4LWI0MTYtZTkyNmY3ZTBhYzkxXkEyXkFqcGc@._V1_QL75_UX1000_CR0,52,1000,563_.jpg" | |
], | |
}, | |
] | |
load_dotenv() | |
def build_user_message(chat_input): | |
text = chat_input["text"] | |
images = chat_input["files"] | |
messages = [{"role": "user", "content": text}] | |
if images: | |
messages.extend( | |
[{"role": "user", "content": {"path": image}} for image in images] | |
) | |
return messages | |
def build_messages_for_agent(chat_input, past_messages): | |
# filter out image messages from past messages to save on tokens | |
messages = past_messages | |
# add the user's text message | |
if chat_input["text"]: | |
messages.append({"type": "text", "text": chat_input["text"]}) | |
# add the user's image message | |
files = chat_input.get("files", []) | |
image_url = upload_image(files[0]) if files else None | |
if image_url: | |
messages.append({"type": "image_url", "image_url": {"url": image_url}}) | |
return messages | |
def select_example(x: gr.SelectData, chat_input): | |
chat_input["text"] = x.value["text"] | |
chat_input["files"] = x.value["files"] | |
return chat_input | |
async def stream_from_agent(chat_input, chatbot, past_messages, current_image): | |
# Prepare messages for the UI | |
chatbot.extend(build_user_message(chat_input)) | |
yield {"text": "", "files": []}, chatbot, gr.skip, gr.skip() | |
# Prepare messages for the agent | |
text = chat_input["text"] | |
files = chat_input.get("files", []) | |
image_url = upload_image(files[0]) if files else None | |
messages = [ | |
{"type": "text", "text": text}, | |
] | |
if image_url: | |
messages.append({"type": "image_url", "image_url": {"url": image_url}}) | |
current_image = image_url | |
# Dependencies | |
hopter = Hopter(os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING) | |
mask_service = GenerateMaskService(hopter=hopter) | |
deps = ImageEditDeps( | |
edit_instruction=text, | |
image_url=current_image, | |
hopter_client=hopter, | |
mask_service=mask_service, | |
) | |
# Run the agent | |
async with image_edit_agent.run_stream( | |
messages, deps=deps, message_history=past_messages | |
) as result: | |
for message in result.new_messages(): | |
for call in message.parts: | |
if isinstance(call, ToolCallPart): | |
call_args = ( | |
call.args.args_json | |
if hasattr(call.args, "args_json") | |
else call.args | |
) | |
metadata = { | |
"title": f"🛠️ Using {call.tool_name}", | |
} | |
# set the tool call id so that when the tool returns | |
# we can find this message and update with the result | |
if call.tool_call_id is not None: | |
metadata["id"] = call.tool_call_id | |
# Create a tool call message to show on the UI | |
gr_message = { | |
"role": "assistant", | |
"content": "Parameters: " + call_args, | |
"metadata": metadata, | |
} | |
chatbot.append(gr_message) | |
if isinstance(call, ToolReturnPart): | |
for gr_message in chatbot: | |
# Skip messages without metadata | |
if not gr_message.get("metadata"): | |
continue | |
if gr_message["metadata"].get("id", "") == call.tool_call_id: | |
if isinstance(call.content, EditImageResult): | |
chatbot.append( | |
{ | |
"role": "assistant", | |
"content": gr.Image( | |
call.content.edited_image_url | |
), | |
"files": [call.content.edited_image_url], | |
} | |
) | |
current_image = call.content.edited_image_url | |
else: | |
gr_message["content"] += f"\nOutput: {call.content}" | |
yield gr.skip(), chatbot, gr.skip(), gr.skip() | |
chatbot.append({"role": "assistant", "content": ""}) | |
async for message in result.stream_text(): | |
chatbot[-1]["content"] = message | |
yield gr.skip(), chatbot, gr.skip(), gr.skip() | |
past_messages = result.all_messages() | |
yield gr.Textbox(interactive=True), gr.skip(), past_messages, current_image | |
with gr.Blocks() as demo: | |
gr.Markdown(INTRO) | |
current_image = gr.State(None) | |
past_messages = gr.State([]) | |
chatbot = gr.Chatbot( | |
elem_id="chatbot", | |
label="Image Editing Assistant", | |
type="messages", | |
avatar_images=(None, "https://ai.pydantic.dev/img/logo-white.svg"), | |
examples=EXAMPLES, | |
) | |
with gr.Row(): | |
chat_input = gr.MultimodalTextbox( | |
interactive=True, | |
file_count="single", | |
show_label=False, | |
placeholder="How would you like to edit this image?", | |
sources=["upload"], | |
) | |
generation = chat_input.submit( | |
stream_from_agent, | |
inputs=[chat_input, chatbot, past_messages, current_image], | |
outputs=[chat_input, chatbot, past_messages, current_image], | |
) | |
chatbot.example_select( | |
select_example, | |
inputs=[chat_input], | |
outputs=[chat_input], | |
).then( | |
stream_from_agent, | |
inputs=[chat_input, chatbot, past_messages, current_image], | |
outputs=[chat_input, chatbot, past_messages, current_image], | |
) | |
examples = gr.Examples( | |
examples=EXAMPLES, | |
inputs=[chat_input], | |
outputs=[chat_input], | |
) | |
if __name__ == "__main__": | |
demo.launch() | |