import gradio as gr from src.agents.image_edit_agent import image_edit_agent, ImageEditDeps, EditImageResult import os from src.hopter.client import Hopter, Environment from src.services.generate_mask import GenerateMaskService from dotenv import load_dotenv from src.utils import upload_image from pydantic_ai.messages import ToolCallPart, ToolReturnPart from pydantic_ai.models.openai import OpenAIModel model = OpenAIModel( "gpt-4o", api_key=os.environ.get("OPENAI_API_KEY"), ) INTRO = """ # Image Editing Assistant ### Experience seamless image editing using natural language in a chat-based interface. With this demo, you can: - Enhance or upscale an image - Remove objects from an image - Replace elements within an image - Change the background """ EXAMPLES = [ { "text": "Replace the background to the space with stars and planets", "files": [ "https://cdn.prod.website-files.com/66f230993926deadc0ac3a44/66f370d65f158cbbcfbcc532_Crossed%20Arms%20Levi%20Meir%20Clancy.jpg" ], }, { "text": "Change all the balloons to red in the image", "files": [ "https://www.apple.com/tv-pr/articles/2024/10/apple-tv-unveils-severance-season-two-teaser-ahead-of-the-highly-anticipated-return-of-the-emmy-and-peabody-award-winning-phenomenon/images/big-image/big-image-01/1023024_Severance_Season_Two_Official_Trailer_Big_Image_01_big_image_post.jpg.large_2x.jpg" ], }, { "text": "Change coffee to a glass of water", "files": [ "https://previews.123rf.com/images/vadymvdrobot/vadymvdrobot1812/vadymvdrobot181201149/113217373-image-of-smiling-woman-holding-takeaway-coffee-in-paper-cup-and-taking-selfie-while-walking-through.jpg" ], }, { "text": "ENHANCE!", "files": [ "https://m.media-amazon.com/images/M/MV5BNzM3ODc5NzEtNzJkOC00MDM4LWI0MTYtZTkyNmY3ZTBhYzkxXkEyXkFqcGc@._V1_QL75_UX1000_CR0,52,1000,563_.jpg" ], }, ] load_dotenv() def build_user_message(chat_input): text = chat_input["text"] images = chat_input["files"] messages = [{"role": "user", "content": text}] if images: messages.extend( [{"role": "user", "content": {"path": image}} for image in images] ) return messages def build_messages_for_agent(chat_input, past_messages): # filter out image messages from past messages to save on tokens messages = past_messages # add the user's text message if chat_input["text"]: messages.append({"type": "text", "text": chat_input["text"]}) # add the user's image message files = chat_input.get("files", []) image_url = upload_image(files[0]) if files else None if image_url: messages.append({"type": "image_url", "image_url": {"url": image_url}}) return messages def select_example(x: gr.SelectData, chat_input): chat_input["text"] = x.value["text"] chat_input["files"] = x.value["files"] return chat_input async def stream_from_agent(chat_input, chatbot, past_messages, current_image): # Prepare messages for the UI chatbot.extend(build_user_message(chat_input)) yield {"text": "", "files": []}, chatbot, gr.skip, gr.skip() # Prepare messages for the agent text = chat_input["text"] files = chat_input.get("files", []) image_url = upload_image(files[0]) if files else None messages = [ {"type": "text", "text": text}, ] if image_url: messages.append({"type": "image_url", "image_url": {"url": image_url}}) current_image = image_url # Dependencies hopter = Hopter(os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING) mask_service = GenerateMaskService(hopter=hopter) deps = ImageEditDeps( edit_instruction=text, image_url=current_image, hopter_client=hopter, mask_service=mask_service, ) # Run the agent async with image_edit_agent.run_stream( messages, deps=deps, message_history=past_messages ) as result: for message in result.new_messages(): for call in message.parts: if isinstance(call, ToolCallPart): call_args = ( call.args.args_json if hasattr(call.args, "args_json") else call.args ) metadata = { "title": f"🛠️ Using {call.tool_name}", } # set the tool call id so that when the tool returns # we can find this message and update with the result if call.tool_call_id is not None: metadata["id"] = call.tool_call_id # Create a tool call message to show on the UI gr_message = { "role": "assistant", "content": "Parameters: " + call_args, "metadata": metadata, } chatbot.append(gr_message) if isinstance(call, ToolReturnPart): for gr_message in chatbot: # Skip messages without metadata if not gr_message.get("metadata"): continue if gr_message["metadata"].get("id", "") == call.tool_call_id: if isinstance(call.content, EditImageResult): chatbot.append( { "role": "assistant", "content": gr.Image( call.content.edited_image_url ), "files": [call.content.edited_image_url], } ) current_image = call.content.edited_image_url else: gr_message["content"] += f"\nOutput: {call.content}" yield gr.skip(), chatbot, gr.skip(), gr.skip() chatbot.append({"role": "assistant", "content": ""}) async for message in result.stream_text(): chatbot[-1]["content"] = message yield gr.skip(), chatbot, gr.skip(), gr.skip() past_messages = result.all_messages() yield gr.Textbox(interactive=True), gr.skip(), past_messages, current_image with gr.Blocks() as demo: gr.Markdown(INTRO) current_image = gr.State(None) past_messages = gr.State([]) chatbot = gr.Chatbot( elem_id="chatbot", label="Image Editing Assistant", type="messages", avatar_images=(None, "https://ai.pydantic.dev/img/logo-white.svg"), examples=EXAMPLES, ) with gr.Row(): chat_input = gr.MultimodalTextbox( interactive=True, file_count="single", show_label=False, placeholder="How would you like to edit this image?", sources=["upload"], ) generation = chat_input.submit( stream_from_agent, inputs=[chat_input, chatbot, past_messages, current_image], outputs=[chat_input, chatbot, past_messages, current_image], ) chatbot.example_select( select_example, inputs=[chat_input], outputs=[chat_input], ).then( stream_from_agent, inputs=[chat_input, chatbot, past_messages, current_image], outputs=[chat_input, chatbot, past_messages, current_image], ) examples = gr.Examples( examples=EXAMPLES, inputs=[chat_input], outputs=[chat_input], ) if __name__ == "__main__": demo.launch()