Spaces:

simonlee-cb
/

chat-image-edit

Running

App Files Files Community

simonlee-cb commited on Mar 10

Commit

fcb8f25

1 Parent(s): 2a2c2ad

refactor: formatting

Browse files

Files changed (17) hide show

app.py +1 -1
image_edit_chat.py +53 -71
image_edit_demo.py +30 -38
server.py +36 -49
src/agents/generic_agent.py +2 -7
src/agents/image_edit_agent.py +31 -26
src/agents/mask_generation_agent.py +4 -12
src/hopter/client.py +54 -43
src/models/generate_mask_instruction.py +6 -8
src/services/generate_mask.py +24 -26
src/services/google_cloud_image_upload.py +25 -9
src/services/image_uploader.py +37 -31
src/services/openai_file_upload.py +1 -0
src/utils.py +16 -6
stream_utils.py +92 -35
test_edit_stream.py +39 -25
test_generic_stream.py +16 -7

app.py CHANGED Viewed

@@ -8,4 +8,4 @@ with demo.route("PicEdit"):
     image_edit_demo.demo.render()
 if __name__ == "__main__":
-    demo.launch()

     image_edit_demo.demo.render()
 if __name__ == "__main__":
+    demo.launch()

image_edit_chat.py CHANGED Viewed

@@ -5,13 +5,10 @@ from src.hopter.client import Hopter, Environment
 from src.services.generate_mask import GenerateMaskService
 from dotenv import load_dotenv
 from src.utils import upload_image
-from pydantic_ai.messages import (
-    ToolCallPart,
-    ToolReturnPart
-)
 from pydantic_ai.models.openai import OpenAIModel
-model = OpenAIModel(
     "gpt-4o",
     api_key=os.environ.get("OPENAI_API_KEY"),
 )
@@ -33,76 +30,65 @@ EXAMPLES = [
         "text": "Replace the background to the space with stars and planets",
         "files": [
             "https://cdn.prod.website-files.com/66f230993926deadc0ac3a44/66f370d65f158cbbcfbcc532_Crossed%20Arms%20Levi%20Meir%20Clancy.jpg"
-        ]
     },
     {
         "text": "Change all the balloons to red in the image",
         "files": [
             "https://www.apple.com/tv-pr/articles/2024/10/apple-tv-unveils-severance-season-two-teaser-ahead-of-the-highly-anticipated-return-of-the-emmy-and-peabody-award-winning-phenomenon/images/big-image/big-image-01/1023024_Severance_Season_Two_Official_Trailer_Big_Image_01_big_image_post.jpg.large_2x.jpg"
-        ]
     },
     {
         "text": "Change coffee to a glass of water",
         "files": [
             "https://previews.123rf.com/images/vadymvdrobot/vadymvdrobot1812/vadymvdrobot181201149/113217373-image-of-smiling-woman-holding-takeaway-coffee-in-paper-cup-and-taking-selfie-while-walking-through.jpg"
-        ]
     },
     {
         "text": "ENHANCE!",
         "files": [
             "https://m.media-amazon.com/images/M/MV5BNzM3ODc5NzEtNzJkOC00MDM4LWI0MTYtZTkyNmY3ZTBhYzkxXkEyXkFqcGc@._V1_QL75_UX1000_CR0,52,1000,563_.jpg"
-        ]
-    }
 ]
 load_dotenv()
 def build_user_message(chat_input):
     text = chat_input["text"]
     images = chat_input["files"]
-    messages = [
-        {
-            "role": "user",
-            "content": text
-        }
-    ]
     if images:
-        messages.extend([
-            {
-                "role": "user",
-                "content": {"path": image}
-            }
-            for image in images
-        ])
     return messages
 def build_messages_for_agent(chat_input, past_messages):
     # filter out image messages from past messages to save on tokens
     messages = past_messages
     # add the user's text message
     if chat_input["text"]:
-        messages.append({
-            "type": "text",
-            "text": chat_input["text"]
-        })
     # add the user's image message
     files = chat_input.get("files", [])
     image_url = upload_image(files[0]) if files else None
     if image_url:
-        messages.append({
-            "type": "image_url",
-            "image_url": {"url": image_url}
-        })
     return messages
 def select_example(x: gr.SelectData, chat_input):
     chat_input["text"] = x.value["text"]
     chat_input["files"] = x.value["files"]
     return chat_input
 async def stream_from_agent(chat_input, chatbot, past_messages, current_image):
     # Prepare messages for the UI
     chatbot.extend(build_user_message(chat_input))
@@ -113,15 +99,10 @@ async def stream_from_agent(chat_input, chatbot, past_messages, current_image):
     files = chat_input.get("files", [])
     image_url = upload_image(files[0]) if files else None
     messages = [
-        {
-            "type": "text",
-            "text": text
-        },
     ]
     if image_url:
-        messages.append(
-            {"type": "image_url", "image_url": {"url": image_url}}
-        )
         current_image = image_url
     # Dependencies
@@ -131,66 +112,67 @@ async def stream_from_agent(chat_input, chatbot, past_messages, current_image):
         edit_instruction=text,
         image_url=current_image,
         hopter_client=hopter,
-        mask_service=mask_service
     )
     # Run the agent
     async with image_edit_agent.run_stream(
-        messages,
-        deps=deps,
-        message_history=past_messages
     ) as result:
         for message in result.new_messages():
             for call in message.parts:
                 if isinstance(call, ToolCallPart):
                     call_args = (
                         call.args.args_json
-                        if hasattr(call.args, 'args_json')
                         else call.args
                     )
                     metadata = {
-                        'title': f'🛠️ Using {call.tool_name}',
                     }
                     # set the tool call id so that when the tool returns
                     # we can find this message and update with the result
                     if call.tool_call_id is not None:
-                        metadata['id'] = call.tool_call_id
                     # Create a tool call message to show on the UI
                     gr_message = {
-                        'role': 'assistant',
-                        'content': 'Parameters: ' + call_args,
-                        'metadata': metadata,
                     }
                     chatbot.append(gr_message)
                 if isinstance(call, ToolReturnPart):
                     for gr_message in chatbot:
                         # Skip messages without metadata
-                        if not gr_message.get('metadata'):
                             continue
-                        if gr_message['metadata'].get('id', '') == call.tool_call_id:
                             if isinstance(call.content, EditImageResult):
-                                chatbot.append({
-                                    "role": "assistant",
-                                    "content": gr.Image(call.content.edited_image_url),
-                                    "files": [call.content.edited_image_url]
-                                })
                                 current_image = call.content.edited_image_url
                             else:
-                                gr_message['content'] += (
-                                    f'\nOutput: {call.content}'
-                                )
                 yield gr.skip(), chatbot, gr.skip(), gr.skip()
-        chatbot.append({'role': 'assistant', 'content': ''})
         async for message in result.stream_text():
-            chatbot[-1]['content'] = message
             yield gr.skip(), chatbot, gr.skip(), gr.skip()
         past_messages = result.all_messages()
         yield gr.Textbox(interactive=True), gr.skip(), past_messages, current_image
 with gr.Blocks() as demo:
     gr.Markdown(INTRO)
@@ -198,10 +180,10 @@ with gr.Blocks() as demo:
     past_messages = gr.State([])
     chatbot = gr.Chatbot(
         elem_id="chatbot",
-        label='Image Editing Assistant',
-        type='messages',
-        avatar_images=(None, 'https://ai.pydantic.dev/img/logo-white.svg'),
-        examples=EXAMPLES
     )
     with gr.Row():
@@ -209,8 +191,8 @@ with gr.Blocks() as demo:
             interactive=True,
             file_count="single",
             show_label=False,
-            placeholder='How would you like to edit this image?',
-            sources=["upload"]
         )
     generation = chat_input.submit(
         stream_from_agent,
@@ -233,7 +215,7 @@ with gr.Blocks() as demo:
         inputs=[chat_input],
         outputs=[chat_input],
     )
-if __name__ == '__main__':
-    demo.launch()

 from src.services.generate_mask import GenerateMaskService
 from dotenv import load_dotenv
 from src.utils import upload_image
+from pydantic_ai.messages import ToolCallPart, ToolReturnPart
 from pydantic_ai.models.openai import OpenAIModel
+model = OpenAIModel(
     "gpt-4o",
     api_key=os.environ.get("OPENAI_API_KEY"),
 )
         "text": "Replace the background to the space with stars and planets",
         "files": [
             "https://cdn.prod.website-files.com/66f230993926deadc0ac3a44/66f370d65f158cbbcfbcc532_Crossed%20Arms%20Levi%20Meir%20Clancy.jpg"
+        ],
     },
     {
         "text": "Change all the balloons to red in the image",
         "files": [
             "https://www.apple.com/tv-pr/articles/2024/10/apple-tv-unveils-severance-season-two-teaser-ahead-of-the-highly-anticipated-return-of-the-emmy-and-peabody-award-winning-phenomenon/images/big-image/big-image-01/1023024_Severance_Season_Two_Official_Trailer_Big_Image_01_big_image_post.jpg.large_2x.jpg"
+        ],
     },
     {
         "text": "Change coffee to a glass of water",
         "files": [
             "https://previews.123rf.com/images/vadymvdrobot/vadymvdrobot1812/vadymvdrobot181201149/113217373-image-of-smiling-woman-holding-takeaway-coffee-in-paper-cup-and-taking-selfie-while-walking-through.jpg"
+        ],
     },
     {
         "text": "ENHANCE!",
         "files": [
             "https://m.media-amazon.com/images/M/MV5BNzM3ODc5NzEtNzJkOC00MDM4LWI0MTYtZTkyNmY3ZTBhYzkxXkEyXkFqcGc@._V1_QL75_UX1000_CR0,52,1000,563_.jpg"
+        ],
+    },
 ]
 load_dotenv()
 def build_user_message(chat_input):
     text = chat_input["text"]
     images = chat_input["files"]
+    messages = [{"role": "user", "content": text}]
     if images:
+        messages.extend(
+            [{"role": "user", "content": {"path": image}} for image in images]
+        )
     return messages
 def build_messages_for_agent(chat_input, past_messages):
     # filter out image messages from past messages to save on tokens
     messages = past_messages
     # add the user's text message
     if chat_input["text"]:
+        messages.append({"type": "text", "text": chat_input["text"]})
     # add the user's image message
     files = chat_input.get("files", [])
     image_url = upload_image(files[0]) if files else None
     if image_url:
+        messages.append({"type": "image_url", "image_url": {"url": image_url}})
     return messages
 def select_example(x: gr.SelectData, chat_input):
     chat_input["text"] = x.value["text"]
     chat_input["files"] = x.value["files"]
     return chat_input
 async def stream_from_agent(chat_input, chatbot, past_messages, current_image):
     # Prepare messages for the UI
     chatbot.extend(build_user_message(chat_input))
     files = chat_input.get("files", [])
     image_url = upload_image(files[0]) if files else None
     messages = [
+        {"type": "text", "text": text},
     ]
     if image_url:
+        messages.append({"type": "image_url", "image_url": {"url": image_url}})
         current_image = image_url
     # Dependencies
         edit_instruction=text,
         image_url=current_image,
         hopter_client=hopter,
+        mask_service=mask_service,
     )
     # Run the agent
     async with image_edit_agent.run_stream(
+        messages, deps=deps, message_history=past_messages
     ) as result:
         for message in result.new_messages():
             for call in message.parts:
                 if isinstance(call, ToolCallPart):
                     call_args = (
                         call.args.args_json
+                        if hasattr(call.args, "args_json")
                         else call.args
                     )
                     metadata = {
+                        "title": f"🛠️ Using {call.tool_name}",
                     }
                     # set the tool call id so that when the tool returns
                     # we can find this message and update with the result
                     if call.tool_call_id is not None:
+                        metadata["id"] = call.tool_call_id
                     # Create a tool call message to show on the UI
                     gr_message = {
+                        "role": "assistant",
+                        "content": "Parameters: " + call_args,
+                        "metadata": metadata,
                     }
                     chatbot.append(gr_message)
                 if isinstance(call, ToolReturnPart):
                     for gr_message in chatbot:
                         # Skip messages without metadata
+                        if not gr_message.get("metadata"):
                             continue
+                        if gr_message["metadata"].get("id", "") == call.tool_call_id:
                             if isinstance(call.content, EditImageResult):
+                                chatbot.append(
+                                    {
+                                        "role": "assistant",
+                                        "content": gr.Image(
+                                            call.content.edited_image_url
+                                        ),
+                                        "files": [call.content.edited_image_url],
+                                    }
+                                )
                                 current_image = call.content.edited_image_url
                             else:
+                                gr_message["content"] += f"\nOutput: {call.content}"
                 yield gr.skip(), chatbot, gr.skip(), gr.skip()
+        chatbot.append({"role": "assistant", "content": ""})
         async for message in result.stream_text():
+            chatbot[-1]["content"] = message
             yield gr.skip(), chatbot, gr.skip(), gr.skip()
         past_messages = result.all_messages()
         yield gr.Textbox(interactive=True), gr.skip(), past_messages, current_image
 with gr.Blocks() as demo:
     gr.Markdown(INTRO)
     past_messages = gr.State([])
     chatbot = gr.Chatbot(
         elem_id="chatbot",
+        label="Image Editing Assistant",
+        type="messages",
+        avatar_images=(None, "https://ai.pydantic.dev/img/logo-white.svg"),
+        examples=EXAMPLES,
     )
     with gr.Row():
             interactive=True,
             file_count="single",
             show_label=False,
+            placeholder="How would you like to edit this image?",
+            sources=["upload"],
         )
     generation = chat_input.submit(
         stream_from_agent,
         inputs=[chat_input],
         outputs=[chat_input],
     )
+if __name__ == "__main__":
+    demo.launch()

image_edit_demo.py CHANGED Viewed

@@ -4,50 +4,47 @@ import os
 from src.hopter.client import Hopter, Environment
 from src.services.generate_mask import GenerateMaskService
 from dotenv import load_dotenv
-from pydantic_ai.messages import (
-    ToolReturnPart
-)
 from src.utils import upload_image
 load_dotenv()
 async def process_edit(image, instruction):
     hopter = Hopter(os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING)
     mask_service = GenerateMaskService(hopter=hopter)
     image_url = upload_image(image)
     messages = [
-        {
-            "type": "text",
-            "text": instruction
-        },
     ]
     if image:
-        messages.append(
-            {"type": "image_url", "image_url": {"url": image_url}}
-        )
     deps = ImageEditDeps(
         edit_instruction=instruction,
         image_url=image_url,
         hopter_client=hopter,
-        mask_service=mask_service
-    )
-    result = await image_edit_agent.run(
-        messages,
-        deps=deps
     )
     # Extract the edited image URL from the tool return
     for message in result.new_messages():
         for part in message.parts:
-            if isinstance(part, ToolReturnPart) and isinstance(part.content, EditImageResult):
                 return part.content.edited_image_url
     return None
 async def use_edited_image(edited_image):
     return edited_image
 def clear_instruction():
     # Only clear the instruction text.
     return ""
 # Create the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# PicEdit")
@@ -55,57 +52,52 @@ with gr.Blocks() as demo:
     Welcome to PicEdit - an AI-powered image editing tool.
     Simply upload an image and describe the changes you want to make in natural language.
     """)
     with gr.Row():
         # Input image on the left
         input_image = gr.Image(label="Original Image", type="filepath")
         with gr.Column():
             # Output image on the right
-            output_image = gr.Image(label="Edited Image", type="filepath", interactive=False, scale=3)
             use_edited_btn = gr.Button("👈 Use Edited Image 👈")
     # Text input for editing instructions
     instruction = gr.Textbox(
         label="Editing Instructions",
-        placeholder="Describe the changes you want to make to the image..."
     )
     # Clear button
     with gr.Row():
         clear_btn = gr.Button("Clear")
         submit_btn = gr.Button("Apply Edit", variant="primary")
     # Set up the event handlers
     submit_btn.click(
-        fn=process_edit,
-        inputs=[input_image, instruction],
-        outputs=output_image
     )
     use_edited_btn.click(
-        fn=use_edited_image,
-        inputs=[output_image],
-        outputs=[input_image]
     )
     # Bind the clear button's click event to only clear the instruction textbox.
-    clear_btn.click(
-        fn=clear_instruction,
-        inputs=[],
-        outputs=[instruction]
-    )
     examples = gr.Examples(
         examples=[
             ["https://i.ibb.co/qYwhcc6j/c837c212afbf.jpg", "remove the pole"],
             ["https://i.ibb.co/2Mrxztw/image.png", "replace the cat with a dog"],
-            ["https://i.ibb.co/9mT4cvnt/resized-78-B40-C09-1037-4-DD3-9-F48-D73637-EE4-E51.png", "ENHANCE!"]
         ],
-        inputs=[input_image, instruction]
     )
 if __name__ == "__main__":
     demo.launch()

 from src.hopter.client import Hopter, Environment
 from src.services.generate_mask import GenerateMaskService
 from dotenv import load_dotenv
+from pydantic_ai.messages import ToolReturnPart
 from src.utils import upload_image
 load_dotenv()
 async def process_edit(image, instruction):
     hopter = Hopter(os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING)
     mask_service = GenerateMaskService(hopter=hopter)
     image_url = upload_image(image)
     messages = [
+        {"type": "text", "text": instruction},
     ]
     if image:
+        messages.append({"type": "image_url", "image_url": {"url": image_url}})
     deps = ImageEditDeps(
         edit_instruction=instruction,
         image_url=image_url,
         hopter_client=hopter,
+        mask_service=mask_service,
     )
+    result = await image_edit_agent.run(messages, deps=deps)
     # Extract the edited image URL from the tool return
     for message in result.new_messages():
         for part in message.parts:
+            if isinstance(part, ToolReturnPart) and isinstance(
+                part.content, EditImageResult
+            ):
                 return part.content.edited_image_url
     return None
 async def use_edited_image(edited_image):
     return edited_image
 def clear_instruction():
     # Only clear the instruction text.
     return ""
 # Create the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# PicEdit")
     Welcome to PicEdit - an AI-powered image editing tool.
     Simply upload an image and describe the changes you want to make in natural language.
     """)
     with gr.Row():
         # Input image on the left
         input_image = gr.Image(label="Original Image", type="filepath")
         with gr.Column():
             # Output image on the right
+            output_image = gr.Image(
+                label="Edited Image", type="filepath", interactive=False, scale=3
+            )
             use_edited_btn = gr.Button("👈 Use Edited Image 👈")
     # Text input for editing instructions
     instruction = gr.Textbox(
         label="Editing Instructions",
+        placeholder="Describe the changes you want to make to the image...",
     )
     # Clear button
     with gr.Row():
         clear_btn = gr.Button("Clear")
         submit_btn = gr.Button("Apply Edit", variant="primary")
     # Set up the event handlers
     submit_btn.click(
+        fn=process_edit, inputs=[input_image, instruction], outputs=output_image
     )
     use_edited_btn.click(
+        fn=use_edited_image, inputs=[output_image], outputs=[input_image]
     )
     # Bind the clear button's click event to only clear the instruction textbox.
+    clear_btn.click(fn=clear_instruction, inputs=[], outputs=[instruction])
     examples = gr.Examples(
         examples=[
             ["https://i.ibb.co/qYwhcc6j/c837c212afbf.jpg", "remove the pole"],
             ["https://i.ibb.co/2Mrxztw/image.png", "replace the cat with a dog"],
+            [
+                "https://i.ibb.co/9mT4cvnt/resized-78-B40-C09-1037-4-DD3-9-F48-D73637-EE4-E51.png",
+                "ENHANCE!",
+            ],
         ],
+        inputs=[input_image, instruction],
     )
 if __name__ == "__main__":
     demo.launch()

server.py CHANGED Viewed

@@ -1,10 +1,9 @@
-from fastapi import FastAPI, UploadFile, File, Form, HTTPException, BackgroundTasks
 from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
-import asyncio
 import os
 from dotenv import load_dotenv
-from typing import Optional, List, Dict, Any
 import json
 from pydantic import BaseModel
@@ -13,7 +12,7 @@ from src.agents.image_edit_agent import image_edit_agent, ImageEditDeps
 from src.agents.generic_agent import generic_agent
 from src.hopter.client import Hopter, Environment
 from src.services.generate_mask import GenerateMaskService
-from src.utils import upload_file_to_base64, upload_image
 # Load environment variables
 load_dotenv()
@@ -29,15 +28,18 @@ app.add_middleware(
     allow_headers=["*"],  # Allows all headers
 )
 class EditRequest(BaseModel):
     edit_instruction: str
     image_url: Optional[str] = None
 class MessageContent(BaseModel):
     type: str
     text: Optional[str] = None
     image_url: Optional[Dict[str, str]] = None
 class Message(BaseModel):
     content: List[MessageContent]
@@ -48,9 +50,10 @@ async def test(query: str):
         async with generic_agent.run_stream(query) as result:
             async for message in result.stream(debounce_by=0.01):
                 yield json.dumps(message) + "\n"
     return StreamingResponse(stream_messages(), media_type="text/plain")
 @app.post("/edit")
 async def edit_image(request: EditRequest):
     """
@@ -60,8 +63,7 @@ async def edit_image(request: EditRequest):
     try:
         # Initialize services
         hopter = Hopter(
-            api_key=os.environ.get("HOPTER_API_KEY"),
-            environment=Environment.STAGING
         )
         mask_service = GenerateMaskService(hopter=hopter)
@@ -70,34 +72,27 @@ async def edit_image(request: EditRequest):
             edit_instruction=request.edit_instruction,
             image_url=request.image_url,
             hopter_client=hopter,
-            mask_service=mask_service
         )
         # Create messages
-        messages = [
-            {
-                "type": "text",
-                "text": request.edit_instruction
-            }
-        ]
         if request.image_url:
-            messages.append({
-                "type": "image_url",
-                "image_url": {
-                    "url": request.image_url
-                }
-            })
         # Run the agent
         result = await image_edit_agent.run(messages, deps=deps)
         # Return the result
         return {"edited_image_url": result.edited_image_url}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/edit/stream")
 async def edit_image_stream(request: EditRequest):
     """
@@ -107,8 +102,7 @@ async def edit_image_stream(request: EditRequest):
     try:
         # Initialize services
         hopter = Hopter(
-            api_key=os.environ.get("HOPTER_API_KEY"),
-            environment=Environment.STAGING
         )
         mask_service = GenerateMaskService(hopter=hopter)
@@ -117,24 +111,16 @@ async def edit_image_stream(request: EditRequest):
             edit_instruction=request.edit_instruction,
             image_url=request.image_url,
             hopter_client=hopter,
-            mask_service=mask_service
         )
         # Create messages
-        messages = [
-            {
-                "type": "text",
-                "text": request.edit_instruction
-            }
-        ]
         if request.image_url:
-            messages.append({
-                "type": "image_url",
-                "image_url": {
-                    "url": request.image_url
-                }
-            })
         async def stream_generator():
             async with image_edit_agent.run_stream(messages, deps=deps) as result:
@@ -142,14 +128,12 @@ async def edit_image_stream(request: EditRequest):
                     # Convert message to JSON and yield
                     yield json.dumps(message) + "\n"
-        return StreamingResponse(
-            stream_generator(),
-            media_type="application/x-ndjson"
-        )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/upload")
 async def upload_image_file(file: UploadFile = File(...)):
     """
@@ -160,18 +144,19 @@ async def upload_image_file(file: UploadFile = File(...)):
         temp_file_path = f"/tmp/{file.filename}"
         with open(temp_file_path, "wb") as buffer:
             buffer.write(await file.read())
         # Upload the image to Google Cloud Storage
         image_url = upload_image(temp_file_path)
         # Remove the temporary file
         os.remove(temp_file_path)
         return {"image_url": image_url}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
     """
@@ -179,6 +164,8 @@ async def health_check():
     """
     return {"status": "ok"}
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

+from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 import os
 from dotenv import load_dotenv
+from typing import Optional, List, Dict
 import json
 from pydantic import BaseModel
 from src.agents.generic_agent import generic_agent
 from src.hopter.client import Hopter, Environment
 from src.services.generate_mask import GenerateMaskService
+from src.utils import upload_image
 # Load environment variables
 load_dotenv()
     allow_headers=["*"],  # Allows all headers
 )
 class EditRequest(BaseModel):
     edit_instruction: str
     image_url: Optional[str] = None
 class MessageContent(BaseModel):
     type: str
     text: Optional[str] = None
     image_url: Optional[Dict[str, str]] = None
 class Message(BaseModel):
     content: List[MessageContent]
         async with generic_agent.run_stream(query) as result:
             async for message in result.stream(debounce_by=0.01):
                 yield json.dumps(message) + "\n"
     return StreamingResponse(stream_messages(), media_type="text/plain")
 @app.post("/edit")
 async def edit_image(request: EditRequest):
     """
     try:
         # Initialize services
         hopter = Hopter(
+            api_key=os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING
         )
         mask_service = GenerateMaskService(hopter=hopter)
             edit_instruction=request.edit_instruction,
             image_url=request.image_url,
             hopter_client=hopter,
+            mask_service=mask_service,
         )
         # Create messages
+        messages = [{"type": "text", "text": request.edit_instruction}]
         if request.image_url:
+            messages.append(
+                {"type": "image_url", "image_url": {"url": request.image_url}}
+            )
         # Run the agent
         result = await image_edit_agent.run(messages, deps=deps)
         # Return the result
         return {"edited_image_url": result.edited_image_url}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/edit/stream")
 async def edit_image_stream(request: EditRequest):
     """
     try:
         # Initialize services
         hopter = Hopter(
+            api_key=os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING
         )
         mask_service = GenerateMaskService(hopter=hopter)
             edit_instruction=request.edit_instruction,
             image_url=request.image_url,
             hopter_client=hopter,
+            mask_service=mask_service,
         )
         # Create messages
+        messages = [{"type": "text", "text": request.edit_instruction}]
         if request.image_url:
+            messages.append(
+                {"type": "image_url", "image_url": {"url": request.image_url}}
+            )
         async def stream_generator():
             async with image_edit_agent.run_stream(messages, deps=deps) as result:
                     # Convert message to JSON and yield
                     yield json.dumps(message) + "\n"
+        return StreamingResponse(stream_generator(), media_type="application/x-ndjson")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/upload")
 async def upload_image_file(file: UploadFile = File(...)):
     """
         temp_file_path = f"/tmp/{file.filename}"
         with open(temp_file_path, "wb") as buffer:
             buffer.write(await file.read())
         # Upload the image to Google Cloud Storage
         image_url = upload_image(temp_file_path)
         # Remove the temporary file
         os.remove(temp_file_path)
         return {"image_url": image_url}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
     """
     """
     return {"status": "ok"}
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

src/agents/generic_agent.py CHANGED Viewed

@@ -1,14 +1,11 @@
-from pydantic_ai import Agent, RunContext
 from pydantic_ai.models.openai import OpenAIModel
 from dotenv import load_dotenv
 import os
 load_dotenv()
-model = OpenAIModel(
-    "gpt-4o",
-    api_key=os.environ.get("OPENAI_API_KEY")
-)
 system_prompt = """
 You are a helpful assistant that can answer questions and help with tasks.
@@ -19,5 +16,3 @@ generic_agent = Agent(
     system_prompt=system_prompt,
     tools=[],
 )

+from pydantic_ai import Agent
 from pydantic_ai.models.openai import OpenAIModel
 from dotenv import load_dotenv
 import os
 load_dotenv()
+model = OpenAIModel("gpt-4o", api_key=os.environ.get("OPENAI_API_KEY"))
 system_prompt = """
 You are a helpful assistant that can answer questions and help with tasks.
     system_prompt=system_prompt,
     tools=[],
 )

src/agents/image_edit_agent.py CHANGED Viewed

@@ -7,7 +7,12 @@ from dataclasses import dataclass
 from typing import Optional
 import logfire
 from src.services.generate_mask import GenerateMaskService
-from src.hopter.client import Hopter, Environment, MagicReplaceInput, SuperResolutionInput
 from src.utils import image_path_to_uri, download_image_to_data_uri, upload_image
 import base64
 import tempfile
@@ -23,6 +28,7 @@ if the edit instruction involved modifying parts of the image, please generate a
 if images are not provided, ask the user to provide an image.
 """
 @dataclass
 class ImageEditDeps:
     edit_instruction: str
@@ -30,6 +36,7 @@ class ImageEditDeps:
     mask_service: GenerateMaskService
     image_url: Optional[str] = None
 model = OpenAIModel(
     "gpt-4o",
     api_key=os.environ.get("OPENAI_API_KEY"),
@@ -40,11 +47,9 @@ model = OpenAIModel(
 class EditImageResult:
     edited_image_url: str
-image_edit_agent = Agent(
-    model,
-    system_prompt=system_prompt,
-    deps_type=ImageEditDeps
-)
 def upload_image_from_base64(base64_image: str) -> str:
     image_format = base64_image.split(",")[0]
@@ -56,6 +61,7 @@ def upload_image_from_base64(base64_image: str) -> str:
             f.write(image_data)
     return upload_image(temp_filename)
 @image_edit_agent.tool
 async def edit_object(ctx: RunContext[ImageEditDeps]) -> EditImageResult:
     """
@@ -75,15 +81,20 @@ async def edit_object(ctx: RunContext[ImageEditDeps]) -> EditImageResult:
     image_uri = download_image_to_data_uri(image_url)
     # Generate mask
-    mask_instruction = mask_service.get_mask_generation_instruction(edit_instruction, image_url)
     mask = mask_service.generate_mask(mask_instruction, image_uri)
     # Magic replace
-    input = MagicReplaceInput(image=image_uri, mask=mask, prompt=mask_instruction.target_caption)
     result = hopter_client.magic_replace(input)
     uploaded_image = upload_image_from_base64(result.base64_image)
     return EditImageResult(edited_image_url=uploaded_image)
 @image_edit_agent.tool
 async def super_resolution(ctx: RunContext[ImageEditDeps]) -> EditImageResult:
     """
@@ -94,31 +105,28 @@ async def super_resolution(ctx: RunContext[ImageEditDeps]) -> EditImageResult:
     image_uri = download_image_to_data_uri(image_url)
-    input = SuperResolutionInput(image_b64=image_uri, scale=4, use_face_enhancement=False)
     result = hopter_client.super_resolution(input)
     uploaded_image = upload_image_from_base64(result.scaled_image)
     return EditImageResult(edited_image_url=uploaded_image)
 async def main():
     image_file_path = "./assets/lakeview.jpg"
     image_url = image_path_to_uri(image_file_path)
     prompt = "remove the light post"
     messages = [
-        {
-            "type": "text",
-            "text": prompt
-        },
-        {
-            "type": "image_url",
-            "image_url": {
-                "url": image_url
-            }
-        }
     ]
     # Initialize services
-    hopter = Hopter(api_key=os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING)
     mask_service = GenerateMaskService(hopter=hopter)
     # Initialize dependencies
@@ -126,15 +134,12 @@ async def main():
         edit_instruction=prompt,
         image_url=image_url,
         hopter_client=hopter,
-        mask_service=mask_service
     )
-    async with image_edit_agent.run_stream(
-        messages,
-        deps=deps
-    ) as result:
         async for message in result.stream():
             print(message)
 if __name__ == "__main__":
-    asyncio.run(main())

 from typing import Optional
 import logfire
 from src.services.generate_mask import GenerateMaskService
+from src.hopter.client import (
+    Hopter,
+    Environment,
+    MagicReplaceInput,
+    SuperResolutionInput,
+)
 from src.utils import image_path_to_uri, download_image_to_data_uri, upload_image
 import base64
 import tempfile
 if images are not provided, ask the user to provide an image.
 """
 @dataclass
 class ImageEditDeps:
     edit_instruction: str
     mask_service: GenerateMaskService
     image_url: Optional[str] = None
 model = OpenAIModel(
     "gpt-4o",
     api_key=os.environ.get("OPENAI_API_KEY"),
 class EditImageResult:
     edited_image_url: str
+image_edit_agent = Agent(model, system_prompt=system_prompt, deps_type=ImageEditDeps)
 def upload_image_from_base64(base64_image: str) -> str:
     image_format = base64_image.split(",")[0]
             f.write(image_data)
     return upload_image(temp_filename)
 @image_edit_agent.tool
 async def edit_object(ctx: RunContext[ImageEditDeps]) -> EditImageResult:
     """
     image_uri = download_image_to_data_uri(image_url)
     # Generate mask
+    mask_instruction = mask_service.get_mask_generation_instruction(
+        edit_instruction, image_url
+    )
     mask = mask_service.generate_mask(mask_instruction, image_uri)
     # Magic replace
+    input = MagicReplaceInput(
+        image=image_uri, mask=mask, prompt=mask_instruction.target_caption
+    )
     result = hopter_client.magic_replace(input)
     uploaded_image = upload_image_from_base64(result.base64_image)
     return EditImageResult(edited_image_url=uploaded_image)
 @image_edit_agent.tool
 async def super_resolution(ctx: RunContext[ImageEditDeps]) -> EditImageResult:
     """
     image_uri = download_image_to_data_uri(image_url)
+    input = SuperResolutionInput(
+        image_b64=image_uri, scale=4, use_face_enhancement=False
+    )
     result = hopter_client.super_resolution(input)
     uploaded_image = upload_image_from_base64(result.scaled_image)
     return EditImageResult(edited_image_url=uploaded_image)
 async def main():
     image_file_path = "./assets/lakeview.jpg"
     image_url = image_path_to_uri(image_file_path)
     prompt = "remove the light post"
     messages = [
+        {"type": "text", "text": prompt},
+        {"type": "image_url", "image_url": {"url": image_url}},
     ]
     # Initialize services
+    hopter = Hopter(
+        api_key=os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING
+    )
     mask_service = GenerateMaskService(hopter=hopter)
     # Initialize dependencies
         edit_instruction=prompt,
         image_url=image_url,
         hopter_client=hopter,
+        mask_service=mask_service,
     )
+    async with image_edit_agent.run_stream(messages, deps=deps) as result:
         async for message in result.stream():
             print(message)
 if __name__ == "__main__":
+    asyncio.run(main())

src/agents/mask_generation_agent.py CHANGED Viewed

@@ -1,16 +1,9 @@
-from pydantic_ai import Agent, RunContext
 from pydantic_ai.models.openai import OpenAIModel
 from dotenv import load_dotenv
 import os
-import asyncio
 from dataclasses import dataclass
-from typing import Optional
 import logfire
-from src.services.generate_mask import GenerateMaskService
-from src.hopter.client import Hopter, Environment, MagicReplaceInput, SuperResolutionInput
-from src.utils import image_path_to_uri, download_image_to_data_uri, upload_image
-import base64
-import tempfile
 load_dotenv()
@@ -56,10 +49,9 @@ model = OpenAIModel(
 class MaskGenerationResult:
     mask_image_base64: str
-mask_generation_agent = Agent(
-    model,
-    system_prompt=system_prompt
-)
 @mask_generation_agent.tool
 async def generate_mask(edit_instruction: str, image_url: str) -> MaskGenerationResult:

+from pydantic_ai import Agent
 from pydantic_ai.models.openai import OpenAIModel
 from dotenv import load_dotenv
 import os
 from dataclasses import dataclass
 import logfire
 load_dotenv()
 class MaskGenerationResult:
     mask_image_base64: str
+mask_generation_agent = Agent(model, system_prompt=system_prompt)
 @mask_generation_agent.tool
 async def generate_mask(edit_instruction: str, image_url: str) -> MaskGenerationResult:

src/hopter/client.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import List
 load_dotenv()
 class Environment(Enum):
     STAGING = "staging"
     PRODUCTION = "production"
@@ -20,39 +21,50 @@ class Environment(Enum):
                 return "https://serving.hopter.staging.picc.co"
             case Environment.PRODUCTION:
                 return "https://serving.hopter.picc.co"
 class RamGroundedSamInput(BaseModel):
-    text_prompt: str = Field(..., description="The text prompt for the mask generation.")
     image_b64: str = Field(..., description="The image in base64 format.")
 class RamGroundedSamResult(BaseModel):
     mask_b64: str = Field(..., description="The mask image in base64 format.")
     class_label: str = Field(..., description="The class label of the mask.")
     confidence: float = Field(..., description="The confidence score of the mask.")
-    bbox: List[float] = Field(..., description="The bounding box of the mask in the format [x1, y1, x2, y2].")
 class MagicReplaceInput(BaseModel):
     image: str = Field(..., description="The image in base64 format.")
     mask: str = Field(..., description="The mask in base64 format.")
     prompt: str = Field(..., description="The prompt for the magic replace.")
 class MagicReplaceResult(BaseModel):
     base64_image: str = Field(..., description="The edited image in base64 format.")
 class SuperResolutionInput(BaseModel):
     image_b64: str = Field(..., description="The image in base64 format.")
     scale: int = Field(4, description="The scale of the image to upscale to.")
-    use_face_enhancement: bool = Field(False, description="Whether to use face enhancement.")
 class SuperResolutionResult(BaseModel):
-    scaled_image: str = Field(..., description="The super-resolved image in base64 format.")
 class Hopter:
-    def __init__(
-        self,
-        api_key: str,
-        environment: Environment = Environment.PRODUCTION
-    ):
         self.api_key = api_key
         self.base_url = environment.base_url
         self.client = httpx.Client()
@@ -64,22 +76,22 @@ class Hopter:
                 f"{self.base_url}/api/v1/services/ram-grounded-sam-api/predictions",
                 headers={
                     "Authorization": f"Bearer {self.api_key}",
-                    "Content-Type": "application/json"
-                },
-                json={
-                    "input": input.model_dump()
                 },
-                timeout=None
             )
             response.raise_for_status()  # Raise an error for bad responses
             instance = response.json().get("output").get("instances")[0]
             print("Generated mask.")
             return RamGroundedSamResult(**instance)
         except httpx.HTTPStatusError as exc:
-            print(f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}")
         except Exception as exc:
             print(f"An unexpected error occurred: {exc}")
     def magic_replace(self, input: MagicReplaceInput) -> MagicReplaceResult:
         print(f"Magic replacing with input: {input.prompt}")
         try:
@@ -87,19 +99,19 @@ class Hopter:
                 f"{self.base_url}/api/v1/services/sdxl-magic-replace/predictions",
                 headers={
                     "Authorization": f"Bearer {self.api_key}",
-                    "Content-Type": "application/json"
                 },
-                json={
-                    "input": input.model_dump()
-                },
-                timeout=None
             )
             response.raise_for_status()  # Raise an error for bad responses
             instance = response.json().get("output")
             print("Magic replaced.")
             return MagicReplaceResult(**instance)
         except httpx.HTTPStatusError as exc:
-            print(f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}")
         except Exception as exc:
             print(f"An unexpected error occurred: {exc}")
@@ -109,51 +121,50 @@ class Hopter:
                 f"{self.base_url}/api/v1/services/super-resolution-esrgan/predictions",
                 headers={
                     "Authorization": f"Bearer {self.api_key}",
-                    "Content-Type": "application/json"
                 },
-                json={
-                    "input": input.model_dump()
-                },
-                timeout=None
             )
             response.raise_for_status()  # Raise an error for bad responses
             instance = response.json().get("output")
             print("Super-resolutin done")
             return SuperResolutionResult(**instance)
         except httpx.HTTPStatusError as exc:
-            print(f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}")
         except Exception as exc:
             print(f"An unexpected error occurred: {exc}")
 async def test_generate_mask(hopter: Hopter, image_url: str) -> str:
-    input = RamGroundedSamInput(
-        text_prompt="pole",
-        image_b64=image_url
-    )
     mask = hopter.generate_mask(input)
     return mask.mask_b64
-async def test_magic_replace(hopter: Hopter, image_url: str, mask: str, prompt: str) -> str:
-    input = MagicReplaceInput(
-        image=image_url,
-        mask=mask,
-        prompt=prompt
-    )
     result = hopter.magic_replace(input)
     return result.base64_image
 async def main():
     hopter = Hopter(
-        api_key=os.getenv("HOPTER_API_KEY"),
-        environment=Environment.STAGING
     )
     image_file_path = "./assets/lakeview.jpg"
     image_url = image_path_to_uri(image_file_path)
     mask = await test_generate_mask(hopter, image_url)
-    magic_replace_result = await test_magic_replace(hopter, image_url, mask, "remove the pole")
     print(magic_replace_result)
 if __name__ == "__main__":
     asyncio.run(main())

 load_dotenv()
 class Environment(Enum):
     STAGING = "staging"
     PRODUCTION = "production"
                 return "https://serving.hopter.staging.picc.co"
             case Environment.PRODUCTION:
                 return "https://serving.hopter.picc.co"
 class RamGroundedSamInput(BaseModel):
+    text_prompt: str = Field(
+        ..., description="The text prompt for the mask generation."
+    )
     image_b64: str = Field(..., description="The image in base64 format.")
 class RamGroundedSamResult(BaseModel):
     mask_b64: str = Field(..., description="The mask image in base64 format.")
     class_label: str = Field(..., description="The class label of the mask.")
     confidence: float = Field(..., description="The confidence score of the mask.")
+    bbox: List[float] = Field(
+        ..., description="The bounding box of the mask in the format [x1, y1, x2, y2]."
+    )
 class MagicReplaceInput(BaseModel):
     image: str = Field(..., description="The image in base64 format.")
     mask: str = Field(..., description="The mask in base64 format.")
     prompt: str = Field(..., description="The prompt for the magic replace.")
 class MagicReplaceResult(BaseModel):
     base64_image: str = Field(..., description="The edited image in base64 format.")
 class SuperResolutionInput(BaseModel):
     image_b64: str = Field(..., description="The image in base64 format.")
     scale: int = Field(4, description="The scale of the image to upscale to.")
+    use_face_enhancement: bool = Field(
+        False, description="Whether to use face enhancement."
+    )
 class SuperResolutionResult(BaseModel):
+    scaled_image: str = Field(
+        ..., description="The super-resolved image in base64 format."
+    )
 class Hopter:
+    def __init__(self, api_key: str, environment: Environment = Environment.PRODUCTION):
         self.api_key = api_key
         self.base_url = environment.base_url
         self.client = httpx.Client()
                 f"{self.base_url}/api/v1/services/ram-grounded-sam-api/predictions",
                 headers={
                     "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json",
                 },
+                json={"input": input.model_dump()},
+                timeout=None,
             )
             response.raise_for_status()  # Raise an error for bad responses
             instance = response.json().get("output").get("instances")[0]
             print("Generated mask.")
             return RamGroundedSamResult(**instance)
         except httpx.HTTPStatusError as exc:
+            print(
+                f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}"
+            )
         except Exception as exc:
             print(f"An unexpected error occurred: {exc}")
     def magic_replace(self, input: MagicReplaceInput) -> MagicReplaceResult:
         print(f"Magic replacing with input: {input.prompt}")
         try:
                 f"{self.base_url}/api/v1/services/sdxl-magic-replace/predictions",
                 headers={
                     "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json",
                 },
+                json={"input": input.model_dump()},
+                timeout=None,
             )
             response.raise_for_status()  # Raise an error for bad responses
             instance = response.json().get("output")
             print("Magic replaced.")
             return MagicReplaceResult(**instance)
         except httpx.HTTPStatusError as exc:
+            print(
+                f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}"
+            )
         except Exception as exc:
             print(f"An unexpected error occurred: {exc}")
                 f"{self.base_url}/api/v1/services/super-resolution-esrgan/predictions",
                 headers={
                     "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json",
                 },
+                json={"input": input.model_dump()},
+                timeout=None,
             )
             response.raise_for_status()  # Raise an error for bad responses
             instance = response.json().get("output")
             print("Super-resolutin done")
             return SuperResolutionResult(**instance)
         except httpx.HTTPStatusError as exc:
+            print(
+                f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}"
+            )
         except Exception as exc:
             print(f"An unexpected error occurred: {exc}")
 async def test_generate_mask(hopter: Hopter, image_url: str) -> str:
+    input = RamGroundedSamInput(text_prompt="pole", image_b64=image_url)
     mask = hopter.generate_mask(input)
     return mask.mask_b64
+async def test_magic_replace(
+    hopter: Hopter, image_url: str, mask: str, prompt: str
+) -> str:
+    input = MagicReplaceInput(image=image_url, mask=mask, prompt=prompt)
     result = hopter.magic_replace(input)
     return result.base64_image
 async def main():
     hopter = Hopter(
+        api_key=os.getenv("HOPTER_API_KEY"), environment=Environment.STAGING
     )
     image_file_path = "./assets/lakeview.jpg"
     image_url = image_path_to_uri(image_file_path)
     mask = await test_generate_mask(hopter, image_url)
+    magic_replace_result = await test_magic_replace(
+        hopter, image_url, mask, "remove the pole"
+    )
     print(magic_replace_result)
 if __name__ == "__main__":
     asyncio.run(main())

src/models/generate_mask_instruction.py CHANGED Viewed

@@ -1,19 +1,17 @@
 from pydantic import BaseModel, Field
 class GenerateMaskInstruction(BaseModel):
     category: str = Field(
         ...,
-        description="The editing category based on the instruction. Must be one of: Addition, Remove, Local, Global, Background."
     )
     subject: str = Field(
         ...,
-        description="The subject of the editing instruction. Must be a noun in no more than 5 words."
-    )
-    caption: str = Field(
-        ...,
-        description="The detailed description of the image."
     )
     target_caption: str = Field(
         ...,
-        description="Apply the editing instruction to the image caption. The target caption should describe the image after the editing instruction is applied."
-    )

 from pydantic import BaseModel, Field
 class GenerateMaskInstruction(BaseModel):
     category: str = Field(
         ...,
+        description="The editing category based on the instruction. Must be one of: Addition, Remove, Local, Global, Background.",
     )
     subject: str = Field(
         ...,
+        description="The subject of the editing instruction. Must be a noun in no more than 5 words.",
     )
+    caption: str = Field(..., description="The detailed description of the image.")
     target_caption: str = Field(
         ...,
+        description="Apply the editing instruction to the image caption. The target caption should describe the image after the editing instruction is applied.",
+    )

src/services/generate_mask.py CHANGED Viewed

@@ -6,6 +6,7 @@ from src.hopter.client import Hopter, RamGroundedSamInput, Environment
 from src.models.generate_mask_instruction import GenerateMaskInstruction
 from src.services.openai_file_upload import OpenAIFileUpload
 from src.utils import download_image_to_data_uri
 load_dotenv()
 system_prompt = """
@@ -37,6 +38,7 @@ Do not output 'sorry, xxx', even if it's a guess, directly output the answer you
 </task_3>
 """
 class GenerateMaskService:
     def __init__(self, hopter: Hopter):
         self.llm = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
@@ -44,38 +46,29 @@ class GenerateMaskService:
         self.openai_file_upload = OpenAIFileUpload()
         self.hopter = hopter
-    def get_mask_generation_instruction(self, edit_instruction: str, image_url: str) -> GenerateMaskInstruction:
         messages = [
-            {
-                "role": "system",
-                "content": system_prompt
-            },
             {
                 "role": "user",
                 "content": [
-                    {
-                        "type": "text",
-                        "text": edit_instruction
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": image_url
-                        }
-                    }
-                ]
-            }
         ]
         response = self.llm.beta.chat.completions.parse(
-            model=self.model,
-            messages=messages,
-            response_format=GenerateMaskInstruction
         )
         instruction = response.choices[0].message.parsed
         return instruction
-    def generate_mask(self, mask_instruction: GenerateMaskInstruction, image_url: str) -> str:
         """
         Generate a mask for the image editing instruction.
@@ -87,14 +80,18 @@ class GenerateMaskService:
         """
         image_uri = download_image_to_data_uri(image_url)
         input = RamGroundedSamInput(
-            text_prompt=mask_instruction.subject,
-            image_b64=image_uri
         )
         generate_mask_result = self.hopter.generate_mask(input)
         return generate_mask_result.mask_b64
 async def main():
-    service = GenerateMaskService(Hopter(api_key=os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING))
     edit_instruction = "remove the light post"
     image_file_path = "./assets/lakeview.jpg"
     with open(image_file_path, "rb") as image_file:
@@ -105,5 +102,6 @@ async def main():
     mask = service.generate_mask(instruction, image_url)
     print(mask)
 if __name__ == "__main__":
     asyncio.run(main())

 from src.models.generate_mask_instruction import GenerateMaskInstruction
 from src.services.openai_file_upload import OpenAIFileUpload
 from src.utils import download_image_to_data_uri
 load_dotenv()
 system_prompt = """
 </task_3>
 """
 class GenerateMaskService:
     def __init__(self, hopter: Hopter):
         self.llm = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
         self.openai_file_upload = OpenAIFileUpload()
         self.hopter = hopter
+    def get_mask_generation_instruction(
+        self, edit_instruction: str, image_url: str
+    ) -> GenerateMaskInstruction:
         messages = [
+            {"role": "system", "content": system_prompt},
             {
                 "role": "user",
                 "content": [
+                    {"type": "text", "text": edit_instruction},
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                ],
+            },
         ]
         response = self.llm.beta.chat.completions.parse(
+            model=self.model, messages=messages, response_format=GenerateMaskInstruction
         )
         instruction = response.choices[0].message.parsed
         return instruction
+    def generate_mask(
+        self, mask_instruction: GenerateMaskInstruction, image_url: str
+    ) -> str:
         """
         Generate a mask for the image editing instruction.
         """
         image_uri = download_image_to_data_uri(image_url)
         input = RamGroundedSamInput(
+            text_prompt=mask_instruction.subject, image_b64=image_uri
         )
         generate_mask_result = self.hopter.generate_mask(input)
         return generate_mask_result.mask_b64
 async def main():
+    service = GenerateMaskService(
+        Hopter(
+            api_key=os.environ.get("HOPTER_API_KEY"), environment=Environment.STAGING
+        )
+    )
     edit_instruction = "remove the light post"
     image_file_path = "./assets/lakeview.jpg"
     with open(image_file_path, "rb") as image_file:
     mask = service.generate_mask(instruction, image_url)
     print(mask)
 if __name__ == "__main__":
     asyncio.run(main())

src/services/google_cloud_image_upload.py CHANGED Viewed

@@ -7,14 +7,18 @@ from dotenv import load_dotenv
 load_dotenv()
 def get_credentials():
     credentials_json_string = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
     # create a temp file with the credentials
-    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp_file:
         temp_file.write(credentials_json_string)
         temp_file_path = temp_file.name
     return temp_file_path
 class GoogleCloudImageUploadService:
     BUCKET_NAME = "picchat-assets"
     MAX_DIMENSION = 1024
@@ -39,37 +43,49 @@ class GoogleCloudImageUploadService:
             # Open and optionally resize the image, then save to a temporary file.
             with Image.open(source_file_name) as image:
                 # Determine the original format. If it's not JPEG or PNG, default to JPEG.
-                original_format = image.format.upper() if image.format in ['JPEG', 'PNG'] else "JPEG"
                 # Resize if needed.
-                if image.width > self.MAX_DIMENSION or image.height > self.MAX_DIMENSION:
                     image.thumbnail((self.MAX_DIMENSION, self.MAX_DIMENSION))
                 # Choose the file extension based on the image format.
                 suffix = ".jpg" if original_format == "JPEG" else ".png"
                 # Create a temporary file with the appropriate suffix.
-                with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
                     temp_filename = temp_file.name
                     image.save(temp_filename, format=original_format)
             try:
                 # Set content type based on the image format.
-                content_type = "image/jpeg" if original_format == "JPEG" else "image/png"
                 blob.upload_from_filename(temp_filename, content_type=content_type)
                 blob.make_public()
             finally:
                 # Remove the temporary file.
                 os.remove(temp_filename)
-            print(f"File {source_file_name} uploaded to {blob_name} in bucket {self.BUCKET_NAME}.")
             return blob.public_url
         except Exception as e:
             print(f"An error occurred: {e}")
             return None
 if __name__ == "__main__":
     image = "./assets/lakeview.jpg"  # Replace with your JPEG or PNG image path.
     upload_service = GoogleCloudImageUploadService()
     url = upload_service.upload_image_to_gcs(image)
-    print(url)

 load_dotenv()
 def get_credentials():
     credentials_json_string = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
     # create a temp file with the credentials
+    with tempfile.NamedTemporaryFile(
+        mode="w+", delete=False, suffix=".json"
+    ) as temp_file:
         temp_file.write(credentials_json_string)
         temp_file_path = temp_file.name
     return temp_file_path
 class GoogleCloudImageUploadService:
     BUCKET_NAME = "picchat-assets"
     MAX_DIMENSION = 1024
             # Open and optionally resize the image, then save to a temporary file.
             with Image.open(source_file_name) as image:
                 # Determine the original format. If it's not JPEG or PNG, default to JPEG.
+                original_format = (
+                    image.format.upper() if image.format in ["JPEG", "PNG"] else "JPEG"
+                )
                 # Resize if needed.
+                if (
+                    image.width > self.MAX_DIMENSION
+                    or image.height > self.MAX_DIMENSION
+                ):
                     image.thumbnail((self.MAX_DIMENSION, self.MAX_DIMENSION))
                 # Choose the file extension based on the image format.
                 suffix = ".jpg" if original_format == "JPEG" else ".png"
                 # Create a temporary file with the appropriate suffix.
+                with tempfile.NamedTemporaryFile(
+                    delete=False, suffix=suffix
+                ) as temp_file:
                     temp_filename = temp_file.name
                     image.save(temp_filename, format=original_format)
             try:
                 # Set content type based on the image format.
+                content_type = (
+                    "image/jpeg" if original_format == "JPEG" else "image/png"
+                )
                 blob.upload_from_filename(temp_filename, content_type=content_type)
                 blob.make_public()
             finally:
                 # Remove the temporary file.
                 os.remove(temp_filename)
+            print(
+                f"File {source_file_name} uploaded to {blob_name} in bucket {self.BUCKET_NAME}."
+            )
             return blob.public_url
         except Exception as e:
             print(f"An error occurred: {e}")
             return None
 if __name__ == "__main__":
     image = "./assets/lakeview.jpg"  # Replace with your JPEG or PNG image path.
     upload_service = GoogleCloudImageUploadService()
     url = upload_service.upload_image_to_gcs(image)
+    print(url)

src/services/image_uploader.py CHANGED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 import os
 from pydantic import BaseModel
 class ImageInfo(BaseModel):
     filename: str
     name: str
@@ -12,6 +13,7 @@ class ImageInfo(BaseModel):
     extension: str
     url: str
 class ImgBBData(BaseModel):
     id: str
     title: str
@@ -28,33 +30,35 @@ class ImgBBData(BaseModel):
     medium: ImageInfo
     delete_url: str
 class ImgBBResponse(BaseModel):
     data: ImgBBData
     success: bool
     status: int
 class ImageUploader:
     """A class to handle image uploads to ImgBB service."""
     def __init__(self, api_key: str):
         """
         Initialize the ImageUploader with an API key.
         Args:
             api_key (str): The ImgBB API key
         """
         self.api_key = api_key
         self.base_url = "https://api.imgbb.com/1/upload"
     def upload(
         self,
         image: Union[str, bytes, Path],
         name: Optional[str] = None,
-        expiration: Optional[int] = None
     ) -> ImgBBResponse:
         """
         Upload an image to ImgBB.
         Args:
             image: Can be:
                 - A file path (str or Path)
@@ -64,20 +68,20 @@ class ImageUploader:
                 - Bytes of an image
             name: Optional name for the uploaded file
             expiration: Optional expiration time in seconds (60-15552000)
         Returns:
             ImgBBResponse containing the parsed upload response from ImgBB
         Raises:
             ValueError: If the image format is invalid or upload fails
             requests.RequestException: If the API request fails
         """
         # Prepare the parameters
-        params = {'key': self.api_key}
         if expiration:
             if not 60 <= expiration <= 15552000:
                 raise ValueError("Expiration must be between 60 and 15552000 seconds")
-            params['expiration'] = expiration
         # Handle different image input types
         if isinstance(image, (str, Path)):
@@ -85,38 +89,40 @@ class ImageUploader:
             files = {}
             if os.path.isfile(image_str):
                 # It's a file path
-                with open(image_str, 'rb') as file:
-                    files['image'] = file
-            elif image_str.startswith(('http://', 'https://')):
                 # It's a URL
-                files['image'] = (None, image_str)
-            elif image_str.startswith('data:image/'):
                 # It's a data URI
                 # Extract the base64 part after the comma
-                base64_data = image_str.split(',', 1)[1]
-                files['image'] = (None, base64_data)
             else:
                 # Assume it's base64 data
-                files['image'] = (None, image_str)
             if name:
-                files['name'] = (None, name)
             response = requests.post(self.base_url, params=params, files=files)
         elif isinstance(image, bytes):
             # Convert bytes to base64
-            base64_image = base64.b64encode(image).decode('utf-8')
-            files = {
-                'image': (None, base64_image)
-            }
             if name:
-                files['name'] = (None, name)
             response = requests.post(self.base_url, params=params, files=files)
         else:
-            raise ValueError("Invalid image format. Must be file path, URL, base64 string, or bytes")
         # Check the response
         if response.status_code != 200:
-            raise ValueError(f"Upload failed with status {response.status_code}: {response.text}")
         # Parse the response using Pydantic model
         response_json = response.json()
@@ -126,16 +132,16 @@ class ImageUploader:
         self,
         file_path: Union[str, Path],
         name: Optional[str] = None,
-        expiration: Optional[int] = None
     ) -> ImgBBResponse:
         """
         Convenience method to upload an image file.
         Args:
             file_path: Path to the image file
             name: Optional name for the uploaded file
             expiration: Optional expiration time in seconds (60-15552000)
         Returns:
             ImgBBResponse containing the parsed upload response from ImgBB
         """
@@ -145,16 +151,16 @@ class ImageUploader:
         self,
         image_url: str,
         name: Optional[str] = None,
-        expiration: Optional[int] = None
     ) -> ImgBBResponse:
         """
         Convenience method to upload an image from a URL.
         Args:
             image_url: URL of the image to upload
             name: Optional name for the uploaded file
             expiration: Optional expiration time in seconds (60-15552000)
         Returns:
             ImgBBResponse containing the parsed upload response from ImgBB
         """

 import os
 from pydantic import BaseModel
 class ImageInfo(BaseModel):
     filename: str
     name: str
     extension: str
     url: str
 class ImgBBData(BaseModel):
     id: str
     title: str
     medium: ImageInfo
     delete_url: str
 class ImgBBResponse(BaseModel):
     data: ImgBBData
     success: bool
     status: int
 class ImageUploader:
     """A class to handle image uploads to ImgBB service."""
     def __init__(self, api_key: str):
         """
         Initialize the ImageUploader with an API key.
         Args:
             api_key (str): The ImgBB API key
         """
         self.api_key = api_key
         self.base_url = "https://api.imgbb.com/1/upload"
     def upload(
         self,
         image: Union[str, bytes, Path],
         name: Optional[str] = None,
+        expiration: Optional[int] = None,
     ) -> ImgBBResponse:
         """
         Upload an image to ImgBB.
         Args:
             image: Can be:
                 - A file path (str or Path)
                 - Bytes of an image
             name: Optional name for the uploaded file
             expiration: Optional expiration time in seconds (60-15552000)
         Returns:
             ImgBBResponse containing the parsed upload response from ImgBB
         Raises:
             ValueError: If the image format is invalid or upload fails
             requests.RequestException: If the API request fails
         """
         # Prepare the parameters
+        params = {"key": self.api_key}
         if expiration:
             if not 60 <= expiration <= 15552000:
                 raise ValueError("Expiration must be between 60 and 15552000 seconds")
+            params["expiration"] = expiration
         # Handle different image input types
         if isinstance(image, (str, Path)):
             files = {}
             if os.path.isfile(image_str):
                 # It's a file path
+                with open(image_str, "rb") as file:
+                    files["image"] = file
+            elif image_str.startswith(("http://", "https://")):
                 # It's a URL
+                files["image"] = (None, image_str)
+            elif image_str.startswith("data:image/"):
                 # It's a data URI
                 # Extract the base64 part after the comma
+                base64_data = image_str.split(",", 1)[1]
+                files["image"] = (None, base64_data)
             else:
                 # Assume it's base64 data
+                files["image"] = (None, image_str)
             if name:
+                files["name"] = (None, name)
             response = requests.post(self.base_url, params=params, files=files)
         elif isinstance(image, bytes):
             # Convert bytes to base64
+            base64_image = base64.b64encode(image).decode("utf-8")
+            files = {"image": (None, base64_image)}
             if name:
+                files["name"] = (None, name)
             response = requests.post(self.base_url, params=params, files=files)
         else:
+            raise ValueError(
+                "Invalid image format. Must be file path, URL, base64 string, or bytes"
+            )
         # Check the response
         if response.status_code != 200:
+            raise ValueError(
+                f"Upload failed with status {response.status_code}: {response.text}"
+            )
         # Parse the response using Pydantic model
         response_json = response.json()
         self,
         file_path: Union[str, Path],
         name: Optional[str] = None,
+        expiration: Optional[int] = None,
     ) -> ImgBBResponse:
         """
         Convenience method to upload an image file.
         Args:
             file_path: Path to the image file
             name: Optional name for the uploaded file
             expiration: Optional expiration time in seconds (60-15552000)
         Returns:
             ImgBBResponse containing the parsed upload response from ImgBB
         """
         self,
         image_url: str,
         name: Optional[str] = None,
+        expiration: Optional[int] = None,
     ) -> ImgBBResponse:
         """
         Convenience method to upload an image from a URL.
         Args:
             image_url: URL of the image to upload
             name: Optional name for the uploaded file
             expiration: Optional expiration time in seconds (60-15552000)
         Returns:
             ImgBBResponse containing the parsed upload response from ImgBB
         """

src/services/openai_file_upload.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 load_dotenv()
 class OpenAIFileUpload:
     def __init__(self):
         self.client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

 load_dotenv()
 class OpenAIFileUpload:
     def __init__(self):
         self.client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

src/utils.py CHANGED Viewed

@@ -4,16 +4,21 @@ from src.services.google_cloud_image_upload import GoogleCloudImageUploadService
 from PIL import Image
 from urllib.request import urlopen
 import io
 def image_path_to_base64(image_path: str) -> str:
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode("utf-8")
 def upload_file_to_base64(file: UploadFile) -> str:
     return base64.b64encode(file.file.read()).decode("utf-8")
 def image_path_to_uri(image_path: str) -> str:
     return f"data:image/jpeg;base64,{image_path_to_base64(image_path)}"
 def upload_image(image_path: str) -> str:
     """
     Upload an image to Google Cloud Storage and return the public URL.
@@ -27,6 +32,7 @@ def upload_image(image_path: str) -> str:
     upload_service = GoogleCloudImageUploadService()
     return upload_service.upload_image_to_gcs(image_path)
 def download_image_to_data_uri(image_url: str) -> str:
     # Open the image from the URL
     response = urlopen(image_url)
@@ -34,16 +40,20 @@ def download_image_to_data_uri(image_url: str) -> str:
     # Determine the image format; default to 'JPEG' if not found
     image_format = img.format if img.format is not None else "JPEG"
     # Build the MIME type; for 'JPEG', use 'image/jpeg'
-    mime_type = "image/jpeg" if image_format.upper() == "JPEG" else f"image/{image_format.lower()}"
     # Save the image to an in-memory buffer using the detected format
     buffered = io.BytesIO()
     img.save(buffered, format=image_format)
     # Encode the image bytes to base64
     img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
     # Return the data URI with the correct MIME type
-    return f"data:{mime_type};base64,{img_base64}"

 from PIL import Image
 from urllib.request import urlopen
 import io
 def image_path_to_base64(image_path: str) -> str:
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode("utf-8")
 def upload_file_to_base64(file: UploadFile) -> str:
     return base64.b64encode(file.file.read()).decode("utf-8")
 def image_path_to_uri(image_path: str) -> str:
     return f"data:image/jpeg;base64,{image_path_to_base64(image_path)}"
 def upload_image(image_path: str) -> str:
     """
     Upload an image to Google Cloud Storage and return the public URL.
     upload_service = GoogleCloudImageUploadService()
     return upload_service.upload_image_to_gcs(image_path)
 def download_image_to_data_uri(image_url: str) -> str:
     # Open the image from the URL
     response = urlopen(image_url)
     # Determine the image format; default to 'JPEG' if not found
     image_format = img.format if img.format is not None else "JPEG"
     # Build the MIME type; for 'JPEG', use 'image/jpeg'
+    mime_type = (
+        "image/jpeg"
+        if image_format.upper() == "JPEG"
+        else f"image/{image_format.lower()}"
+    )
     # Save the image to an in-memory buffer using the detected format
     buffered = io.BytesIO()
     img.save(buffered, format=image_format)
     # Encode the image bytes to base64
     img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
     # Return the data URI with the correct MIME type
+    return f"data:{mime_type};base64,{img_base64}"

stream_utils.py CHANGED Viewed

@@ -6,28 +6,29 @@ from rich.markdown import Markdown
 from rich.panel import Panel
 from rich.text import Text
 class StreamResponseHandler:
     """
     A utility class for handling streaming responses from API endpoints.
     Provides rich formatting and real-time updates of the response content.
     """
     def __init__(self, console=None):
         """
         Initialize the stream response handler.
         Args:
             console (Console, optional): A Rich console instance. If not provided, a new one will be created.
         """
         self.console = console or Console()
     def check_server_health(self, health_url="http://localhost:8000/health"):
         """
         Check if the server is running and accessible.
         Args:
             health_url (str, optional): The URL to check server health. Defaults to "http://localhost:8000/health".
         Returns:
             bool: True if the server is running and accessible, False otherwise.
         """
@@ -38,26 +39,32 @@ class StreamResponseHandler:
                 self.console.print("[bold green]✓ Server is running and accessible.[/]")
                 return True
             else:
-                self.console.print(f"[bold red]✗ Server health check failed[/] with status code: {response.status_code}")
                 return False
         except requests.exceptions.ConnectionError:
-            self.console.print("[bold red]✗ Error:[/] Could not connect to the server. Make sure it's running.")
             return False
         except Exception as e:
             self.console.print(f"[bold red]✗ Error checking server health:[/] {e}")
             return False
-    def stream_response(self, url, payload=None, params=None, method="POST", title="AI Response"):
         """
         Send a request to an endpoint and stream the output to the terminal.
         Args:
             url (str): The URL of the endpoint to send the request to.
             payload (dict, optional): The JSON payload to send in the request body. Defaults to None.
             params (dict, optional): The query parameters to send in the request. Defaults to None.
             method (str, optional): The HTTP method to use. Defaults to "POST".
             title (str, optional): The title to display in the panel. Defaults to "AI Response".
         Returns:
             bool: True if the streaming was successful, False otherwise.
         """
@@ -69,39 +76,42 @@ class StreamResponseHandler:
         if params:
             self.console.print("Parameters:", style="bold")
             self.console.print(json.dumps(params, indent=2))
         try:
             # Prepare the request
-            request_kwargs = {
-                "stream": True
-            }
             if payload:
                 request_kwargs["json"] = payload
             if params:
                 request_kwargs["params"] = params
             # Make the request
             with getattr(requests, method.lower())(url, **request_kwargs) as response:
                 # Check if the request was successful
                 if response.status_code != 200:
-                    self.console.print(f"[bold red]Error:[/] Received status code {response.status_code}")
                     self.console.print(f"Response: {response.text}")
                     return False
                 # Initialize an empty response text
                 full_response = ""
                 # Use Rich's Live display to update the content in place
-                with Live(Panel("Waiting for response...", title=title, border_style="blue"), refresh_per_second=10) as live:
                     # Process the streaming response
                     for line in response.iter_lines():
                         if line:
                             # Decode the line and parse it as JSON
-                            decoded_line = line.decode('utf-8')
                             try:
                                 # Parse the JSON
                                 data = json.loads(decoded_line)
                                 # Extract and display the content
                                 if isinstance(data, dict):
                                     if "content" in data:
@@ -111,35 +121,82 @@ class StreamResponseHandler:
                                                 # Append to the full response
                                                 full_response += text_content
                                                 # Update the live display with the current full response
-                                                live.update(Panel(Markdown(full_response), title=title, border_style="green"))
                                             elif content.get("type") == "image_url":
-                                                image_url = content.get("image_url", {}).get("url", "")
                                                 # Add a note about the image URL
-                                                image_note = f"\n\n[Image URL: {image_url}]"
                                                 full_response += image_note
-                                                live.update(Panel(Markdown(full_response), title=title, border_style="green"))
                                     elif "edited_image_url" in data:
                                         # Handle edited image URL from edit endpoint
                                         image_url = data.get("edited_image_url", "")
-                                        image_note = f"\n\n[Edited Image URL: {image_url}]"
                                         full_response += image_note
-                                        live.update(Panel(Markdown(full_response), title=title, border_style="green"))
                                     else:
                                         # For other types of data, just show the JSON
-                                        live.update(Panel(Text(json.dumps(data, indent=2)), title="Raw JSON Response", border_style="yellow"))
                                 else:
-                                    live.update(Panel(Text(decoded_line), title="Raw Response", border_style="yellow"))
                             except json.JSONDecodeError:
                                 # If it's not valid JSON, just show the raw line
-                                live.update(Panel(Text(f"Raw response: {decoded_line}"), title="Invalid JSON", border_style="red"))
                 self.console.print("[bold green]Stream completed.[/]")
                 return True
         except requests.exceptions.ConnectionError:
-            self.console.print(f"[bold red]Error:[/] Could not connect to the server at {url}", style="red")
-            self.console.print("Make sure the server is running and accessible.", style="red")
             return False
         except requests.exceptions.RequestException as e:
             self.console.print(f"[bold red]Error:[/] {e}", style="red")
-            return False

 from rich.panel import Panel
 from rich.text import Text
 class StreamResponseHandler:
     """
     A utility class for handling streaming responses from API endpoints.
     Provides rich formatting and real-time updates of the response content.
     """
     def __init__(self, console=None):
         """
         Initialize the stream response handler.
         Args:
             console (Console, optional): A Rich console instance. If not provided, a new one will be created.
         """
         self.console = console or Console()
     def check_server_health(self, health_url="http://localhost:8000/health"):
         """
         Check if the server is running and accessible.
         Args:
             health_url (str, optional): The URL to check server health. Defaults to "http://localhost:8000/health".
         Returns:
             bool: True if the server is running and accessible, False otherwise.
         """
                 self.console.print("[bold green]✓ Server is running and accessible.[/]")
                 return True
             else:
+                self.console.print(
+                    f"[bold red]✗ Server health check failed[/] with status code: {response.status_code}"
+                )
                 return False
         except requests.exceptions.ConnectionError:
+            self.console.print(
+                "[bold red]✗ Error:[/] Could not connect to the server. Make sure it's running."
+            )
             return False
         except Exception as e:
             self.console.print(f"[bold red]✗ Error checking server health:[/] {e}")
             return False
+    def stream_response(
+        self, url, payload=None, params=None, method="POST", title="AI Response"
+    ):
         """
         Send a request to an endpoint and stream the output to the terminal.
         Args:
             url (str): The URL of the endpoint to send the request to.
             payload (dict, optional): The JSON payload to send in the request body. Defaults to None.
             params (dict, optional): The query parameters to send in the request. Defaults to None.
             method (str, optional): The HTTP method to use. Defaults to "POST".
             title (str, optional): The title to display in the panel. Defaults to "AI Response".
         Returns:
             bool: True if the streaming was successful, False otherwise.
         """
         if params:
             self.console.print("Parameters:", style="bold")
             self.console.print(json.dumps(params, indent=2))
         try:
             # Prepare the request
+            request_kwargs = {"stream": True}
             if payload:
                 request_kwargs["json"] = payload
             if params:
                 request_kwargs["params"] = params
             # Make the request
             with getattr(requests, method.lower())(url, **request_kwargs) as response:
                 # Check if the request was successful
                 if response.status_code != 200:
+                    self.console.print(
+                        f"[bold red]Error:[/] Received status code {response.status_code}"
+                    )
                     self.console.print(f"Response: {response.text}")
                     return False
                 # Initialize an empty response text
                 full_response = ""
                 # Use Rich's Live display to update the content in place
+                with Live(
+                    Panel("Waiting for response...", title=title, border_style="blue"),
+                    refresh_per_second=10,
+                ) as live:
                     # Process the streaming response
                     for line in response.iter_lines():
                         if line:
                             # Decode the line and parse it as JSON
+                            decoded_line = line.decode("utf-8")
                             try:
                                 # Parse the JSON
                                 data = json.loads(decoded_line)
                                 # Extract and display the content
                                 if isinstance(data, dict):
                                     if "content" in data:
                                                 # Append to the full response
                                                 full_response += text_content
                                                 # Update the live display with the current full response
+                                                live.update(
+                                                    Panel(
+                                                        Markdown(full_response),
+                                                        title=title,
+                                                        border_style="green",
+                                                    )
+                                                )
                                             elif content.get("type") == "image_url":
+                                                image_url = content.get(
+                                                    "image_url", {}
+                                                ).get("url", "")
                                                 # Add a note about the image URL
+                                                image_note = (
+                                                    f"\n\n[Image URL: {image_url}]"
+                                                )
                                                 full_response += image_note
+                                                live.update(
+                                                    Panel(
+                                                        Markdown(full_response),
+                                                        title=title,
+                                                        border_style="green",
+                                                    )
+                                                )
                                     elif "edited_image_url" in data:
                                         # Handle edited image URL from edit endpoint
                                         image_url = data.get("edited_image_url", "")
+                                        image_note = (
+                                            f"\n\n[Edited Image URL: {image_url}]"
+                                        )
                                         full_response += image_note
+                                        live.update(
+                                            Panel(
+                                                Markdown(full_response),
+                                                title=title,
+                                                border_style="green",
+                                            )
+                                        )
                                     else:
                                         # For other types of data, just show the JSON
+                                        live.update(
+                                            Panel(
+                                                Text(json.dumps(data, indent=2)),
+                                                title="Raw JSON Response",
+                                                border_style="yellow",
+                                            )
+                                        )
                                 else:
+                                    live.update(
+                                        Panel(
+                                            Text(decoded_line),
+                                            title="Raw Response",
+                                            border_style="yellow",
+                                        )
+                                    )
                             except json.JSONDecodeError:
                                 # If it's not valid JSON, just show the raw line
+                                live.update(
+                                    Panel(
+                                        Text(f"Raw response: {decoded_line}"),
+                                        title="Invalid JSON",
+                                        border_style="red",
+                                    )
+                                )
                 self.console.print("[bold green]Stream completed.[/]")
                 return True
         except requests.exceptions.ConnectionError:
+            self.console.print(
+                f"[bold red]Error:[/] Could not connect to the server at {url}",
+                style="red",
+            )
+            self.console.print(
+                "Make sure the server is running and accessible.", style="red"
+            )
             return False
         except requests.exceptions.RequestException as e:
             self.console.print(f"[bold red]Error:[/] {e}", style="red")
+            return False

test_edit_stream.py CHANGED Viewed

@@ -2,19 +2,20 @@ import argparse
 import os
 import sys
 import requests
-import json
 from dotenv import load_dotenv
 from stream_utils import StreamResponseHandler
 # Load environment variables
 load_dotenv()
 def get_default_image():
     """Get the default image path and convert it to a data URI."""
     image_path = "./assets/lakeview.jpg"
     if os.path.exists(image_path):
         try:
             from src.utils import image_path_to_uri
             image_uri = image_path_to_uri(image_path)
             print(f"Using default image: {image_path}")
             return image_uri
@@ -25,80 +26,93 @@ def get_default_image():
         print(f"Warning: Default image not found at {image_path}")
         return None
 def upload_image(handler, image_path):
     """
     Upload an image to the server.
     Args:
         handler (StreamResponseHandler): The stream response handler.
         image_path (str): Path to the image file to upload.
     Returns:
         str: The URL of the uploaded image, or None if upload failed.
     """
     if not os.path.exists(image_path):
-        handler.console.print(f"[bold red]Error:[/] Image file not found at {image_path}")
         return None
     try:
         handler.console.print(f"Uploading image: [bold]{image_path}[/]")
-        with open(image_path, 'rb') as f:
-            files = {'file': (os.path.basename(image_path), f)}
             response = requests.post("http://localhost:8000/upload", files=files)
             if response.status_code == 200:
                 image_url = response.json().get("image_url")
-                handler.console.print(f"Image uploaded successfully. URL: [bold green]{image_url}[/]")
                 return image_url
             else:
-                handler.console.print(f"[bold red]Failed to upload image.[/] Status code: {response.status_code}")
                 handler.console.print(f"Response: {response.text}")
                 return None
     except Exception as e:
         handler.console.print(f"[bold red]Error uploading image:[/] {e}")
         return None
 def main():
     # Create a stream response handler
     handler = StreamResponseHandler()
     # Parse command line arguments
     parser = argparse.ArgumentParser(description="Test the image edit streaming API.")
-    parser.add_argument("--instruction", "-i", required=True, help="The edit instruction.")
     parser.add_argument("--image", "-img", help="The URL of the image to edit.")
     parser.add_argument("--upload", "-u", help="Path to an image file to upload first.")
     args = parser.parse_args()
     # Check if the server is running
     if not handler.check_server_health():
         sys.exit(1)
     image_url = args.image
     # If upload is specified, upload the image first
     if args.upload:
         image_url = upload_image(handler, args.upload)
         if not image_url:
-            handler.console.print("[yellow]Warning:[/] Failed to upload image. Continuing without image URL.")
     # Use the default image if no image URL is provided
     if not image_url:
         image_url = get_default_image()
         if not image_url:
-            handler.console.print("[yellow]No image URL provided and default image not available.[/]")
             handler.console.print("The agent may ask for an image if needed.")
     # Prepare the payload for the edit request
-    payload = {
-        "edit_instruction": args.instruction
-    }
     if image_url:
         payload["image_url"] = image_url
     # Stream the edit request
     endpoint_url = "http://localhost:8000/edit/stream"
     handler.stream_response(endpoint_url, payload=payload, title="Image Edit Response")
 if __name__ == "__main__":
-    main()

 import os
 import sys
 import requests
 from dotenv import load_dotenv
 from stream_utils import StreamResponseHandler
 # Load environment variables
 load_dotenv()
 def get_default_image():
     """Get the default image path and convert it to a data URI."""
     image_path = "./assets/lakeview.jpg"
     if os.path.exists(image_path):
         try:
             from src.utils import image_path_to_uri
             image_uri = image_path_to_uri(image_path)
             print(f"Using default image: {image_path}")
             return image_uri
         print(f"Warning: Default image not found at {image_path}")
         return None
 def upload_image(handler, image_path):
     """
     Upload an image to the server.
     Args:
         handler (StreamResponseHandler): The stream response handler.
         image_path (str): Path to the image file to upload.
     Returns:
         str: The URL of the uploaded image, or None if upload failed.
     """
     if not os.path.exists(image_path):
+        handler.console.print(
+            f"[bold red]Error:[/] Image file not found at {image_path}"
+        )
         return None
     try:
         handler.console.print(f"Uploading image: [bold]{image_path}[/]")
+        with open(image_path, "rb") as f:
+            files = {"file": (os.path.basename(image_path), f)}
             response = requests.post("http://localhost:8000/upload", files=files)
             if response.status_code == 200:
                 image_url = response.json().get("image_url")
+                handler.console.print(
+                    f"Image uploaded successfully. URL: [bold green]{image_url}[/]"
+                )
                 return image_url
             else:
+                handler.console.print(
+                    f"[bold red]Failed to upload image.[/] Status code: {response.status_code}"
+                )
                 handler.console.print(f"Response: {response.text}")
                 return None
     except Exception as e:
         handler.console.print(f"[bold red]Error uploading image:[/] {e}")
         return None
 def main():
     # Create a stream response handler
     handler = StreamResponseHandler()
     # Parse command line arguments
     parser = argparse.ArgumentParser(description="Test the image edit streaming API.")
+    parser.add_argument(
+        "--instruction", "-i", required=True, help="The edit instruction."
+    )
     parser.add_argument("--image", "-img", help="The URL of the image to edit.")
     parser.add_argument("--upload", "-u", help="Path to an image file to upload first.")
     args = parser.parse_args()
     # Check if the server is running
     if not handler.check_server_health():
         sys.exit(1)
     image_url = args.image
     # If upload is specified, upload the image first
     if args.upload:
         image_url = upload_image(handler, args.upload)
         if not image_url:
+            handler.console.print(
+                "[yellow]Warning:[/] Failed to upload image. Continuing without image URL."
+            )
     # Use the default image if no image URL is provided
     if not image_url:
         image_url = get_default_image()
         if not image_url:
+            handler.console.print(
+                "[yellow]No image URL provided and default image not available.[/]"
+            )
             handler.console.print("The agent may ask for an image if needed.")
     # Prepare the payload for the edit request
+    payload = {"edit_instruction": args.instruction}
     if image_url:
         payload["image_url"] = image_url
     # Stream the edit request
     endpoint_url = "http://localhost:8000/edit/stream"
     handler.stream_response(endpoint_url, payload=payload, title="Image Edit Response")
 if __name__ == "__main__":
+    main()

test_generic_stream.py CHANGED Viewed

@@ -6,24 +6,33 @@ from stream_utils import StreamResponseHandler
 # Load environment variables
 load_dotenv()
 def main():
     # Create a console for rich output
     handler = StreamResponseHandler()
     # Parse command line arguments
-    parser = argparse.ArgumentParser(description="Test the generic agent streaming API.")
-    parser.add_argument("--query", "-q", required=True, help="The query or message to send to the generic agent.")
     args = parser.parse_args()
     # Check if the server is running
     if not handler.check_server_health():
         sys.exit(1)
     # Stream the generic request
     endpoint_url = "http://localhost:8000/test/stream"
     params = {"query": args.query}
     handler.stream_response(endpoint_url, params=params, title="Generic Agent Response")
 if __name__ == "__main__":
-    main()

 # Load environment variables
 load_dotenv()
 def main():
     # Create a console for rich output
     handler = StreamResponseHandler()
     # Parse command line arguments
+    parser = argparse.ArgumentParser(
+        description="Test the generic agent streaming API."
+    )
+    parser.add_argument(
+        "--query",
+        "-q",
+        required=True,
+        help="The query or message to send to the generic agent.",
+    )
     args = parser.parse_args()
     # Check if the server is running
     if not handler.check_server_health():
         sys.exit(1)
     # Stream the generic request
     endpoint_url = "http://localhost:8000/test/stream"
     params = {"query": args.query}
     handler.stream_response(endpoint_url, params=params, title="Generic Agent Response")
 if __name__ == "__main__":
+    main()