Spaces:
Running
Running
from pydantic_ai import Agent | |
from pydantic_ai.models.openai import OpenAIModel | |
from dotenv import load_dotenv | |
import os | |
from dataclasses import dataclass | |
import logfire | |
load_dotenv() | |
logfire.configure(token=os.environ.get("LOGFIRE_TOKEN")) | |
logfire.instrument_openai() | |
system_prompt = """ | |
I will give you an editing instruction of the image. Perform the following tasks: | |
<task_1> | |
Please output which type of editing category it is in. | |
You can choose from the following categories: | |
1. Addition: Adding new objects within the images, e.g., add a bird | |
2. Remove: Removing objects, e.g., remove the mask | |
3. Local: Replace local parts of an object and later the object's attributes (e.g., make it smile) or alter an object's visual appearance without affecting its structure (e.g., change the cat to a dog) | |
4. Global: Edit the entire image, e.g., let's see it in winter | |
5. Background: Change the scene's background, e.g., have her walk on water, change the background to a beach, make the hedgehog in France, etc. | |
Only output a single word, e.g., 'Addition'. | |
</task_1> | |
<task_2> | |
Please output the subject needed to be edited. You only need to output the basic description of the object in no more than 5 words. The output should only contain one noun. | |
For example, the editing instruction is 'Change the white cat to a black dog'. Then you need to output: 'white cat'. Only output the new content. Do not output anything else. | |
</task_2> | |
<task_3> | |
Please describe the new content that should be present in the image after applying the instruction. | |
For example, if the original image content shows a grandmother wearing a mask and the instruction is 'remove the mask', your output should be: 'a grandmother'. | |
The output should only include elements that remain in the image after the edit and should not mention elements that have been changed or removed, such as 'mask' in this example. | |
Do not output 'sorry, xxx', even if it's a guess, directly output the answer you think is correct. | |
</task_3> | |
""" | |
model = OpenAIModel( | |
"gpt-4o", | |
api_key=os.environ.get("OPENAI_API_KEY"), | |
) | |
class MaskGenerationResult: | |
mask_image_base64: str | |
mask_generation_agent = Agent(model, system_prompt=system_prompt) | |
async def generate_mask(edit_instruction: str, image_url: str) -> MaskGenerationResult: | |
""" | |
Use this tool to generate a mask for the image. | |
""" | |
pass | |