File size: 4,511 Bytes
3db8e9e
 
 
 
 
 
 
 
 
4abd9be
 
 
d054694
4abd9be
 
3db8e9e
 
97dbe65
3db8e9e
 
4abd9be
97dbe65
3db8e9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, Tool
from gradio_client import Client

#%% Tool Wrapper for the Hugging Face Space
class TextToImageTool(Tool):
    name = "text_to_image"
    description = "Generate an image from a text prompt using m-ric/text-to-image."
    
    # `inputs` is a dictionary: argument_name -> type
    inputs = {"prompt": str}
    output_type = "image" 
    outputs = {"image": "image"}  # Optional, type can be a string or class
    
    def __init__(self):
        super().__init__()
        self.client = Client("m-ric/text-to-image")

    def run(self, prompt: str):
        return self.client.predict(prompt, api_name="/predict")  # Returns PIL.Image


#%% Utility functions
def add_label_to_image(image, label):
    draw = ImageDraw.Draw(image)
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
    font_size = 30
    try:
        font = ImageFont.truetype(font_path, font_size)
    except:
        font = ImageFont.load_default()
    text_bbox = draw.textbbox((0, 0), label, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]
    position = (image.width - text_width - 20, image.height - text_height - 20)
    rect_margin = 10
    rect_position = [
        position[0] - rect_margin, position[1] - rect_margin,
        position[0] + text_width + rect_margin, position[1] + text_height + rect_margin
    ]
    draw.rectangle(rect_position, fill=(0, 0, 0, 128))
    draw.text(position, label, fill="white", font=font)
    return image

def plot_and_save_agent_image(image, label, save_path=None):
    labeled_image = add_label_to_image(image, label)
    labeled_image.show()
    if save_path:
        labeled_image.save(save_path)
        print(f"Image saved to {save_path}")

def generate_prompts_for_object(object_name):
    return {
        "past": f"Show an old version of a {object_name} from its early days.",
        "present": f"Show a {object_name} with current features/design/technology.",
        "future": f"Show a futuristic version of a {object_name}, by predicting advanced features and futuristic design."
    }

def generate_object_history(object_name):
    prompts = generate_prompts_for_object(object_name)
    labels = {
        "past": f"{object_name} - Past",
        "present": f"{object_name} - Present",
        "future": f"{object_name} - Future"
    }

    images = []
    for time_period, prompt in prompts.items():
        print(f"Generating {time_period} frame: {prompt}")
        result = agent.run(prompt)  # Runs tool
        if hasattr(result, "to_raw"):  # If wrapped output
            result = result.to_raw()
        images.append(result)
        plot_and_save_agent_image(result, labels[time_period], save_path=f"{object_name}_{time_period}.png")

    gif_path = f"{object_name}_evolution.gif"
    images[0].save(gif_path, save_all=True, append_images=images[1:], duration=1000, loop=0)
    return images, gif_path

#%% Tool & Agent Setup
image_generation_tool = TextToImageTool()
search_tool = DuckDuckGoSearchTool()
llm_engine = InferenceClientModel("Qwen/Qwen2.5-72B-Instruct")

agent = CodeAgent(tools=[image_generation_tool, search_tool], model=llm_engine)

#%% Gradio Interface
def create_gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# TimeMetamorphy: an object Evolution Generator")
        gr.Markdown("""
        ## Unlocking the secrets of time!
        Enter an object name (like bicycle or smartphone), and this app will generate its visual evolution.
        """)

        default_images = [
            ("car_past.png", "Car - Past"),
            ("car_present.png", "Car - Present"),
            ("car_future.png", "Car - Future")
        ]
        default_gif_path = "car_evolution.gif"

        with gr.Row():
            with gr.Column():
                object_name_input = gr.Textbox(label="Enter an object name", placeholder="e.g., bicycle, phone")
                generate_button = gr.Button("Generate Evolution")
                image_gallery = gr.Gallery(label="Generated Images", columns=3, rows=1, value=default_images)
                gif_output = gr.Image(label="Generated GIF", value=default_gif_path)

        generate_button.click(fn=generate_object_history, inputs=[object_name_input], outputs=[image_gallery, gif_output])
    return demo

# Launch app
demo = create_gradio_interface()
demo.launch(share=True)