File size: 4,357 Bytes
3db8e9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, Tool
from gradio_client import Client

#%% Tool Wrapper for the Hugging Face Space
class TextToImageTool(Tool):
    name = "text_to_image"
    description = "Generate an image from a text prompt using m-ric/text-to-image."

    def __init__(self):
        super().__init__()
        self.client = Client("m-ric/text-to-image")  # Calls HF Space

    def run(self, prompt: str):
        image = self.client.predict(prompt, api_name="/predict")
        return image  # This is a PIL image

#%% Utility functions
def add_label_to_image(image, label):
    draw = ImageDraw.Draw(image)
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
    font_size = 30
    try:
        font = ImageFont.truetype(font_path, font_size)
    except:
        font = ImageFont.load_default()
    text_bbox = draw.textbbox((0, 0), label, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]
    position = (image.width - text_width - 20, image.height - text_height - 20)
    rect_margin = 10
    rect_position = [
        position[0] - rect_margin, position[1] - rect_margin,
        position[0] + text_width + rect_margin, position[1] + text_height + rect_margin
    ]
    draw.rectangle(rect_position, fill=(0, 0, 0, 128))
    draw.text(position, label, fill="white", font=font)
    return image

def plot_and_save_agent_image(image, label, save_path=None):
    labeled_image = add_label_to_image(image, label)
    labeled_image.show()
    if save_path:
        labeled_image.save(save_path)
        print(f"Image saved to {save_path}")

def generate_prompts_for_object(object_name):
    return {
        "past": f"Show an old version of a {object_name} from its early days.",
        "present": f"Show a {object_name} with current features/design/technology.",
        "future": f"Show a futuristic version of a {object_name}, by predicting advanced features and futuristic design."
    }

def generate_object_history(object_name):
    prompts = generate_prompts_for_object(object_name)
    labels = {
        "past": f"{object_name} - Past",
        "present": f"{object_name} - Present",
        "future": f"{object_name} - Future"
    }

    images = []
    for time_period, prompt in prompts.items():
        print(f"Generating {time_period} frame: {prompt}")
        result = agent.run(prompt)  # Runs tool
        if hasattr(result, "to_raw"):  # If wrapped output
            result = result.to_raw()
        images.append(result)
        plot_and_save_agent_image(result, labels[time_period], save_path=f"{object_name}_{time_period}.png")

    gif_path = f"{object_name}_evolution.gif"
    images[0].save(gif_path, save_all=True, append_images=images[1:], duration=1000, loop=0)
    return images, gif_path

#%% Tool & Agent Setup
image_generation_tool = TextToImageTool()
search_tool = DuckDuckGoSearchTool()
llm_engine = InferenceClientModel("Qwen/Qwen2.5-72B-Instruct")

agent = CodeAgent(tools=[image_generation_tool, search_tool], model=llm_engine)

#%% Gradio Interface
def create_gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# TimeMetamorphy: an object Evolution Generator")
        gr.Markdown("""
        ## Unlocking the secrets of time!
        Enter an object name (like bicycle or smartphone), and this app will generate its visual evolution.
        """)

        default_images = [
            ("car_past.png", "Car - Past"),
            ("car_present.png", "Car - Present"),
            ("car_future.png", "Car - Future")
        ]
        default_gif_path = "car_evolution.gif"

        with gr.Row():
            with gr.Column():
                object_name_input = gr.Textbox(label="Enter an object name", placeholder="e.g., bicycle, phone")
                generate_button = gr.Button("Generate Evolution")
                image_gallery = gr.Gallery(label="Generated Images", columns=3, rows=1, value=default_images)
                gif_output = gr.Image(label="Generated GIF", value=default_gif_path)

        generate_button.click(fn=generate_object_history, inputs=[object_name_input], outputs=[image_gallery, gif_output])
    return demo

# Launch app
demo = create_gradio_interface()
demo.launch(share=True)