File size: 4,504 Bytes
3db8e9e
 
 
 
 
 
 
 
 
91bb920
3545efb
203d34d
43b06bc
203d34d
 
3545efb
203d34d
3545efb
203d34d
3db8e9e
 
97dbe65
3db8e9e
91bb920
203d34d
3db8e9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, Tool
from gradio_client import Client

#%% Tool Wrapper for the Hugging Face Space
class TextToImageTool(Tool):
    name = "text_to_image"
    description = "Generate an image from a text prompt using m-ric/text-to-image."
    
    inputs = {
        "prompt": {
            "type": "string",
            "description": "Text prompt to generate the image"
        }
    }

    output_type = "image"

    def __init__(self):
        super().__init__()
        self.client = Client("m-ric/text-to-image")

    def run(self, prompt):  # Must explicitly match 'inputs' keys
        return self.client.predict(prompt, api_name="/predict")

#%% Utility functions
def add_label_to_image(image, label):
    draw = ImageDraw.Draw(image)
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
    font_size = 30
    try:
        font = ImageFont.truetype(font_path, font_size)
    except:
        font = ImageFont.load_default()
    text_bbox = draw.textbbox((0, 0), label, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]
    position = (image.width - text_width - 20, image.height - text_height - 20)
    rect_margin = 10
    rect_position = [
        position[0] - rect_margin, position[1] - rect_margin,
        position[0] + text_width + rect_margin, position[1] + text_height + rect_margin
    ]
    draw.rectangle(rect_position, fill=(0, 0, 0, 128))
    draw.text(position, label, fill="white", font=font)
    return image

def plot_and_save_agent_image(image, label, save_path=None):
    labeled_image = add_label_to_image(image, label)
    labeled_image.show()
    if save_path:
        labeled_image.save(save_path)
        print(f"Image saved to {save_path}")

def generate_prompts_for_object(object_name):
    return {
        "past": f"Show an old version of a {object_name} from its early days.",
        "present": f"Show a {object_name} with current features/design/technology.",
        "future": f"Show a futuristic version of a {object_name}, by predicting advanced features and futuristic design."
    }

def generate_object_history(object_name):
    prompts = generate_prompts_for_object(object_name)
    labels = {
        "past": f"{object_name} - Past",
        "present": f"{object_name} - Present",
        "future": f"{object_name} - Future"
    }

    images = []
    for time_period, prompt in prompts.items():
        print(f"Generating {time_period} frame: {prompt}")
        result = agent.run(prompt)  # Runs tool
        if hasattr(result, "to_raw"):  # If wrapped output
            result = result.to_raw()
        images.append(result)
        plot_and_save_agent_image(result, labels[time_period], save_path=f"{object_name}_{time_period}.png")

    gif_path = f"{object_name}_evolution.gif"
    images[0].save(gif_path, save_all=True, append_images=images[1:], duration=1000, loop=0)
    return images, gif_path

#%% Tool & Agent Setup
image_generation_tool = TextToImageTool()
search_tool = DuckDuckGoSearchTool()
llm_engine = InferenceClientModel("Qwen/Qwen2.5-72B-Instruct")

agent = CodeAgent(tools=[image_generation_tool, search_tool], model=llm_engine)

#%% Gradio Interface
def create_gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# TimeMetamorphy: an object Evolution Generator")
        gr.Markdown("""
        ## Unlocking the secrets of time!
        Enter an object name (like bicycle or smartphone), and this app will generate its visual evolution.
        """)

        default_images = [
            ("car_past.png", "Car - Past"),
            ("car_present.png", "Car - Present"),
            ("car_future.png", "Car - Future")
        ]
        default_gif_path = "car_evolution.gif"

        with gr.Row():
            with gr.Column():
                object_name_input = gr.Textbox(label="Enter an object name", placeholder="e.g., bicycle, phone")
                generate_button = gr.Button("Generate Evolution")
                image_gallery = gr.Gallery(label="Generated Images", columns=3, rows=1, value=default_images)
                gif_output = gr.Image(label="Generated GIF", value=default_gif_path)

        generate_button.click(fn=generate_object_history, inputs=[object_name_input], outputs=[image_gallery, gif_output])
    return demo

# Launch app
demo = create_gradio_interface()
demo.launch(share=True)