File size: 6,601 Bytes
860760c
 
 
 
 
1fd7a59
062d16c
5167fb6
860760c
 
 
 
 
84abbea
062d16c
860760c
1fd7a59
 
860760c
 
3a2a66c
fad0d14
3a2a66c
 
fad0d14
 
860760c
fad0d14
 
 
 
 
 
 
860760c
fad0d14
1fd7a59
 
062d16c
 
860760c
 
062d16c
 
fad0d14
062d16c
fad0d14
 
 
062d16c
 
fad0d14
860760c
fad0d14
860760c
fad0d14
 
062d16c
6d28680
 
860760c
fad0d14
6d28680
fad0d14
 
 
 
 
 
 
860760c
 
 
062d16c
860760c
 
 
 
 
 
 
 
062d16c
860760c
 
 
 
 
 
 
 
 
 
 
 
062d16c
fad0d14
860760c
062d16c
5167fb6
860760c
 
1fd7a59
860760c
 
 
 
 
 
 
 
 
fad0d14
860760c
 
 
 
5167fb6
860760c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5167fb6
 
062d16c
 
fad0d14
062d16c
 
 
860760c
062d16c
 
860760c
062d16c
 
fad0d14
 
 
 
 
 
 
 
5420ab6
fad0d14
 
860760c
 
 
 
 
fad0d14
860760c
 
 
062d16c
860760c
fad0d14
062d16c
5167fb6
 
860760c
 
5167fb6
860760c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
from huggingface_hub import InferenceClient
from langchain_huggingface import HuggingFaceHub
from langchain.tools import DuckDuckGoSearchResults
from langchain.agents import create_react_agent
from langchain.tools import BaseTool
from PIL import Image, ImageDraw, ImageFont
import tempfile
import gradio as gr
import requests
from io import BytesIO

# Your HF API token here (set your actual token)
#HF_TOKEN 

#%% Methods

def add_label_to_image(image, label):
    draw = ImageDraw.Draw(image)
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
    font_size = 30
    try:
        font = ImageFont.truetype(font_path, font_size)
    except:
        font = ImageFont.load_default()
    text_bbox = draw.textbbox((0, 0), label, font=font)
    text_width, text_height = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
    position = (image.width - text_width - 20, image.height - text_height - 20)
    rect_margin = 10
    rect_position = [
        position[0] - rect_margin,
        position[1] - rect_margin,
        position[0] + text_width + rect_margin,
        position[1] + text_height + rect_margin,
    ]
    draw.rectangle(rect_position, fill=(0, 0, 0, 128))
    draw.text(position, label, fill="white", font=font)
    return image


def plot_and_save_agent_image(agent_image, label, save_path=None):
    # agent_image is a PIL Image already in this refactor
    pil_image = agent_image

    labeled_image = add_label_to_image(pil_image, label)
    labeled_image.show()

    if save_path:
        labeled_image.save(save_path)
        print(f"Image saved to {save_path}")
    else:
        print("No save path provided. Image not saved.")


def generate_prompts_for_object(object_name):
    return {
        "past": f"Show an old version of a {object_name} from its early days.",
        "present": f"Show a {object_name} with current features/design/technology.",
        "future": f"Show a futuristic version of a {object_name}, by predicting advanced features and futuristic design."
    }


def generate_object_history(object_name):
    images = []
    prompts = generate_prompts_for_object(object_name)
    labels = {
        "past": f"{object_name} - Past",
        "present": f"{object_name} - Present",
        "future": f"{object_name} - Future"
    }

    for time_period, prompt in prompts.items():
        print(f"Generating {time_period} frame: {prompt}")
        result = agent.invoke(prompt)  # returns PIL Image or string output
        
        # result is a PIL Image from our tool, or fallback string - ensure PIL Image
        if isinstance(result, Image.Image):
            images.append(result)
            image_filename = f"{object_name}_{time_period}.png"
            plot_and_save_agent_image(result, labels[time_period], save_path=image_filename)
        else:
            print(f"Unexpected output for {time_period}: {result}")

    gif_path = f"{object_name}_evolution.gif"
    if images:
        images[0].save(
            gif_path,
            save_all=True,
            append_images=images[1:],
            duration=1000,
            loop=0
        )
        print(f"GIF saved to {gif_path}")
    else:
        print("No images generated, GIF not created.")

    return images, gif_path


#%% Initialization of tools and AI_Agent

# Initialize HuggingFace Inference Client for text-to-image
text_to_image_client = InferenceClient(repo_id="m-ric/text-to-image")

def run_text_to_image(prompt: str) -> Image.Image:
    outputs = text_to_image_client.text_to_image(prompt)
    # Assuming outputs returns a list of URLs
    image_url = outputs[0] if outputs else None
    if image_url is None:
        raise ValueError("No image URL returned from the model.")
    response = requests.get(image_url)
    img = Image.open(BytesIO(response.content)).convert("RGB")
    return img

# Custom LangChain tool wrapper for text-to-image
class TextToImageTool(BaseTool):
    name = "text-to-image"
    description = "Generates an image from a prompt using HuggingFace model"

    def _run(self, prompt: str):
        return run_text_to_image(prompt)

    async def _arun(self, prompt: str):
        raise NotImplementedError()

image_generation_tool = TextToImageTool()

# DuckDuckGo Search Tool from LangChain
search_tool = DuckDuckGoSearchResults()

# HuggingFace LLM for Qwen2.5
llm_engine = HuggingFaceHub(
    repo_id="Qwen/Qwen2.5-72B-Instruct",
    huggingfacehub_api_token=HF_TOKEN,
    model_kwargs={"temperature": 0.7}
)

# Create agent with the tools and LLM
agent = create_react_agent(llm_engine, tools=[image_generation_tool, search_tool])


#%% Gradio interface
def create_gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# TimeMetamorphy: an object Evolution Generator")
        
        gr.Markdown("""
        ## Unlocking the secrets of time!
        This app unveils these mysteries by offering a unique/magic lens that allows us "time travel". 
        Powered by AI agents equipped with cutting-edge tools, it provides the superpower to explore the past, witness the present, and dream up the future like never before.

        This system allows you to generate visualizations of how an object/concept, like a bicycle or a car, may have evolved over time. 
        It generates images of the object in the past, present, and future based on your input.

        ### Default Example: Evolution of a Car
        Below, you can see a precomputed example of a "car" evolution. Enter another object to generate its evolution.
        """)

        default_images = [
            ("car_past.png", "Car - Past"),
            ("car_present.png", "Car - Present"),
            ("car_future.png", "Car - Future")
        ]
        default_gif_path = "car_evolution.gif"

        with gr.Row():
            with gr.Column():
                object_name_input = gr.Textbox(
                    label="Enter an object name (e.g., bicycle, phone)", 
                    placeholder="Enter an object name", 
                    lines=1
                )
                generate_button = gr.Button("Generate Evolution")
                image_gallery = gr.Gallery(
                    label="Generated Images", show_label=True, columns=3, rows=1, value=default_images
                )
                gif_output = gr.Image(label="Generated GIF", show_label=True, value=default_gif_path)

        generate_button.click(fn=generate_object_history, inputs=[object_name_input], outputs=[image_gallery, gif_output])
    
    return demo


# Launch the Gradio app
demo = create_gradio_interface()
demo.launch(share=True)