File size: 4,509 Bytes
860760c
4bdfa75
1509d22
4bdfa75
 
 
1fd7a59
062d16c
5167fb6
860760c
4bdfa75
27a219a
 
84abbea
4bdfa75
 
 
 
 
 
 
 
 
 
 
 
 
860760c
4bdfa75
 
1fd7a59
 
860760c
 
3a2a66c
fad0d14
3a2a66c
 
4bdfa75
860760c
4bdfa75
860760c
fad0d14
1fd7a59
 
062d16c
4bdfa75
fad0d14
860760c
fad0d14
 
4bdfa75
fad0d14
 
860760c
4bdfa75
4bdd365
f003f38
4bdfa75
860760c
 
4bdd365
11cd28e
 
4bdd365
 
 
 
 
 
 
 
 
 
 
 
860760c
4bdd365
4bdfa75
 
860760c
 
4bdfa75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5167fb6
 
4bdfa75
5420ab6
fad0d14
 
4bdfa75
fad0d14
4bdfa75
 
 
 
860760c
5167fb6
 
860760c
4bdfa75
5167fb6
860760c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from huggingface_hub import InferenceClient
from langchain_community.llms import HuggingFaceHub
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.tools import BaseTool
from pydantic import Field
from PIL import Image, ImageDraw, ImageFont
import tempfile
import gradio as gr
from io import BytesIO
from typing import Optional
from langchain_core.language_models.llms import LLM
from transformers import pipeline

# === Image generation tool ===
class TextToImageTool(BaseTool):
    name: str = "text_to_image"
    description: str = "Generate an image from a text prompt."
    client: InferenceClient = Field(exclude=True)

    def _run(self, prompt: str) -> Image.Image:
        print(f"[Tool] Generating image for prompt: {prompt}")
        image_bytes = self.client.text_to_image(prompt)
        return Image.open(BytesIO(image_bytes))

    def _arun(self, prompt: str):
        raise NotImplementedError("This tool does not support async.")


# === Labeling Function ===
def add_label_to_image(image, label):
    draw = ImageDraw.Draw(image)
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
    font_size = 30
    try:
        font = ImageFont.truetype(font_path, font_size)
    except:
        font = ImageFont.load_default()
    text_width, text_height = draw.textsize(label, font=font)
    position = (image.width - text_width - 20, image.height - text_height - 20)
    rect_position = [position[0] - 10, position[1] - 10, position[0] + text_width + 10, position[1] + text_height + 10]
    draw.rectangle(rect_position, fill=(0, 0, 0, 128))
    draw.text(position, label, fill="white", font=font)
    return image


# === Prompt Generator ===
def generate_prompts_for_object(object_name):
    return {
        "past": f"Show an old version of a {object_name} from its early days.",
        "present": f"Show a {object_name} with current features/design/technology.",
        "future": f"Show a futuristic version of a {object_name}, predicting future features/designs.",
    }


# === Agent Setup ===
# Set up the tools
text_to_image_client = InferenceClient("m-ric/text-to-image")
text_to_image_tool = TextToImageTool(client=text_to_image_client)
search_tool = DuckDuckGoSearchResults()

# Load a public, token-free model locally via transformers pipeline
text_gen_pipeline = pipeline("text-generation", model="Qwen/Qwen2.5-72B-Instruct", max_new_tokens=512)
#tiiuae/falcon-7b-instruct

# Wrap pipeline into a LangChain LLM
class PipelineLLM(LLM):
    def _call(self, prompt, stop=None):
        output = text_gen_pipeline(prompt)[0]["generated_text"]
        return output

    @property
    def _llm_type(self):
        return "pipeline_llm"

llm = PipelineLLM()

# Create agent and executor
agent = create_react_agent(llm=llm, tools=[text_to_image_tool, search_tool])
agent_executor = AgentExecutor(agent=agent, tools=[text_to_image_tool, search_tool], verbose=True)


# === History Generator ===
def generate_object_history(object_name: str):
    prompts = generate_prompts_for_object(object_name)
    images = []
    labels = {
        "past": f"{object_name} - Past",
        "present": f"{object_name} - Present",
        "future": f"{object_name} - Future"
    }
    for period, prompt in prompts.items():
        result = text_to_image_tool._run(prompt)
        labeled = add_label_to_image(result, labels[period])
        file_path = f"{object_name}_{period}.png"
        labeled.save(file_path)
        images.append((file_path, labels[period]))
    gif_path = f"{object_name}_evolution.gif"
    pil_images = [Image.open(img[0]) for img in images]
    pil_images[0].save(gif_path, save_all=True, append_images=pil_images[1:], duration=1000, loop=0)
    return images, gif_path


# === Gradio UI ===
def create_gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# TimeMetamorphy: Evolution Visualizer")

        with gr.Row():
            with gr.Column():
                object_input = gr.Textbox(label="Enter Object (e.g., car, phone)")
                generate_button = gr.Button("Generate Evolution")
                gallery = gr.Gallery(label="Generated Images").style(grid=3)
                gif_display = gr.Image(label="Generated GIF")

        generate_button.click(fn=generate_object_history, inputs=object_input, outputs=[gallery, gif_display])

    return demo


# === Launch App ===
demo = create_gradio_interface()
demo.launch(share=True)