from PIL import Image, ImageDraw, ImageFont import gradio as gr from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, Tool from gradio_client import Client #%% Tool Wrapper for the Hugging Face Space class TextToImageTool(Tool): name = "text_to_image" description = "Generate an image from a text prompt using m-ric/text-to-image." def __init__(self): super().__init__() self.client = Client("m-ric/text-to-image") # Calls HF Space def run(self, prompt: str): image = self.client.predict(prompt, api_name="/predict") return image # This is a PIL image #%% Utility functions def add_label_to_image(image, label): draw = ImageDraw.Draw(image) font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" font_size = 30 try: font = ImageFont.truetype(font_path, font_size) except: font = ImageFont.load_default() text_bbox = draw.textbbox((0, 0), label, font=font) text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] position = (image.width - text_width - 20, image.height - text_height - 20) rect_margin = 10 rect_position = [ position[0] - rect_margin, position[1] - rect_margin, position[0] + text_width + rect_margin, position[1] + text_height + rect_margin ] draw.rectangle(rect_position, fill=(0, 0, 0, 128)) draw.text(position, label, fill="white", font=font) return image def plot_and_save_agent_image(image, label, save_path=None): labeled_image = add_label_to_image(image, label) labeled_image.show() if save_path: labeled_image.save(save_path) print(f"Image saved to {save_path}") def generate_prompts_for_object(object_name): return { "past": f"Show an old version of a {object_name} from its early days.", "present": f"Show a {object_name} with current features/design/technology.", "future": f"Show a futuristic version of a {object_name}, by predicting advanced features and futuristic design." } def generate_object_history(object_name): prompts = generate_prompts_for_object(object_name) labels = { "past": f"{object_name} - Past", "present": f"{object_name} - Present", "future": f"{object_name} - Future" } images = [] for time_period, prompt in prompts.items(): print(f"Generating {time_period} frame: {prompt}") result = agent.run(prompt) # Runs tool if hasattr(result, "to_raw"): # If wrapped output result = result.to_raw() images.append(result) plot_and_save_agent_image(result, labels[time_period], save_path=f"{object_name}_{time_period}.png") gif_path = f"{object_name}_evolution.gif" images[0].save(gif_path, save_all=True, append_images=images[1:], duration=1000, loop=0) return images, gif_path #%% Tool & Agent Setup image_generation_tool = TextToImageTool() search_tool = DuckDuckGoSearchTool() llm_engine = InferenceClientModel("Qwen/Qwen2.5-72B-Instruct") agent = CodeAgent(tools=[image_generation_tool, search_tool], model=llm_engine) #%% Gradio Interface def create_gradio_interface(): with gr.Blocks() as demo: gr.Markdown("# TimeMetamorphy: an object Evolution Generator") gr.Markdown(""" ## Unlocking the secrets of time! Enter an object name (like bicycle or smartphone), and this app will generate its visual evolution. """) default_images = [ ("car_past.png", "Car - Past"), ("car_present.png", "Car - Present"), ("car_future.png", "Car - Future") ] default_gif_path = "car_evolution.gif" with gr.Row(): with gr.Column(): object_name_input = gr.Textbox(label="Enter an object name", placeholder="e.g., bicycle, phone") generate_button = gr.Button("Generate Evolution") image_gallery = gr.Gallery(label="Generated Images", columns=3, rows=1, value=default_images) gif_output = gr.Image(label="Generated GIF", value=default_gif_path) generate_button.click(fn=generate_object_history, inputs=[object_name_input], outputs=[image_gallery, gif_output]) return demo # Launch app demo = create_gradio_interface() demo.launch(share=True)