from huggingface_hub import InferenceClient from langchain_community.llms import HuggingFaceHub from langchain_community.tools import DuckDuckGoSearchResults from langchain.agents import create_react_agent from langchain.tools import BaseTool from PIL import Image, ImageDraw, ImageFont import tempfile import gradio as gr import requests from io import BytesIO # Your HF API token here (set your actual token) #HF_TOKEN #%% Methods def add_label_to_image(image, label): draw = ImageDraw.Draw(image) font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" font_size = 30 try: font = ImageFont.truetype(font_path, font_size) except: font = ImageFont.load_default() text_bbox = draw.textbbox((0, 0), label, font=font) text_width, text_height = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1] position = (image.width - text_width - 20, image.height - text_height - 20) rect_margin = 10 rect_position = [ position[0] - rect_margin, position[1] - rect_margin, position[0] + text_width + rect_margin, position[1] + text_height + rect_margin, ] draw.rectangle(rect_position, fill=(0, 0, 0, 128)) draw.text(position, label, fill="white", font=font) return image def plot_and_save_agent_image(agent_image, label, save_path=None): # agent_image is a PIL Image already in this refactor pil_image = agent_image labeled_image = add_label_to_image(pil_image, label) labeled_image.show() if save_path: labeled_image.save(save_path) print(f"Image saved to {save_path}") else: print("No save path provided. Image not saved.") def generate_prompts_for_object(object_name): return { "past": f"Show an old version of a {object_name} from its early days.", "present": f"Show a {object_name} with current features/design/technology.", "future": f"Show a futuristic version of a {object_name}, by predicting advanced features and futuristic design." } def generate_object_history(object_name): images = [] prompts = generate_prompts_for_object(object_name) labels = { "past": f"{object_name} - Past", "present": f"{object_name} - Present", "future": f"{object_name} - Future" } for time_period, prompt in prompts.items(): print(f"Generating {time_period} frame: {prompt}") result = agent.invoke(prompt) # returns PIL Image or string output # result is a PIL Image from our tool, or fallback string - ensure PIL Image if isinstance(result, Image.Image): images.append(result) image_filename = f"{object_name}_{time_period}.png" plot_and_save_agent_image(result, labels[time_period], save_path=image_filename) else: print(f"Unexpected output for {time_period}: {result}") gif_path = f"{object_name}_evolution.gif" if images: images[0].save( gif_path, save_all=True, append_images=images[1:], duration=1000, loop=0 ) print(f"GIF saved to {gif_path}") else: print("No images generated, GIF not created.") return images, gif_path #%% Initialization of tools and AI_Agent # Initialize HuggingFace Inference Client for text-to-image text_to_image_client = InferenceClient(repo_id="m-ric/text-to-image") def run_text_to_image(prompt: str) -> Image.Image: outputs = text_to_image_client.text_to_image(prompt) # Assuming outputs returns a list of URLs image_url = outputs[0] if outputs else None if image_url is None: raise ValueError("No image URL returned from the model.") response = requests.get(image_url) img = Image.open(BytesIO(response.content)).convert("RGB") return img # Custom LangChain tool wrapper for text-to-image class TextToImageTool(BaseTool): name = "text-to-image" description = "Generates an image from a prompt using HuggingFace model" def _run(self, prompt: str): return run_text_to_image(prompt) async def _arun(self, prompt: str): raise NotImplementedError() image_generation_tool = TextToImageTool() # DuckDuckGo Search Tool from LangChain search_tool = DuckDuckGoSearchResults() # HuggingFace LLM for Qwen2.5 llm_engine = HuggingFaceHub( repo_id="Qwen/Qwen2.5-72B-Instruct", huggingfacehub_api_token=HF_TOKEN, model_kwargs={"temperature": 0.7} ) # Create agent with the tools and LLM agent = create_react_agent(llm_engine, tools=[image_generation_tool, search_tool]) #%% Gradio interface def create_gradio_interface(): with gr.Blocks() as demo: gr.Markdown("# TimeMetamorphy: an object Evolution Generator") gr.Markdown(""" ## Unlocking the secrets of time! This app unveils these mysteries by offering a unique/magic lens that allows us "time travel". Powered by AI agents equipped with cutting-edge tools, it provides the superpower to explore the past, witness the present, and dream up the future like never before. This system allows you to generate visualizations of how an object/concept, like a bicycle or a car, may have evolved over time. It generates images of the object in the past, present, and future based on your input. ### Default Example: Evolution of a Car Below, you can see a precomputed example of a "car" evolution. Enter another object to generate its evolution. """) default_images = [ ("car_past.png", "Car - Past"), ("car_present.png", "Car - Present"), ("car_future.png", "Car - Future") ] default_gif_path = "car_evolution.gif" with gr.Row(): with gr.Column(): object_name_input = gr.Textbox( label="Enter an object name (e.g., bicycle, phone)", placeholder="Enter an object name", lines=1 ) generate_button = gr.Button("Generate Evolution") image_gallery = gr.Gallery( label="Generated Images", show_label=True, columns=3, rows=1, value=default_images ) gif_output = gr.Image(label="Generated GIF", show_label=True, value=default_gif_path) generate_button.click(fn=generate_object_history, inputs=[object_name_input], outputs=[image_gallery, gif_output]) return demo # Launch the Gradio app demo = create_gradio_interface() demo.launch(share=True)