File size: 4,509 Bytes
860760c 4bdfa75 1509d22 4bdfa75 1fd7a59 062d16c 5167fb6 860760c 4bdfa75 27a219a 84abbea 4bdfa75 860760c 4bdfa75 1fd7a59 860760c 3a2a66c fad0d14 3a2a66c 4bdfa75 860760c 4bdfa75 860760c fad0d14 1fd7a59 062d16c 4bdfa75 fad0d14 860760c fad0d14 4bdfa75 fad0d14 860760c 4bdfa75 4bdd365 f003f38 4bdfa75 860760c 4bdd365 11cd28e 4bdd365 860760c 4bdd365 4bdfa75 860760c 4bdfa75 5167fb6 4bdfa75 5420ab6 fad0d14 4bdfa75 fad0d14 4bdfa75 860760c 5167fb6 860760c 4bdfa75 5167fb6 860760c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
from huggingface_hub import InferenceClient
from langchain_community.llms import HuggingFaceHub
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.tools import BaseTool
from pydantic import Field
from PIL import Image, ImageDraw, ImageFont
import tempfile
import gradio as gr
from io import BytesIO
from typing import Optional
from langchain_core.language_models.llms import LLM
from transformers import pipeline
# === Image generation tool ===
class TextToImageTool(BaseTool):
name: str = "text_to_image"
description: str = "Generate an image from a text prompt."
client: InferenceClient = Field(exclude=True)
def _run(self, prompt: str) -> Image.Image:
print(f"[Tool] Generating image for prompt: {prompt}")
image_bytes = self.client.text_to_image(prompt)
return Image.open(BytesIO(image_bytes))
def _arun(self, prompt: str):
raise NotImplementedError("This tool does not support async.")
# === Labeling Function ===
def add_label_to_image(image, label):
draw = ImageDraw.Draw(image)
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
font_size = 30
try:
font = ImageFont.truetype(font_path, font_size)
except:
font = ImageFont.load_default()
text_width, text_height = draw.textsize(label, font=font)
position = (image.width - text_width - 20, image.height - text_height - 20)
rect_position = [position[0] - 10, position[1] - 10, position[0] + text_width + 10, position[1] + text_height + 10]
draw.rectangle(rect_position, fill=(0, 0, 0, 128))
draw.text(position, label, fill="white", font=font)
return image
# === Prompt Generator ===
def generate_prompts_for_object(object_name):
return {
"past": f"Show an old version of a {object_name} from its early days.",
"present": f"Show a {object_name} with current features/design/technology.",
"future": f"Show a futuristic version of a {object_name}, predicting future features/designs.",
}
# === Agent Setup ===
# Set up the tools
text_to_image_client = InferenceClient("m-ric/text-to-image")
text_to_image_tool = TextToImageTool(client=text_to_image_client)
search_tool = DuckDuckGoSearchResults()
# Load a public, token-free model locally via transformers pipeline
text_gen_pipeline = pipeline("text-generation", model="Qwen/Qwen2.5-72B-Instruct", max_new_tokens=512)
#tiiuae/falcon-7b-instruct
# Wrap pipeline into a LangChain LLM
class PipelineLLM(LLM):
def _call(self, prompt, stop=None):
output = text_gen_pipeline(prompt)[0]["generated_text"]
return output
@property
def _llm_type(self):
return "pipeline_llm"
llm = PipelineLLM()
# Create agent and executor
agent = create_react_agent(llm=llm, tools=[text_to_image_tool, search_tool])
agent_executor = AgentExecutor(agent=agent, tools=[text_to_image_tool, search_tool], verbose=True)
# === History Generator ===
def generate_object_history(object_name: str):
prompts = generate_prompts_for_object(object_name)
images = []
labels = {
"past": f"{object_name} - Past",
"present": f"{object_name} - Present",
"future": f"{object_name} - Future"
}
for period, prompt in prompts.items():
result = text_to_image_tool._run(prompt)
labeled = add_label_to_image(result, labels[period])
file_path = f"{object_name}_{period}.png"
labeled.save(file_path)
images.append((file_path, labels[period]))
gif_path = f"{object_name}_evolution.gif"
pil_images = [Image.open(img[0]) for img in images]
pil_images[0].save(gif_path, save_all=True, append_images=pil_images[1:], duration=1000, loop=0)
return images, gif_path
# === Gradio UI ===
def create_gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("# TimeMetamorphy: Evolution Visualizer")
with gr.Row():
with gr.Column():
object_input = gr.Textbox(label="Enter Object (e.g., car, phone)")
generate_button = gr.Button("Generate Evolution")
gallery = gr.Gallery(label="Generated Images").style(grid=3)
gif_display = gr.Image(label="Generated GIF")
generate_button.click(fn=generate_object_history, inputs=object_input, outputs=[gallery, gif_display])
return demo
# === Launch App ===
demo = create_gradio_interface()
demo.launch(share=True)
|