AMfeta99's picture
Update app.py
11cd28e verified
raw
history blame
4.51 kB
from huggingface_hub import InferenceClient
from langchain_community.llms import HuggingFaceHub
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.tools import BaseTool
from pydantic import Field
from PIL import Image, ImageDraw, ImageFont
import tempfile
import gradio as gr
from io import BytesIO
from typing import Optional
from langchain_core.language_models.llms import LLM
from transformers import pipeline
# === Image generation tool ===
class TextToImageTool(BaseTool):
name: str = "text_to_image"
description: str = "Generate an image from a text prompt."
client: InferenceClient = Field(exclude=True)
def _run(self, prompt: str) -> Image.Image:
print(f"[Tool] Generating image for prompt: {prompt}")
image_bytes = self.client.text_to_image(prompt)
return Image.open(BytesIO(image_bytes))
def _arun(self, prompt: str):
raise NotImplementedError("This tool does not support async.")
# === Labeling Function ===
def add_label_to_image(image, label):
draw = ImageDraw.Draw(image)
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
font_size = 30
try:
font = ImageFont.truetype(font_path, font_size)
except:
font = ImageFont.load_default()
text_width, text_height = draw.textsize(label, font=font)
position = (image.width - text_width - 20, image.height - text_height - 20)
rect_position = [position[0] - 10, position[1] - 10, position[0] + text_width + 10, position[1] + text_height + 10]
draw.rectangle(rect_position, fill=(0, 0, 0, 128))
draw.text(position, label, fill="white", font=font)
return image
# === Prompt Generator ===
def generate_prompts_for_object(object_name):
return {
"past": f"Show an old version of a {object_name} from its early days.",
"present": f"Show a {object_name} with current features/design/technology.",
"future": f"Show a futuristic version of a {object_name}, predicting future features/designs.",
}
# === Agent Setup ===
# Set up the tools
text_to_image_client = InferenceClient("m-ric/text-to-image")
text_to_image_tool = TextToImageTool(client=text_to_image_client)
search_tool = DuckDuckGoSearchResults()
# Load a public, token-free model locally via transformers pipeline
text_gen_pipeline = pipeline("text-generation", model="Qwen/Qwen2.5-72B-Instruct", max_new_tokens=512)
#tiiuae/falcon-7b-instruct
# Wrap pipeline into a LangChain LLM
class PipelineLLM(LLM):
def _call(self, prompt, stop=None):
output = text_gen_pipeline(prompt)[0]["generated_text"]
return output
@property
def _llm_type(self):
return "pipeline_llm"
llm = PipelineLLM()
# Create agent and executor
agent = create_react_agent(llm=llm, tools=[text_to_image_tool, search_tool])
agent_executor = AgentExecutor(agent=agent, tools=[text_to_image_tool, search_tool], verbose=True)
# === History Generator ===
def generate_object_history(object_name: str):
prompts = generate_prompts_for_object(object_name)
images = []
labels = {
"past": f"{object_name} - Past",
"present": f"{object_name} - Present",
"future": f"{object_name} - Future"
}
for period, prompt in prompts.items():
result = text_to_image_tool._run(prompt)
labeled = add_label_to_image(result, labels[period])
file_path = f"{object_name}_{period}.png"
labeled.save(file_path)
images.append((file_path, labels[period]))
gif_path = f"{object_name}_evolution.gif"
pil_images = [Image.open(img[0]) for img in images]
pil_images[0].save(gif_path, save_all=True, append_images=pil_images[1:], duration=1000, loop=0)
return images, gif_path
# === Gradio UI ===
def create_gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("# TimeMetamorphy: Evolution Visualizer")
with gr.Row():
with gr.Column():
object_input = gr.Textbox(label="Enter Object (e.g., car, phone)")
generate_button = gr.Button("Generate Evolution")
gallery = gr.Gallery(label="Generated Images").style(grid=3)
gif_display = gr.Image(label="Generated GIF")
generate_button.click(fn=generate_object_history, inputs=object_input, outputs=[gallery, gif_display])
return demo
# === Launch App ===
demo = create_gradio_interface()
demo.launch(share=True)