AMfeta99's picture
Update app.py
1509d22 verified
raw
history blame
6.62 kB
from huggingface_hub import InferenceClient
from langchain_community.llms import HuggingFaceHub
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.agents import create_react_agent
from langchain.tools import BaseTool
from PIL import Image, ImageDraw, ImageFont
import tempfile
import gradio as gr
import requests
from io import BytesIO
# Your HF API token here (set your actual token)
#HF_TOKEN
#%% Methods
def add_label_to_image(image, label):
draw = ImageDraw.Draw(image)
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
font_size = 30
try:
font = ImageFont.truetype(font_path, font_size)
except:
font = ImageFont.load_default()
text_bbox = draw.textbbox((0, 0), label, font=font)
text_width, text_height = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
position = (image.width - text_width - 20, image.height - text_height - 20)
rect_margin = 10
rect_position = [
position[0] - rect_margin,
position[1] - rect_margin,
position[0] + text_width + rect_margin,
position[1] + text_height + rect_margin,
]
draw.rectangle(rect_position, fill=(0, 0, 0, 128))
draw.text(position, label, fill="white", font=font)
return image
def plot_and_save_agent_image(agent_image, label, save_path=None):
# agent_image is a PIL Image already in this refactor
pil_image = agent_image
labeled_image = add_label_to_image(pil_image, label)
labeled_image.show()
if save_path:
labeled_image.save(save_path)
print(f"Image saved to {save_path}")
else:
print("No save path provided. Image not saved.")
def generate_prompts_for_object(object_name):
return {
"past": f"Show an old version of a {object_name} from its early days.",
"present": f"Show a {object_name} with current features/design/technology.",
"future": f"Show a futuristic version of a {object_name}, by predicting advanced features and futuristic design."
}
def generate_object_history(object_name):
images = []
prompts = generate_prompts_for_object(object_name)
labels = {
"past": f"{object_name} - Past",
"present": f"{object_name} - Present",
"future": f"{object_name} - Future"
}
for time_period, prompt in prompts.items():
print(f"Generating {time_period} frame: {prompt}")
result = agent.invoke(prompt) # returns PIL Image or string output
# result is a PIL Image from our tool, or fallback string - ensure PIL Image
if isinstance(result, Image.Image):
images.append(result)
image_filename = f"{object_name}_{time_period}.png"
plot_and_save_agent_image(result, labels[time_period], save_path=image_filename)
else:
print(f"Unexpected output for {time_period}: {result}")
gif_path = f"{object_name}_evolution.gif"
if images:
images[0].save(
gif_path,
save_all=True,
append_images=images[1:],
duration=1000,
loop=0
)
print(f"GIF saved to {gif_path}")
else:
print("No images generated, GIF not created.")
return images, gif_path
#%% Initialization of tools and AI_Agent
# Initialize HuggingFace Inference Client for text-to-image
text_to_image_client = InferenceClient(repo_id="m-ric/text-to-image")
def run_text_to_image(prompt: str) -> Image.Image:
outputs = text_to_image_client.text_to_image(prompt)
# Assuming outputs returns a list of URLs
image_url = outputs[0] if outputs else None
if image_url is None:
raise ValueError("No image URL returned from the model.")
response = requests.get(image_url)
img = Image.open(BytesIO(response.content)).convert("RGB")
return img
# Custom LangChain tool wrapper for text-to-image
class TextToImageTool(BaseTool):
name = "text-to-image"
description = "Generates an image from a prompt using HuggingFace model"
def _run(self, prompt: str):
return run_text_to_image(prompt)
async def _arun(self, prompt: str):
raise NotImplementedError()
image_generation_tool = TextToImageTool()
# DuckDuckGo Search Tool from LangChain
search_tool = DuckDuckGoSearchResults()
# HuggingFace LLM for Qwen2.5
llm_engine = HuggingFaceHub(
repo_id="Qwen/Qwen2.5-72B-Instruct",
huggingfacehub_api_token=HF_TOKEN,
model_kwargs={"temperature": 0.7}
)
# Create agent with the tools and LLM
agent = create_react_agent(llm_engine, tools=[image_generation_tool, search_tool])
#%% Gradio interface
def create_gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("# TimeMetamorphy: an object Evolution Generator")
gr.Markdown("""
## Unlocking the secrets of time!
This app unveils these mysteries by offering a unique/magic lens that allows us "time travel".
Powered by AI agents equipped with cutting-edge tools, it provides the superpower to explore the past, witness the present, and dream up the future like never before.
This system allows you to generate visualizations of how an object/concept, like a bicycle or a car, may have evolved over time.
It generates images of the object in the past, present, and future based on your input.
### Default Example: Evolution of a Car
Below, you can see a precomputed example of a "car" evolution. Enter another object to generate its evolution.
""")
default_images = [
("car_past.png", "Car - Past"),
("car_present.png", "Car - Present"),
("car_future.png", "Car - Future")
]
default_gif_path = "car_evolution.gif"
with gr.Row():
with gr.Column():
object_name_input = gr.Textbox(
label="Enter an object name (e.g., bicycle, phone)",
placeholder="Enter an object name",
lines=1
)
generate_button = gr.Button("Generate Evolution")
image_gallery = gr.Gallery(
label="Generated Images", show_label=True, columns=3, rows=1, value=default_images
)
gif_output = gr.Image(label="Generated GIF", show_label=True, value=default_gif_path)
generate_button.click(fn=generate_object_history, inputs=[object_name_input], outputs=[image_gallery, gif_output])
return demo
# Launch the Gradio app
demo = create_gradio_interface()
demo.launch(share=True)