Spaces:

AMfeta99
/

Object_Evolution_Generator

Running

App Files Files Community

Object_Evolution_Generator / app.py

AMfeta99

Update app.py

1509d22 verified about 2 months ago

raw

history blame

6.62 kB

	from huggingface_hub import InferenceClient
	from langchain_community.llms import HuggingFaceHub
	from langchain_community.tools import DuckDuckGoSearchResults
	from langchain.agents import create_react_agent
	from langchain.tools import BaseTool
	from PIL import Image, ImageDraw, ImageFont
	import tempfile
	import gradio as gr
	import requests
	from io import BytesIO

	# Your HF API token here (set your actual token)
	#HF_TOKEN

	#%% Methods

	def add_label_to_image(image, label):
	draw = ImageDraw.Draw(image)
	font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
	font_size = 30
	try:
	font = ImageFont.truetype(font_path, font_size)
	except:
	font = ImageFont.load_default()
	text_bbox = draw.textbbox((0, 0), label, font=font)
	text_width, text_height = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
	position = (image.width - text_width - 20, image.height - text_height - 20)
	rect_margin = 10
	rect_position = [
	position[0] - rect_margin,
	position[1] - rect_margin,
	position[0] + text_width + rect_margin,
	position[1] + text_height + rect_margin,
	]
	draw.rectangle(rect_position, fill=(0, 0, 0, 128))
	draw.text(position, label, fill="white", font=font)
	return image


	def plot_and_save_agent_image(agent_image, label, save_path=None):
	# agent_image is a PIL Image already in this refactor
	pil_image = agent_image

	labeled_image = add_label_to_image(pil_image, label)
	labeled_image.show()

	if save_path:
	labeled_image.save(save_path)
	print(f"Image saved to {save_path}")
	else:
	print("No save path provided. Image not saved.")


	def generate_prompts_for_object(object_name):
	return {
	"past": f"Show an old version of a {object_name} from its early days.",
	"present": f"Show a {object_name} with current features/design/technology.",
	"future": f"Show a futuristic version of a {object_name}, by predicting advanced features and futuristic design."
	}


	def generate_object_history(object_name):
	images = []
	prompts = generate_prompts_for_object(object_name)
	labels = {
	"past": f"{object_name} - Past",
	"present": f"{object_name} - Present",
	"future": f"{object_name} - Future"
	}

	for time_period, prompt in prompts.items():
	print(f"Generating {time_period} frame: {prompt}")
	result = agent.invoke(prompt) # returns PIL Image or string output

	# result is a PIL Image from our tool, or fallback string - ensure PIL Image
	if isinstance(result, Image.Image):
	images.append(result)
	image_filename = f"{object_name}_{time_period}.png"
	plot_and_save_agent_image(result, labels[time_period], save_path=image_filename)
	else:
	print(f"Unexpected output for {time_period}: {result}")

	gif_path = f"{object_name}_evolution.gif"
	if images:
	images[0].save(
	gif_path,
	save_all=True,
	append_images=images[1:],
	duration=1000,
	loop=0
	)
	print(f"GIF saved to {gif_path}")
	else:
	print("No images generated, GIF not created.")

	return images, gif_path


	#%% Initialization of tools and AI_Agent

	# Initialize HuggingFace Inference Client for text-to-image
	text_to_image_client = InferenceClient(repo_id="m-ric/text-to-image")

	def run_text_to_image(prompt: str) -> Image.Image:
	outputs = text_to_image_client.text_to_image(prompt)
	# Assuming outputs returns a list of URLs
	image_url = outputs[0] if outputs else None
	if image_url is None:
	raise ValueError("No image URL returned from the model.")
	response = requests.get(image_url)
	img = Image.open(BytesIO(response.content)).convert("RGB")
	return img

	# Custom LangChain tool wrapper for text-to-image
	class TextToImageTool(BaseTool):
	name = "text-to-image"
	description = "Generates an image from a prompt using HuggingFace model"

	def _run(self, prompt: str):
	return run_text_to_image(prompt)

	async def _arun(self, prompt: str):
	raise NotImplementedError()

	image_generation_tool = TextToImageTool()

	# DuckDuckGo Search Tool from LangChain
	search_tool = DuckDuckGoSearchResults()

	# HuggingFace LLM for Qwen2.5
	llm_engine = HuggingFaceHub(
	repo_id="Qwen/Qwen2.5-72B-Instruct",
	huggingfacehub_api_token=HF_TOKEN,
	model_kwargs={"temperature": 0.7}
	)

	# Create agent with the tools and LLM
	agent = create_react_agent(llm_engine, tools=[image_generation_tool, search_tool])


	#%% Gradio interface
	def create_gradio_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# TimeMetamorphy: an object Evolution Generator")

	gr.Markdown("""
	## Unlocking the secrets of time!
	This app unveils these mysteries by offering a unique/magic lens that allows us "time travel".
	Powered by AI agents equipped with cutting-edge tools, it provides the superpower to explore the past, witness the present, and dream up the future like never before.

	This system allows you to generate visualizations of how an object/concept, like a bicycle or a car, may have evolved over time.
	It generates images of the object in the past, present, and future based on your input.

	### Default Example: Evolution of a Car
	Below, you can see a precomputed example of a "car" evolution. Enter another object to generate its evolution.
	""")

	default_images = [
	("car_past.png", "Car - Past"),
	("car_present.png", "Car - Present"),
	("car_future.png", "Car - Future")
	]
	default_gif_path = "car_evolution.gif"

	with gr.Row():
	with gr.Column():
	object_name_input = gr.Textbox(
	label="Enter an object name (e.g., bicycle, phone)",
	placeholder="Enter an object name",
	lines=1
	)
	generate_button = gr.Button("Generate Evolution")
	image_gallery = gr.Gallery(
	label="Generated Images", show_label=True, columns=3, rows=1, value=default_images
	)
	gif_output = gr.Image(label="Generated GIF", show_label=True, value=default_gif_path)

	generate_button.click(fn=generate_object_history, inputs=[object_name_input], outputs=[image_gallery, gif_output])

	return demo


	# Launch the Gradio app
	demo = create_gradio_interface()
	demo.launch(share=True)