Spaces:

mgeorgi
/

Heeha

Sleeping

App Files Files Community

Heeha / app.py

mgeorgi

Upload folder using huggingface_hub

bce8c3d verified 4 months ago

raw

history blame contribute delete

7.96 kB

	import torch
	import gradio as gr
	from transformers import pipeline
	from typing import List, Dict, Any, Tuple
	import csv
	from io import StringIO
	from PIL import Image, ImageDraw, ImageFont
	import requests
	from io import BytesIO
	import os
	from pathlib import Path
	import logging

	# Create a font cache directory
	FONT_CACHE_DIR = Path("./font_cache")
	FONT_CACHE_DIR.mkdir(exist_ok=True)

	# Define common font URLs and their corresponding filenames
	FONT_SOURCES = {
	"Arial": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial.ttf",
	"Arial Bold": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Bold.ttf",
	"Arial Bold Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Bold_Italic.ttf",
	"Arial Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Italic.ttf",
	"Courier New": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Courier_New.ttf",
	"Verdana": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana.ttf",
	"Verdana Bold": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Bold.ttf",
	"Verdana Bold Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Bold_Italic.ttf",
	"Verdana Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Italic.ttf",
	}

	# Font cache dictionary
	font_cache = {}


	def load_and_cache_fonts():
	"""Load and cache fonts from known sources."""
	for font_name, url in FONT_SOURCES.items():
	font_path = FONT_CACHE_DIR / f"{font_name}.ttf"

	# Check if font is already cached
	if font_path.exists():
	try:
	font_cache[font_name] = str(font_path)
	logging.info(f"Loaded cached font: {font_name}")
	except Exception as e:
	logging.error(f"Error loading cached font {font_name}: {e}")
	continue

	# Download and cache font
	try:
	response = requests.get(url)
	response.raise_for_status()

	with open(font_path, "wb") as f:
	f.write(response.content)

	font_cache[font_name] = str(font_path)
	logging.info(f"Downloaded and cached font: {font_name}")
	except Exception as e:
	logging.error(f"Error downloading font {font_name}: {e}")


	# Initialize font cache at startup
	load_and_cache_fonts()

	# Initialize the pipeline (doing it here means it will be loaded only once when the script starts)
	pipe = pipeline(
	"text-generation",
	model="alpindale/Llama-3.2-3B-Instruct",
	torch_dtype=torch.bfloat16,
	device="cuda",
	)


	def read_feed_data(feed_text: str) -> List[Dict[str, str]]:
	"""Read all rows of feed data and return as list of dictionaries.
	Automatically detects the delimiter from common options (\|, ,, ;, \t)."""
	feed_io = StringIO(feed_text)
	# Get first line to detect delimiter
	first_line = feed_io.readline().strip()

	# Common delimiters to check
	delimiters = ["\|", ",", ";", "\t"]
	delimiter = "\|" # default
	max_count = 0

	# Find the delimiter that splits the line into the most fields
	for d in delimiters:
	count = len(first_line.split(d))
	if count > max_count:
	max_count = count
	delimiter = d

	# Reset the StringIO buffer to start
	feed_io.seek(0)
	reader = csv.reader(feed_io, delimiter=delimiter)
	headers = next(reader) # Get header row
	return [dict(zip(headers, row)) for row in reader]


	def overlay_text_on_image(
	image_url: str,
	text: str,
	position: Tuple[int, int],
	font_size: int,
	font_color: str,
	font_family: str,
	) -> Image.Image:
	"""Add text overlay to image with specified properties."""
	# Download image
	response = requests.get(image_url)
	img = Image.open(BytesIO(response.content))

	# Create draw object
	draw = ImageDraw.Draw(img)

	try:
	# Try to use cached font first
	if font_family in font_cache:
	font = ImageFont.truetype(font_cache[font_family], font_size)
	else:
	# Fallback to system font or default
	font = ImageFont.truetype(font_family, font_size)
	except OSError:
	# Ultimate fallback to default font
	font = ImageFont.load_default()
	logging.warning(f"Failed to load font {font_family}, using default")

	# Convert RGBA color format to hex if needed
	if font_color.startswith("rgba"):
	try:
	# Parse RGBA values
	rgba = font_color.strip("rgba()").split(",")
	r = int(float(rgba[0]))
	g = int(float(rgba[1]))
	b = int(float(rgba[2]))
	a = int(float(rgba[3]) * 255) # Convert alpha from 0-1 to 0-255
	font_color = f"#{r:02x}{g:02x}{b:02x}"
	except (ValueError, IndexError):
	logging.warning(
	f"Invalid RGBA color format: {font_color}, falling back to white"
	)
	font_color = "#FFFFFF"

	# Add text to image
	draw.text(position, text, font=font, fill=font_color)

	return img


	def generate_response(
	prompt: str,
	feed_text: str,
	text_x: int = 10,
	text_y: int = 10,
	font_size: int = 24,
	font_color: str = "#FFFFFF",
	font_family: str = "Arial",
	max_new_tokens: int = 256,
	temperature: float = 0.7,
	) -> List[Image.Image]:
	# Read feed data
	feed_data_list = read_feed_data(feed_text)
	images = []

	for feed_data in feed_data_list:
	# Format the prompt using the chat template and feed data
	formatted_prompt = prompt.format(**feed_data)
	system_prompt = "You are a helpful assistant that processes Meta Product Feeds."

	print(formatted_prompt)

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": formatted_prompt},
	]

	# Generate response
	outputs = pipe(
	messages,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	)

	response = outputs[0]["generated_text"]
	# Extract the generated text from the pipeline output
	# The pipeline returns the text directly, not in a dictionary
	generated_text = str(response[-1]["content"]) if response else ""

	# Get image with text overlay
	image_with_text = overlay_text_on_image(
	image_url=feed_data.get("image_link", ""),
	text=generated_text,
	position=(text_x, text_y),
	font_size=font_size,
	font_color=font_color,
	font_family=font_family,
	)
	images.append(image_with_text)

	return images


	# Create Gradio interface
	demo = gr.Interface(
	title="Meta Product Feed Chat",
	description="Chat with Llama 3.2 model using feed data. Use {field_name} in your prompt to include feed data. The feed should be in CSV format with headers in the first row.",
	fn=generate_response,
	inputs=[
	gr.Textbox(label="Enter your prompt (use {field_name} for feed data)", lines=3),
	gr.Textbox(
	label="Feed data (CSV with auto-detected delimiter)", lines=10, value=""
	),
	gr.Number(label="Text X Position", value=10),
	gr.Number(label="Text Y Position", value=10),
	gr.Number(label="Font Size", value=24),
	gr.ColorPicker(label="Font Color", value="#FFFFFF"),
	gr.Dropdown(
	label="Font Family",
	choices=list(FONT_SOURCES.keys()),
	value="Arial",
	),
	gr.Slider(minimum=1, maximum=512, value=256, step=1, label="Max New Tokens"),
	gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
	],
	outputs=[
	gr.Gallery(label="Product Images with Text", columns=2),
	],
	)

	if __name__ == "__main__":
	demo.launch(share=True)