Heeha / app.py
mgeorgi's picture
Upload folder using huggingface_hub
bce8c3d verified
import torch
import gradio as gr
from transformers import pipeline
from typing import List, Dict, Any, Tuple
import csv
from io import StringIO
from PIL import Image, ImageDraw, ImageFont
import requests
from io import BytesIO
import os
from pathlib import Path
import logging
# Create a font cache directory
FONT_CACHE_DIR = Path("./font_cache")
FONT_CACHE_DIR.mkdir(exist_ok=True)
# Define common font URLs and their corresponding filenames
FONT_SOURCES = {
"Arial": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial.ttf",
"Arial Bold": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Bold.ttf",
"Arial Bold Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Bold_Italic.ttf",
"Arial Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Italic.ttf",
"Courier New": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Courier_New.ttf",
"Verdana": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana.ttf",
"Verdana Bold": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Bold.ttf",
"Verdana Bold Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Bold_Italic.ttf",
"Verdana Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Italic.ttf",
}
# Font cache dictionary
font_cache = {}
def load_and_cache_fonts():
"""Load and cache fonts from known sources."""
for font_name, url in FONT_SOURCES.items():
font_path = FONT_CACHE_DIR / f"{font_name}.ttf"
# Check if font is already cached
if font_path.exists():
try:
font_cache[font_name] = str(font_path)
logging.info(f"Loaded cached font: {font_name}")
except Exception as e:
logging.error(f"Error loading cached font {font_name}: {e}")
continue
# Download and cache font
try:
response = requests.get(url)
response.raise_for_status()
with open(font_path, "wb") as f:
f.write(response.content)
font_cache[font_name] = str(font_path)
logging.info(f"Downloaded and cached font: {font_name}")
except Exception as e:
logging.error(f"Error downloading font {font_name}: {e}")
# Initialize font cache at startup
load_and_cache_fonts()
# Initialize the pipeline (doing it here means it will be loaded only once when the script starts)
pipe = pipeline(
"text-generation",
model="alpindale/Llama-3.2-3B-Instruct",
torch_dtype=torch.bfloat16,
device="cuda",
)
def read_feed_data(feed_text: str) -> List[Dict[str, str]]:
"""Read all rows of feed data and return as list of dictionaries.
Automatically detects the delimiter from common options (|, ,, ;, \t)."""
feed_io = StringIO(feed_text)
# Get first line to detect delimiter
first_line = feed_io.readline().strip()
# Common delimiters to check
delimiters = ["|", ",", ";", "\t"]
delimiter = "|" # default
max_count = 0
# Find the delimiter that splits the line into the most fields
for d in delimiters:
count = len(first_line.split(d))
if count > max_count:
max_count = count
delimiter = d
# Reset the StringIO buffer to start
feed_io.seek(0)
reader = csv.reader(feed_io, delimiter=delimiter)
headers = next(reader) # Get header row
return [dict(zip(headers, row)) for row in reader]
def overlay_text_on_image(
image_url: str,
text: str,
position: Tuple[int, int],
font_size: int,
font_color: str,
font_family: str,
) -> Image.Image:
"""Add text overlay to image with specified properties."""
# Download image
response = requests.get(image_url)
img = Image.open(BytesIO(response.content))
# Create draw object
draw = ImageDraw.Draw(img)
try:
# Try to use cached font first
if font_family in font_cache:
font = ImageFont.truetype(font_cache[font_family], font_size)
else:
# Fallback to system font or default
font = ImageFont.truetype(font_family, font_size)
except OSError:
# Ultimate fallback to default font
font = ImageFont.load_default()
logging.warning(f"Failed to load font {font_family}, using default")
# Convert RGBA color format to hex if needed
if font_color.startswith("rgba"):
try:
# Parse RGBA values
rgba = font_color.strip("rgba()").split(",")
r = int(float(rgba[0]))
g = int(float(rgba[1]))
b = int(float(rgba[2]))
a = int(float(rgba[3]) * 255) # Convert alpha from 0-1 to 0-255
font_color = f"#{r:02x}{g:02x}{b:02x}"
except (ValueError, IndexError):
logging.warning(
f"Invalid RGBA color format: {font_color}, falling back to white"
)
font_color = "#FFFFFF"
# Add text to image
draw.text(position, text, font=font, fill=font_color)
return img
def generate_response(
prompt: str,
feed_text: str,
text_x: int = 10,
text_y: int = 10,
font_size: int = 24,
font_color: str = "#FFFFFF",
font_family: str = "Arial",
max_new_tokens: int = 256,
temperature: float = 0.7,
) -> List[Image.Image]:
# Read feed data
feed_data_list = read_feed_data(feed_text)
images = []
for feed_data in feed_data_list:
# Format the prompt using the chat template and feed data
formatted_prompt = prompt.format(**feed_data)
system_prompt = "You are a helpful assistant that processes Meta Product Feeds."
print(formatted_prompt)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": formatted_prompt},
]
# Generate response
outputs = pipe(
messages,
max_new_tokens=max_new_tokens,
temperature=temperature,
)
response = outputs[0]["generated_text"]
# Extract the generated text from the pipeline output
# The pipeline returns the text directly, not in a dictionary
generated_text = str(response[-1]["content"]) if response else ""
# Get image with text overlay
image_with_text = overlay_text_on_image(
image_url=feed_data.get("image_link", ""),
text=generated_text,
position=(text_x, text_y),
font_size=font_size,
font_color=font_color,
font_family=font_family,
)
images.append(image_with_text)
return images
# Create Gradio interface
demo = gr.Interface(
title="Meta Product Feed Chat",
description="Chat with Llama 3.2 model using feed data. Use {field_name} in your prompt to include feed data. The feed should be in CSV format with headers in the first row.",
fn=generate_response,
inputs=[
gr.Textbox(label="Enter your prompt (use {field_name} for feed data)", lines=3),
gr.Textbox(
label="Feed data (CSV with auto-detected delimiter)", lines=10, value=""
),
gr.Number(label="Text X Position", value=10),
gr.Number(label="Text Y Position", value=10),
gr.Number(label="Font Size", value=24),
gr.ColorPicker(label="Font Color", value="#FFFFFF"),
gr.Dropdown(
label="Font Family",
choices=list(FONT_SOURCES.keys()),
value="Arial",
),
gr.Slider(minimum=1, maximum=512, value=256, step=1, label="Max New Tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
],
outputs=[
gr.Gallery(label="Product Images with Text", columns=2),
],
)
if __name__ == "__main__":
demo.launch(share=True)