import torch import gradio as gr from transformers import pipeline from typing import List, Dict, Any, Tuple import csv from io import StringIO from PIL import Image, ImageDraw, ImageFont import requests from io import BytesIO import os from pathlib import Path import logging # Create a font cache directory FONT_CACHE_DIR = Path("./font_cache") FONT_CACHE_DIR.mkdir(exist_ok=True) # Define common font URLs and their corresponding filenames FONT_SOURCES = { "Arial": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial.ttf", "Arial Bold": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Bold.ttf", "Arial Bold Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Bold_Italic.ttf", "Arial Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Italic.ttf", "Courier New": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Courier_New.ttf", "Verdana": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana.ttf", "Verdana Bold": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Bold.ttf", "Verdana Bold Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Bold_Italic.ttf", "Verdana Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Italic.ttf", } # Font cache dictionary font_cache = {} def load_and_cache_fonts(): """Load and cache fonts from known sources.""" for font_name, url in FONT_SOURCES.items(): font_path = FONT_CACHE_DIR / f"{font_name}.ttf" # Check if font is already cached if font_path.exists(): try: font_cache[font_name] = str(font_path) logging.info(f"Loaded cached font: {font_name}") except Exception as e: logging.error(f"Error loading cached font {font_name}: {e}") continue # Download and cache font try: response = requests.get(url) response.raise_for_status() with open(font_path, "wb") as f: f.write(response.content) font_cache[font_name] = str(font_path) logging.info(f"Downloaded and cached font: {font_name}") except Exception as e: logging.error(f"Error downloading font {font_name}: {e}") # Initialize font cache at startup load_and_cache_fonts() # Initialize the pipeline (doing it here means it will be loaded only once when the script starts) pipe = pipeline( "text-generation", model="alpindale/Llama-3.2-3B-Instruct", torch_dtype=torch.bfloat16, device="cuda", ) def read_feed_data(feed_text: str) -> List[Dict[str, str]]: """Read all rows of feed data and return as list of dictionaries. Automatically detects the delimiter from common options (|, ,, ;, \t).""" feed_io = StringIO(feed_text) # Get first line to detect delimiter first_line = feed_io.readline().strip() # Common delimiters to check delimiters = ["|", ",", ";", "\t"] delimiter = "|" # default max_count = 0 # Find the delimiter that splits the line into the most fields for d in delimiters: count = len(first_line.split(d)) if count > max_count: max_count = count delimiter = d # Reset the StringIO buffer to start feed_io.seek(0) reader = csv.reader(feed_io, delimiter=delimiter) headers = next(reader) # Get header row return [dict(zip(headers, row)) for row in reader] def overlay_text_on_image( image_url: str, text: str, position: Tuple[int, int], font_size: int, font_color: str, font_family: str, ) -> Image.Image: """Add text overlay to image with specified properties.""" # Download image response = requests.get(image_url) img = Image.open(BytesIO(response.content)) # Create draw object draw = ImageDraw.Draw(img) try: # Try to use cached font first if font_family in font_cache: font = ImageFont.truetype(font_cache[font_family], font_size) else: # Fallback to system font or default font = ImageFont.truetype(font_family, font_size) except OSError: # Ultimate fallback to default font font = ImageFont.load_default() logging.warning(f"Failed to load font {font_family}, using default") # Convert RGBA color format to hex if needed if font_color.startswith("rgba"): try: # Parse RGBA values rgba = font_color.strip("rgba()").split(",") r = int(float(rgba[0])) g = int(float(rgba[1])) b = int(float(rgba[2])) a = int(float(rgba[3]) * 255) # Convert alpha from 0-1 to 0-255 font_color = f"#{r:02x}{g:02x}{b:02x}" except (ValueError, IndexError): logging.warning( f"Invalid RGBA color format: {font_color}, falling back to white" ) font_color = "#FFFFFF" # Add text to image draw.text(position, text, font=font, fill=font_color) return img def generate_response( prompt: str, feed_text: str, text_x: int = 10, text_y: int = 10, font_size: int = 24, font_color: str = "#FFFFFF", font_family: str = "Arial", max_new_tokens: int = 256, temperature: float = 0.7, ) -> List[Image.Image]: # Read feed data feed_data_list = read_feed_data(feed_text) images = [] for feed_data in feed_data_list: # Format the prompt using the chat template and feed data formatted_prompt = prompt.format(**feed_data) system_prompt = "You are a helpful assistant that processes Meta Product Feeds." print(formatted_prompt) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": formatted_prompt}, ] # Generate response outputs = pipe( messages, max_new_tokens=max_new_tokens, temperature=temperature, ) response = outputs[0]["generated_text"] # Extract the generated text from the pipeline output # The pipeline returns the text directly, not in a dictionary generated_text = str(response[-1]["content"]) if response else "" # Get image with text overlay image_with_text = overlay_text_on_image( image_url=feed_data.get("image_link", ""), text=generated_text, position=(text_x, text_y), font_size=font_size, font_color=font_color, font_family=font_family, ) images.append(image_with_text) return images # Create Gradio interface demo = gr.Interface( title="Meta Product Feed Chat", description="Chat with Llama 3.2 model using feed data. Use {field_name} in your prompt to include feed data. The feed should be in CSV format with headers in the first row.", fn=generate_response, inputs=[ gr.Textbox(label="Enter your prompt (use {field_name} for feed data)", lines=3), gr.Textbox( label="Feed data (CSV with auto-detected delimiter)", lines=10, value="" ), gr.Number(label="Text X Position", value=10), gr.Number(label="Text Y Position", value=10), gr.Number(label="Font Size", value=24), gr.ColorPicker(label="Font Color", value="#FFFFFF"), gr.Dropdown( label="Font Family", choices=list(FONT_SOURCES.keys()), value="Arial", ), gr.Slider(minimum=1, maximum=512, value=256, step=1, label="Max New Tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), ], outputs=[ gr.Gallery(label="Product Images with Text", columns=2), ], ) if __name__ == "__main__": demo.launch(share=True)