File size: 7,962 Bytes
d4448fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce8c3d
 
d4448fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce8c3d
d4448fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce8c3d
d4448fe
bce8c3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4448fe
 
 
 
 
 
 
 
bce8c3d
d4448fe
bce8c3d
d4448fe
 
 
 
 
 
 
 
 
 
 
 
 
 
bce8c3d
d4448fe
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import torch
import gradio as gr
from transformers import pipeline
from typing import List, Dict, Any, Tuple
import csv
from io import StringIO
from PIL import Image, ImageDraw, ImageFont
import requests
from io import BytesIO
import os
from pathlib import Path
import logging

# Create a font cache directory
FONT_CACHE_DIR = Path("./font_cache")
FONT_CACHE_DIR.mkdir(exist_ok=True)

# Define common font URLs and their corresponding filenames
FONT_SOURCES = {
    "Arial": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial.ttf",
    "Arial Bold": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Bold.ttf",
    "Arial Bold Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Bold_Italic.ttf",
    "Arial Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Arial_Italic.ttf",
    "Courier New": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Courier_New.ttf",
    "Verdana": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana.ttf",
    "Verdana Bold": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Bold.ttf",
    "Verdana Bold Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Bold_Italic.ttf",
    "Verdana Italic": "https://github.com/matomo-org/travis-scripts/raw/master/fonts/Verdana_Italic.ttf",
}

# Font cache dictionary
font_cache = {}


def load_and_cache_fonts():
    """Load and cache fonts from known sources."""
    for font_name, url in FONT_SOURCES.items():
        font_path = FONT_CACHE_DIR / f"{font_name}.ttf"

        # Check if font is already cached
        if font_path.exists():
            try:
                font_cache[font_name] = str(font_path)
                logging.info(f"Loaded cached font: {font_name}")
            except Exception as e:
                logging.error(f"Error loading cached font {font_name}: {e}")
            continue

        # Download and cache font
        try:
            response = requests.get(url)
            response.raise_for_status()

            with open(font_path, "wb") as f:
                f.write(response.content)

            font_cache[font_name] = str(font_path)
            logging.info(f"Downloaded and cached font: {font_name}")
        except Exception as e:
            logging.error(f"Error downloading font {font_name}: {e}")


# Initialize font cache at startup
load_and_cache_fonts()

# Initialize the pipeline (doing it here means it will be loaded only once when the script starts)
pipe = pipeline(
    "text-generation",
    model="alpindale/Llama-3.2-3B-Instruct",
    torch_dtype=torch.bfloat16,
    device="cuda",
)


def read_feed_data(feed_text: str) -> List[Dict[str, str]]:
    """Read all rows of feed data and return as list of dictionaries.
    Automatically detects the delimiter from common options (|, ,, ;, \t)."""
    feed_io = StringIO(feed_text)
    # Get first line to detect delimiter
    first_line = feed_io.readline().strip()

    # Common delimiters to check
    delimiters = ["|", ",", ";", "\t"]
    delimiter = "|"  # default
    max_count = 0

    # Find the delimiter that splits the line into the most fields
    for d in delimiters:
        count = len(first_line.split(d))
        if count > max_count:
            max_count = count
            delimiter = d

    # Reset the StringIO buffer to start
    feed_io.seek(0)
    reader = csv.reader(feed_io, delimiter=delimiter)
    headers = next(reader)  # Get header row
    return [dict(zip(headers, row)) for row in reader]


def overlay_text_on_image(
    image_url: str,
    text: str,
    position: Tuple[int, int],
    font_size: int,
    font_color: str,
    font_family: str,
) -> Image.Image:
    """Add text overlay to image with specified properties."""
    # Download image
    response = requests.get(image_url)
    img = Image.open(BytesIO(response.content))

    # Create draw object
    draw = ImageDraw.Draw(img)

    try:
        # Try to use cached font first
        if font_family in font_cache:
            font = ImageFont.truetype(font_cache[font_family], font_size)
        else:
            # Fallback to system font or default
            font = ImageFont.truetype(font_family, font_size)
    except OSError:
        # Ultimate fallback to default font
        font = ImageFont.load_default()
        logging.warning(f"Failed to load font {font_family}, using default")

    # Convert RGBA color format to hex if needed
    if font_color.startswith("rgba"):
        try:
            # Parse RGBA values
            rgba = font_color.strip("rgba()").split(",")
            r = int(float(rgba[0]))
            g = int(float(rgba[1]))
            b = int(float(rgba[2]))
            a = int(float(rgba[3]) * 255)  # Convert alpha from 0-1 to 0-255
            font_color = f"#{r:02x}{g:02x}{b:02x}"
        except (ValueError, IndexError):
            logging.warning(
                f"Invalid RGBA color format: {font_color}, falling back to white"
            )
            font_color = "#FFFFFF"

    # Add text to image
    draw.text(position, text, font=font, fill=font_color)

    return img


def generate_response(
    prompt: str,
    feed_text: str,
    text_x: int = 10,
    text_y: int = 10,
    font_size: int = 24,
    font_color: str = "#FFFFFF",
    font_family: str = "Arial",
    max_new_tokens: int = 256,
    temperature: float = 0.7,
) -> List[Image.Image]:
    # Read feed data
    feed_data_list = read_feed_data(feed_text)
    images = []

    for feed_data in feed_data_list:
        # Format the prompt using the chat template and feed data
        formatted_prompt = prompt.format(**feed_data)
        system_prompt = "You are a helpful assistant that processes Meta Product Feeds."

        print(formatted_prompt)

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": formatted_prompt},
        ]

        # Generate response
        outputs = pipe(
            messages,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
        )

        response = outputs[0]["generated_text"]
        # Extract the generated text from the pipeline output
        # The pipeline returns the text directly, not in a dictionary
        generated_text = str(response[-1]["content"]) if response else ""

        # Get image with text overlay
        image_with_text = overlay_text_on_image(
            image_url=feed_data.get("image_link", ""),
            text=generated_text,
            position=(text_x, text_y),
            font_size=font_size,
            font_color=font_color,
            font_family=font_family,
        )
        images.append(image_with_text)

    return images


# Create Gradio interface
demo = gr.Interface(
    title="Meta Product Feed Chat",
    description="Chat with Llama 3.2 model using feed data. Use {field_name} in your prompt to include feed data. The feed should be in CSV format with headers in the first row.",
    fn=generate_response,
    inputs=[
        gr.Textbox(label="Enter your prompt (use {field_name} for feed data)", lines=3),
        gr.Textbox(
            label="Feed data (CSV with auto-detected delimiter)", lines=10, value=""
        ),
        gr.Number(label="Text X Position", value=10),
        gr.Number(label="Text Y Position", value=10),
        gr.Number(label="Font Size", value=24),
        gr.ColorPicker(label="Font Color", value="#FFFFFF"),
        gr.Dropdown(
            label="Font Family",
            choices=list(FONT_SOURCES.keys()),
            value="Arial",
        ),
        gr.Slider(minimum=1, maximum=512, value=256, step=1, label="Max New Tokens"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
    ],
    outputs=[
        gr.Gallery(label="Product Images with Text", columns=2),
    ],
)

if __name__ == "__main__":
    demo.launch(share=True)