File size: 6,604 Bytes
6fbeeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import gradio as gr
import logging
from roboflow import Roboflow
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import cv2
import numpy as np
import os
from math import atan2, degrees
from diffusers import AutoPipelineForText2Image
import torch

# Configure logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("debug.log"),
        logging.StreamHandler()
    ]
)

# Roboflow and model configuration
ROBOFLOW_API_KEY = "KUP9w62eUcD5PrrRMJsV"  # Replace with your API key
PROJECT_NAME = "model_verification_project"
VERSION_NUMBER = 2

# Initialize the FLUX handwriting model
device = "cuda" if torch.cuda.is_available() else "cpu"
pipeline = AutoPipelineForText2Image.from_pretrained(
    'black-forest-labs/FLUX.1-dev',
    torch_dtype=torch.float16
).to(device)
pipeline.load_lora_weights('fofr/flux-handwriting', weight_name='lora.safetensors')

# Function to detect paper angle within bounding box
def detect_paper_angle(image, bounding_box):
    x1, y1, x2, y2 = bounding_box
    roi = np.array(image)[y1:y2, x1:x2]
    gray = cv2.cvtColor(roi, cv2.COLOR_RGBA2GRAY)
    edges = cv2.Canny(gray, 50, 150)
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=50, maxLineGap=10)
    if lines is not None:
        longest_line = max(lines, key=lambda line: np.linalg.norm((line[0][2] - line[0][0], line[0][3] - line[0][1])))
        x1, y1, x2, y2 = longest_line[0]
        dx = x2 - x1
        dy = y2 - y1
        angle = degrees(atan2(dy, dx))
        return angle
    else:
        return 0

# Function to process image and overlay text
def process_image(image, text):
    try:
        # Initialize Roboflow
        rf = Roboflow(api_key=ROBOFLOW_API_KEY)
        logging.debug("Initialized Roboflow API.")
        project = rf.workspace().project(PROJECT_NAME)
        logging.debug("Accessed project in Roboflow.")
        model = project.version(VERSION_NUMBER).model
        logging.debug("Loaded model from Roboflow.")

        # Save input image temporarily
        input_image_path = "/tmp/input_image.jpg"
        image.save(input_image_path)
        logging.debug(f"Input image saved to {input_image_path}.")

        # Perform inference
        logging.debug("Performing inference on the image...")
        prediction = model.predict(input_image_path, confidence=70, overlap=50).json()
        logging.debug(f"Inference result: {prediction}")

        # Open the image for processing
        pil_image = image.convert("RGBA")
        logging.debug("Converted image to RGBA mode.")

        # Iterate over detected objects
        for obj in prediction['predictions']:
            white_paper_width = obj['width']
            white_paper_height = obj['height']
            padding_x = int(white_paper_width * 0.1)
            padding_y = int(white_paper_height * 0.1)
            box_width = white_paper_width - 2 * padding_x
            box_height = white_paper_height - 2 * padding_y
            logging.debug(f"Padded white paper dimensions: width={box_width}, height={box_height}.")

            x1_padded = int(obj['x'] - white_paper_width / 2 + padding_x)
            y1_padded = int(obj['y'] - white_paper_height / 2 + padding_y)
            x2_padded = int(obj['x'] + white_paper_width / 2 - padding_x)
            y2_padded = int(obj['y'] + white_paper_height / 2 - padding_y)

            # Detect paper angle
            angle = detect_paper_angle(np.array(image), (x1_padded, y1_padded, x2_padded, y2_padded))
            logging.debug(f"Detected paper angle: {angle} degrees.")

            # Generate handwriting image with transparent background
            prompt = f'HWRIT handwriting saying "{text}", neat style, black ink on transparent background'
            generated_image = pipeline(prompt).images[0].convert("RGBA")
            logging.debug("Generated handwriting image.")

            # Resize generated handwriting to fit the detected area
            generated_image = generated_image.resize((box_width, box_height), Image.ANTIALIAS)

            # Create a mask for the generated handwriting
            mask = generated_image.split()[3]

            # Rotate the generated handwriting to match the detected paper angle
            rotated_handwriting = generated_image.rotate(-angle, resample=Image.BICUBIC, center=(box_width // 2, box_height // 2))
            mask = mask.rotate(-angle, resample=Image.BICUBIC, center=(box_width // 2, box_height // 2))

            # Paste the rotated handwriting onto the original image
            pil_image.paste(rotated_handwriting, (x1_padded, y1_padded), mask)
            logging.debug("Pasted generated handwriting onto the original image.")

        # Save and return output image path
        output_image_path = "/tmp/output_image.png"
        pil_image.convert("RGB").save(output_image_path)
        logging.debug(f"Output image saved to {output_image_path}.")
        return output_image_path

    except Exception as e:
        logging.error(f"Error during image processing: {e}")
        return None

# Gradio interface function
def gradio_inference(image, text):
    logging.debug("Starting Gradio inference.")
    result_path = process_image(image, text)
    if result_path:
        logging.debug("Gradio inference successful.")
        return result_path, result_path, "Processing complete! Download the image below."
    logging.error("Gradio inference failed.")
    return None, None, "An error occurred while processing the image. Please check the logs."

# Gradio interface
# Gradio interface
interface = gr.Interface(
    fn=gradio_inference,
    inputs=[
        gr.Image(type="pil", label="Upload an Image"),  # Upload an image
        gr.Textbox(label="Enter Text to Overlay"),  # Enter text to overlay
    ],
    outputs=[
        gr.Image(label="Processed Image Preview"),  # Preview the processed image
        gr.File(label="Download Processed Image"),  # Download the image
        gr.Textbox(label="Status"),  # Status message
    ],
    title="Handwriting Overlay on White Paper",
    description=(
        "Upload an image with white paper detected, and enter the text to overlay. "
        "This app will generate handwriting using the FLUX handwriting model and overlay it on the detected white paper. "
        "Preview or download the output image below."
    ),
    allow_flagging="never",  # Disables flagging
)

# Launch the Gradio app
if __name__ == "__main__":
    logging.debug("Launching Gradio interface.")
    interface.launch(share=True)