File size: 7,704 Bytes
6fbeeae
 
 
e5c1793
6fbeeae
 
e5c1793
f2b8513
e5c1793
 
 
 
 
 
6fbeeae
 
 
 
 
 
e5c1793
 
6fbeeae
 
 
 
 
 
 
f2b8513
 
 
 
e5c1793
 
 
f2b8513
 
 
 
 
 
 
 
 
e5c1793
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fbeeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5c1793
6fbeeae
 
e5c1793
 
 
 
 
 
 
 
 
6fbeeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5c1793
 
 
 
6fbeeae
 
 
 
 
 
e5c1793
 
6fbeeae
e5c1793
 
6fbeeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5c1793
 
6fbeeae
 
e5c1793
6fbeeae
 
 
e5c1793
6fbeeae
e5c1793
 
6fbeeae
 
e5c1793
6fbeeae
 
 
 
 
 
e5c1793
f2b8513
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import gradio as gr
import logging
from roboflow import Roboflow
from PIL import Image, ImageDraw
import cv2
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import Select
import time
import os
from math import atan2, degrees

# Configure logging
logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[logging.FileHandler("debug.log"), logging.StreamHandler()],
)

# Roboflow and model configuration
ROBOFLOW_API_KEY = "KUP9w62eUcD5PrrRMJsV"  # Replace with your API key
PROJECT_NAME = "model_verification_project"
VERSION_NUMBER = 2

# Paths to Chrome and ChromeDriver downloaded by Selenium Manager
CHROME_PATH = "/home/user/.cache/selenium/chrome/linux64/132.0.6834.83/chrome"
CHROMEDRIVER_PATH = "/home/user/.cache/selenium/chromedriver/linux64/132.0.6834.83/chromedriver"

# Selenium configuration for Calligrapher
def get_calligrapher():
    calli_url = "https://www.calligrapher.ai"

    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run Chrome in headless mode
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.binary_location = CHROME_PATH

    service = Service(CHROMEDRIVER_PATH)
    driver = webdriver.Chrome(service=service, options=options)
    driver.maximize_window()
    driver.get(calli_url)

    # Adjust sliders for customization
    speed_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'speed-slider')))
    ActionChains(driver).drag_and_drop_by_offset(speed_slider, 40, 0).perform()

    bias_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'bias-slider')))
    ActionChains(driver).drag_and_drop_by_offset(bias_slider, 20, 0).perform()

    width_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'width-slider')))
    ActionChains(driver).drag_and_drop_by_offset(width_slider, 20, 0).perform()

    # Select handwriting style
    select = Select(driver.find_element(By.ID, 'select-style'))
    select.select_by_visible_text('9')  # Adjust to the desired style
    return driver

def get_calligrapher_text(driver, text, save_path):
    text_input = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'text-input')))
    text_input.clear()
    text_input.send_keys(text)

    draw_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'draw-button')))
    draw_button.click()
    time.sleep(3)

    # Save the generated handwriting as an image
    canvas = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'draw-area')))
    canvas.screenshot(save_path)
    print(f"Handwriting saved to: {save_path}")

# Function to detect paper angle within bounding box
def detect_paper_angle(image, bounding_box):
    x1, y1, x2, y2 = bounding_box
    roi = np.array(image)[y1:y2, x1:x2]
    gray = cv2.cvtColor(roi, cv2.COLOR_RGBA2GRAY)
    edges = cv2.Canny(gray, 50, 150)
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=50, maxLineGap=10)
    if lines is not None:
        longest_line = max(lines, key=lambda line: np.linalg.norm((line[0][2] - line[0][0], line[0][3] - line[0][1])))
        x1, y1, x2, y2 = longest_line[0]
        dx = x2 - x1
        dy = y2 - y1
        angle = degrees(atan2(dy, dx))
        return angle
    else:
        return 0

# Function to process image and overlay handwriting
def process_image(image, text):
    try:
        # Initialize Selenium and generate handwriting
        save_path = "/tmp/handwriting.png"
        driver = get_calligrapher()
        get_calligrapher_text(driver, text, save_path)
        driver.quit()

        # Open generated handwriting image
        handwriting_image = Image.open(save_path).convert("RGBA")

        # Initialize Roboflow
        rf = Roboflow(api_key=ROBOFLOW_API_KEY)
        logging.debug("Initialized Roboflow API.")
        project = rf.workspace().project(PROJECT_NAME)
        logging.debug("Accessed project in Roboflow.")
        model = project.version(VERSION_NUMBER).model
        logging.debug("Loaded model from Roboflow.")

        # Save input image temporarily
        input_image_path = "/tmp/input_image.jpg"
        image.save(input_image_path)
        logging.debug(f"Input image saved to {input_image_path}.")

        # Perform inference
        logging.debug("Performing inference on the image...")
        prediction = model.predict(input_image_path, confidence=70, overlap=50).json()
        logging.debug(f"Inference result: {prediction}")

        # Open the image for processing
        pil_image = image.convert("RGBA")
        logging.debug("Converted image to RGBA mode.")

        # Iterate over detected objects
        for obj in prediction['predictions']:
            white_paper_width = obj['width']
            white_paper_height = obj['height']

            padding_x = int(white_paper_width * 0.1)  # 10% padding horizontally
            padding_y = int(white_paper_height * 0.1)  # 10% padding vertically

            box_width = white_paper_width - 2 * padding_x
            box_height = white_paper_height - 2 * padding_y

            x1_padded = int(obj['x'] - white_paper_width / 2 + padding_x)
            y1_padded = int(obj['y'] - white_paper_height / 2 + padding_y)

            # Resize handwriting image to fit the detected area
            resized_handwriting = handwriting_image.resize((box_width, box_height), Image.ANTIALIAS)

            # Paste handwriting onto detected area
            pil_image.paste(resized_handwriting, (x1_padded, y1_padded), resized_handwriting)

        # Save and return output image path
        output_image_path = "/tmp/output_image.png"
        pil_image.convert("RGB").save(output_image_path)
        logging.debug(f"Output image saved to {output_image_path}.")
        return output_image_path

    except Exception as e:
        logging.error(f"Error during image processing: {e}")
        return None

# Gradio interface function
def gradio_inference(image, text):
    logging.debug("Starting Gradio inference.")
    result_path = process_image(image, text)
    if result_path:
        logging.debug("Gradio inference successful.")
        return result_path, result_path, "Processing complete! Download the image below."
    logging.error("Gradio inference failed.")
    return None, None, "An error occurred while processing the image. Please check the logs."

# Gradio interface
interface = gr.Interface(
    fn=gradio_inference,
    inputs=[
        gr.Image(type="pil", label="Upload an Image"),
        gr.Textbox(label="Enter Text to Overlay"),
    ],
    outputs=[
        gr.Image(label="Processed Image Preview"),  # Preview processed image
        gr.File(label="Download Processed Image"),  # Download the image
        gr.Textbox(label="Status"),  # Status message
    ],
    title="Roboflow Detection with Calligrapher Text Overlay",
    description=(
        "Upload an image, enter text to overlay, and let the Roboflow model process the image. "
        "Handwritten text is generated using Calligrapher.ai and overlaid on the detected white paper areas. "
        "Preview or download the output image below."
    ),
    allow_flagging="never",
)

# Launch the Gradio app
if __name__ == "__main__":
    logging.debug("Launching Gradio interface.")
    interface.launch(share=True)