File size: 7,704 Bytes
6fbeeae e5c1793 6fbeeae e5c1793 f2b8513 e5c1793 6fbeeae e5c1793 6fbeeae f2b8513 e5c1793 f2b8513 e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 6fbeeae e5c1793 f2b8513 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import gradio as gr
import logging
from roboflow import Roboflow
from PIL import Image, ImageDraw
import cv2
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import Select
import time
import os
from math import atan2, degrees
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[logging.FileHandler("debug.log"), logging.StreamHandler()],
)
# Roboflow and model configuration
ROBOFLOW_API_KEY = "KUP9w62eUcD5PrrRMJsV" # Replace with your API key
PROJECT_NAME = "model_verification_project"
VERSION_NUMBER = 2
# Paths to Chrome and ChromeDriver downloaded by Selenium Manager
CHROME_PATH = "/home/user/.cache/selenium/chrome/linux64/132.0.6834.83/chrome"
CHROMEDRIVER_PATH = "/home/user/.cache/selenium/chromedriver/linux64/132.0.6834.83/chromedriver"
# Selenium configuration for Calligrapher
def get_calligrapher():
calli_url = "https://www.calligrapher.ai"
options = webdriver.ChromeOptions()
options.add_argument("--headless") # Run Chrome in headless mode
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.binary_location = CHROME_PATH
service = Service(CHROMEDRIVER_PATH)
driver = webdriver.Chrome(service=service, options=options)
driver.maximize_window()
driver.get(calli_url)
# Adjust sliders for customization
speed_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'speed-slider')))
ActionChains(driver).drag_and_drop_by_offset(speed_slider, 40, 0).perform()
bias_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'bias-slider')))
ActionChains(driver).drag_and_drop_by_offset(bias_slider, 20, 0).perform()
width_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'width-slider')))
ActionChains(driver).drag_and_drop_by_offset(width_slider, 20, 0).perform()
# Select handwriting style
select = Select(driver.find_element(By.ID, 'select-style'))
select.select_by_visible_text('9') # Adjust to the desired style
return driver
def get_calligrapher_text(driver, text, save_path):
text_input = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'text-input')))
text_input.clear()
text_input.send_keys(text)
draw_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'draw-button')))
draw_button.click()
time.sleep(3)
# Save the generated handwriting as an image
canvas = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'draw-area')))
canvas.screenshot(save_path)
print(f"Handwriting saved to: {save_path}")
# Function to detect paper angle within bounding box
def detect_paper_angle(image, bounding_box):
x1, y1, x2, y2 = bounding_box
roi = np.array(image)[y1:y2, x1:x2]
gray = cv2.cvtColor(roi, cv2.COLOR_RGBA2GRAY)
edges = cv2.Canny(gray, 50, 150)
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=50, maxLineGap=10)
if lines is not None:
longest_line = max(lines, key=lambda line: np.linalg.norm((line[0][2] - line[0][0], line[0][3] - line[0][1])))
x1, y1, x2, y2 = longest_line[0]
dx = x2 - x1
dy = y2 - y1
angle = degrees(atan2(dy, dx))
return angle
else:
return 0
# Function to process image and overlay handwriting
def process_image(image, text):
try:
# Initialize Selenium and generate handwriting
save_path = "/tmp/handwriting.png"
driver = get_calligrapher()
get_calligrapher_text(driver, text, save_path)
driver.quit()
# Open generated handwriting image
handwriting_image = Image.open(save_path).convert("RGBA")
# Initialize Roboflow
rf = Roboflow(api_key=ROBOFLOW_API_KEY)
logging.debug("Initialized Roboflow API.")
project = rf.workspace().project(PROJECT_NAME)
logging.debug("Accessed project in Roboflow.")
model = project.version(VERSION_NUMBER).model
logging.debug("Loaded model from Roboflow.")
# Save input image temporarily
input_image_path = "/tmp/input_image.jpg"
image.save(input_image_path)
logging.debug(f"Input image saved to {input_image_path}.")
# Perform inference
logging.debug("Performing inference on the image...")
prediction = model.predict(input_image_path, confidence=70, overlap=50).json()
logging.debug(f"Inference result: {prediction}")
# Open the image for processing
pil_image = image.convert("RGBA")
logging.debug("Converted image to RGBA mode.")
# Iterate over detected objects
for obj in prediction['predictions']:
white_paper_width = obj['width']
white_paper_height = obj['height']
padding_x = int(white_paper_width * 0.1) # 10% padding horizontally
padding_y = int(white_paper_height * 0.1) # 10% padding vertically
box_width = white_paper_width - 2 * padding_x
box_height = white_paper_height - 2 * padding_y
x1_padded = int(obj['x'] - white_paper_width / 2 + padding_x)
y1_padded = int(obj['y'] - white_paper_height / 2 + padding_y)
# Resize handwriting image to fit the detected area
resized_handwriting = handwriting_image.resize((box_width, box_height), Image.ANTIALIAS)
# Paste handwriting onto detected area
pil_image.paste(resized_handwriting, (x1_padded, y1_padded), resized_handwriting)
# Save and return output image path
output_image_path = "/tmp/output_image.png"
pil_image.convert("RGB").save(output_image_path)
logging.debug(f"Output image saved to {output_image_path}.")
return output_image_path
except Exception as e:
logging.error(f"Error during image processing: {e}")
return None
# Gradio interface function
def gradio_inference(image, text):
logging.debug("Starting Gradio inference.")
result_path = process_image(image, text)
if result_path:
logging.debug("Gradio inference successful.")
return result_path, result_path, "Processing complete! Download the image below."
logging.error("Gradio inference failed.")
return None, None, "An error occurred while processing the image. Please check the logs."
# Gradio interface
interface = gr.Interface(
fn=gradio_inference,
inputs=[
gr.Image(type="pil", label="Upload an Image"),
gr.Textbox(label="Enter Text to Overlay"),
],
outputs=[
gr.Image(label="Processed Image Preview"), # Preview processed image
gr.File(label="Download Processed Image"), # Download the image
gr.Textbox(label="Status"), # Status message
],
title="Roboflow Detection with Calligrapher Text Overlay",
description=(
"Upload an image, enter text to overlay, and let the Roboflow model process the image. "
"Handwritten text is generated using Calligrapher.ai and overlaid on the detected white paper areas. "
"Preview or download the output image below."
),
allow_flagging="never",
)
# Launch the Gradio app
if __name__ == "__main__":
logging.debug("Launching Gradio interface.")
interface.launch(share=True)
|