Spaces:
Sleeping
Sleeping
import gradio as gr | |
import logging | |
from roboflow import Roboflow | |
from PIL import Image, ImageDraw | |
import cv2 | |
import numpy as np | |
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver import ActionChains | |
from selenium.webdriver.support.ui import Select | |
import time | |
import os | |
from math import atan2, degrees | |
# Configure logging | |
logging.basicConfig( | |
level=logging.DEBUG, | |
format="%(asctime)s - %(levelname)s - %(message)s", | |
handlers=[logging.FileHandler("debug.log"), logging.StreamHandler()], | |
) | |
# Roboflow and model configuration | |
ROBOFLOW_API_KEY = "KUP9w62eUcD5PrrRMJsV" # Replace with your API key | |
PROJECT_NAME = "model_verification_project" | |
VERSION_NUMBER = 2 | |
# Paths to Chrome and ChromeDriver downloaded by Selenium Manager | |
CHROME_PATH = "/home/user/.cache/selenium/chrome/linux64/132.0.6834.83/chrome" | |
CHROMEDRIVER_PATH = "/home/user/.cache/selenium/chromedriver/linux64/132.0.6834.83/chromedriver" | |
# Selenium configuration for Calligrapher | |
def get_calligrapher(): | |
calli_url = "https://www.calligrapher.ai" | |
options = webdriver.ChromeOptions() | |
options.add_argument("--headless") # Run Chrome in headless mode | |
options.add_argument("--no-sandbox") | |
options.add_argument("--disable-dev-shm-usage") | |
options.binary_location = CHROME_PATH | |
service = Service(CHROMEDRIVER_PATH) | |
driver = webdriver.Chrome(service=service, options=options) | |
driver.maximize_window() | |
driver.get(calli_url) | |
# Adjust sliders for customization | |
speed_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'speed-slider'))) | |
ActionChains(driver).drag_and_drop_by_offset(speed_slider, 40, 0).perform() | |
bias_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'bias-slider'))) | |
ActionChains(driver).drag_and_drop_by_offset(bias_slider, 20, 0).perform() | |
width_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'width-slider'))) | |
ActionChains(driver).drag_and_drop_by_offset(width_slider, 20, 0).perform() | |
# Select handwriting style | |
select = Select(driver.find_element(By.ID, 'select-style')) | |
select.select_by_visible_text('9') # Adjust to the desired style | |
return driver | |
def get_calligrapher_text(driver, text, save_path): | |
text_input = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'text-input'))) | |
text_input.clear() | |
text_input.send_keys(text) | |
draw_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'draw-button'))) | |
draw_button.click() | |
time.sleep(3) | |
# Save the generated handwriting as an image | |
canvas = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'draw-area'))) | |
canvas.screenshot(save_path) | |
print(f"Handwriting saved to: {save_path}") | |
# Function to detect paper angle within bounding box | |
def detect_paper_angle(image, bounding_box): | |
x1, y1, x2, y2 = bounding_box | |
roi = np.array(image)[y1:y2, x1:x2] | |
gray = cv2.cvtColor(roi, cv2.COLOR_RGBA2GRAY) | |
edges = cv2.Canny(gray, 50, 150) | |
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=50, maxLineGap=10) | |
if lines is not None: | |
longest_line = max(lines, key=lambda line: np.linalg.norm((line[0][2] - line[0][0], line[0][3] - line[0][1]))) | |
x1, y1, x2, y2 = longest_line[0] | |
dx = x2 - x1 | |
dy = y2 - y1 | |
angle = degrees(atan2(dy, dx)) | |
return angle | |
else: | |
return 0 | |
# Function to process image and overlay handwriting | |
def process_image(image, text): | |
try: | |
# Initialize Selenium and generate handwriting | |
save_path = "/tmp/handwriting.png" | |
driver = get_calligrapher() | |
get_calligrapher_text(driver, text, save_path) | |
driver.quit() | |
# Open generated handwriting image | |
handwriting_image = Image.open(save_path).convert("RGBA") | |
# Initialize Roboflow | |
rf = Roboflow(api_key=ROBOFLOW_API_KEY) | |
logging.debug("Initialized Roboflow API.") | |
project = rf.workspace().project(PROJECT_NAME) | |
logging.debug("Accessed project in Roboflow.") | |
model = project.version(VERSION_NUMBER).model | |
logging.debug("Loaded model from Roboflow.") | |
# Save input image temporarily | |
input_image_path = "/tmp/input_image.jpg" | |
image.save(input_image_path) | |
logging.debug(f"Input image saved to {input_image_path}.") | |
# Perform inference | |
logging.debug("Performing inference on the image...") | |
prediction = model.predict(input_image_path, confidence=70, overlap=50).json() | |
logging.debug(f"Inference result: {prediction}") | |
# Open the image for processing | |
pil_image = image.convert("RGBA") | |
logging.debug("Converted image to RGBA mode.") | |
# Iterate over detected objects | |
for obj in prediction['predictions']: | |
white_paper_width = obj['width'] | |
white_paper_height = obj['height'] | |
padding_x = int(white_paper_width * 0.1) # 10% padding horizontally | |
padding_y = int(white_paper_height * 0.1) # 10% padding vertically | |
box_width = white_paper_width - 2 * padding_x | |
box_height = white_paper_height - 2 * padding_y | |
x1_padded = int(obj['x'] - white_paper_width / 2 + padding_x) | |
y1_padded = int(obj['y'] - white_paper_height / 2 + padding_y) | |
# Resize handwriting image to fit the detected area | |
resized_handwriting = handwriting_image.resize((box_width, box_height), Image.ANTIALIAS) | |
# Paste handwriting onto detected area | |
pil_image.paste(resized_handwriting, (x1_padded, y1_padded), resized_handwriting) | |
# Save and return output image path | |
output_image_path = "/tmp/output_image.png" | |
pil_image.convert("RGB").save(output_image_path) | |
logging.debug(f"Output image saved to {output_image_path}.") | |
return output_image_path | |
except Exception as e: | |
logging.error(f"Error during image processing: {e}") | |
return None | |
# Gradio interface function | |
def gradio_inference(image, text): | |
logging.debug("Starting Gradio inference.") | |
result_path = process_image(image, text) | |
if result_path: | |
logging.debug("Gradio inference successful.") | |
return result_path, result_path, "Processing complete! Download the image below." | |
logging.error("Gradio inference failed.") | |
return None, None, "An error occurred while processing the image. Please check the logs." | |
# Gradio interface | |
interface = gr.Interface( | |
fn=gradio_inference, | |
inputs=[ | |
gr.Image(type="pil", label="Upload an Image"), | |
gr.Textbox(label="Enter Text to Overlay"), | |
], | |
outputs=[ | |
gr.Image(label="Processed Image Preview"), # Preview processed image | |
gr.File(label="Download Processed Image"), # Download the image | |
gr.Textbox(label="Status"), # Status message | |
], | |
title="Roboflow Detection with Calligrapher Text Overlay", | |
description=( | |
"Upload an image, enter text to overlay, and let the Roboflow model process the image. " | |
"Handwritten text is generated using Calligrapher.ai and overlaid on the detected white paper areas. " | |
"Preview or download the output image below." | |
), | |
allow_flagging="never", | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
logging.debug("Launching Gradio interface.") | |
interface.launch(share=True) | |