import gradio as gr import logging from roboflow import Roboflow from PIL import Image, ImageDraw import cv2 import numpy as np from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver import ActionChains from selenium.webdriver.support.ui import Select import time import os from math import atan2, degrees # Configure logging logging.basicConfig( level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[logging.FileHandler("debug.log"), logging.StreamHandler()], ) # Roboflow and model configuration ROBOFLOW_API_KEY = "KUP9w62eUcD5PrrRMJsV" # Replace with your API key PROJECT_NAME = "model_verification_project" VERSION_NUMBER = 2 # Paths to Chrome and ChromeDriver downloaded by Selenium Manager CHROME_PATH = "/home/user/.cache/selenium/chrome/linux64/132.0.6834.83/chrome" CHROMEDRIVER_PATH = "/home/user/.cache/selenium/chromedriver/linux64/132.0.6834.83/chromedriver" # Selenium configuration for Calligrapher def get_calligrapher(): calli_url = "https://www.calligrapher.ai" options = webdriver.ChromeOptions() options.add_argument("--headless") # Run Chrome in headless mode options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.binary_location = CHROME_PATH service = Service(CHROMEDRIVER_PATH) driver = webdriver.Chrome(service=service, options=options) driver.maximize_window() driver.get(calli_url) # Adjust sliders for customization speed_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'speed-slider'))) ActionChains(driver).drag_and_drop_by_offset(speed_slider, 40, 0).perform() bias_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'bias-slider'))) ActionChains(driver).drag_and_drop_by_offset(bias_slider, 20, 0).perform() width_slider = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'width-slider'))) ActionChains(driver).drag_and_drop_by_offset(width_slider, 20, 0).perform() # Select handwriting style select = Select(driver.find_element(By.ID, 'select-style')) select.select_by_visible_text('9') # Adjust to the desired style return driver def get_calligrapher_text(driver, text, save_path): text_input = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'text-input'))) text_input.clear() text_input.send_keys(text) draw_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'draw-button'))) draw_button.click() time.sleep(3) # Save the generated handwriting as an image canvas = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'draw-area'))) canvas.screenshot(save_path) print(f"Handwriting saved to: {save_path}") # Function to detect paper angle within bounding box def detect_paper_angle(image, bounding_box): x1, y1, x2, y2 = bounding_box roi = np.array(image)[y1:y2, x1:x2] gray = cv2.cvtColor(roi, cv2.COLOR_RGBA2GRAY) edges = cv2.Canny(gray, 50, 150) lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=50, maxLineGap=10) if lines is not None: longest_line = max(lines, key=lambda line: np.linalg.norm((line[0][2] - line[0][0], line[0][3] - line[0][1]))) x1, y1, x2, y2 = longest_line[0] dx = x2 - x1 dy = y2 - y1 angle = degrees(atan2(dy, dx)) return angle else: return 0 # Function to process image and overlay handwriting def process_image(image, text): try: # Initialize Selenium and generate handwriting save_path = "/tmp/handwriting.png" driver = get_calligrapher() get_calligrapher_text(driver, text, save_path) driver.quit() # Open generated handwriting image handwriting_image = Image.open(save_path).convert("RGBA") # Initialize Roboflow rf = Roboflow(api_key=ROBOFLOW_API_KEY) logging.debug("Initialized Roboflow API.") project = rf.workspace().project(PROJECT_NAME) logging.debug("Accessed project in Roboflow.") model = project.version(VERSION_NUMBER).model logging.debug("Loaded model from Roboflow.") # Save input image temporarily input_image_path = "/tmp/input_image.jpg" image.save(input_image_path) logging.debug(f"Input image saved to {input_image_path}.") # Perform inference logging.debug("Performing inference on the image...") prediction = model.predict(input_image_path, confidence=70, overlap=50).json() logging.debug(f"Inference result: {prediction}") # Open the image for processing pil_image = image.convert("RGBA") logging.debug("Converted image to RGBA mode.") # Iterate over detected objects for obj in prediction['predictions']: white_paper_width = obj['width'] white_paper_height = obj['height'] padding_x = int(white_paper_width * 0.1) # 10% padding horizontally padding_y = int(white_paper_height * 0.1) # 10% padding vertically box_width = white_paper_width - 2 * padding_x box_height = white_paper_height - 2 * padding_y x1_padded = int(obj['x'] - white_paper_width / 2 + padding_x) y1_padded = int(obj['y'] - white_paper_height / 2 + padding_y) # Resize handwriting image to fit the detected area resized_handwriting = handwriting_image.resize((box_width, box_height), Image.ANTIALIAS) # Paste handwriting onto detected area pil_image.paste(resized_handwriting, (x1_padded, y1_padded), resized_handwriting) # Save and return output image path output_image_path = "/tmp/output_image.png" pil_image.convert("RGB").save(output_image_path) logging.debug(f"Output image saved to {output_image_path}.") return output_image_path except Exception as e: logging.error(f"Error during image processing: {e}") return None # Gradio interface function def gradio_inference(image, text): logging.debug("Starting Gradio inference.") result_path = process_image(image, text) if result_path: logging.debug("Gradio inference successful.") return result_path, result_path, "Processing complete! Download the image below." logging.error("Gradio inference failed.") return None, None, "An error occurred while processing the image. Please check the logs." # Gradio interface interface = gr.Interface( fn=gradio_inference, inputs=[ gr.Image(type="pil", label="Upload an Image"), gr.Textbox(label="Enter Text to Overlay"), ], outputs=[ gr.Image(label="Processed Image Preview"), # Preview processed image gr.File(label="Download Processed Image"), # Download the image gr.Textbox(label="Status"), # Status message ], title="Roboflow Detection with Calligrapher Text Overlay", description=( "Upload an image, enter text to overlay, and let the Roboflow model process the image. " "Handwritten text is generated using Calligrapher.ai and overlaid on the detected white paper areas. " "Preview or download the output image below." ), allow_flagging="never", ) # Launch the Gradio app if __name__ == "__main__": logging.debug("Launching Gradio interface.") interface.launch(share=True)