Spaces:

atlury
/

document-layout-comparison

Sleeping

File size: 1,564 Bytes

492a9fd
 
 
 
73cd058
fea7704
db520f8
73cd058
 
 
 
 
 
 
 
 
492a9fd
db520f8
 
 
492a9fd
 
 
 
db520f8
492a9fd
 
 
db520f8
492a9fd
db520f8
 
 
 
 
 
492a9fd
 
 
 
fea7704
 
 
492a9fd

import gradio as gr
from ultralytics import YOLO
import cv2
import numpy as np
import os
import requests
import torch

# Ensure the model file is in the correct location
model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
if not os.path.exists(model_path):
    # Download the model file if it doesn't exist
    model_url = "https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
    response = requests.get(model_url)
    with open(model_path, "wb") as f:
        f.write(response.content)

# Load the document segmentation model on CPU
device = torch.device('cpu')
docseg_model = YOLO(model_path).to(device)

def process_image(image):
    # Convert image to the format YOLO model expects
    image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    results = docseg_model(image)

    # Extract annotated image from results
    annotated_img = results[0].plot()
    annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)

    # Prepare detected areas and labels as text output
    detected_areas_labels = "\n".join(
        [f"{box.label}: {box.conf:.2f}" for box in results[0].boxes]
    )

    return annotated_img, detected_areas_labels

# Define the Gradio interface
interface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs=[gr.Image(type="pil", label="Annotated Image"),
             gr.Textbox(label="Detected Areas and Labels")]
)

if __name__ == "__main__":
    interface.launch()