Spaces:
Runtime error
Runtime error
File size: 3,140 Bytes
db28818 42eb874 83a0630 db28818 83a0630 db28818 83a0630 db28818 83a0630 db28818 83a0630 db28818 83a0630 db28818 83a0630 161dbfb 83a0630 161dbfb 83a0630 580bfba 83a0630 42eb874 83a0630 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import streamlit as st
from PIL import Image
import cv2
import numpy as np
import pytesseract
import torch
from torchvision import models, transforms
from transformers import DetrImageProcessor, DetrForObjectDetection
# Load a pre-trained DETR model for object detection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
# Image transformations
transform = transforms.Compose([
transforms.ToTensor()
])
def detect_panels(image, threshold):
# Convert image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 100, 200)
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
panels = []
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if w > threshold and h > threshold:
panels.append({"coords": (x, y, w, h)})
return panels
def detect_characters(image, threshold):
# Apply DETR model to detect characters
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
bboxes = outputs.pred_boxes
# Filter results
characters = []
for logit, box in zip(logits[0], bboxes[0]):
if logit.argmax() == 0: # Assuming '0' corresponds to 'character'
x, y, w, h = box * torch.tensor([image.width, image.height, image.width, image.height])
if w > threshold and h > threshold:
characters.append({"coords": (x.item(), y.item(), w.item(), h.item())})
return characters
def match_text_to_characters(image, panels):
text_matches = []
for panel in panels:
x, y, w, h = map(int, panel['coords'])
panel_img = image.crop((x, y, x+w, y+h))
text = pytesseract.image_to_string(panel_img)
text_matches.append({"panel": panel, "dialog": text})
return text_matches
def match_characters(characters):
coords = np.array([((c['coords'][0] + c['coords'][2]) / 2, (c['coords'][1] + c['coords'][3]) / 2) for c in characters])
clustering = DBSCAN(eps=20, min_samples=1).fit(coords)
character_matches = [{"character": c, "cluster": cluster} for c, cluster in zip(characters, clustering.labels_)]
return character_matches
# Streamlit UI
st.title("Advanced Manga Reader")
uploaded_file = st.file_uploader("Upload a manga page", type=["jpg", "png"])
if uploaded_file is not None:
image = Image.open(uploaded_file).convert('RGB')
st.image(image, caption='Uploaded Manga Page', use_column_width=True)
panel_threshold = st.slider("Panel Detection Threshold", 0, 500, 100)
character_threshold = st.slider("Character Detection Threshold", 0.0, 50.0, 10.0)
panels = detect_panels(np.array(image), panel_threshold)
characters = detect_characters(image, character_threshold)
dialogues = match_text_to_characters(image, panels)
st.write("Detected Panels:", panels)
st.write("Detected Characters:", characters)
st.write("Dialogues:", dialogues)
for dialogue in dialogues:
st.write(f"Panel: {dialogue['dialog']}") |