Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from PIL import Image
|
4 |
+
import pytesseract
|
5 |
+
from ultralytics import YOLO
|
6 |
+
from transformers import pipeline
|
7 |
+
import numpy as np
|
8 |
+
from torchvision import transforms
|
9 |
+
|
10 |
+
# Initialize models
|
11 |
+
yolo_model = YOLO('yolov5n') # YOLO model for panel and character detection
|
12 |
+
summarizer = pipeline('summarization', model="facebook/bart-large-cnn") # Text summarizer model
|
13 |
+
|
14 |
+
# Hyperparameters
|
15 |
+
st.sidebar.title("Adjust Hyperparameters")
|
16 |
+
detection_threshold = st.sidebar.slider("Detection Confidence Threshold", 0.1, 1.0, 0.4)
|
17 |
+
text_summary_length = st.sidebar.slider("Text Summary Length (Words)", 30, 150, 50)
|
18 |
+
|
19 |
+
# Upload section
|
20 |
+
st.title("Manga Narration AI")
|
21 |
+
uploaded_files = st.file_uploader("Upload up to 60 Manga Images", accept_multiple_files=True, type=["jpg", "jpeg", "png"], key="images")
|
22 |
+
|
23 |
+
# Ensure there are uploaded files
|
24 |
+
if uploaded_files:
|
25 |
+
st.write(f"Processing {len(uploaded_files)} images...")
|
26 |
+
progress = st.progress(0)
|
27 |
+
|
28 |
+
narration_script = ""
|
29 |
+
num_images = len(uploaded_files)
|
30 |
+
|
31 |
+
for i, uploaded_file in enumerate(uploaded_files):
|
32 |
+
# Update progress bar
|
33 |
+
progress.progress((i + 1) / num_images)
|
34 |
+
|
35 |
+
# Open image and display
|
36 |
+
image = Image.open(uploaded_file)
|
37 |
+
st.image(image, caption=f"Processing {uploaded_file.name}", use_column_width=True)
|
38 |
+
|
39 |
+
# Convert image to numpy array for YOLO
|
40 |
+
img_np = np.array(image)
|
41 |
+
|
42 |
+
# Panel and character detection using YOLO
|
43 |
+
results = yolo_model(img_np)
|
44 |
+
panels = []
|
45 |
+
characters = []
|
46 |
+
|
47 |
+
for res in results:
|
48 |
+
for detection in res.boxes.xyxy:
|
49 |
+
# Filter detections based on confidence
|
50 |
+
if detection.conf >= detection_threshold:
|
51 |
+
x1, y1, x2, y2 = map(int, detection.xyxy)
|
52 |
+
crop = image.crop((x1, y1, x2, y2))
|
53 |
+
label = res.names[int(detection.cls)]
|
54 |
+
if label == "person":
|
55 |
+
characters.append(crop)
|
56 |
+
else:
|
57 |
+
panels.append(crop)
|
58 |
+
|
59 |
+
# Display detected characters and panels
|
60 |
+
st.write(f"Detected {len(panels)} panels and {len(characters)} characters in {uploaded_file.name}.")
|
61 |
+
for panel in panels:
|
62 |
+
st.image(panel, caption="Detected Panel", use_column_width=True)
|
63 |
+
for character in characters:
|
64 |
+
st.image(character, caption="Detected Character", use_column_width=True)
|
65 |
+
|
66 |
+
# Text extraction using OCR (Tesseract)
|
67 |
+
panel_text = ""
|
68 |
+
for panel in panels:
|
69 |
+
panel_text += pytesseract.image_to_string(panel) + " "
|
70 |
+
|
71 |
+
if panel_text:
|
72 |
+
# Summarize extracted text for clear narration
|
73 |
+
summary = summarizer(panel_text, max_length=text_summary_length, min_length=int(text_summary_length / 2), do_sample=False)[0]['summary_text']
|
74 |
+
narration_script += f"{summary}\n"
|
75 |
+
st.write(f"Summary: {summary}")
|
76 |
+
else:
|
77 |
+
st.write(f"No text detected in panels of {uploaded_file.name}.")
|
78 |
+
|
79 |
+
# Final narration script
|
80 |
+
st.success("Narration generation completed.")
|
81 |
+
st.write("Generated Narration Script:")
|
82 |
+
st.text(narration_script)
|
83 |
+
|
84 |
+
# Add download option for generated narration
|
85 |
+
if narration_script:
|
86 |
+
st.download_button("Download Narration", narration_script, "narration.txt")
|