Spaces:

daranaka
/

manga-narrator-ai

Runtime error

App Files Files Community

daranaka commited on Oct 20, 2024

Commit

db28818

verified ·

1 Parent(s): 51d7502

Create app.py

Browse files

Files changed (1) hide show

app.py +86 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import os
+import streamlit as st
+from PIL import Image
+import pytesseract
+from ultralytics import YOLO
+from transformers import pipeline
+import numpy as np
+from torchvision import transforms
+# Initialize models
+yolo_model = YOLO('yolov5n')  # YOLO model for panel and character detection
+summarizer = pipeline('summarization', model="facebook/bart-large-cnn")  # Text summarizer model
+# Hyperparameters
+st.sidebar.title("Adjust Hyperparameters")
+detection_threshold = st.sidebar.slider("Detection Confidence Threshold", 0.1, 1.0, 0.4)
+text_summary_length = st.sidebar.slider("Text Summary Length (Words)", 30, 150, 50)
+# Upload section
+st.title("Manga Narration AI")
+uploaded_files = st.file_uploader("Upload up to 60 Manga Images", accept_multiple_files=True, type=["jpg", "jpeg", "png"], key="images")
+# Ensure there are uploaded files
+if uploaded_files:
+    st.write(f"Processing {len(uploaded_files)} images...")
+    progress = st.progress(0)
+    narration_script = ""
+    num_images = len(uploaded_files)
+    for i, uploaded_file in enumerate(uploaded_files):
+        # Update progress bar
+        progress.progress((i + 1) / num_images)
+        # Open image and display
+        image = Image.open(uploaded_file)
+        st.image(image, caption=f"Processing {uploaded_file.name}", use_column_width=True)
+        # Convert image to numpy array for YOLO
+        img_np = np.array(image)
+        # Panel and character detection using YOLO
+        results = yolo_model(img_np)
+        panels = []
+        characters = []
+        for res in results:
+            for detection in res.boxes.xyxy:
+                # Filter detections based on confidence
+                if detection.conf >= detection_threshold:
+                    x1, y1, x2, y2 = map(int, detection.xyxy)
+                    crop = image.crop((x1, y1, x2, y2))
+                    label = res.names[int(detection.cls)]
+                    if label == "person":
+                        characters.append(crop)
+                    else:
+                        panels.append(crop)
+        # Display detected characters and panels
+        st.write(f"Detected {len(panels)} panels and {len(characters)} characters in {uploaded_file.name}.")
+        for panel in panels:
+            st.image(panel, caption="Detected Panel", use_column_width=True)
+        for character in characters:
+            st.image(character, caption="Detected Character", use_column_width=True)
+        # Text extraction using OCR (Tesseract)
+        panel_text = ""
+        for panel in panels:
+            panel_text += pytesseract.image_to_string(panel) + " "
+        if panel_text:
+            # Summarize extracted text for clear narration
+            summary = summarizer(panel_text, max_length=text_summary_length, min_length=int(text_summary_length / 2), do_sample=False)[0]['summary_text']
+            narration_script += f"{summary}\n"
+            st.write(f"Summary: {summary}")
+        else:
+            st.write(f"No text detected in panels of {uploaded_file.name}.")
+    # Final narration script
+    st.success("Narration generation completed.")
+    st.write("Generated Narration Script:")
+    st.text(narration_script)
+# Add download option for generated narration
+if narration_script:
+    st.download_button("Download Narration", narration_script, "narration.txt")