daranaka commited on
Commit
db28818
·
verified ·
1 Parent(s): 51d7502

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from PIL import Image
4
+ import pytesseract
5
+ from ultralytics import YOLO
6
+ from transformers import pipeline
7
+ import numpy as np
8
+ from torchvision import transforms
9
+
10
+ # Initialize models
11
+ yolo_model = YOLO('yolov5n') # YOLO model for panel and character detection
12
+ summarizer = pipeline('summarization', model="facebook/bart-large-cnn") # Text summarizer model
13
+
14
+ # Hyperparameters
15
+ st.sidebar.title("Adjust Hyperparameters")
16
+ detection_threshold = st.sidebar.slider("Detection Confidence Threshold", 0.1, 1.0, 0.4)
17
+ text_summary_length = st.sidebar.slider("Text Summary Length (Words)", 30, 150, 50)
18
+
19
+ # Upload section
20
+ st.title("Manga Narration AI")
21
+ uploaded_files = st.file_uploader("Upload up to 60 Manga Images", accept_multiple_files=True, type=["jpg", "jpeg", "png"], key="images")
22
+
23
+ # Ensure there are uploaded files
24
+ if uploaded_files:
25
+ st.write(f"Processing {len(uploaded_files)} images...")
26
+ progress = st.progress(0)
27
+
28
+ narration_script = ""
29
+ num_images = len(uploaded_files)
30
+
31
+ for i, uploaded_file in enumerate(uploaded_files):
32
+ # Update progress bar
33
+ progress.progress((i + 1) / num_images)
34
+
35
+ # Open image and display
36
+ image = Image.open(uploaded_file)
37
+ st.image(image, caption=f"Processing {uploaded_file.name}", use_column_width=True)
38
+
39
+ # Convert image to numpy array for YOLO
40
+ img_np = np.array(image)
41
+
42
+ # Panel and character detection using YOLO
43
+ results = yolo_model(img_np)
44
+ panels = []
45
+ characters = []
46
+
47
+ for res in results:
48
+ for detection in res.boxes.xyxy:
49
+ # Filter detections based on confidence
50
+ if detection.conf >= detection_threshold:
51
+ x1, y1, x2, y2 = map(int, detection.xyxy)
52
+ crop = image.crop((x1, y1, x2, y2))
53
+ label = res.names[int(detection.cls)]
54
+ if label == "person":
55
+ characters.append(crop)
56
+ else:
57
+ panels.append(crop)
58
+
59
+ # Display detected characters and panels
60
+ st.write(f"Detected {len(panels)} panels and {len(characters)} characters in {uploaded_file.name}.")
61
+ for panel in panels:
62
+ st.image(panel, caption="Detected Panel", use_column_width=True)
63
+ for character in characters:
64
+ st.image(character, caption="Detected Character", use_column_width=True)
65
+
66
+ # Text extraction using OCR (Tesseract)
67
+ panel_text = ""
68
+ for panel in panels:
69
+ panel_text += pytesseract.image_to_string(panel) + " "
70
+
71
+ if panel_text:
72
+ # Summarize extracted text for clear narration
73
+ summary = summarizer(panel_text, max_length=text_summary_length, min_length=int(text_summary_length / 2), do_sample=False)[0]['summary_text']
74
+ narration_script += f"{summary}\n"
75
+ st.write(f"Summary: {summary}")
76
+ else:
77
+ st.write(f"No text detected in panels of {uploaded_file.name}.")
78
+
79
+ # Final narration script
80
+ st.success("Narration generation completed.")
81
+ st.write("Generated Narration Script:")
82
+ st.text(narration_script)
83
+
84
+ # Add download option for generated narration
85
+ if narration_script:
86
+ st.download_button("Download Narration", narration_script, "narration.txt")