Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,9 @@ from moviepy.editor import VideoFileClip
|
|
12 |
from sklearn.cluster import DBSCAN
|
13 |
from sklearn.decomposition import PCA
|
14 |
import plotly.graph_objs as go
|
|
|
|
|
|
|
15 |
|
16 |
# Load models
|
17 |
@st.cache_resource
|
@@ -53,48 +56,75 @@ def load_indexes(video_id, output_dir):
|
|
53 |
return text_index, image_index
|
54 |
|
55 |
text_index, image_index = load_indexes(video_id, output_dir)
|
56 |
-
|
57 |
-
#
|
58 |
-
def create_comprehensive_face_summary(face_index, face_metadata, eps=0.5, min_samples=3):
|
59 |
face_embeddings = face_index.reconstruct_n(0, face_index.ntotal)
|
60 |
|
|
|
|
|
|
|
|
|
61 |
clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine').fit(face_embeddings)
|
62 |
|
63 |
-
|
|
|
64 |
for i, label in enumerate(clustering.labels_):
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
68 |
|
69 |
-
|
70 |
-
for label, indices in face_clusters.items():
|
71 |
if label != -1: # Ignore noise points
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
"cluster_id": f"cluster_{label}",
|
75 |
-
"
|
76 |
-
"
|
77 |
-
"
|
78 |
-
|
79 |
-
|
80 |
-
],
|
81 |
-
"
|
82 |
-
"
|
83 |
-
"last_appearance": max(app['end'] for app in cluster_appearances)
|
84 |
}
|
85 |
-
|
|
|
|
|
|
|
|
|
86 |
|
87 |
-
return
|
88 |
|
89 |
-
#
|
90 |
-
|
91 |
|
92 |
# Face cluster visualization
|
93 |
-
|
|
|
94 |
pca = PCA(n_components=3)
|
95 |
embeddings_3d = pca.fit_transform(face_embeddings)
|
96 |
|
97 |
-
unique_labels = set(
|
98 |
colors = [f'rgb({int(r*255)},{int(g*255)},{int(b*255)})'
|
99 |
for r, g, b, _ in plt.cm.rainbow(np.linspace(0, 1, len(unique_labels)))]
|
100 |
|
@@ -102,20 +132,16 @@ def plot_face_clusters_interactive(face_embeddings, labels, face_summary):
|
|
102 |
for label, color in zip(unique_labels, colors):
|
103 |
if label == -1:
|
104 |
continue # Skip noise points
|
105 |
-
cluster_points = embeddings_3d[labels == label]
|
106 |
-
cluster_info = next((c for c in face_summary if c['cluster_id'] == f'cluster_{label}'), None)
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
else:
|
118 |
-
hover_text = [f"Cluster {label}" for _ in cluster_points]
|
119 |
|
120 |
trace = go.Scatter3d(
|
121 |
x=cluster_points[:, 0],
|
@@ -129,6 +155,20 @@ def plot_face_clusters_interactive(face_embeddings, labels, face_summary):
|
|
129 |
)
|
130 |
traces.append(trace)
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
layout = go.Layout(
|
133 |
title='Face Clusters Visualization',
|
134 |
scene=dict(xaxis_title='PCA 1', yaxis_title='PCA 2', zaxis_title='PCA 3'),
|
@@ -190,22 +230,25 @@ st.sidebar.text_area("Full Transcript", transcript_text, height=300)
|
|
190 |
st.header("Video Summary")
|
191 |
|
192 |
# Face Clusters
|
193 |
-
st.subheader("Face Clusters")
|
194 |
-
for
|
195 |
-
st.write(f"Cluster {
|
196 |
-
st.write(f"
|
197 |
-
st.write(f" Total screen time: {
|
198 |
-
st.write(f" First appearance: {
|
199 |
-
st.write(f" Last appearance: {
|
200 |
-
st.write(f" Timeline: {len(
|
201 |
st.write(" First 5 appearances:")
|
202 |
-
for app in
|
203 |
st.write(f" {app['start']:.2f}s - {app['end']:.2f}s")
|
|
|
|
|
|
|
204 |
st.write("---")
|
205 |
|
206 |
# Face Cluster Visualization
|
207 |
st.subheader("Face Cluster Visualization")
|
208 |
-
fig = plot_face_clusters_interactive(face_embeddings, face_labels,
|
209 |
st.plotly_chart(fig)
|
210 |
|
211 |
# Themes
|
|
|
12 |
from sklearn.cluster import DBSCAN
|
13 |
from sklearn.decomposition import PCA
|
14 |
import plotly.graph_objs as go
|
15 |
+
from collections import defaultdict
|
16 |
+
|
17 |
+
|
18 |
|
19 |
# Load models
|
20 |
@st.cache_resource
|
|
|
56 |
return text_index, image_index
|
57 |
|
58 |
text_index, image_index = load_indexes(video_id, output_dir)
|
59 |
+
def create_comprehensive_face_summary(face_index, face_metadata, eps=0.5, min_samples=3, top_k=5):
|
60 |
+
# Extract face embeddings
|
|
|
61 |
face_embeddings = face_index.reconstruct_n(0, face_index.ntotal)
|
62 |
|
63 |
+
# Normalize embeddings
|
64 |
+
face_embeddings = face_embeddings / np.linalg.norm(face_embeddings, axis=1)[:, np.newaxis]
|
65 |
+
|
66 |
+
# Perform DBSCAN clustering
|
67 |
clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine').fit(face_embeddings)
|
68 |
|
69 |
+
# Group faces by cluster
|
70 |
+
face_clusters = defaultdict(list)
|
71 |
for i, label in enumerate(clustering.labels_):
|
72 |
+
face_clusters[label].append(face_metadata[i])
|
73 |
+
|
74 |
+
# Sort clusters by size
|
75 |
+
sorted_clusters = sorted(face_clusters.items(), key=lambda x: len(x[1]), reverse=True)
|
76 |
+
|
77 |
+
all_faces_summary = []
|
78 |
+
prominent_faces = []
|
79 |
|
80 |
+
for i, (label, cluster) in enumerate(sorted_clusters):
|
|
|
81 |
if label != -1: # Ignore noise points
|
82 |
+
# Collect all appearances
|
83 |
+
appearances = [
|
84 |
+
{
|
85 |
+
'start': face['start'],
|
86 |
+
'end': face['end'],
|
87 |
+
'size_ratio': face.get('size_ratio', 1.0) # Use 1.0 as default if size_ratio is not present
|
88 |
+
}
|
89 |
+
for face in cluster
|
90 |
+
]
|
91 |
+
|
92 |
+
# Sort appearances by start time
|
93 |
+
appearances.sort(key=lambda x: x['start'])
|
94 |
+
|
95 |
+
# Select representative face (e.g., largest face in the cluster)
|
96 |
+
representative_face = max(cluster, key=lambda f: f.get('size_ratio', 1.0))
|
97 |
+
|
98 |
+
face_summary = {
|
99 |
+
"id": f"face_{i}",
|
100 |
"cluster_id": f"cluster_{label}",
|
101 |
+
"bbox": representative_face.get('bbox', []),
|
102 |
+
"embedding": representative_face.get('embedding', []),
|
103 |
+
"appearances": appearances,
|
104 |
+
"total_appearances": len(appearances),
|
105 |
+
"total_screen_time": sum(app['end'] - app['start'] for app in appearances),
|
106 |
+
"first_appearance": appearances[0]['start'],
|
107 |
+
"last_appearance": appearances[-1]['end'],
|
108 |
+
"thumbnail": representative_face.get('thumbnail', '')
|
|
|
109 |
}
|
110 |
+
|
111 |
+
all_faces_summary.append(face_summary)
|
112 |
+
|
113 |
+
if i < top_k:
|
114 |
+
prominent_faces.append(face_summary)
|
115 |
|
116 |
+
return all_faces_summary, prominent_faces, face_embeddings, clustering.labels_
|
117 |
|
118 |
+
# Usage in the main Streamlit app:
|
119 |
+
all_faces_summary, prominent_faces, face_embeddings, face_labels = create_comprehensive_face_summary(face_index, face_metadata)
|
120 |
|
121 |
# Face cluster visualization
|
122 |
+
# Update the face cluster visualization function
|
123 |
+
def plot_face_clusters_interactive(face_embeddings, face_labels, all_faces_summary, prominent_faces):
|
124 |
pca = PCA(n_components=3)
|
125 |
embeddings_3d = pca.fit_transform(face_embeddings)
|
126 |
|
127 |
+
unique_labels = set(face_labels)
|
128 |
colors = [f'rgb({int(r*255)},{int(g*255)},{int(b*255)})'
|
129 |
for r, g, b, _ in plt.cm.rainbow(np.linspace(0, 1, len(unique_labels)))]
|
130 |
|
|
|
132 |
for label, color in zip(unique_labels, colors):
|
133 |
if label == -1:
|
134 |
continue # Skip noise points
|
|
|
|
|
135 |
|
136 |
+
cluster_points = embeddings_3d[face_labels == label]
|
137 |
+
cluster_faces = [face for face in all_faces_summary if face['cluster_id'] == f'cluster_{label}']
|
138 |
+
|
139 |
+
hover_text = [
|
140 |
+
f"Cluster {label}<br>"
|
141 |
+
f"Time: {face['appearances'][0]['start']:.2f}s - {face['appearances'][-1]['end']:.2f}s<br>"
|
142 |
+
f"Appearances: {face['total_appearances']}"
|
143 |
+
for face in cluster_faces
|
144 |
+
]
|
|
|
|
|
145 |
|
146 |
trace = go.Scatter3d(
|
147 |
x=cluster_points[:, 0],
|
|
|
155 |
)
|
156 |
traces.append(trace)
|
157 |
|
158 |
+
# Add markers for prominent faces
|
159 |
+
prominent_points = [embeddings_3d[face_labels == int(face['cluster_id'].split('_')[1])][0] for face in prominent_faces]
|
160 |
+
prominent_trace = go.Scatter3d(
|
161 |
+
x=[p[0] for p in prominent_points],
|
162 |
+
y=[p[1] for p in prominent_points],
|
163 |
+
z=[p[2] for p in prominent_points],
|
164 |
+
mode='markers',
|
165 |
+
name='Prominent Faces',
|
166 |
+
marker=dict(size=10, color='red', symbol='star'),
|
167 |
+
text=[f"Prominent Face<br>Cluster {face['cluster_id']}" for face in prominent_faces],
|
168 |
+
hoverinfo='text'
|
169 |
+
)
|
170 |
+
traces.append(prominent_trace)
|
171 |
+
|
172 |
layout = go.Layout(
|
173 |
title='Face Clusters Visualization',
|
174 |
scene=dict(xaxis_title='PCA 1', yaxis_title='PCA 2', zaxis_title='PCA 3'),
|
|
|
230 |
st.header("Video Summary")
|
231 |
|
232 |
# Face Clusters
|
233 |
+
st.subheader("Prominent Face Clusters")
|
234 |
+
for face in prominent_faces: # Use prominent_faces instead of face_summary
|
235 |
+
st.write(f"Face Cluster {face['cluster_id']}:")
|
236 |
+
st.write(f" Total appearances: {face['total_appearances']}")
|
237 |
+
st.write(f" Total screen time: {face['total_screen_time']:.2f} seconds")
|
238 |
+
st.write(f" First appearance: {face['first_appearance']:.2f} seconds")
|
239 |
+
st.write(f" Last appearance: {face['last_appearance']:.2f} seconds")
|
240 |
+
st.write(f" Timeline: {len(face['appearances'])} appearances")
|
241 |
st.write(" First 5 appearances:")
|
242 |
+
for app in face['appearances'][:5]:
|
243 |
st.write(f" {app['start']:.2f}s - {app['end']:.2f}s")
|
244 |
+
if face['thumbnail']:
|
245 |
+
image = Image.open(io.BytesIO(base64.b64decode(face['thumbnail'])))
|
246 |
+
st.image(image, caption=f"Representative face for {face['cluster_id']}", width=100)
|
247 |
st.write("---")
|
248 |
|
249 |
# Face Cluster Visualization
|
250 |
st.subheader("Face Cluster Visualization")
|
251 |
+
fig = plot_face_clusters_interactive(face_embeddings, face_labels, all_faces_summary, prominent_faces)
|
252 |
st.plotly_chart(fig)
|
253 |
|
254 |
# Themes
|