whitphx HF staff commited on
Commit
7d45ce2
·
1 Parent(s): faac748

Update app.py and requirements.txt

Browse files
Files changed (2) hide show
  1. app.py +216 -32
  2. requirements.txt +6 -4
app.py CHANGED
@@ -1,6 +1,8 @@
 
1
  import logging
2
  import logging.handlers
3
  import queue
 
4
  import urllib.request
5
  from pathlib import Path
6
  from typing import List, NamedTuple
@@ -12,13 +14,16 @@ except ImportError:
12
 
13
  import av
14
  import cv2
 
15
  import numpy as np
 
16
  import streamlit as st
17
  from aiortc.contrib.media import MediaPlayer
18
 
19
  from streamlit_webrtc import (
 
20
  ClientSettings,
21
- VideoTransformerBase,
22
  WebRtcMode,
23
  webrtc_streamer,
24
  )
@@ -87,18 +92,28 @@ def main():
87
  video_filters_page = (
88
  "Real time video transform with simple OpenCV filters (sendrecv)"
89
  )
 
 
90
  streaming_page = (
91
  "Consuming media files on server-side and streaming it to browser (recvonly)"
92
  )
93
- sendonly_page = "WebRTC is sendonly and images are shown via st.image() (sendonly)"
94
- loopback_page = "Simple video loopback (sendrecv)"
 
 
 
 
 
95
  app_mode = st.sidebar.selectbox(
96
  "Choose the app mode",
97
  [
98
  object_detection_page,
99
  video_filters_page,
 
 
100
  streaming_page,
101
- sendonly_page,
 
102
  loopback_page,
103
  ],
104
  )
@@ -108,13 +123,24 @@ def main():
108
  app_video_filters()
109
  elif app_mode == object_detection_page:
110
  app_object_detection()
 
 
 
 
111
  elif app_mode == streaming_page:
112
  app_streaming()
113
- elif app_mode == sendonly_page:
114
- app_sendonly()
 
 
115
  elif app_mode == loopback_page:
116
  app_loopback()
117
 
 
 
 
 
 
118
 
119
  def app_loopback():
120
  """ Simple video loopback """
@@ -122,20 +148,20 @@ def app_loopback():
122
  key="loopback",
123
  mode=WebRtcMode.SENDRECV,
124
  client_settings=WEBRTC_CLIENT_SETTINGS,
125
- video_transformer_factory=None, # NoOp
126
  )
127
 
128
 
129
  def app_video_filters():
130
  """ Video transforms with OpenCV """
131
 
132
- class OpenCVVideoTransformer(VideoTransformerBase):
133
  type: Literal["noop", "cartoon", "edges", "rotate"]
134
 
135
  def __init__(self) -> None:
136
  self.type = "noop"
137
 
138
- def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
139
  img = frame.to_ndarray(format="bgr24")
140
 
141
  if self.type == "noop":
@@ -170,18 +196,18 @@ def app_video_filters():
170
  M = cv2.getRotationMatrix2D((cols / 2, rows / 2), frame.time * 45, 1)
171
  img = cv2.warpAffine(img, M, (cols, rows))
172
 
173
- return img
174
 
175
  webrtc_ctx = webrtc_streamer(
176
  key="opencv-filter",
177
  mode=WebRtcMode.SENDRECV,
178
  client_settings=WEBRTC_CLIENT_SETTINGS,
179
- video_transformer_factory=OpenCVVideoTransformer,
180
- async_transform=True,
181
  )
182
 
183
- if webrtc_ctx.video_transformer:
184
- webrtc_ctx.video_transformer.type = st.radio(
185
  "Select transform type", ("noop", "cartoon", "edges", "rotate")
186
  )
187
 
@@ -192,6 +218,82 @@ def app_video_filters():
192
  )
193
 
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  def app_object_detection():
196
  """Object detection demo with MobileNet SSD.
197
  This model and code are based on
@@ -236,7 +338,7 @@ def app_object_detection():
236
  name: str
237
  prob: float
238
 
239
- class MobileNetSSDVideoTransformer(VideoTransformerBase):
240
  confidence_threshold: float
241
  result_queue: "queue.Queue[List[Detection]]"
242
 
@@ -280,7 +382,7 @@ def app_object_detection():
280
  )
281
  return image, result
282
 
283
- def transform(self, frame: av.VideoFrame) -> np.ndarray:
284
  image = frame.to_ndarray(format="bgr24")
285
  blob = cv2.dnn.blobFromImage(
286
  cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
@@ -289,25 +391,25 @@ def app_object_detection():
289
  detections = self._net.forward()
290
  annotated_image, result = self._annotate_image(image, detections)
291
 
292
- # NOTE: This `transform` method is called in another thread,
293
  # so it must be thread-safe.
294
  self.result_queue.put(result)
295
 
296
- return annotated_image
297
 
298
  webrtc_ctx = webrtc_streamer(
299
  key="object-detection",
300
  mode=WebRtcMode.SENDRECV,
301
  client_settings=WEBRTC_CLIENT_SETTINGS,
302
- video_transformer_factory=MobileNetSSDVideoTransformer,
303
- async_transform=True,
304
  )
305
 
306
  confidence_threshold = st.slider(
307
  "Confidence threshold", 0.0, 1.0, DEFAULT_CONFIDENCE_THRESHOLD, 0.05
308
  )
309
- if webrtc_ctx.video_transformer:
310
- webrtc_ctx.video_transformer.confidence_threshold = confidence_threshold
311
 
312
  if st.checkbox("Show the detected labels", value=True):
313
  if webrtc_ctx.state.playing:
@@ -318,9 +420,9 @@ def app_object_detection():
318
  # Then the rendered video frames and the labels displayed here
319
  # are not strictly synchronized.
320
  while True:
321
- if webrtc_ctx.video_transformer:
322
  try:
323
- result = webrtc_ctx.video_transformer.result_queue.get(
324
  timeout=1.0
325
  )
326
  except queue.Empty:
@@ -393,7 +495,7 @@ def app_streaming():
393
  )
394
 
395
 
396
- def app_sendonly():
397
  """A sample to use WebRTC in sendonly mode to transfer frames
398
  from the browser to the server and to render frames via `st.image`."""
399
  webrtc_ctx = webrtc_streamer(
@@ -402,28 +504,110 @@ def app_sendonly():
402
  client_settings=WEBRTC_CLIENT_SETTINGS,
403
  )
404
 
 
 
405
  if webrtc_ctx.video_receiver:
406
- image_loc = st.empty()
407
  while True:
408
  try:
409
- frame = webrtc_ctx.video_receiver.get_frame(timeout=1)
410
  except queue.Empty:
411
- print("Queue is empty. Stop the loop.")
412
- webrtc_ctx.video_receiver.stop()
413
  break
414
 
415
- img_rgb = frame.to_ndarray(format="rgb24")
416
- image_loc.image(img_rgb)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
 
418
 
419
  if __name__ == "__main__":
 
 
 
 
420
  logging.basicConfig(
421
  format="[%(asctime)s] %(levelname)7s from %(name)s in %(pathname)s:%(lineno)d: "
422
  "%(message)s",
423
  force=True,
424
  )
425
 
426
- logger.setLevel(level=logging.DEBUG)
427
 
428
  st_webrtc_logger = logging.getLogger("streamlit_webrtc")
429
  st_webrtc_logger.setLevel(logging.DEBUG)
 
1
+ import asyncio
2
  import logging
3
  import logging.handlers
4
  import queue
5
+ import threading
6
  import urllib.request
7
  from pathlib import Path
8
  from typing import List, NamedTuple
 
14
 
15
  import av
16
  import cv2
17
+ import matplotlib.pyplot as plt
18
  import numpy as np
19
+ import pydub
20
  import streamlit as st
21
  from aiortc.contrib.media import MediaPlayer
22
 
23
  from streamlit_webrtc import (
24
+ AudioProcessorBase,
25
  ClientSettings,
26
+ VideoProcessorBase,
27
  WebRtcMode,
28
  webrtc_streamer,
29
  )
 
92
  video_filters_page = (
93
  "Real time video transform with simple OpenCV filters (sendrecv)"
94
  )
95
+ audio_filter_page = "Real time audio filter (sendrecv)"
96
+ delayed_echo_page = "Delayed echo (sendrecv)"
97
  streaming_page = (
98
  "Consuming media files on server-side and streaming it to browser (recvonly)"
99
  )
100
+ video_sendonly_page = (
101
+ "WebRTC is sendonly and images are shown via st.image() (sendonly)"
102
+ )
103
+ audio_sendonly_page = (
104
+ "WebRTC is sendonly and audio frames are visualized with matplotlib (sendonly)"
105
+ )
106
+ loopback_page = "Simple video and audio loopback (sendrecv)"
107
  app_mode = st.sidebar.selectbox(
108
  "Choose the app mode",
109
  [
110
  object_detection_page,
111
  video_filters_page,
112
+ audio_filter_page,
113
+ delayed_echo_page,
114
  streaming_page,
115
+ video_sendonly_page,
116
+ audio_sendonly_page,
117
  loopback_page,
118
  ],
119
  )
 
123
  app_video_filters()
124
  elif app_mode == object_detection_page:
125
  app_object_detection()
126
+ elif app_mode == audio_filter_page:
127
+ app_audio_filter()
128
+ elif app_mode == delayed_echo_page:
129
+ app_delayed_echo()
130
  elif app_mode == streaming_page:
131
  app_streaming()
132
+ elif app_mode == video_sendonly_page:
133
+ app_sendonly_video()
134
+ elif app_mode == audio_sendonly_page:
135
+ app_sendonly_audio()
136
  elif app_mode == loopback_page:
137
  app_loopback()
138
 
139
+ logger.debug("=== Alive threads ===")
140
+ for thread in threading.enumerate():
141
+ if thread.is_alive():
142
+ logger.debug(f" {thread.name} ({thread.ident})")
143
+
144
 
145
  def app_loopback():
146
  """ Simple video loopback """
 
148
  key="loopback",
149
  mode=WebRtcMode.SENDRECV,
150
  client_settings=WEBRTC_CLIENT_SETTINGS,
151
+ video_processor_factory=None, # NoOp
152
  )
153
 
154
 
155
  def app_video_filters():
156
  """ Video transforms with OpenCV """
157
 
158
+ class OpenCVVideoProcessor(VideoProcessorBase):
159
  type: Literal["noop", "cartoon", "edges", "rotate"]
160
 
161
  def __init__(self) -> None:
162
  self.type = "noop"
163
 
164
+ def recv(self, frame: av.VideoFrame) -> av.VideoFrame:
165
  img = frame.to_ndarray(format="bgr24")
166
 
167
  if self.type == "noop":
 
196
  M = cv2.getRotationMatrix2D((cols / 2, rows / 2), frame.time * 45, 1)
197
  img = cv2.warpAffine(img, M, (cols, rows))
198
 
199
+ return av.VideoFrame.from_ndarray(img, format="bgr24")
200
 
201
  webrtc_ctx = webrtc_streamer(
202
  key="opencv-filter",
203
  mode=WebRtcMode.SENDRECV,
204
  client_settings=WEBRTC_CLIENT_SETTINGS,
205
+ video_processor_factory=OpenCVVideoProcessor,
206
+ async_processing=True,
207
  )
208
 
209
+ if webrtc_ctx.video_processor:
210
+ webrtc_ctx.video_processor.type = st.radio(
211
  "Select transform type", ("noop", "cartoon", "edges", "rotate")
212
  )
213
 
 
218
  )
219
 
220
 
221
+ def app_audio_filter():
222
+ DEFAULT_GAIN = 1.0
223
+
224
+ class AudioProcessor(AudioProcessorBase):
225
+ gain = DEFAULT_GAIN
226
+
227
+ def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
228
+ raw_samples = frame.to_ndarray()
229
+ sound = pydub.AudioSegment(
230
+ data=raw_samples.tobytes(),
231
+ sample_width=frame.format.bytes,
232
+ frame_rate=frame.sample_rate,
233
+ channels=len(frame.layout.channels),
234
+ )
235
+
236
+ sound = sound.apply_gain(self.gain)
237
+
238
+ # Ref: https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples # noqa
239
+ channel_sounds = sound.split_to_mono()
240
+ channel_samples = [s.get_array_of_samples() for s in channel_sounds]
241
+ new_samples: np.ndarray = np.array(channel_samples).T
242
+ new_samples = new_samples.reshape(raw_samples.shape)
243
+
244
+ new_frame = av.AudioFrame.from_ndarray(
245
+ new_samples, layout=frame.layout.name
246
+ )
247
+ new_frame.sample_rate = frame.sample_rate
248
+ return new_frame
249
+
250
+ webrtc_ctx = webrtc_streamer(
251
+ key="audio-filter",
252
+ mode=WebRtcMode.SENDRECV,
253
+ client_settings=WEBRTC_CLIENT_SETTINGS,
254
+ audio_processor_factory=AudioProcessor,
255
+ async_processing=True,
256
+ )
257
+
258
+ if webrtc_ctx.audio_processor:
259
+ webrtc_ctx.audio_processor.gain = st.slider(
260
+ "Gain", -10.0, +20.0, DEFAULT_GAIN, 0.05
261
+ )
262
+
263
+
264
+ def app_delayed_echo():
265
+ DEFAULT_DELAY = 1.0
266
+
267
+ class VideoProcessor(VideoProcessorBase):
268
+ delay = DEFAULT_DELAY
269
+
270
+ async def recv_queued(self, frames: List[av.VideoFrame]) -> List[av.VideoFrame]:
271
+ logger.debug("Delay:", self.delay)
272
+ await asyncio.sleep(self.delay)
273
+ return frames
274
+
275
+ class AudioProcessor(AudioProcessorBase):
276
+ delay = DEFAULT_DELAY
277
+
278
+ async def recv_queued(self, frames: List[av.AudioFrame]) -> List[av.AudioFrame]:
279
+ await asyncio.sleep(self.delay)
280
+ return frames
281
+
282
+ webrtc_ctx = webrtc_streamer(
283
+ key="delay",
284
+ mode=WebRtcMode.SENDRECV,
285
+ client_settings=WEBRTC_CLIENT_SETTINGS,
286
+ video_processor_factory=VideoProcessor,
287
+ audio_processor_factory=AudioProcessor,
288
+ async_processing=True,
289
+ )
290
+
291
+ if webrtc_ctx.video_processor and webrtc_ctx.audio_processor:
292
+ delay = st.slider("Delay", 0.0, 5.0, DEFAULT_DELAY, 0.05)
293
+ webrtc_ctx.video_processor.delay = delay
294
+ webrtc_ctx.audio_processor.delay = delay
295
+
296
+
297
  def app_object_detection():
298
  """Object detection demo with MobileNet SSD.
299
  This model and code are based on
 
338
  name: str
339
  prob: float
340
 
341
+ class MobileNetSSDVideoProcessor(VideoProcessorBase):
342
  confidence_threshold: float
343
  result_queue: "queue.Queue[List[Detection]]"
344
 
 
382
  )
383
  return image, result
384
 
385
+ def recv(self, frame: av.VideoFrame) -> av.VideoFrame:
386
  image = frame.to_ndarray(format="bgr24")
387
  blob = cv2.dnn.blobFromImage(
388
  cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
 
391
  detections = self._net.forward()
392
  annotated_image, result = self._annotate_image(image, detections)
393
 
394
+ # NOTE: This `recv` method is called in another thread,
395
  # so it must be thread-safe.
396
  self.result_queue.put(result)
397
 
398
+ return av.VideoFrame.from_ndarray(annotated_image, format="bgr24")
399
 
400
  webrtc_ctx = webrtc_streamer(
401
  key="object-detection",
402
  mode=WebRtcMode.SENDRECV,
403
  client_settings=WEBRTC_CLIENT_SETTINGS,
404
+ video_processor_factory=MobileNetSSDVideoProcessor,
405
+ async_processing=True,
406
  )
407
 
408
  confidence_threshold = st.slider(
409
  "Confidence threshold", 0.0, 1.0, DEFAULT_CONFIDENCE_THRESHOLD, 0.05
410
  )
411
+ if webrtc_ctx.video_processor:
412
+ webrtc_ctx.video_processor.confidence_threshold = confidence_threshold
413
 
414
  if st.checkbox("Show the detected labels", value=True):
415
  if webrtc_ctx.state.playing:
 
420
  # Then the rendered video frames and the labels displayed here
421
  # are not strictly synchronized.
422
  while True:
423
+ if webrtc_ctx.video_processor:
424
  try:
425
+ result = webrtc_ctx.video_processor.result_queue.get(
426
  timeout=1.0
427
  )
428
  except queue.Empty:
 
495
  )
496
 
497
 
498
+ def app_sendonly_video():
499
  """A sample to use WebRTC in sendonly mode to transfer frames
500
  from the browser to the server and to render frames via `st.image`."""
501
  webrtc_ctx = webrtc_streamer(
 
504
  client_settings=WEBRTC_CLIENT_SETTINGS,
505
  )
506
 
507
+ image_place = st.empty()
508
+
509
  if webrtc_ctx.video_receiver:
 
510
  while True:
511
  try:
512
+ video_frame = webrtc_ctx.video_receiver.get_frame(timeout=1)
513
  except queue.Empty:
514
+ logger.warning("Queue is empty. Abort.")
 
515
  break
516
 
517
+ img_rgb = video_frame.to_ndarray(format="rgb24")
518
+ image_place.image(img_rgb)
519
+
520
+
521
+ def app_sendonly_audio():
522
+ """A sample to use WebRTC in sendonly mode to transfer audio frames
523
+ from the browser to the server and visualize them with matplotlib
524
+ and `st.pyplog`."""
525
+ webrtc_ctx = webrtc_streamer(
526
+ key="loopback",
527
+ mode=WebRtcMode.SENDONLY,
528
+ audio_receiver_size=64,
529
+ client_settings=WEBRTC_CLIENT_SETTINGS,
530
+ )
531
+
532
+ fig_place = st.empty()
533
+
534
+ fig, [ax_time, ax_freq] = plt.subplots(
535
+ 2, 1, gridspec_kw={"top": 1.5, "bottom": 0.2}
536
+ )
537
+
538
+ sound_window_len = 5000 # 5s
539
+ sound_window_buffer = None
540
+ while True:
541
+ if webrtc_ctx.audio_receiver:
542
+ try:
543
+ audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
544
+ except queue.Empty:
545
+ logger.warning("Queue is empty. Abort.")
546
+ break
547
+
548
+ sound_chunk = pydub.AudioSegment.empty()
549
+ for audio_frame in audio_frames:
550
+ sound = pydub.AudioSegment(
551
+ data=audio_frame.to_ndarray().tobytes(),
552
+ sample_width=audio_frame.format.bytes,
553
+ frame_rate=audio_frame.sample_rate,
554
+ channels=len(audio_frame.layout.channels),
555
+ )
556
+ sound_chunk += sound
557
+
558
+ if len(sound_chunk) > 0:
559
+ if sound_window_buffer is None:
560
+ sound_window_buffer = pydub.AudioSegment.silent(
561
+ duration=sound_window_len
562
+ )
563
+
564
+ sound_window_buffer += sound_chunk
565
+ if len(sound_window_buffer) > sound_window_len:
566
+ sound_window_buffer = sound_window_buffer[-sound_window_len:]
567
+
568
+ if sound_window_buffer:
569
+ # Ref: https://own-search-and-study.xyz/2017/10/27/python%E3%82%92%E4%BD%BF%E3%81%A3%E3%81%A6%E9%9F%B3%E5%A3%B0%E3%83%87%E3%83%BC%E3%82%BF%E3%81%8B%E3%82%89%E3%82%B9%E3%83%9A%E3%82%AF%E3%83%88%E3%83%AD%E3%82%B0%E3%83%A9%E3%83%A0%E3%82%92%E4%BD%9C/ # noqa
570
+ sound_window_buffer = sound_window_buffer.set_channels(
571
+ 1
572
+ ) # Stereo to mono
573
+ sample = np.array(sound_window_buffer.get_array_of_samples())
574
+
575
+ ax_time.cla()
576
+ times = (np.arange(-len(sample), 0)) / sound_window_buffer.frame_rate
577
+ ax_time.plot(times, sample)
578
+ ax_time.set_xlabel("Time")
579
+ ax_time.set_ylabel("Magnitude")
580
+
581
+ spec = np.fft.fft(sample)
582
+ freq = np.fft.fftfreq(sample.shape[0], 1.0 / sound_chunk.frame_rate)
583
+ freq = freq[: int(freq.shape[0] / 2)]
584
+ spec = spec[: int(spec.shape[0] / 2)]
585
+ spec[0] = spec[0] / 2
586
+
587
+ ax_freq.cla()
588
+ ax_freq.plot(freq, np.abs(spec))
589
+ ax_freq.set_xlabel("Frequency")
590
+ ax_freq.set_yscale("log")
591
+ ax_freq.set_ylabel("Magnitude")
592
+
593
+ fig_place.pyplot(fig)
594
+ else:
595
+ logger.warning("AudioReciver is not set. Abort.")
596
+ break
597
 
598
 
599
  if __name__ == "__main__":
600
+ import os
601
+
602
+ DEBUG = os.environ.get("DEBUG", "false").lower() not in ["false", "no", "0"]
603
+
604
  logging.basicConfig(
605
  format="[%(asctime)s] %(levelname)7s from %(name)s in %(pathname)s:%(lineno)d: "
606
  "%(message)s",
607
  force=True,
608
  )
609
 
610
+ logger.setLevel(level=logging.DEBUG if DEBUG else logging.INFO)
611
 
612
  st_webrtc_logger = logging.getLogger("streamlit_webrtc")
613
  st_webrtc_logger.setLevel(logging.DEBUG)
requirements.txt CHANGED
@@ -1,7 +1,9 @@
1
- aiortc==1.2.0
2
  av==8.0.3
3
- numpy==1.20.2
 
4
  opencv_python==4.5.1.48
5
- streamlit==0.80.0
6
- streamlit_webrtc==0.11.0
 
7
  typing_extensions==3.7.4.3
 
1
+ aiortc==1.1.2
2
  av==8.0.3
3
+ matplotlib==3.4.2
4
+ numpy==1.19.5
5
  opencv_python==4.5.1.48
6
+ pydub==0.25.1
7
+ streamlit==0.75.0
8
+ streamlit_webrtc==0.20.0
9
  typing_extensions==3.7.4.3