streamlit-webrtc-example / pages /10_sendonly_audio.py
whitphx's picture
whitphx HF staff
Fix all the demos to use Twilio STUN/TURN servers
79ac659
"""A sample to use WebRTC in sendonly mode to transfer audio frames
from the browser to the server and visualize them with matplotlib
and `st.pyplot`."""
import logging
import queue
import matplotlib.pyplot as plt
import numpy as np
import pydub
import streamlit as st
from streamlit_webrtc import WebRtcMode, webrtc_streamer
from sample_utils.turn import get_ice_servers
logger = logging.getLogger(__name__)
webrtc_ctx = webrtc_streamer(
key="sendonly-audio",
mode=WebRtcMode.SENDONLY,
audio_receiver_size=256,
rtc_configuration={"iceServers": get_ice_servers()},
media_stream_constraints={"audio": True},
)
fig_place = st.empty()
fig, [ax_time, ax_freq] = plt.subplots(2, 1, gridspec_kw={"top": 1.5, "bottom": 0.2})
sound_window_len = 5000 # 5s
sound_window_buffer = None
while True:
if webrtc_ctx.audio_receiver:
try:
audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
except queue.Empty:
logger.warning("Queue is empty. Abort.")
break
sound_chunk = pydub.AudioSegment.empty()
for audio_frame in audio_frames:
sound = pydub.AudioSegment(
data=audio_frame.to_ndarray().tobytes(),
sample_width=audio_frame.format.bytes,
frame_rate=audio_frame.sample_rate,
channels=len(audio_frame.layout.channels),
)
sound_chunk += sound
if len(sound_chunk) > 0:
if sound_window_buffer is None:
sound_window_buffer = pydub.AudioSegment.silent(
duration=sound_window_len
)
sound_window_buffer += sound_chunk
if len(sound_window_buffer) > sound_window_len:
sound_window_buffer = sound_window_buffer[-sound_window_len:]
if sound_window_buffer:
# Ref: https://own-search-and-study.xyz/2017/10/27/python%E3%82%92%E4%BD%BF%E3%81%A3%E3%81%A6%E9%9F%B3%E5%A3%B0%E3%83%87%E3%83%BC%E3%82%BF%E3%81%8B%E3%82%89%E3%82%B9%E3%83%9A%E3%82%AF%E3%83%88%E3%83%AD%E3%82%B0%E3%83%A9%E3%83%A0%E3%82%92%E4%BD%9C/ # noqa
sound_window_buffer = sound_window_buffer.set_channels(1) # Stereo to mono
sample = np.array(sound_window_buffer.get_array_of_samples())
ax_time.cla()
times = (np.arange(-len(sample), 0)) / sound_window_buffer.frame_rate
ax_time.plot(times, sample)
ax_time.set_xlabel("Time")
ax_time.set_ylabel("Magnitude")
spec = np.fft.fft(sample)
freq = np.fft.fftfreq(sample.shape[0], 1.0 / sound_chunk.frame_rate)
freq = freq[: int(freq.shape[0] / 2)]
spec = spec[: int(spec.shape[0] / 2)]
spec[0] = spec[0] / 2
ax_freq.cla()
ax_freq.plot(freq, np.abs(spec))
ax_freq.set_xlabel("Frequency")
ax_freq.set_yscale("log")
ax_freq.set_ylabel("Magnitude")
fig_place.pyplot(fig)
else:
logger.warning("AudioReciver is not set. Abort.")
break