translator / streaming-react-app /src /StreamingInterface.tsx
ferrazzipietro's picture
fix
4b65c80
// import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react';
// import Button from '@mui/material/Button';
// import Typography from '@mui/material/Typography';
// import InputLabel from '@mui/material/InputLabel';
// import FormControl from '@mui/material/FormControl';
// import Select, {SelectChangeEvent} from '@mui/material/Select';
// import MenuItem from '@mui/material/MenuItem';
// import Stack from '@mui/material/Stack';
// import seamlessLogoUrl from './assets/seamless.svg';
// import {
// AgentCapabilities,
// BaseResponse,
// BrowserAudioStreamConfig,
// DynamicConfig,
// PartialDynamicConfig,
// SUPPORTED_INPUT_SOURCES,
// SUPPORTED_OUTPUT_MODES,
// ServerExceptionData,
// ServerSpeechData,
// ServerState,
// ServerTextData,
// StartStreamEventConfig,
// StreamingStatus,
// SupportedInputSource,
// SupportedOutputMode,
// TranslationSentences,
// } from './types/StreamingTypes';
// import FormLabel from '@mui/material/FormLabel';
// import RadioGroup from '@mui/material/RadioGroup';
// import FormControlLabel from '@mui/material/FormControlLabel';
// import Radio from '@mui/material/Radio';
// import './StreamingInterface.css';
// import RoomConfig from './RoomConfig';
// import Divider from '@mui/material/Divider';
// import {useSocket} from './useSocket';
// import {RoomState} from './types/RoomState';
// import useStable from './useStable';
// import float32To16BitPCM from './float32To16BitPCM';
// import createBufferedSpeechPlayer from './createBufferedSpeechPlayer';
// import Checkbox from '@mui/material/Checkbox';
// import Alert from '@mui/material/Alert';
// import isScrolledToDocumentBottom from './isScrolledToDocumentBottom';
// import Box from '@mui/material/Box';
// import Slider from '@mui/material/Slider';
// import VolumeDown from '@mui/icons-material/VolumeDown';
// import VolumeUp from '@mui/icons-material/VolumeUp';
// import Mic from '@mui/icons-material/Mic';
// import MicOff from '@mui/icons-material/MicOff';
// import XRDialog from './react-xr/XRDialog';
// import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData';
// import {
// sliceTranslationSentencesUpToIndex,
// getTotalSentencesLength,
// } from './sliceTranslationSentencesUtils';
// import Blink from './Blink';
// import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval';
// import {getURLParams} from './URLParams';
// import debug from './debug';
// import DebugSection from './DebugSection';
// import Switch from '@mui/material/Switch';
// import Grid from '@mui/material/Grid';
// import {getLanguageFromThreeLetterCode} from './languageLookup';
// import HeadphonesIcon from '@mui/icons-material/Headphones';
// const AUDIO_STREAM_DEFAULTS = {
// userMedia: {
// echoCancellation: false,
// noiseSuppression: true,
// },
// displayMedia: {
// echoCancellation: false,
// noiseSuppression: false,
// },
// } as const;
// async function requestUserMediaAudioStream(
// config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['userMedia'],
// ) {
// const stream = await navigator.mediaDevices.getUserMedia({
// audio: {...config, channelCount: 1},
// });
// console.debug(
// '[requestUserMediaAudioStream] stream created with settings:',
// stream.getAudioTracks()?.[0]?.getSettings(),
// );
// return stream;
// }
// async function requestDisplayMediaAudioStream(
// config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['displayMedia'],
// ) {
// const stream = await navigator.mediaDevices.getDisplayMedia({
// audio: {...config, channelCount: 1},
// });
// console.debug(
// '[requestDisplayMediaAudioStream] stream created with settings:',
// stream.getAudioTracks()?.[0]?.getSettings(),
// );
// return stream;
// }
// const buttonLabelMap: {[key in StreamingStatus]: string} = {
// stopped: 'Start Streaming',
// running: 'Stop Streaming',
// starting: 'Starting...',
// };
// const BUFFER_LIMIT = 1;
// const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36;
// const GAIN_MULTIPLIER_OVER_1 = 3;
// const getGainScaledValue = (value) =>
// value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value;
// const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2;
// const MAX_SERVER_EXCEPTIONS_TRACKED = 500;
// export const TYPING_ANIMATION_DELAY_MS = 6;
// export default function StreamingInterface() {
// const urlParams = getURLParams();
// const debugParam = urlParams.debug;
// const [animateTextDisplay, setAnimateTextDisplay] = useState<boolean>(
// urlParams.animateTextDisplay,
// );
// const socketObject = useSocket();
// const {socket, clientID} = socketObject;
// const [serverState, setServerState] = useState<ServerState | null>(null);
// const [agent, setAgent] = useState<AgentCapabilities | null>(null);
// const model = agent?.name ?? null;
// const agentsCapabilities: Array<AgentCapabilities> =
// serverState?.agentsCapabilities ?? [];
// const currentAgent: AgentCapabilities | null =
// agentsCapabilities.find((agent) => agent.name === model) ?? null;
// const [serverExceptions, setServerExceptions] = useState<
// Array<ServerExceptionData>
// >([]);
// const [roomState, setRoomState] = useState<RoomState | null>(null);
// const roomID = roomState?.room_id ?? null;
// const isSpeaker =
// (clientID != null && roomState?.speakers.includes(clientID)) ?? false;
// const isListener =
// (clientID != null && roomState?.listeners.includes(clientID)) ?? false;
// const [streamingStatus, setStreamingStatus] =
// useState<StreamingStatus>('stopped');
// const isStreamConfiguredRef = useRef<boolean>(false);
// const [hasMaxSpeakers, setHasMaxSpeakers] = useState<boolean>(false);
// const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t');
// const [inputSource, setInputSource] =
// useState<SupportedInputSource>('userMedia');
// const [enableNoiseSuppression, setEnableNoiseSuppression] = useState<
// boolean | null
// >(null);
// const [enableEchoCancellation, setEnableEchoCancellation] = useState<
// boolean | null
// >(null);
// // Dynamic Params:
// const [targetLang, setTargetLang] = useState<string | null>(null);
// const [enableExpressive, setEnableExpressive] = useState<boolean | null>(
// null,
// );
// const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
// debugParam ?? false,
// );
// const [receivedData, setReceivedData] = useState<Array<ServerTextData>>([]);
// const [
// translationSentencesAnimatedIndex,
// setTranslationSentencesAnimatedIndex,
// ] = useState<number>(0);
// const lastTranslationResultRef = useRef<HTMLDivElement | null>(null);
// const [inputStream, setInputStream] = useState<MediaStream | null>(null);
// const [inputStreamSource, setInputStreamSource] =
// useState<MediaStreamAudioSourceNode | null>(null);
// const audioContext = useStable<AudioContext>(() => new AudioContext());
// const [scriptNodeProcessor, setScriptNodeProcessor] =
// useState<ScriptProcessorNode | null>(null);
// const [muted, setMuted] = useState<boolean>(false);
// // The onaudioprocess script needs an up-to-date reference to the muted state, so
// // we use a ref here and keep it in sync via useEffect
// const mutedRef = useRef<boolean>(muted);
// useEffect(() => {
// mutedRef.current = muted;
// }, [muted]);
// const [gain, setGain] = useState<number>(1);
// const isScrolledToBottomRef = useRef<boolean>(isScrolledToDocumentBottom());
// // Some config options must be set when starting streaming and cannot be chaned dynamically.
// // This controls whether they are disabled or not
// const streamFixedConfigOptionsDisabled =
// streamingStatus !== 'stopped' || roomID == null;
// const bufferedSpeechPlayer = useStable(() => {
// const player = createBufferedSpeechPlayer({
// onStarted: () => {
// console.debug('📢 PLAYBACK STARTED 📢');
// },
// onEnded: () => {
// console.debug('🛑 PLAYBACK ENDED 🛑');
// },
// });
// // Start the player now so it eagerly plays audio when it arrives
// player.start();
// return player;
// });
// const translationSentencesBase: TranslationSentences =
// getTranslationSentencesFromReceivedData(receivedData);
// const translationSentencesBaseTotalLength = getTotalSentencesLength(
// translationSentencesBase,
// );
// const translationSentences: TranslationSentences = animateTextDisplay
// ? sliceTranslationSentencesUpToIndex(
// translationSentencesBase,
// translationSentencesAnimatedIndex,
// )
// : translationSentencesBase;
// // We want the blinking cursor to show before any text has arrived, so let's add an empty string so that the cursor shows up
// const translationSentencesWithEmptyStartingString =
// streamingStatus === 'running' && translationSentences.length === 0
// ? ['']
// : translationSentences;
// /******************************************
// * Event Handlers
// ******************************************/
// const setAgentAndUpdateParams = useCallback(
// (newAgent: AgentCapabilities | null) => {
// setAgent((prevAgent) => {
// if (prevAgent?.name !== newAgent?.name) {
// setTargetLang(newAgent?.targetLangs[0] ?? null);
// setEnableExpressive(null);
// }
// return newAgent;
// });
// },
// [],
// );
// const onSetDynamicConfig = useCallback(
// async (partialConfig: PartialDynamicConfig) => {
// return new Promise<void>((resolve, reject) => {
// if (socket == null) {
// reject(new Error('[onSetDynamicConfig] socket is null '));
// return;
// }
// socket.emit(
// 'set_dynamic_config',
// partialConfig,
// (result: BaseResponse) => {
// console.log('[emit result: set_dynamic_config]', result);
// if (result.status === 'ok') {
// resolve();
// } else {
// reject();
// }
// },
// );
// });
// },
// [socket],
// );
// const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => {
// return new Promise<void>((resolve, reject) => {
// if (socket == null) {
// reject(new Error('[configureStreamAsync] socket is null '));
// return;
// }
// const modelName = agent?.name ?? null;
// if (modelName == null) {
// reject(new Error('[configureStreamAsync] modelName is null '));
// return;
// }
// const config: StartStreamEventConfig = {
// event: 'config',
// rate: sampleRate,
// model_name: modelName,
// debug: serverDebugFlag,
// // synchronous processing isn't implemented on the v2 pubsub server, so hardcode this to true
// async_processing: true,
// buffer_limit: BUFFER_LIMIT,
// model_type: outputMode,
// };
// console.log('[configureStreamAsync] sending config', config);
// socket.emit('configure_stream', config, (statusObject) => {
// setHasMaxSpeakers(statusObject.message === 'max_speakers')
// if (statusObject.status === 'ok') {
// isStreamConfiguredRef.current = true;
// console.debug(
// '[configureStreamAsync] stream configured!',
// statusObject,
// );
// resolve();
// } else {
// isStreamConfiguredRef.current = false;
// reject(
// new Error(
// `[configureStreamAsync] configure_stream returned status: ${statusObject.status}`,
// ),
// );
// return;
// }
// });
// });
// };
// const startStreaming = async () => {
// if (streamingStatus !== 'stopped') {
// console.warn(
// `Attempting to start stream when status is ${streamingStatus}`,
// );
// return;
// }
// setStreamingStatus('starting');
// if (audioContext.state === 'suspended') {
// console.warn('audioContext was suspended! resuming...');
// await audioContext.resume();
// }
// let stream: MediaStream | null = null;
// try {
// if (inputSource === 'userMedia') {
// stream = await requestUserMediaAudioStream({
// noiseSuppression:
// enableNoiseSuppression ??
// AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression,
// echoCancellation:
// enableEchoCancellation ??
// AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation,
// });
// } else if (inputSource === 'displayMedia') {
// stream = await requestDisplayMediaAudioStream({
// noiseSuppression:
// enableNoiseSuppression ??
// AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression,
// echoCancellation:
// enableEchoCancellation ??
// AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation,
// });
// } else {
// throw new Error(`Unsupported input source requested: ${inputSource}`);
// }
// setInputStream(stream);
// } catch (e) {
// console.error('[startStreaming] media stream request failed:', e);
// setStreamingStatus('stopped');
// return;
// }
// const mediaStreamSource = audioContext.createMediaStreamSource(stream);
// setInputStreamSource(mediaStreamSource);
// /**
// * NOTE: This currently uses a deprecated way of processing the audio (createScriptProcessor), but
// * which is easy and convenient for our purposes.
// *
// * Documentation for the deprecated way of doing it is here: https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor
// *
// * In an ideal world this would be migrated to something like this SO answer: https://stackoverflow.com/a/65448287
// */
// const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1);
// setScriptNodeProcessor(scriptProcessor);
// scriptProcessor.onaudioprocess = (event) => {
// if (isStreamConfiguredRef.current === false) {
// console.debug('[onaudioprocess] stream is not configured yet!');
// return;
// }
// if (socket == null) {
// console.warn('[onaudioprocess] socket is null in onaudioprocess');
// return;
// }
// if (mutedRef.current) {
// // We still want to send audio to the server when we're muted to ensure we
// // get any remaining audio back from the server, so let's pass an array length 1 with a value of 0
// const mostlyEmptyInt16Array = new Int16Array(1);
// socket.emit('incoming_audio', mostlyEmptyInt16Array);
// } else {
// const float32Audio = event.inputBuffer.getChannelData(0);
// const pcm16Audio = float32To16BitPCM(float32Audio);
// socket.emit('incoming_audio', pcm16Audio);
// }
// debug()?.sentAudio(event);
// };
// mediaStreamSource.connect(scriptProcessor);
// scriptProcessor.connect(audioContext.destination);
// bufferedSpeechPlayer.start();
// try {
// if (targetLang == null) {
// throw new Error('[startStreaming] targetLang cannot be nullish');
// }
// // When we are starting the stream we want to pass all the dynamic config values
// // available before actually configuring and starting the stream
// const fullDynamicConfig: DynamicConfig = {
// targetLanguage: targetLang,
// expressive: enableExpressive,
// };
// await onSetDynamicConfig(fullDynamicConfig);
// // NOTE: this needs to be the *audioContext* sample rate, not the sample rate of the input stream. Not entirely sure why.
// await configureStreamAsync({
// sampleRate: audioContext.sampleRate,
// });
// } catch (e) {
// console.error('configureStreamAsync failed', e);
// setStreamingStatus('stopped');
// return;
// }
// setStreamingStatus('running');
// };
// const stopStreaming = useCallback(async () => {
// if (streamingStatus === 'stopped') {
// console.warn(
// `Attempting to stop stream when status is ${streamingStatus}`,
// );
// return;
// }
// // Stop the speech playback right away
// bufferedSpeechPlayer.stop();
// if (inputStreamSource == null || scriptNodeProcessor == null) {
// console.error(
// 'inputStreamSource || scriptNodeProcessor is null in stopStreaming',
// );
// } else {
// inputStreamSource.disconnect(scriptNodeProcessor);
// scriptNodeProcessor.disconnect(audioContext.destination);
// // Release the mic input so we stop showing the red recording icon in the browser
// inputStream?.getTracks().forEach((track) => track.stop());
// }
// if (socket == null) {
// console.warn('Unable to emit stop_stream because socket is null');
// } else {
// socket.emit('stop_stream', (result) => {
// console.debug('[emit result: stop_stream]', result);
// });
// }
// setStreamingStatus('stopped');
// }, [
// audioContext.destination,
// bufferedSpeechPlayer,
// inputStream,
// inputStreamSource,
// scriptNodeProcessor,
// socket,
// streamingStatus,
// ]);
// const onClearTranscriptForAll = useCallback(() => {
// if (socket != null) {
// socket.emit('clear_transcript_for_all');
// }
// }, [socket]);
// /******************************************
// * Effects
// ******************************************/
// useEffect(() => {
// if (socket == null) {
// return;
// }
// const onRoomStateUpdate = (roomState: RoomState) => {
// setRoomState(roomState);
// };
// socket.on('room_state_update', onRoomStateUpdate);
// return () => {
// socket.off('room_state_update', onRoomStateUpdate);
// };
// }, [socket]);
// useEffect(() => {
// if (socket != null) {
// const onTranslationText = (data: ServerTextData) => {
// setReceivedData((prev) => [...prev, data]);
// debug()?.receivedText(data.payload);
// };
// const onTranslationSpeech = (data: ServerSpeechData) => {
// bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate);
// };
// socket.on('translation_text', onTranslationText);
// socket.on('translation_speech', onTranslationSpeech);
// return () => {
// socket.off('translation_text', onTranslationText);
// socket.off('translation_speech', onTranslationSpeech);
// };
// }
// }, [bufferedSpeechPlayer, socket]);
// useEffect(() => {
// if (socket != null) {
// const onServerStateUpdate = (newServerState: ServerState) => {
// setServerState(newServerState);
// // If a client creates a server lock, we want to stop streaming if we're not them
// if (
// newServerState.serverLock?.isActive === true &&
// newServerState.serverLock?.clientID !== clientID &&
// streamingStatus === 'running'
// ) {
// stopStreaming();
// }
// const firstAgentNullable = newServerState.agentsCapabilities[0];
// if (agent == null && firstAgentNullable != null) {
// setAgentAndUpdateParams(firstAgentNullable);
// }
// };
// socket.on('server_state_update', onServerStateUpdate);
// return () => {
// socket.off('server_state_update', onServerStateUpdate);
// };
// }
// }, [
// agent,
// clientID,
// setAgentAndUpdateParams,
// socket,
// stopStreaming,
// streamingStatus,
// ]);
// useEffect(() => {
// if (socket != null) {
// const onServerException = (
// exceptionDataWithoutClientTime: ServerExceptionData,
// ) => {
// const exceptionData = {
// ...exceptionDataWithoutClientTime,
// timeStringClient: new Date(
// exceptionDataWithoutClientTime['timeEpochMs'],
// ).toLocaleString(),
// };
// setServerExceptions((prev) =>
// [exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED),
// );
// console.error(
// `[server_exception] The server encountered an exception: ${exceptionData['message']}`,
// exceptionData,
// );
// };
// socket.on('server_exception', onServerException);
// return () => {
// socket.off('server_exception', onServerException);
// };
// }
// }, [socket]);
// useEffect(() => {
// if (socket != null) {
// const onClearTranscript = () => {
// setReceivedData([]);
// setTranslationSentencesAnimatedIndex(0);
// };
// socket.on('clear_transcript', onClearTranscript);
// return () => {
// socket.off('clear_transcript', onClearTranscript);
// };
// }
// }, [socket]);
// useEffect(() => {
// const onScroll = () => {
// if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) {
// isScrolledToBottomRef.current = true;
// return;
// }
// isScrolledToBottomRef.current = false;
// return;
// };
// document.addEventListener('scroll', onScroll);
// return () => {
// document.removeEventListener('scroll', onScroll);
// };
// }, []);
// useLayoutEffect(() => {
// if (
// lastTranslationResultRef.current != null &&
// isScrolledToBottomRef.current
// ) {
// // Scroll the div to the most recent entry
// lastTranslationResultRef.current.scrollIntoView();
// }
// // Run the effect every time data is received, so that
// // we scroll to the bottom even if we're just adding text to
// // a pre-existing chunk
// }, [receivedData]);
// useEffect(() => {
// if (!animateTextDisplay) {
// return;
// }
// if (
// translationSentencesAnimatedIndex < translationSentencesBaseTotalLength
// ) {
// const timeout = setTimeout(() => {
// setTranslationSentencesAnimatedIndex((prev) => prev + 1);
// debug()?.startRenderText();
// }, TYPING_ANIMATION_DELAY_MS);
// return () => clearTimeout(timeout);
// } else {
// debug()?.endRenderText();
// }
// }, [
// animateTextDisplay,
// translationSentencesAnimatedIndex,
// translationSentencesBaseTotalLength,
// ]);
// /******************************************
// * Sub-components
// ******************************************/
// const volumeSliderNode = (
// <Stack
// spacing={2}
// direction="row"
// sx={{mb: 1, width: '100%'}}
// alignItems="center">
// <VolumeDown color="primary" />
// <Slider
// aria-label="Volume"
// defaultValue={1}
// scale={getGainScaledValue}
// min={0}
// max={3}
// step={0.1}
// marks={[
// {value: 0, label: '0%'},
// {value: 1, label: '100%'},
// {value: 2, label: '400%'},
// {value: 3, label: '700%'},
// ]}
// valueLabelFormat={(value) => `${(value * 100).toFixed(0)}%`}
// valueLabelDisplay="auto"
// value={gain}
// onChange={(_event: Event, newValue: number | number[]) => {
// if (typeof newValue === 'number') {
// const scaledGain = getGainScaledValue(newValue);
// // We want the actual gain node to use the scaled value
// bufferedSpeechPlayer.setGain(scaledGain);
// // But we want react state to keep track of the non-scaled value
// setGain(newValue);
// } else {
// console.error(
// `[volume slider] Unexpected non-number value: ${newValue}`,
// );
// }
// }}
// />
// <VolumeUp color="primary" />
// </Stack>
// );
// const xrDialogComponent = (
// <XRDialog
// animateTextDisplay={
// animateTextDisplay &&
// translationSentencesAnimatedIndex == translationSentencesBaseTotalLength
// }
// bufferedSpeechPlayer={bufferedSpeechPlayer}
// translationSentences={translationSentences}
// roomState={roomState}
// roomID={roomID}
// startStreaming={startStreaming}
// stopStreaming={stopStreaming}
// debugParam={debugParam}
// onARHidden={() => {
// setAnimateTextDisplay(urlParams.animateTextDisplay);
// }}
// onARVisible={() => setAnimateTextDisplay(false)}
// />
// );
// return (
// <div className="app-wrapper-sra">
// <Box
// // eslint-disable-next-line @typescript-eslint/ban-ts-comment
// // @ts-ignore Not sure why it's complaining about complexity here
// sx={{width: '100%', maxWidth: '660px', minWidth: '320px'}}>
// <div className="main-container-sra">
// <div className="top-section-sra horizontal-padding-sra">
// <div className="header-container-sra">
// <img
// src={seamlessLogoUrl}
// className="header-icon-sra"
// alt="Seamless Translation Logo"
// height={24}
// width={24}
// />
// <div>
// <Typography variant="h1" sx={{color: '#65676B'}}>
// Seamless Translation
// </Typography>
// </div>
// </div>
// <div className="header-container-sra">
// <div>
// <Typography variant="body2" sx={{color: '#65676B'}}>
// Welcome! This space is limited to one speaker at a time.
// If using the live HF space, sharing room code to listeners on another
// IP address may not work because it's running on different replicas.
// Use headphones if you are both speaker and listener to prevent feedback.
// <br/>
// If max speakers reached, please duplicate the space <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/spaces/facebook/seamless-streaming?duplicate=true">here</a>.
// In your duplicated space, join a room as speaker or listener (or both),
// and share the room code to invite listeners.
// <br/>
// Check out the seamless_communication <a target="_blank" rel="noopener noreferrer" href="https://github.com/facebookresearch/seamless_communication/tree/main">README</a> for more information.
// <br/>
// SeamlessStreaming model is a research model and is not released
// for production deployment. It is important to use a microphone with
// noise cancellation (for e.g. a smartphone), otherwise you may see model hallucination on noises.
// It works best if you pause every couple of sentences, or you may wish adjust the VAD threshold
// in the model config. The real-time performance will degrade
// if you try streaming multiple speakers at the same time.
// </Typography>
// </div>
// </div>
// <Stack spacing="22px" direction="column">
// <Box>
// <RoomConfig
// roomState={roomState}
// serverState={serverState}
// streamingStatus={streamingStatus}
// onJoinRoomOrUpdateRoles={() => {
// // If the user has switched from speaker to listener we need to tell the
// // player to play eagerly, since currently the listener doesn't have any stop/start controls
// bufferedSpeechPlayer.start();
// }}
// />
// {isListener && !isSpeaker && (
// <Box
// sx={{
// paddingX: 6,
// paddingBottom: 2,
// marginY: 2,
// display: 'flex',
// flexDirection: 'column',
// alignItems: 'center',
// }}>
// {volumeSliderNode}
// </Box>
// )}
// </Box>
// {isSpeaker && (
// <>
// <Divider />
// <Stack spacing="12px" direction="column">
// <FormLabel id="output-modes-radio-group-label">
// Model
// </FormLabel>
// <FormControl
// disabled={
// streamFixedConfigOptionsDisabled ||
// agentsCapabilities.length === 0
// }
// fullWidth
// sx={{minWidth: '14em'}}>
// <InputLabel id="model-selector-input-label">
// Model
// </InputLabel>
// <Select
// labelId="model-selector-input-label"
// label="Model"
// onChange={(e: SelectChangeEvent) => {
// const newAgent =
// agentsCapabilities.find(
// (agent) => e.target.value === agent.name,
// ) ?? null;
// if (newAgent == null) {
// console.error(
// 'Unable to find agent with name',
// e.target.value,
// );
// }
// setAgentAndUpdateParams(newAgent);
// }}
// value={model ?? ''}>
// {agentsCapabilities.map((agent) => (
// <MenuItem value={agent.name} key={agent.name}>
// {agent.name}
// </MenuItem>
// ))}
// </Select>
// </FormControl>
// </Stack>
// <Stack spacing={0.5}>
// <FormLabel id="output-modes-radio-group-label">
// Output
// </FormLabel>
// <Box sx={{paddingTop: 2, paddingBottom: 1}}>
// <FormControl fullWidth sx={{minWidth: '14em'}}>
// <InputLabel id="target-selector-input-label">
// Target Language
// </InputLabel>
// <Select
// labelId="target-selector-input-label"
// label="Target Language"
// onChange={(e: SelectChangeEvent) => {
// setTargetLang(e.target.value);
// onSetDynamicConfig({
// targetLanguage: e.target.value,
// });
// }}
// value={targetLang ?? ''}>
// {currentAgent?.targetLangs.map((langCode) => (
// <MenuItem value={langCode} key={langCode}>
// {getLanguageFromThreeLetterCode(langCode) != null
// ? `${getLanguageFromThreeLetterCode(
// langCode,
// )} (${langCode})`
// : langCode}
// </MenuItem>
// ))}
// </Select>
// </FormControl>
// </Box>
// <Grid container>
// <Grid item xs={12} sm={4}>
// <FormControl
// disabled={streamFixedConfigOptionsDisabled}>
// <RadioGroup
// aria-labelledby="output-modes-radio-group-label"
// value={outputMode}
// onChange={(e) =>
// setOutputMode(
// e.target.value as SupportedOutputMode,
// )
// }
// name="output-modes-radio-buttons-group">
// {
// // TODO: Use supported modalities from agentCapabilities
// SUPPORTED_OUTPUT_MODES.map(({value, label}) => (
// <FormControlLabel
// key={value}
// value={value}
// control={<Radio />}
// label={label}
// />
// ))
// }
// </RadioGroup>
// </FormControl>
// </Grid>
// <Grid item xs={12} sm={8}>
// <Stack
// direction="column"
// spacing={1}
// alignItems="flex-start"
// sx={{flexGrow: 1}}>
// {currentAgent?.dynamicParams?.includes(
// 'expressive',
// ) && (
// <FormControlLabel
// control={
// <Switch
// checked={enableExpressive ?? false}
// onChange={(
// event: React.ChangeEvent<HTMLInputElement>,
// ) => {
// const newValue = event.target.checked;
// setEnableExpressive(newValue);
// onSetDynamicConfig({
// expressive: newValue,
// });
// }}
// />
// }
// label="Expressive"
// />
// )}
// {isListener && (
// <Box
// sx={{
// flexGrow: 1,
// paddingX: 1.5,
// paddingY: 1.5,
// width: '100%',
// }}>
// {volumeSliderNode}
// </Box>
// )}
// </Stack>
// </Grid>
// </Grid>
// </Stack>
// <Stack
// direction="row"
// spacing={2}
// justifyContent="space-between">
// <Box sx={{flex: 1}}>
// <FormControl disabled={streamFixedConfigOptionsDisabled}>
// <FormLabel id="input-source-radio-group-label">
// Input Source
// </FormLabel>
// <RadioGroup
// aria-labelledby="input-source-radio-group-label"
// value={inputSource}
// onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
// setInputSource(
// e.target.value as SupportedInputSource,
// )
// }
// name="input-source-radio-buttons-group">
// {SUPPORTED_INPUT_SOURCES.map(({label, value}) => (
// <FormControlLabel
// key={value}
// value={value}
// control={<Radio />}
// label={label}
// />
// ))}
// </RadioGroup>
// </FormControl>
// </Box>
// <Box sx={{flex: 1, flexGrow: 2}}>
// <FormControl disabled={streamFixedConfigOptionsDisabled}>
// <FormLabel>Options</FormLabel>
// <FormControlLabel
// control={
// <Checkbox
// checked={
// enableNoiseSuppression ??
// AUDIO_STREAM_DEFAULTS[inputSource]
// .noiseSuppression
// }
// onChange={(
// event: React.ChangeEvent<HTMLInputElement>,
// ) =>
// setEnableNoiseSuppression(event.target.checked)
// }
// />
// }
// label="Noise Suppression"
// />
// <FormControlLabel
// control={
// <Checkbox
// checked={
// enableEchoCancellation ??
// AUDIO_STREAM_DEFAULTS[inputSource]
// .echoCancellation
// }
// onChange={(
// event: React.ChangeEvent<HTMLInputElement>,
// ) =>
// setEnableEchoCancellation(event.target.checked)
// }
// />
// }
// label="Echo Cancellation (not recommended)"
// />
// <FormControlLabel
// control={
// <Checkbox
// checked={serverDebugFlag}
// onChange={(
// event: React.ChangeEvent<HTMLInputElement>,
// ) => setServerDebugFlag(event.target.checked)}
// />
// }
// label="Enable Server Debugging"
// />
// </FormControl>
// </Box>
// </Stack>
// {isSpeaker &&
// isListener &&
// inputSource === 'userMedia' &&
// !enableEchoCancellation &&
// gain !== 0 && (
// <div>
// <Alert severity="warning" icon={<HeadphonesIcon />}>
// Headphones required to prevent feedback.
// </Alert>
// </div>
// )}
// {isSpeaker && enableEchoCancellation && (
// <div>
// <Alert severity="warning">
// We don't recommend using echo cancellation as it may
// distort the input audio. If possible, use headphones and
// disable echo cancellation instead.
// </Alert>
// </div>
// )}
// <Stack direction="row" spacing={2}>
// {streamingStatus === 'stopped' ? (
// <Button
// variant="contained"
// onClick={startStreaming}
// disabled={
// roomID == null ||
// // Prevent users from starting streaming if there is a server lock with an active session
// (serverState?.serverLock?.isActive === true &&
// serverState.serverLock.clientID !== clientID)
// }>
// {buttonLabelMap[streamingStatus]}
// </Button>
// ) : (
// <Button
// variant="contained"
// color={
// streamingStatus === 'running' ? 'error' : 'primary'
// }
// disabled={
// streamingStatus === 'starting' || roomID == null
// }
// onClick={stopStreaming}>
// {buttonLabelMap[streamingStatus]}
// </Button>
// )}
// <Box>
// <Button
// variant="contained"
// aria-label={muted ? 'Unmute' : 'Mute'}
// color={muted ? 'info' : 'primary'}
// onClick={() => setMuted((prev) => !prev)}
// sx={{
// borderRadius: 100,
// paddingX: 0,
// minWidth: '36px',
// }}>
// {muted ? <MicOff /> : <Mic />}
// </Button>
// </Box>
// {roomID == null ? null : (
// <Box
// sx={{
// flexGrow: 1,
// display: 'flex',
// justifyContent: 'flex-end',
// }}>
// {xrDialogComponent}
// </Box>
// )}
// </Stack>
// {serverExceptions.length > 0 && (
// <div>
// <Alert severity="error">
// {`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`}
// </Alert>
// </div>
// )}
// {serverState != null && hasMaxSpeakers && (
// <div>
// <Alert severity="error">
// {`Maximum number of speakers reached. Please try again at a later time.`}
// </Alert>
// </div>
// )}
// {serverState != null &&
// serverState.totalActiveTranscoders >=
// TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && (
// <div>
// <Alert severity="warning">
// {`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`}
// </Alert>
// </div>
// )}
// {serverState?.serverLock != null &&
// serverState.serverLock.clientID !== clientID && (
// <div>
// <Alert severity="warning">
// {`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`}
// </Alert>
// </div>
// )}
// </>
// )}
// </Stack>
// {isListener && !isSpeaker && (
// <Box sx={{marginBottom: 1, marginTop: 2}}>
// {xrDialogComponent}
// </Box>
// )}
// </div>
// {debugParam && roomID != null && <DebugSection />}
// <div className="translation-text-container-sra horizontal-padding-sra">
// <Stack
// direction="row"
// spacing={2}
// sx={{mb: '16px', alignItems: 'center'}}>
// <Typography variant="h1" sx={{fontWeight: 700, flexGrow: 1}}>
// Transcript
// </Typography>
// {isSpeaker && (
// <Button
// variant="text"
// size="small"
// onClick={onClearTranscriptForAll}>
// Clear Transcript for All
// </Button>
// )}
// </Stack>
// <Stack direction="row">
// <div className="translation-text-sra">
// {translationSentencesWithEmptyStartingString.map(
// (sentence, index, arr) => {
// const isLast = index === arr.length - 1;
// const maybeRef = isLast
// ? {ref: lastTranslationResultRef}
// : {};
// return (
// <div className="text-chunk-sra" key={index} {...maybeRef}>
// <Typography variant="body1">
// {sentence}
// {animateTextDisplay && isLast && (
// <Blink
// intervalMs={CURSOR_BLINK_INTERVAL_MS}
// shouldBlink={
// (roomState?.activeTranscoders ?? 0) > 0
// }>
// <Typography
// component="span"
// variant="body1"
// sx={{
// display: 'inline-block',
// transform: 'scaleY(1.25) translateY(-1px)',
// }}>
// {'|'}
// </Typography>
// </Blink>
// )}
// </Typography>
// </div>
// );
// },
// )}
// </div>
// </Stack>
// </div>
// </div>
// </Box>
// </div>
// );
// }
import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react';
import Button from '@mui/material/Button';
import Typography from '@mui/material/Typography';
import InputLabel from '@mui/material/InputLabel';
import FormControl from '@mui/material/FormControl';
import Select, {SelectChangeEvent} from '@mui/material/Select';
import MenuItem from '@mui/material/MenuItem';
import Stack from '@mui/material/Stack';
import seamlessLogoUrl from './assets/DSC_4281.svg';
import {
AgentCapabilities,
BaseResponse,
BrowserAudioStreamConfig,
DynamicConfig,
PartialDynamicConfig,
SUPPORTED_INPUT_SOURCES,
SUPPORTED_OUTPUT_MODES,
ServerExceptionData,
ServerSpeechData,
ServerState,
ServerTextData,
StartStreamEventConfig,
StreamingStatus,
SupportedInputSource,
SupportedOutputMode,
TranslationSentences,
} from './types/StreamingTypes';
import FormLabel from '@mui/material/FormLabel';
import RadioGroup from '@mui/material/RadioGroup';
import FormControlLabel from '@mui/material/FormControlLabel';
import Radio from '@mui/material/Radio';
import './StreamingInterface.css';
import RoomConfig from './RoomConfig';
import Divider from '@mui/material/Divider';
import {useSocket} from './useSocket';
import {RoomState} from './types/RoomState';
import useStable from './useStable';
import float32To16BitPCM from './float32To16BitPCM';
import createBufferedSpeechPlayer from './createBufferedSpeechPlayer';
import Checkbox from '@mui/material/Checkbox';
import Alert from '@mui/material/Alert';
import isScrolledToDocumentBottom from './isScrolledToDocumentBottom';
import Box from '@mui/material/Box';
import Slider from '@mui/material/Slider';
import VolumeDown from '@mui/icons-material/VolumeDown';
import VolumeUp from '@mui/icons-material/VolumeUp';
import Mic from '@mui/icons-material/Mic';
import MicOff from '@mui/icons-material/MicOff';
import XRDialog from './react-xr/XRDialog';
import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData';
import {
sliceTranslationSentencesUpToIndex,
getTotalSentencesLength,
} from './sliceTranslationSentencesUtils';
import Blink from './Blink';
import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval';
import {getURLParams} from './URLParams';
import debug from './debug';
import DebugSection from './DebugSection';
import Switch from '@mui/material/Switch';
import Grid from '@mui/material/Grid';
import {getLanguageFromThreeLetterCode} from './languageLookup';
import HeadphonesIcon from '@mui/icons-material/Headphones';
const AUDIO_STREAM_DEFAULTS = {
userMedia: {
echoCancellation: false,
noiseSuppression: true,
},
displayMedia: {
echoCancellation: false,
noiseSuppression: false,
},
} as const;
async function requestUserMediaAudioStream(
config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['userMedia'],
) {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {...config, channelCount: 1},
});
console.debug(
'[requestUserMediaAudioStream] stream created with settings:',
stream.getAudioTracks()?.[0]?.getSettings(),
);
return stream;
}
async function requestDisplayMediaAudioStream(
config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['displayMedia'],
) {
const stream = await navigator.mediaDevices.getDisplayMedia({
audio: {...config, channelCount: 1},
});
console.debug(
'[requestDisplayMediaAudioStream] stream created with settings:',
stream.getAudioTracks()?.[0]?.getSettings(),
);
return stream;
}
const buttonLabelMap: {[key in StreamingStatus]: string} = {
stopped: 'Start Streaming',
running: 'Stop Streaming',
starting: 'Starting...',
};
const BUFFER_LIMIT = 1;
const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36;
const GAIN_MULTIPLIER_OVER_1 = 3;
const getGainScaledValue = (value) =>
value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value;
const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2;
const MAX_SERVER_EXCEPTIONS_TRACKED = 500;
export const TYPING_ANIMATION_DELAY_MS = 6;
export default function StreamingInterface() {
const urlParams = getURLParams();
const debugParam = urlParams.debug;
const [animateTextDisplay, setAnimateTextDisplay] = useState<boolean>(
urlParams.animateTextDisplay,
);
const socketObject = useSocket();
const {socket, clientID} = socketObject;
const [serverState, setServerState] = useState<ServerState | null>(null);
const [agent, setAgent] = useState<AgentCapabilities | null>(null);
const model = agent?.name ?? null;
const agentsCapabilities: Array<AgentCapabilities> =
serverState?.agentsCapabilities ?? [];
const currentAgent: AgentCapabilities | null =
agentsCapabilities.find((agent) => agent.name === model) ?? null;
const [serverExceptions, setServerExceptions] = useState<
Array<ServerExceptionData>
>([]);
const [roomState, setRoomState] = useState<RoomState | null>(null);
const roomID = roomState?.room_id ?? null;
const isSpeaker =
(clientID != null && roomState?.speakers.includes(clientID)) ?? false;
const isListener =
(clientID != null && roomState?.listeners.includes(clientID)) ?? false;
const [streamingStatus, setStreamingStatus] =
useState<StreamingStatus>('stopped');
const isStreamConfiguredRef = useRef<boolean>(false);
const [hasMaxSpeakers, setHasMaxSpeakers] = useState<boolean>(false);
const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t');
const [inputSource, setInputSource] =
useState<SupportedInputSource>('userMedia');
const [enableNoiseSuppression, setEnableNoiseSuppression] = useState<
boolean | null
>(null);
const [enableEchoCancellation, setEnableEchoCancellation] = useState<
boolean | null
>(null);
// Dynamic Params:
const [targetLang, setTargetLang] = useState<string | null>(null);
const [enableExpressive, setEnableExpressive] = useState<boolean | null>(
null,
);
const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
debugParam ?? false,
);
const [receivedData, setReceivedData] = useState<Array<ServerTextData>>([]);
const [
translationSentencesAnimatedIndex,
setTranslationSentencesAnimatedIndex,
] = useState<number>(0);
const lastTranslationResultRef = useRef<HTMLDivElement | null>(null);
const [inputStream, setInputStream] = useState<MediaStream | null>(null);
const [inputStreamSource, setInputStreamSource] =
useState<MediaStreamAudioSourceNode | null>(null);
const audioContext = useStable<AudioContext>(() => new AudioContext());
const [scriptNodeProcessor, setScriptNodeProcessor] =
useState<ScriptProcessorNode | null>(null);
const [muted, setMuted] = useState<boolean>(false);
// The onaudioprocess script needs an up-to-date reference to the muted state, so
// we use a ref here and keep it in sync via useEffect
const mutedRef = useRef<boolean>(muted);
useEffect(() => {
mutedRef.current = muted;
}, [muted]);
const [gain, setGain] = useState<number>(1);
const isScrolledToBottomRef = useRef<boolean>(isScrolledToDocumentBottom());
// Some config options must be set when starting streaming and cannot be chaned dynamically.
// This controls whether they are disabled or not
const streamFixedConfigOptionsDisabled =
streamingStatus !== 'stopped' || roomID == null;
const bufferedSpeechPlayer = useStable(() => {
const player = createBufferedSpeechPlayer({
onStarted: () => {
console.debug('📢 PLAYBACK STARTED 📢');
},
onEnded: () => {
console.debug('🛑 PLAYBACK ENDED 🛑');
},
});
// Start the player now so it eagerly plays audio when it arrives
player.start();
return player;
});
const translationSentencesBase: TranslationSentences =
getTranslationSentencesFromReceivedData(receivedData);
const translationSentencesBaseTotalLength = getTotalSentencesLength(
translationSentencesBase,
);
const translationSentences: TranslationSentences = animateTextDisplay
? sliceTranslationSentencesUpToIndex(
translationSentencesBase,
translationSentencesAnimatedIndex,
)
: translationSentencesBase;
// We want the blinking cursor to show before any text has arrived, so let's add an empty string so that the cursor shows up
const translationSentencesWithEmptyStartingString =
streamingStatus === 'running' && translationSentences.length === 0
? ['']
: translationSentences;
/******************************************
* Event Handlers
******************************************/
const setAgentAndUpdateParams = useCallback(
(newAgent: AgentCapabilities | null) => {
setAgent((prevAgent) => {
if (prevAgent?.name !== newAgent?.name) {
setTargetLang(newAgent?.targetLangs[0] ?? null);
setEnableExpressive(null);
}
return newAgent;
});
},
[],
);
const onSetDynamicConfig = useCallback(
async (partialConfig: PartialDynamicConfig) => {
return new Promise<void>((resolve, reject) => {
if (socket == null) {
reject(new Error('[onSetDynamicConfig] socket is null '));
return;
}
socket.emit(
'set_dynamic_config',
partialConfig,
(result: BaseResponse) => {
console.log('[emit result: set_dynamic_config]', result);
if (result.status === 'ok') {
resolve();
} else {
reject();
}
},
);
});
},
[socket],
);
const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => {
return new Promise<void>((resolve, reject) => {
if (socket == null) {
reject(new Error('[configureStreamAsync] socket is null '));
return;
}
const modelName = agent?.name ?? null;
if (modelName == null) {
reject(new Error('[configureStreamAsync] modelName is null '));
return;
}
const config: StartStreamEventConfig = {
event: 'config',
rate: sampleRate,
model_name: modelName,
debug: serverDebugFlag,
// synchronous processing isn't implemented on the v2 pubsub server, so hardcode this to true
async_processing: true,
buffer_limit: BUFFER_LIMIT,
model_type: outputMode,
};
console.log('[configureStreamAsync] sending config', config);
socket.emit('configure_stream', config, (statusObject) => {
setHasMaxSpeakers(statusObject.message === 'max_speakers')
if (statusObject.status === 'ok') {
isStreamConfiguredRef.current = true;
console.debug(
'[configureStreamAsync] stream configured!',
statusObject,
);
resolve();
} else {
isStreamConfiguredRef.current = false;
reject(
new Error(
`[configureStreamAsync] configure_stream returned status: ${statusObject.status}`,
),
);
return;
}
});
});
};
const startStreaming = async () => {
if (streamingStatus !== 'stopped') {
console.warn(
`Attempting to start stream when status is ${streamingStatus}`,
);
return;
}
setStreamingStatus('starting');
if (audioContext.state === 'suspended') {
console.warn('audioContext was suspended! resuming...');
await audioContext.resume();
}
let stream: MediaStream | null = null;
try {
if (inputSource === 'userMedia') {
stream = await requestUserMediaAudioStream({
noiseSuppression:
enableNoiseSuppression ??
AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression,
echoCancellation:
enableEchoCancellation ??
AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation,
});
} else if (inputSource === 'displayMedia') {
stream = await requestDisplayMediaAudioStream({
noiseSuppression:
enableNoiseSuppression ??
AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression,
echoCancellation:
enableEchoCancellation ??
AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation,
});
} else {
throw new Error(`Unsupported input source requested: ${inputSource}`);
}
setInputStream(stream);
} catch (e) {
console.error('[startStreaming] media stream request failed:', e);
setStreamingStatus('stopped');
return;
}
const mediaStreamSource = audioContext.createMediaStreamSource(stream);
setInputStreamSource(mediaStreamSource);
/**
* NOTE: This currently uses a deprecated way of processing the audio (createScriptProcessor), but
* which is easy and convenient for our purposes.
*
* Documentation for the deprecated way of doing it is here: https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor
*
* In an ideal world this would be migrated to something like this SO answer: https://stackoverflow.com/a/65448287
*/
const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1);
setScriptNodeProcessor(scriptProcessor);
scriptProcessor.onaudioprocess = (event) => {
if (isStreamConfiguredRef.current === false) {
console.debug('[onaudioprocess] stream is not configured yet!');
return;
}
if (socket == null) {
console.warn('[onaudioprocess] socket is null in onaudioprocess');
return;
}
if (mutedRef.current) {
// We still want to send audio to the server when we're muted to ensure we
// get any remaining audio back from the server, so let's pass an array length 1 with a value of 0
const mostlyEmptyInt16Array = new Int16Array(1);
socket.emit('incoming_audio', mostlyEmptyInt16Array);
} else {
const float32Audio = event.inputBuffer.getChannelData(0);
const pcm16Audio = float32To16BitPCM(float32Audio);
socket.emit('incoming_audio', pcm16Audio);
}
debug()?.sentAudio(event);
};
mediaStreamSource.connect(scriptProcessor);
scriptProcessor.connect(audioContext.destination);
bufferedSpeechPlayer.start();
try {
if (targetLang == null) {
throw new Error('[startStreaming] targetLang cannot be nullish');
}
// When we are starting the stream we want to pass all the dynamic config values
// available before actually configuring and starting the stream
const fullDynamicConfig: DynamicConfig = {
targetLanguage: targetLang,
expressive: enableExpressive,
};
await onSetDynamicConfig(fullDynamicConfig);
// NOTE: this needs to be the *audioContext* sample rate, not the sample rate of the input stream. Not entirely sure why.
await configureStreamAsync({
sampleRate: audioContext.sampleRate,
});
} catch (e) {
console.error('configureStreamAsync failed', e);
setStreamingStatus('stopped');
return;
}
setStreamingStatus('running');
};
const stopStreaming = useCallback(async () => {
if (streamingStatus === 'stopped') {
console.warn(
`Attempting to stop stream when status is ${streamingStatus}`,
);
return;
}
// Stop the speech playback right away
bufferedSpeechPlayer.stop();
if (inputStreamSource == null || scriptNodeProcessor == null) {
console.error(
'inputStreamSource || scriptNodeProcessor is null in stopStreaming',
);
} else {
inputStreamSource.disconnect(scriptNodeProcessor);
scriptNodeProcessor.disconnect(audioContext.destination);
// Release the mic input so we stop showing the red recording icon in the browser
inputStream?.getTracks().forEach((track) => track.stop());
}
if (socket == null) {
console.warn('Unable to emit stop_stream because socket is null');
} else {
socket.emit('stop_stream', (result) => {
console.debug('[emit result: stop_stream]', result);
});
}
setStreamingStatus('stopped');
}, [
audioContext.destination,
bufferedSpeechPlayer,
inputStream,
inputStreamSource,
scriptNodeProcessor,
socket,
streamingStatus,
]);
const onClearTranscriptForAll = useCallback(() => {
if (socket != null) {
socket.emit('clear_transcript_for_all');
}
}, [socket]);
/******************************************
* Effects
******************************************/
useEffect(() => {
if (socket == null) {
return;
}
const onRoomStateUpdate = (roomState: RoomState) => {
setRoomState(roomState);
};
socket.on('room_state_update', onRoomStateUpdate);
return () => {
socket.off('room_state_update', onRoomStateUpdate);
};
}, [socket]);
useEffect(() => {
if (socket != null) {
const onTranslationText = (data: ServerTextData) => {
setReceivedData((prev) => [...prev, data]);
debug()?.receivedText(data.payload);
};
const onTranslationSpeech = (data: ServerSpeechData) => {
bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate);
};
socket.on('translation_text', onTranslationText);
socket.on('translation_speech', onTranslationSpeech);
return () => {
socket.off('translation_text', onTranslationText);
socket.off('translation_speech', onTranslationSpeech);
};
}
}, [bufferedSpeechPlayer, socket]);
useEffect(() => {
if (socket != null) {
const onServerStateUpdate = (newServerState: ServerState) => {
setServerState(newServerState);
// If a client creates a server lock, we want to stop streaming if we're not them
if (
newServerState.serverLock?.isActive === true &&
newServerState.serverLock?.clientID !== clientID &&
streamingStatus === 'running'
) {
stopStreaming();
}
const firstAgentNullable = newServerState.agentsCapabilities[0];
if (agent == null && firstAgentNullable != null) {
setAgentAndUpdateParams(firstAgentNullable);
}
};
socket.on('server_state_update', onServerStateUpdate);
return () => {
socket.off('server_state_update', onServerStateUpdate);
};
}
}, [
agent,
clientID,
setAgentAndUpdateParams,
socket,
stopStreaming,
streamingStatus,
]);
useEffect(() => {
if (socket != null) {
const onServerException = (
exceptionDataWithoutClientTime: ServerExceptionData,
) => {
const exceptionData = {
...exceptionDataWithoutClientTime,
timeStringClient: new Date(
exceptionDataWithoutClientTime['timeEpochMs'],
).toLocaleString(),
};
setServerExceptions((prev) =>
[exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED),
);
console.error(
`[server_exception] The server encountered an exception: ${exceptionData['message']}`,
exceptionData,
);
};
socket.on('server_exception', onServerException);
return () => {
socket.off('server_exception', onServerException);
};
}
}, [socket]);
useEffect(() => {
if (socket != null) {
const onClearTranscript = () => {
setReceivedData([]);
setTranslationSentencesAnimatedIndex(0);
};
socket.on('clear_transcript', onClearTranscript);
return () => {
socket.off('clear_transcript', onClearTranscript);
};
}
}, [socket]);
useEffect(() => {
const onScroll = () => {
if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) {
isScrolledToBottomRef.current = true;
return;
}
isScrolledToBottomRef.current = false;
return;
};
document.addEventListener('scroll', onScroll);
return () => {
document.removeEventListener('scroll', onScroll);
};
}, []);
useLayoutEffect(() => {
if (
lastTranslationResultRef.current != null &&
isScrolledToBottomRef.current
) {
// Scroll the div to the most recent entry
lastTranslationResultRef.current.scrollIntoView();
}
// Run the effect every time data is received, so that
// we scroll to the bottom even if we're just adding text to
// a pre-existing chunk
}, [receivedData]);
useEffect(() => {
if (!animateTextDisplay) {
return;
}
if (
translationSentencesAnimatedIndex < translationSentencesBaseTotalLength
) {
const timeout = setTimeout(() => {
setTranslationSentencesAnimatedIndex((prev) => prev + 1);
debug()?.startRenderText();
}, TYPING_ANIMATION_DELAY_MS);
return () => clearTimeout(timeout);
} else {
debug()?.endRenderText();
}
}, [
animateTextDisplay,
translationSentencesAnimatedIndex,
translationSentencesBaseTotalLength,
]);
/******************************************
* Sub-components
******************************************/
const volumeSliderNode = (
<Stack
spacing={2}
direction="row"
sx={{mb: 1, width: '100%'}}
alignItems="center">
<VolumeDown color="primary" />
<Slider
aria-label="Volume"
defaultValue={1}
scale={getGainScaledValue}
min={0}
max={3}
step={0.1}
marks={[
{value: 0, label: '0%'},
{value: 1, label: '100%'},
{value: 2, label: '400%'},
{value: 3, label: '700%'},
]}
valueLabelFormat={(value) => `${(value * 100).toFixed(0)}%`}
valueLabelDisplay="auto"
value={gain}
onChange={(_event: Event, newValue: number | number[]) => {
if (typeof newValue === 'number') {
const scaledGain = getGainScaledValue(newValue);
// We want the actual gain node to use the scaled value
bufferedSpeechPlayer.setGain(scaledGain);
// But we want react state to keep track of the non-scaled value
setGain(newValue);
} else {
console.error(
`[volume slider] Unexpected non-number value: ${newValue}`,
);
}
}}
/>
<VolumeUp color="primary" />
</Stack>
);
const xrDialogComponent = (
<XRDialog
animateTextDisplay={
animateTextDisplay &&
translationSentencesAnimatedIndex == translationSentencesBaseTotalLength
}
bufferedSpeechPlayer={bufferedSpeechPlayer}
translationSentences={translationSentences}
roomState={roomState}
roomID={roomID}
startStreaming={startStreaming}
stopStreaming={stopStreaming}
debugParam={debugParam}
onARHidden={() => {
setAnimateTextDisplay(urlParams.animateTextDisplay);
}}
onARVisible={() => setAnimateTextDisplay(false)}
/>
);
return (
<div className="app-wrapper-sra">
<Box
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore Not sure why it's complaining about complexity here
sx={{width: '100%', maxWidth: '660px', minWidth: '320px'}}>
<div className="main-container-sra">
<div className="top-section-sra horizontal-padding-sra">
<div className="header-container-sra">
<img
src={seamlessLogoUrl}
className="header-icon-sra"
alt="Seamless Translation Logo"
height={150}
width={225}
/>
<div>
<Typography variant="h1" sx={{color: '#800020'}}>
Pietro's translator
</Typography>
<Typography variant="body2" sx={{color: '#800020'}}>
<span style={{ fontStyle: 'italic' }}>
Making communication easier
</span>
</Typography>
</div>
</div>
<div className="header-container-sra">
<div>
<Typography variant="body2" sx={{color: '#65676B'}}>
Hey <strong style={{ fontWeight: 'bold' }}>Pietro</strong>, <strong style={{ fontWeight: 'bold' }}>it's good to see you!</strong>
<br/>
You can use this platform to translate from/to Italian and many some other languages.
<br/>
Use headphones if you are both speaker and listener to prevent feedback.
<br/>
<br/>
<a target="_blank" rel="noopener noreferrer" href="https://ai.meta.com/research/seamless-communication/">SeamlessStreaming</a> is
a research model and streaming quality works best if you pause
every couple of sentences. The real-time performance will degrade
if you try streaming multiple speakers at the same time.
<br/>
<br/>
Let's try!
</Typography>
</div>
</div>
<Stack spacing="22px" direction="column">
<Box>
{ <RoomConfig
roomState={roomState}
serverState={serverState}
streamingStatus={streamingStatus}
onJoinRoomOrUpdateRoles={() => {
// If the user has switched from speaker to listener we need to tell the
// player to play eagerly, since currently the listener doesn't have any stop/start controls
bufferedSpeechPlayer.start();
}}
/> }
{isListener && !isSpeaker && (
<Box
sx={{
paddingX: 6,
paddingBottom: 2,
marginY: 2,
display: 'flex',
flexDirection: 'column',
alignItems: 'center',
}}>
{volumeSliderNode}
</Box>
)}
</Box>
{isSpeaker && (
<>
<Divider />
<Stack spacing="12px" direction="column">
{/* <FormLabel id="output-modes-radio-group-label">
Model
</FormLabel> */}
<FormControl
disabled={
streamFixedConfigOptionsDisabled ||
agentsCapabilities.length === 0
}
fullWidth
sx={{minWidth: '14em'}}>
{/* <InputLabel id="model-selector-input-label">
Model
</InputLabel> */}
{/* <Select
labelId="model-selector-input-label"
label="Model"
onChange={(e: SelectChangeEvent) => {
const newAgent =
agentsCapabilities.find(
(agent) => e.target.value === agent.name,
) ?? null;
if (newAgent == null) {
console.error(
'Unable to find agent with name',
e.target.value,
);
}
setAgentAndUpdateParams(newAgent);
}}
value={model ?? ''}>
{agentsCapabilities.map((agent) => (
<MenuItem value={agent.name} key={agent.name}>
{agent.name}
</MenuItem>
))}
</Select> */}
</FormControl>
</Stack>
<Stack spacing={0.5}>
<FormLabel id="output-modes-radio-group-label">
Pietro, can you please select the target language?
</FormLabel>
<Box sx={{paddingTop: 2, paddingBottom: 1}}>
<FormControl fullWidth sx={{minWidth: '14em'}}>
<InputLabel id="target-selector-input-label">
Target Language
</InputLabel>
<Select
labelId="target-selector-input-label"
label="Target Language"
onChange={(e: SelectChangeEvent) => {
setTargetLang(e.target.value);
onSetDynamicConfig({
targetLanguage: e.target.value,
});
}}
value={targetLang ?? ''}>
{currentAgent?.targetLangs.map((langCode) => (
<MenuItem value={langCode} key={langCode}>
{getLanguageFromThreeLetterCode(langCode) != null
? `${getLanguageFromThreeLetterCode(
langCode,
)} (${langCode})`
: langCode}
</MenuItem>
))}
</Select>
</FormControl>
</Box>
<Grid container>
<Grid item xs={12} sm={4}>
<FormControl
disabled={streamFixedConfigOptionsDisabled}>
<RadioGroup
aria-labelledby="output-modes-radio-group-label"
value={outputMode}
onChange={(e) =>
setOutputMode(
e.target.value as SupportedOutputMode,
)
}
name="output-modes-radio-buttons-group">
{
// TODO: Use supported modalities from agentCapabilities
SUPPORTED_OUTPUT_MODES.map(({value, label}) => (
<FormControlLabel
key={value}
value={value}
control={<Radio />}
label={label}
/>
))
}
</RadioGroup>
</FormControl>
</Grid>
<Grid item xs={12} sm={8}>
<Stack
direction="column"
spacing={1}
alignItems="flex-start"
sx={{flexGrow: 1}}>
{/* {currentAgent?.dynamicParams?.includes(
'expressive',
) && (
<FormControlLabel
control={
<Switch
checked={enableExpressive ?? false}
onChange={(
event: React.ChangeEvent<HTMLInputElement>,
) => {
const newValue = event.target.checked;
setEnableExpressive(newValue);
onSetDynamicConfig({
expressive: newValue,
});
}}
/>
}
label="Expressive"
/>
)} */}
{isListener && (
<Box
sx={{
flexGrow: 1,
paddingX: 1.5,
paddingY: 1.5,
width: '100%',
}}>
{volumeSliderNode}
</Box>
)}
</Stack>
</Grid>
</Grid>
</Stack>
<Stack
direction="row"
spacing={2}
justifyContent="space-between">
<Box sx={{flex: 1}}>
<FormControl disabled={streamFixedConfigOptionsDisabled}>
{/* <FormLabel id="input-source-radio-group-label">
Input Source
</FormLabel> */}
{/* <RadioGroup
aria-labelledby="input-source-radio-group-label"
value={inputSource}
onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
setInputSource(
e.target.value as SupportedInputSource,
)
}
name="input-source-radio-buttons-group">
{SUPPORTED_INPUT_SOURCES.map(({label, value}) => (
<FormControlLabel
key={value}
value={value}
control={<Radio />}
label={label}
/>
))} */}
{/* </RadioGroup> */}
</FormControl>
</Box>
<Box sx={{flex: 1, flexGrow: 2}}>
<FormControl disabled={streamFixedConfigOptionsDisabled}>
<FormLabel>Options</FormLabel>
<FormControlLabel
control={
<Checkbox
checked={
enableNoiseSuppression ??
AUDIO_STREAM_DEFAULTS[inputSource]
.noiseSuppression
}
onChange={(
event: React.ChangeEvent<HTMLInputElement>,
) =>
setEnableNoiseSuppression(event.target.checked)
}
/>
}
label="Noise Suppression"
/>
<FormControlLabel
control={
<Checkbox
checked={
enableEchoCancellation ??
AUDIO_STREAM_DEFAULTS[inputSource]
.echoCancellation
}
onChange={(
event: React.ChangeEvent<HTMLInputElement>,
) =>
setEnableEchoCancellation(event.target.checked)
}
/>
}
label="Echo Cancellation (not recommended)"
/>
<FormControlLabel
control={
<Checkbox
checked={serverDebugFlag}
onChange={(
event: React.ChangeEvent<HTMLInputElement>,
) => setServerDebugFlag(event.target.checked)}
/>
}
label="Enable Server Debugging"
/>
</FormControl>
</Box>
</Stack>
{isSpeaker &&
isListener &&
inputSource === 'userMedia' &&
!enableEchoCancellation &&
gain !== 0 && (
<div>
<Alert severity="warning" icon={<HeadphonesIcon />}>
Headphones required to prevent feedback.
</Alert>
</div>
)}
{isSpeaker && enableEchoCancellation && (
<div>
<Alert severity="warning">
We don't recommend using echo cancellation as it may
distort the input audio. If possible, use headphones and
disable echo cancellation instead.
</Alert>
</div>
)}
<Stack direction="row" spacing={2}>
{streamingStatus === 'stopped' ? (
<Button
variant="contained"
onClick={startStreaming}
disabled={
roomID == null ||
// Prevent users from starting streaming if there is a server lock with an active session
(serverState?.serverLock?.isActive === true &&
serverState.serverLock.clientID !== clientID)
}>
{buttonLabelMap[streamingStatus]}
</Button>
) : (
<Button
variant="contained"
color={
streamingStatus === 'running' ? 'error' : 'primary'
}
disabled={
streamingStatus === 'starting' || roomID == null
}
onClick={stopStreaming}>
{buttonLabelMap[streamingStatus]}
</Button>
)}
<Box>
<Button
variant="contained"
aria-label={muted ? 'Unmute' : 'Mute'}
color={muted ? 'info' : 'primary'}
onClick={() => setMuted((prev) => !prev)}
sx={{
borderRadius: 100,
paddingX: 0,
minWidth: '36px',
}}>
{muted ? <MicOff /> : <Mic />}
</Button>
</Box>
{roomID == null ? null : (
<Box
sx={{
flexGrow: 1,
display: 'flex',
justifyContent: 'flex-end',
}}>
{xrDialogComponent}
</Box>
)}
</Stack>
{serverExceptions.length > 0 && (
<div>
<Alert severity="error">
{`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`}
</Alert>
</div>
)}
{serverState != null && hasMaxSpeakers && (
<div>
<Alert severity="error">
{`Maximum number of speakers reached. Please try again at a later time.`}
</Alert>
</div>
)}
{serverState != null &&
serverState.totalActiveTranscoders >=
TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && (
<div>
<Alert severity="warning">
{`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`}
</Alert>
</div>
)}
{serverState?.serverLock != null &&
serverState.serverLock.clientID !== clientID && (
<div>
<Alert severity="warning">
{`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`}
</Alert>
</div>
)}
</>
)}
</Stack>
{isListener && !isSpeaker && (
<Box sx={{marginBottom: 1, marginTop: 2}}>
{xrDialogComponent}
</Box>
)}
</div>
{debugParam && roomID != null && <DebugSection />}
<div className="translation-text-container-sra horizontal-padding-sra">
<Stack
direction="row"
spacing={2}
sx={{mb: '16px', alignItems: 'center'}}>
<Typography variant="h1" sx={{fontWeight: 700, flexGrow: 1}}>
Transcript
</Typography>
{isSpeaker && (
<Button
variant="text"
size="small"
onClick={onClearTranscriptForAll}>
Clear Transcript for All
</Button>
)}
</Stack>
<Stack direction="row">
<div className="translation-text-sra">
{translationSentencesWithEmptyStartingString.map(
(sentence, index, arr) => {
const isLast = index === arr.length - 1;
const maybeRef = isLast
? {ref: lastTranslationResultRef}
: {};
return (
<div className="text-chunk-sra" key={index} {...maybeRef}>
<Typography variant="body1">
{sentence}
{animateTextDisplay && isLast && (
<Blink
intervalMs={CURSOR_BLINK_INTERVAL_MS}
shouldBlink={
(roomState?.activeTranscoders ?? 0) > 0
}>
<Typography
component="span"
variant="body1"
sx={{
display: 'inline-block',
transform: 'scaleY(1.25) translateY(-1px)',
}}>
{'|'}
</Typography>
</Blink>
)}
</Typography>
</div>
);
},
)}
</div>
</Stack>
</div>
</div>
</Box>
</div>
);
}