// import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react'; // import Button from '@mui/material/Button'; // import Typography from '@mui/material/Typography'; // import InputLabel from '@mui/material/InputLabel'; // import FormControl from '@mui/material/FormControl'; // import Select, {SelectChangeEvent} from '@mui/material/Select'; // import MenuItem from '@mui/material/MenuItem'; // import Stack from '@mui/material/Stack'; // import seamlessLogoUrl from './assets/seamless.svg'; // import { // AgentCapabilities, // BaseResponse, // BrowserAudioStreamConfig, // DynamicConfig, // PartialDynamicConfig, // SUPPORTED_INPUT_SOURCES, // SUPPORTED_OUTPUT_MODES, // ServerExceptionData, // ServerSpeechData, // ServerState, // ServerTextData, // StartStreamEventConfig, // StreamingStatus, // SupportedInputSource, // SupportedOutputMode, // TranslationSentences, // } from './types/StreamingTypes'; // import FormLabel from '@mui/material/FormLabel'; // import RadioGroup from '@mui/material/RadioGroup'; // import FormControlLabel from '@mui/material/FormControlLabel'; // import Radio from '@mui/material/Radio'; // import './StreamingInterface.css'; // import RoomConfig from './RoomConfig'; // import Divider from '@mui/material/Divider'; // import {useSocket} from './useSocket'; // import {RoomState} from './types/RoomState'; // import useStable from './useStable'; // import float32To16BitPCM from './float32To16BitPCM'; // import createBufferedSpeechPlayer from './createBufferedSpeechPlayer'; // import Checkbox from '@mui/material/Checkbox'; // import Alert from '@mui/material/Alert'; // import isScrolledToDocumentBottom from './isScrolledToDocumentBottom'; // import Box from '@mui/material/Box'; // import Slider from '@mui/material/Slider'; // import VolumeDown from '@mui/icons-material/VolumeDown'; // import VolumeUp from '@mui/icons-material/VolumeUp'; // import Mic from '@mui/icons-material/Mic'; // import MicOff from '@mui/icons-material/MicOff'; // import XRDialog from './react-xr/XRDialog'; // import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData'; // import { // sliceTranslationSentencesUpToIndex, // getTotalSentencesLength, // } from './sliceTranslationSentencesUtils'; // import Blink from './Blink'; // import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval'; // import {getURLParams} from './URLParams'; // import debug from './debug'; // import DebugSection from './DebugSection'; // import Switch from '@mui/material/Switch'; // import Grid from '@mui/material/Grid'; // import {getLanguageFromThreeLetterCode} from './languageLookup'; // import HeadphonesIcon from '@mui/icons-material/Headphones'; // const AUDIO_STREAM_DEFAULTS = { // userMedia: { // echoCancellation: false, // noiseSuppression: true, // }, // displayMedia: { // echoCancellation: false, // noiseSuppression: false, // }, // } as const; // async function requestUserMediaAudioStream( // config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['userMedia'], // ) { // const stream = await navigator.mediaDevices.getUserMedia({ // audio: {...config, channelCount: 1}, // }); // console.debug( // '[requestUserMediaAudioStream] stream created with settings:', // stream.getAudioTracks()?.[0]?.getSettings(), // ); // return stream; // } // async function requestDisplayMediaAudioStream( // config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['displayMedia'], // ) { // const stream = await navigator.mediaDevices.getDisplayMedia({ // audio: {...config, channelCount: 1}, // }); // console.debug( // '[requestDisplayMediaAudioStream] stream created with settings:', // stream.getAudioTracks()?.[0]?.getSettings(), // ); // return stream; // } // const buttonLabelMap: {[key in StreamingStatus]: string} = { // stopped: 'Start Streaming', // running: 'Stop Streaming', // starting: 'Starting...', // }; // const BUFFER_LIMIT = 1; // const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36; // const GAIN_MULTIPLIER_OVER_1 = 3; // const getGainScaledValue = (value) => // value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value; // const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2; // const MAX_SERVER_EXCEPTIONS_TRACKED = 500; // export const TYPING_ANIMATION_DELAY_MS = 6; // export default function StreamingInterface() { // const urlParams = getURLParams(); // const debugParam = urlParams.debug; // const [animateTextDisplay, setAnimateTextDisplay] = useState( // urlParams.animateTextDisplay, // ); // const socketObject = useSocket(); // const {socket, clientID} = socketObject; // const [serverState, setServerState] = useState(null); // const [agent, setAgent] = useState(null); // const model = agent?.name ?? null; // const agentsCapabilities: Array = // serverState?.agentsCapabilities ?? []; // const currentAgent: AgentCapabilities | null = // agentsCapabilities.find((agent) => agent.name === model) ?? null; // const [serverExceptions, setServerExceptions] = useState< // Array // >([]); // const [roomState, setRoomState] = useState(null); // const roomID = roomState?.room_id ?? null; // const isSpeaker = // (clientID != null && roomState?.speakers.includes(clientID)) ?? false; // const isListener = // (clientID != null && roomState?.listeners.includes(clientID)) ?? false; // const [streamingStatus, setStreamingStatus] = // useState('stopped'); // const isStreamConfiguredRef = useRef(false); // const [hasMaxSpeakers, setHasMaxSpeakers] = useState(false); // const [outputMode, setOutputMode] = useState('s2s&t'); // const [inputSource, setInputSource] = // useState('userMedia'); // const [enableNoiseSuppression, setEnableNoiseSuppression] = useState< // boolean | null // >(null); // const [enableEchoCancellation, setEnableEchoCancellation] = useState< // boolean | null // >(null); // // Dynamic Params: // const [targetLang, setTargetLang] = useState(null); // const [enableExpressive, setEnableExpressive] = useState( // null, // ); // const [serverDebugFlag, setServerDebugFlag] = useState( // debugParam ?? false, // ); // const [receivedData, setReceivedData] = useState>([]); // const [ // translationSentencesAnimatedIndex, // setTranslationSentencesAnimatedIndex, // ] = useState(0); // const lastTranslationResultRef = useRef(null); // const [inputStream, setInputStream] = useState(null); // const [inputStreamSource, setInputStreamSource] = // useState(null); // const audioContext = useStable(() => new AudioContext()); // const [scriptNodeProcessor, setScriptNodeProcessor] = // useState(null); // const [muted, setMuted] = useState(false); // // The onaudioprocess script needs an up-to-date reference to the muted state, so // // we use a ref here and keep it in sync via useEffect // const mutedRef = useRef(muted); // useEffect(() => { // mutedRef.current = muted; // }, [muted]); // const [gain, setGain] = useState(1); // const isScrolledToBottomRef = useRef(isScrolledToDocumentBottom()); // // Some config options must be set when starting streaming and cannot be chaned dynamically. // // This controls whether they are disabled or not // const streamFixedConfigOptionsDisabled = // streamingStatus !== 'stopped' || roomID == null; // const bufferedSpeechPlayer = useStable(() => { // const player = createBufferedSpeechPlayer({ // onStarted: () => { // console.debug('📢 PLAYBACK STARTED 📢'); // }, // onEnded: () => { // console.debug('🛑 PLAYBACK ENDED 🛑'); // }, // }); // // Start the player now so it eagerly plays audio when it arrives // player.start(); // return player; // }); // const translationSentencesBase: TranslationSentences = // getTranslationSentencesFromReceivedData(receivedData); // const translationSentencesBaseTotalLength = getTotalSentencesLength( // translationSentencesBase, // ); // const translationSentences: TranslationSentences = animateTextDisplay // ? sliceTranslationSentencesUpToIndex( // translationSentencesBase, // translationSentencesAnimatedIndex, // ) // : translationSentencesBase; // // We want the blinking cursor to show before any text has arrived, so let's add an empty string so that the cursor shows up // const translationSentencesWithEmptyStartingString = // streamingStatus === 'running' && translationSentences.length === 0 // ? [''] // : translationSentences; // /****************************************** // * Event Handlers // ******************************************/ // const setAgentAndUpdateParams = useCallback( // (newAgent: AgentCapabilities | null) => { // setAgent((prevAgent) => { // if (prevAgent?.name !== newAgent?.name) { // setTargetLang(newAgent?.targetLangs[0] ?? null); // setEnableExpressive(null); // } // return newAgent; // }); // }, // [], // ); // const onSetDynamicConfig = useCallback( // async (partialConfig: PartialDynamicConfig) => { // return new Promise((resolve, reject) => { // if (socket == null) { // reject(new Error('[onSetDynamicConfig] socket is null ')); // return; // } // socket.emit( // 'set_dynamic_config', // partialConfig, // (result: BaseResponse) => { // console.log('[emit result: set_dynamic_config]', result); // if (result.status === 'ok') { // resolve(); // } else { // reject(); // } // }, // ); // }); // }, // [socket], // ); // const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => { // return new Promise((resolve, reject) => { // if (socket == null) { // reject(new Error('[configureStreamAsync] socket is null ')); // return; // } // const modelName = agent?.name ?? null; // if (modelName == null) { // reject(new Error('[configureStreamAsync] modelName is null ')); // return; // } // const config: StartStreamEventConfig = { // event: 'config', // rate: sampleRate, // model_name: modelName, // debug: serverDebugFlag, // // synchronous processing isn't implemented on the v2 pubsub server, so hardcode this to true // async_processing: true, // buffer_limit: BUFFER_LIMIT, // model_type: outputMode, // }; // console.log('[configureStreamAsync] sending config', config); // socket.emit('configure_stream', config, (statusObject) => { // setHasMaxSpeakers(statusObject.message === 'max_speakers') // if (statusObject.status === 'ok') { // isStreamConfiguredRef.current = true; // console.debug( // '[configureStreamAsync] stream configured!', // statusObject, // ); // resolve(); // } else { // isStreamConfiguredRef.current = false; // reject( // new Error( // `[configureStreamAsync] configure_stream returned status: ${statusObject.status}`, // ), // ); // return; // } // }); // }); // }; // const startStreaming = async () => { // if (streamingStatus !== 'stopped') { // console.warn( // `Attempting to start stream when status is ${streamingStatus}`, // ); // return; // } // setStreamingStatus('starting'); // if (audioContext.state === 'suspended') { // console.warn('audioContext was suspended! resuming...'); // await audioContext.resume(); // } // let stream: MediaStream | null = null; // try { // if (inputSource === 'userMedia') { // stream = await requestUserMediaAudioStream({ // noiseSuppression: // enableNoiseSuppression ?? // AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression, // echoCancellation: // enableEchoCancellation ?? // AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation, // }); // } else if (inputSource === 'displayMedia') { // stream = await requestDisplayMediaAudioStream({ // noiseSuppression: // enableNoiseSuppression ?? // AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression, // echoCancellation: // enableEchoCancellation ?? // AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation, // }); // } else { // throw new Error(`Unsupported input source requested: ${inputSource}`); // } // setInputStream(stream); // } catch (e) { // console.error('[startStreaming] media stream request failed:', e); // setStreamingStatus('stopped'); // return; // } // const mediaStreamSource = audioContext.createMediaStreamSource(stream); // setInputStreamSource(mediaStreamSource); // /** // * NOTE: This currently uses a deprecated way of processing the audio (createScriptProcessor), but // * which is easy and convenient for our purposes. // * // * Documentation for the deprecated way of doing it is here: https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor // * // * In an ideal world this would be migrated to something like this SO answer: https://stackoverflow.com/a/65448287 // */ // const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1); // setScriptNodeProcessor(scriptProcessor); // scriptProcessor.onaudioprocess = (event) => { // if (isStreamConfiguredRef.current === false) { // console.debug('[onaudioprocess] stream is not configured yet!'); // return; // } // if (socket == null) { // console.warn('[onaudioprocess] socket is null in onaudioprocess'); // return; // } // if (mutedRef.current) { // // We still want to send audio to the server when we're muted to ensure we // // get any remaining audio back from the server, so let's pass an array length 1 with a value of 0 // const mostlyEmptyInt16Array = new Int16Array(1); // socket.emit('incoming_audio', mostlyEmptyInt16Array); // } else { // const float32Audio = event.inputBuffer.getChannelData(0); // const pcm16Audio = float32To16BitPCM(float32Audio); // socket.emit('incoming_audio', pcm16Audio); // } // debug()?.sentAudio(event); // }; // mediaStreamSource.connect(scriptProcessor); // scriptProcessor.connect(audioContext.destination); // bufferedSpeechPlayer.start(); // try { // if (targetLang == null) { // throw new Error('[startStreaming] targetLang cannot be nullish'); // } // // When we are starting the stream we want to pass all the dynamic config values // // available before actually configuring and starting the stream // const fullDynamicConfig: DynamicConfig = { // targetLanguage: targetLang, // expressive: enableExpressive, // }; // await onSetDynamicConfig(fullDynamicConfig); // // NOTE: this needs to be the *audioContext* sample rate, not the sample rate of the input stream. Not entirely sure why. // await configureStreamAsync({ // sampleRate: audioContext.sampleRate, // }); // } catch (e) { // console.error('configureStreamAsync failed', e); // setStreamingStatus('stopped'); // return; // } // setStreamingStatus('running'); // }; // const stopStreaming = useCallback(async () => { // if (streamingStatus === 'stopped') { // console.warn( // `Attempting to stop stream when status is ${streamingStatus}`, // ); // return; // } // // Stop the speech playback right away // bufferedSpeechPlayer.stop(); // if (inputStreamSource == null || scriptNodeProcessor == null) { // console.error( // 'inputStreamSource || scriptNodeProcessor is null in stopStreaming', // ); // } else { // inputStreamSource.disconnect(scriptNodeProcessor); // scriptNodeProcessor.disconnect(audioContext.destination); // // Release the mic input so we stop showing the red recording icon in the browser // inputStream?.getTracks().forEach((track) => track.stop()); // } // if (socket == null) { // console.warn('Unable to emit stop_stream because socket is null'); // } else { // socket.emit('stop_stream', (result) => { // console.debug('[emit result: stop_stream]', result); // }); // } // setStreamingStatus('stopped'); // }, [ // audioContext.destination, // bufferedSpeechPlayer, // inputStream, // inputStreamSource, // scriptNodeProcessor, // socket, // streamingStatus, // ]); // const onClearTranscriptForAll = useCallback(() => { // if (socket != null) { // socket.emit('clear_transcript_for_all'); // } // }, [socket]); // /****************************************** // * Effects // ******************************************/ // useEffect(() => { // if (socket == null) { // return; // } // const onRoomStateUpdate = (roomState: RoomState) => { // setRoomState(roomState); // }; // socket.on('room_state_update', onRoomStateUpdate); // return () => { // socket.off('room_state_update', onRoomStateUpdate); // }; // }, [socket]); // useEffect(() => { // if (socket != null) { // const onTranslationText = (data: ServerTextData) => { // setReceivedData((prev) => [...prev, data]); // debug()?.receivedText(data.payload); // }; // const onTranslationSpeech = (data: ServerSpeechData) => { // bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate); // }; // socket.on('translation_text', onTranslationText); // socket.on('translation_speech', onTranslationSpeech); // return () => { // socket.off('translation_text', onTranslationText); // socket.off('translation_speech', onTranslationSpeech); // }; // } // }, [bufferedSpeechPlayer, socket]); // useEffect(() => { // if (socket != null) { // const onServerStateUpdate = (newServerState: ServerState) => { // setServerState(newServerState); // // If a client creates a server lock, we want to stop streaming if we're not them // if ( // newServerState.serverLock?.isActive === true && // newServerState.serverLock?.clientID !== clientID && // streamingStatus === 'running' // ) { // stopStreaming(); // } // const firstAgentNullable = newServerState.agentsCapabilities[0]; // if (agent == null && firstAgentNullable != null) { // setAgentAndUpdateParams(firstAgentNullable); // } // }; // socket.on('server_state_update', onServerStateUpdate); // return () => { // socket.off('server_state_update', onServerStateUpdate); // }; // } // }, [ // agent, // clientID, // setAgentAndUpdateParams, // socket, // stopStreaming, // streamingStatus, // ]); // useEffect(() => { // if (socket != null) { // const onServerException = ( // exceptionDataWithoutClientTime: ServerExceptionData, // ) => { // const exceptionData = { // ...exceptionDataWithoutClientTime, // timeStringClient: new Date( // exceptionDataWithoutClientTime['timeEpochMs'], // ).toLocaleString(), // }; // setServerExceptions((prev) => // [exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED), // ); // console.error( // `[server_exception] The server encountered an exception: ${exceptionData['message']}`, // exceptionData, // ); // }; // socket.on('server_exception', onServerException); // return () => { // socket.off('server_exception', onServerException); // }; // } // }, [socket]); // useEffect(() => { // if (socket != null) { // const onClearTranscript = () => { // setReceivedData([]); // setTranslationSentencesAnimatedIndex(0); // }; // socket.on('clear_transcript', onClearTranscript); // return () => { // socket.off('clear_transcript', onClearTranscript); // }; // } // }, [socket]); // useEffect(() => { // const onScroll = () => { // if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) { // isScrolledToBottomRef.current = true; // return; // } // isScrolledToBottomRef.current = false; // return; // }; // document.addEventListener('scroll', onScroll); // return () => { // document.removeEventListener('scroll', onScroll); // }; // }, []); // useLayoutEffect(() => { // if ( // lastTranslationResultRef.current != null && // isScrolledToBottomRef.current // ) { // // Scroll the div to the most recent entry // lastTranslationResultRef.current.scrollIntoView(); // } // // Run the effect every time data is received, so that // // we scroll to the bottom even if we're just adding text to // // a pre-existing chunk // }, [receivedData]); // useEffect(() => { // if (!animateTextDisplay) { // return; // } // if ( // translationSentencesAnimatedIndex < translationSentencesBaseTotalLength // ) { // const timeout = setTimeout(() => { // setTranslationSentencesAnimatedIndex((prev) => prev + 1); // debug()?.startRenderText(); // }, TYPING_ANIMATION_DELAY_MS); // return () => clearTimeout(timeout); // } else { // debug()?.endRenderText(); // } // }, [ // animateTextDisplay, // translationSentencesAnimatedIndex, // translationSentencesBaseTotalLength, // ]); // /****************************************** // * Sub-components // ******************************************/ // const volumeSliderNode = ( // // // `${(value * 100).toFixed(0)}%`} // valueLabelDisplay="auto" // value={gain} // onChange={(_event: Event, newValue: number | number[]) => { // if (typeof newValue === 'number') { // const scaledGain = getGainScaledValue(newValue); // // We want the actual gain node to use the scaled value // bufferedSpeechPlayer.setGain(scaledGain); // // But we want react state to keep track of the non-scaled value // setGain(newValue); // } else { // console.error( // `[volume slider] Unexpected non-number value: ${newValue}`, // ); // } // }} // /> // // // ); // const xrDialogComponent = ( // { // setAnimateTextDisplay(urlParams.animateTextDisplay); // }} // onARVisible={() => setAnimateTextDisplay(false)} // /> // ); // return ( //
// //
//
//
// Seamless Translation Logo //
// // Seamless Translation // //
//
//
//
// // Welcome! This space is limited to one speaker at a time. // If using the live HF space, sharing room code to listeners on another // IP address may not work because it's running on different replicas. // Use headphones if you are both speaker and listener to prevent feedback. //
// If max speakers reached, please duplicate the space here. // In your duplicated space, join a room as speaker or listener (or both), // and share the room code to invite listeners. //
// Check out the seamless_communication README for more information. //
// SeamlessStreaming model is a research model and is not released // for production deployment. It is important to use a microphone with // noise cancellation (for e.g. a smartphone), otherwise you may see model hallucination on noises. // It works best if you pause every couple of sentences, or you may wish adjust the VAD threshold // in the model config. The real-time performance will degrade // if you try streaming multiple speakers at the same time. //
//
//
// // // { // // If the user has switched from speaker to listener we need to tell the // // player to play eagerly, since currently the listener doesn't have any stop/start controls // bufferedSpeechPlayer.start(); // }} // /> // {isListener && !isSpeaker && ( // // {volumeSliderNode} // // )} // // {isSpeaker && ( // <> // // // // Model // // // // Model // // // // // // // Output // // // // // Target Language // // // // // // // // // setOutputMode( // e.target.value as SupportedOutputMode, // ) // } // name="output-modes-radio-buttons-group"> // { // // TODO: Use supported modalities from agentCapabilities // SUPPORTED_OUTPUT_MODES.map(({value, label}) => ( // } // label={label} // /> // )) // } // // // // // // {currentAgent?.dynamicParams?.includes( // 'expressive', // ) && ( // , // ) => { // const newValue = event.target.checked; // setEnableExpressive(newValue); // onSetDynamicConfig({ // expressive: newValue, // }); // }} // /> // } // label="Expressive" // /> // )} // {isListener && ( // // {volumeSliderNode} // // )} // // // // // // // // // Input Source // // ) => // setInputSource( // e.target.value as SupportedInputSource, // ) // } // name="input-source-radio-buttons-group"> // {SUPPORTED_INPUT_SOURCES.map(({label, value}) => ( // } // label={label} // /> // ))} // // // // // // Options // , // ) => // setEnableNoiseSuppression(event.target.checked) // } // /> // } // label="Noise Suppression" // /> // , // ) => // setEnableEchoCancellation(event.target.checked) // } // /> // } // label="Echo Cancellation (not recommended)" // /> // , // ) => setServerDebugFlag(event.target.checked)} // /> // } // label="Enable Server Debugging" // /> // // // // {isSpeaker && // isListener && // inputSource === 'userMedia' && // !enableEchoCancellation && // gain !== 0 && ( //
// }> // Headphones required to prevent feedback. // //
// )} // {isSpeaker && enableEchoCancellation && ( //
// // We don't recommend using echo cancellation as it may // distort the input audio. If possible, use headphones and // disable echo cancellation instead. // //
// )} // // {streamingStatus === 'stopped' ? ( // // ) : ( // // )} // // // // {roomID == null ? null : ( // // {xrDialogComponent} // // )} // // {serverExceptions.length > 0 && ( //
// // {`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`} // //
// )} // {serverState != null && hasMaxSpeakers && ( //
// // {`Maximum number of speakers reached. Please try again at a later time.`} // //
// )} // {serverState != null && // serverState.totalActiveTranscoders >= // TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && ( //
// // {`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`} // //
// )} // {serverState?.serverLock != null && // serverState.serverLock.clientID !== clientID && ( //
// // {`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`} // //
// )} // // )} //
// {isListener && !isSpeaker && ( // // {xrDialogComponent} // // )} //
// {debugParam && roomID != null && } //
// // // Transcript // // {isSpeaker && ( // // )} // // //
// {translationSentencesWithEmptyStartingString.map( // (sentence, index, arr) => { // const isLast = index === arr.length - 1; // const maybeRef = isLast // ? {ref: lastTranslationResultRef} // : {}; // return ( //
// // {sentence} // {animateTextDisplay && isLast && ( // 0 // }> // // {'|'} // // // )} // //
// ); // }, // )} //
//
//
//
//
//
// ); // } import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react'; import Button from '@mui/material/Button'; import Typography from '@mui/material/Typography'; import InputLabel from '@mui/material/InputLabel'; import FormControl from '@mui/material/FormControl'; import Select, {SelectChangeEvent} from '@mui/material/Select'; import MenuItem from '@mui/material/MenuItem'; import Stack from '@mui/material/Stack'; import seamlessLogoUrl from './assets/DSC_4281.svg'; import { AgentCapabilities, BaseResponse, BrowserAudioStreamConfig, DynamicConfig, PartialDynamicConfig, SUPPORTED_INPUT_SOURCES, SUPPORTED_OUTPUT_MODES, ServerExceptionData, ServerSpeechData, ServerState, ServerTextData, StartStreamEventConfig, StreamingStatus, SupportedInputSource, SupportedOutputMode, TranslationSentences, } from './types/StreamingTypes'; import FormLabel from '@mui/material/FormLabel'; import RadioGroup from '@mui/material/RadioGroup'; import FormControlLabel from '@mui/material/FormControlLabel'; import Radio from '@mui/material/Radio'; import './StreamingInterface.css'; import RoomConfig from './RoomConfig'; import Divider from '@mui/material/Divider'; import {useSocket} from './useSocket'; import {RoomState} from './types/RoomState'; import useStable from './useStable'; import float32To16BitPCM from './float32To16BitPCM'; import createBufferedSpeechPlayer from './createBufferedSpeechPlayer'; import Checkbox from '@mui/material/Checkbox'; import Alert from '@mui/material/Alert'; import isScrolledToDocumentBottom from './isScrolledToDocumentBottom'; import Box from '@mui/material/Box'; import Slider from '@mui/material/Slider'; import VolumeDown from '@mui/icons-material/VolumeDown'; import VolumeUp from '@mui/icons-material/VolumeUp'; import Mic from '@mui/icons-material/Mic'; import MicOff from '@mui/icons-material/MicOff'; import XRDialog from './react-xr/XRDialog'; import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData'; import { sliceTranslationSentencesUpToIndex, getTotalSentencesLength, } from './sliceTranslationSentencesUtils'; import Blink from './Blink'; import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval'; import {getURLParams} from './URLParams'; import debug from './debug'; import DebugSection from './DebugSection'; import Switch from '@mui/material/Switch'; import Grid from '@mui/material/Grid'; import {getLanguageFromThreeLetterCode} from './languageLookup'; import HeadphonesIcon from '@mui/icons-material/Headphones'; const AUDIO_STREAM_DEFAULTS = { userMedia: { echoCancellation: false, noiseSuppression: true, }, displayMedia: { echoCancellation: false, noiseSuppression: false, }, } as const; async function requestUserMediaAudioStream( config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['userMedia'], ) { const stream = await navigator.mediaDevices.getUserMedia({ audio: {...config, channelCount: 1}, }); console.debug( '[requestUserMediaAudioStream] stream created with settings:', stream.getAudioTracks()?.[0]?.getSettings(), ); return stream; } async function requestDisplayMediaAudioStream( config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['displayMedia'], ) { const stream = await navigator.mediaDevices.getDisplayMedia({ audio: {...config, channelCount: 1}, }); console.debug( '[requestDisplayMediaAudioStream] stream created with settings:', stream.getAudioTracks()?.[0]?.getSettings(), ); return stream; } const buttonLabelMap: {[key in StreamingStatus]: string} = { stopped: 'Start Streaming', running: 'Stop Streaming', starting: 'Starting...', }; const BUFFER_LIMIT = 1; const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36; const GAIN_MULTIPLIER_OVER_1 = 3; const getGainScaledValue = (value) => value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value; const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2; const MAX_SERVER_EXCEPTIONS_TRACKED = 500; export const TYPING_ANIMATION_DELAY_MS = 6; export default function StreamingInterface() { const urlParams = getURLParams(); const debugParam = urlParams.debug; const [animateTextDisplay, setAnimateTextDisplay] = useState( urlParams.animateTextDisplay, ); const socketObject = useSocket(); const {socket, clientID} = socketObject; const [serverState, setServerState] = useState(null); const [agent, setAgent] = useState(null); const model = agent?.name ?? null; const agentsCapabilities: Array = serverState?.agentsCapabilities ?? []; const currentAgent: AgentCapabilities | null = agentsCapabilities.find((agent) => agent.name === model) ?? null; const [serverExceptions, setServerExceptions] = useState< Array >([]); const [roomState, setRoomState] = useState(null); const roomID = roomState?.room_id ?? null; const isSpeaker = (clientID != null && roomState?.speakers.includes(clientID)) ?? false; const isListener = (clientID != null && roomState?.listeners.includes(clientID)) ?? false; const [streamingStatus, setStreamingStatus] = useState('stopped'); const isStreamConfiguredRef = useRef(false); const [hasMaxSpeakers, setHasMaxSpeakers] = useState(false); const [outputMode, setOutputMode] = useState('s2s&t'); const [inputSource, setInputSource] = useState('userMedia'); const [enableNoiseSuppression, setEnableNoiseSuppression] = useState< boolean | null >(null); const [enableEchoCancellation, setEnableEchoCancellation] = useState< boolean | null >(null); // Dynamic Params: const [targetLang, setTargetLang] = useState(null); const [enableExpressive, setEnableExpressive] = useState( null, ); const [serverDebugFlag, setServerDebugFlag] = useState( debugParam ?? false, ); const [receivedData, setReceivedData] = useState>([]); const [ translationSentencesAnimatedIndex, setTranslationSentencesAnimatedIndex, ] = useState(0); const lastTranslationResultRef = useRef(null); const [inputStream, setInputStream] = useState(null); const [inputStreamSource, setInputStreamSource] = useState(null); const audioContext = useStable(() => new AudioContext()); const [scriptNodeProcessor, setScriptNodeProcessor] = useState(null); const [muted, setMuted] = useState(false); // The onaudioprocess script needs an up-to-date reference to the muted state, so // we use a ref here and keep it in sync via useEffect const mutedRef = useRef(muted); useEffect(() => { mutedRef.current = muted; }, [muted]); const [gain, setGain] = useState(1); const isScrolledToBottomRef = useRef(isScrolledToDocumentBottom()); // Some config options must be set when starting streaming and cannot be chaned dynamically. // This controls whether they are disabled or not const streamFixedConfigOptionsDisabled = streamingStatus !== 'stopped' || roomID == null; const bufferedSpeechPlayer = useStable(() => { const player = createBufferedSpeechPlayer({ onStarted: () => { console.debug('📢 PLAYBACK STARTED 📢'); }, onEnded: () => { console.debug('🛑 PLAYBACK ENDED 🛑'); }, }); // Start the player now so it eagerly plays audio when it arrives player.start(); return player; }); const translationSentencesBase: TranslationSentences = getTranslationSentencesFromReceivedData(receivedData); const translationSentencesBaseTotalLength = getTotalSentencesLength( translationSentencesBase, ); const translationSentences: TranslationSentences = animateTextDisplay ? sliceTranslationSentencesUpToIndex( translationSentencesBase, translationSentencesAnimatedIndex, ) : translationSentencesBase; // We want the blinking cursor to show before any text has arrived, so let's add an empty string so that the cursor shows up const translationSentencesWithEmptyStartingString = streamingStatus === 'running' && translationSentences.length === 0 ? [''] : translationSentences; /****************************************** * Event Handlers ******************************************/ const setAgentAndUpdateParams = useCallback( (newAgent: AgentCapabilities | null) => { setAgent((prevAgent) => { if (prevAgent?.name !== newAgent?.name) { setTargetLang(newAgent?.targetLangs[0] ?? null); setEnableExpressive(null); } return newAgent; }); }, [], ); const onSetDynamicConfig = useCallback( async (partialConfig: PartialDynamicConfig) => { return new Promise((resolve, reject) => { if (socket == null) { reject(new Error('[onSetDynamicConfig] socket is null ')); return; } socket.emit( 'set_dynamic_config', partialConfig, (result: BaseResponse) => { console.log('[emit result: set_dynamic_config]', result); if (result.status === 'ok') { resolve(); } else { reject(); } }, ); }); }, [socket], ); const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => { return new Promise((resolve, reject) => { if (socket == null) { reject(new Error('[configureStreamAsync] socket is null ')); return; } const modelName = agent?.name ?? null; if (modelName == null) { reject(new Error('[configureStreamAsync] modelName is null ')); return; } const config: StartStreamEventConfig = { event: 'config', rate: sampleRate, model_name: modelName, debug: serverDebugFlag, // synchronous processing isn't implemented on the v2 pubsub server, so hardcode this to true async_processing: true, buffer_limit: BUFFER_LIMIT, model_type: outputMode, }; console.log('[configureStreamAsync] sending config', config); socket.emit('configure_stream', config, (statusObject) => { setHasMaxSpeakers(statusObject.message === 'max_speakers') if (statusObject.status === 'ok') { isStreamConfiguredRef.current = true; console.debug( '[configureStreamAsync] stream configured!', statusObject, ); resolve(); } else { isStreamConfiguredRef.current = false; reject( new Error( `[configureStreamAsync] configure_stream returned status: ${statusObject.status}`, ), ); return; } }); }); }; const startStreaming = async () => { if (streamingStatus !== 'stopped') { console.warn( `Attempting to start stream when status is ${streamingStatus}`, ); return; } setStreamingStatus('starting'); if (audioContext.state === 'suspended') { console.warn('audioContext was suspended! resuming...'); await audioContext.resume(); } let stream: MediaStream | null = null; try { if (inputSource === 'userMedia') { stream = await requestUserMediaAudioStream({ noiseSuppression: enableNoiseSuppression ?? AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression, echoCancellation: enableEchoCancellation ?? AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation, }); } else if (inputSource === 'displayMedia') { stream = await requestDisplayMediaAudioStream({ noiseSuppression: enableNoiseSuppression ?? AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression, echoCancellation: enableEchoCancellation ?? AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation, }); } else { throw new Error(`Unsupported input source requested: ${inputSource}`); } setInputStream(stream); } catch (e) { console.error('[startStreaming] media stream request failed:', e); setStreamingStatus('stopped'); return; } const mediaStreamSource = audioContext.createMediaStreamSource(stream); setInputStreamSource(mediaStreamSource); /** * NOTE: This currently uses a deprecated way of processing the audio (createScriptProcessor), but * which is easy and convenient for our purposes. * * Documentation for the deprecated way of doing it is here: https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor * * In an ideal world this would be migrated to something like this SO answer: https://stackoverflow.com/a/65448287 */ const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1); setScriptNodeProcessor(scriptProcessor); scriptProcessor.onaudioprocess = (event) => { if (isStreamConfiguredRef.current === false) { console.debug('[onaudioprocess] stream is not configured yet!'); return; } if (socket == null) { console.warn('[onaudioprocess] socket is null in onaudioprocess'); return; } if (mutedRef.current) { // We still want to send audio to the server when we're muted to ensure we // get any remaining audio back from the server, so let's pass an array length 1 with a value of 0 const mostlyEmptyInt16Array = new Int16Array(1); socket.emit('incoming_audio', mostlyEmptyInt16Array); } else { const float32Audio = event.inputBuffer.getChannelData(0); const pcm16Audio = float32To16BitPCM(float32Audio); socket.emit('incoming_audio', pcm16Audio); } debug()?.sentAudio(event); }; mediaStreamSource.connect(scriptProcessor); scriptProcessor.connect(audioContext.destination); bufferedSpeechPlayer.start(); try { if (targetLang == null) { throw new Error('[startStreaming] targetLang cannot be nullish'); } // When we are starting the stream we want to pass all the dynamic config values // available before actually configuring and starting the stream const fullDynamicConfig: DynamicConfig = { targetLanguage: targetLang, expressive: enableExpressive, }; await onSetDynamicConfig(fullDynamicConfig); // NOTE: this needs to be the *audioContext* sample rate, not the sample rate of the input stream. Not entirely sure why. await configureStreamAsync({ sampleRate: audioContext.sampleRate, }); } catch (e) { console.error('configureStreamAsync failed', e); setStreamingStatus('stopped'); return; } setStreamingStatus('running'); }; const stopStreaming = useCallback(async () => { if (streamingStatus === 'stopped') { console.warn( `Attempting to stop stream when status is ${streamingStatus}`, ); return; } // Stop the speech playback right away bufferedSpeechPlayer.stop(); if (inputStreamSource == null || scriptNodeProcessor == null) { console.error( 'inputStreamSource || scriptNodeProcessor is null in stopStreaming', ); } else { inputStreamSource.disconnect(scriptNodeProcessor); scriptNodeProcessor.disconnect(audioContext.destination); // Release the mic input so we stop showing the red recording icon in the browser inputStream?.getTracks().forEach((track) => track.stop()); } if (socket == null) { console.warn('Unable to emit stop_stream because socket is null'); } else { socket.emit('stop_stream', (result) => { console.debug('[emit result: stop_stream]', result); }); } setStreamingStatus('stopped'); }, [ audioContext.destination, bufferedSpeechPlayer, inputStream, inputStreamSource, scriptNodeProcessor, socket, streamingStatus, ]); const onClearTranscriptForAll = useCallback(() => { if (socket != null) { socket.emit('clear_transcript_for_all'); } }, [socket]); /****************************************** * Effects ******************************************/ useEffect(() => { if (socket == null) { return; } const onRoomStateUpdate = (roomState: RoomState) => { setRoomState(roomState); }; socket.on('room_state_update', onRoomStateUpdate); return () => { socket.off('room_state_update', onRoomStateUpdate); }; }, [socket]); useEffect(() => { if (socket != null) { const onTranslationText = (data: ServerTextData) => { setReceivedData((prev) => [...prev, data]); debug()?.receivedText(data.payload); }; const onTranslationSpeech = (data: ServerSpeechData) => { bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate); }; socket.on('translation_text', onTranslationText); socket.on('translation_speech', onTranslationSpeech); return () => { socket.off('translation_text', onTranslationText); socket.off('translation_speech', onTranslationSpeech); }; } }, [bufferedSpeechPlayer, socket]); useEffect(() => { if (socket != null) { const onServerStateUpdate = (newServerState: ServerState) => { setServerState(newServerState); // If a client creates a server lock, we want to stop streaming if we're not them if ( newServerState.serverLock?.isActive === true && newServerState.serverLock?.clientID !== clientID && streamingStatus === 'running' ) { stopStreaming(); } const firstAgentNullable = newServerState.agentsCapabilities[0]; if (agent == null && firstAgentNullable != null) { setAgentAndUpdateParams(firstAgentNullable); } }; socket.on('server_state_update', onServerStateUpdate); return () => { socket.off('server_state_update', onServerStateUpdate); }; } }, [ agent, clientID, setAgentAndUpdateParams, socket, stopStreaming, streamingStatus, ]); useEffect(() => { if (socket != null) { const onServerException = ( exceptionDataWithoutClientTime: ServerExceptionData, ) => { const exceptionData = { ...exceptionDataWithoutClientTime, timeStringClient: new Date( exceptionDataWithoutClientTime['timeEpochMs'], ).toLocaleString(), }; setServerExceptions((prev) => [exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED), ); console.error( `[server_exception] The server encountered an exception: ${exceptionData['message']}`, exceptionData, ); }; socket.on('server_exception', onServerException); return () => { socket.off('server_exception', onServerException); }; } }, [socket]); useEffect(() => { if (socket != null) { const onClearTranscript = () => { setReceivedData([]); setTranslationSentencesAnimatedIndex(0); }; socket.on('clear_transcript', onClearTranscript); return () => { socket.off('clear_transcript', onClearTranscript); }; } }, [socket]); useEffect(() => { const onScroll = () => { if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) { isScrolledToBottomRef.current = true; return; } isScrolledToBottomRef.current = false; return; }; document.addEventListener('scroll', onScroll); return () => { document.removeEventListener('scroll', onScroll); }; }, []); useLayoutEffect(() => { if ( lastTranslationResultRef.current != null && isScrolledToBottomRef.current ) { // Scroll the div to the most recent entry lastTranslationResultRef.current.scrollIntoView(); } // Run the effect every time data is received, so that // we scroll to the bottom even if we're just adding text to // a pre-existing chunk }, [receivedData]); useEffect(() => { if (!animateTextDisplay) { return; } if ( translationSentencesAnimatedIndex < translationSentencesBaseTotalLength ) { const timeout = setTimeout(() => { setTranslationSentencesAnimatedIndex((prev) => prev + 1); debug()?.startRenderText(); }, TYPING_ANIMATION_DELAY_MS); return () => clearTimeout(timeout); } else { debug()?.endRenderText(); } }, [ animateTextDisplay, translationSentencesAnimatedIndex, translationSentencesBaseTotalLength, ]); /****************************************** * Sub-components ******************************************/ const volumeSliderNode = ( `${(value * 100).toFixed(0)}%`} valueLabelDisplay="auto" value={gain} onChange={(_event: Event, newValue: number | number[]) => { if (typeof newValue === 'number') { const scaledGain = getGainScaledValue(newValue); // We want the actual gain node to use the scaled value bufferedSpeechPlayer.setGain(scaledGain); // But we want react state to keep track of the non-scaled value setGain(newValue); } else { console.error( `[volume slider] Unexpected non-number value: ${newValue}`, ); } }} /> ); const xrDialogComponent = ( { setAnimateTextDisplay(urlParams.animateTextDisplay); }} onARVisible={() => setAnimateTextDisplay(false)} /> ); return (
Seamless Translation Logo
Pietro's translator Making communication easier
Hey Pietro, it's good to see you!
You can use this platform to translate from/to Italian and many some other languages.
Use headphones if you are both speaker and listener to prevent feedback.

SeamlessStreaming is a research model and streaming quality works best if you pause every couple of sentences. The real-time performance will degrade if you try streaming multiple speakers at the same time.

Let's try!
{ { // If the user has switched from speaker to listener we need to tell the // player to play eagerly, since currently the listener doesn't have any stop/start controls bufferedSpeechPlayer.start(); }} /> } {isListener && !isSpeaker && ( {volumeSliderNode} )} {isSpeaker && ( <> {/* Model */} {/* Model */} {/* */} Pietro, can you please select the target language? Target Language setOutputMode( e.target.value as SupportedOutputMode, ) } name="output-modes-radio-buttons-group"> { // TODO: Use supported modalities from agentCapabilities SUPPORTED_OUTPUT_MODES.map(({value, label}) => ( } label={label} /> )) } {/* {currentAgent?.dynamicParams?.includes( 'expressive', ) && ( , ) => { const newValue = event.target.checked; setEnableExpressive(newValue); onSetDynamicConfig({ expressive: newValue, }); }} /> } label="Expressive" /> )} */} {isListener && ( {volumeSliderNode} )} {/* Input Source */} {/* ) => setInputSource( e.target.value as SupportedInputSource, ) } name="input-source-radio-buttons-group"> {SUPPORTED_INPUT_SOURCES.map(({label, value}) => ( } label={label} /> ))} */} {/* */} Options , ) => setEnableNoiseSuppression(event.target.checked) } /> } label="Noise Suppression" /> , ) => setEnableEchoCancellation(event.target.checked) } /> } label="Echo Cancellation (not recommended)" /> , ) => setServerDebugFlag(event.target.checked)} /> } label="Enable Server Debugging" /> {isSpeaker && isListener && inputSource === 'userMedia' && !enableEchoCancellation && gain !== 0 && (
}> Headphones required to prevent feedback.
)} {isSpeaker && enableEchoCancellation && (
We don't recommend using echo cancellation as it may distort the input audio. If possible, use headphones and disable echo cancellation instead.
)} {streamingStatus === 'stopped' ? ( ) : ( )} {roomID == null ? null : ( {xrDialogComponent} )} {serverExceptions.length > 0 && (
{`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`}
)} {serverState != null && hasMaxSpeakers && (
{`Maximum number of speakers reached. Please try again at a later time.`}
)} {serverState != null && serverState.totalActiveTranscoders >= TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && (
{`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`}
)} {serverState?.serverLock != null && serverState.serverLock.clientID !== clientID && (
{`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`}
)} )}
{isListener && !isSpeaker && ( {xrDialogComponent} )}
{debugParam && roomID != null && }
Transcript {isSpeaker && ( )}
{translationSentencesWithEmptyStartingString.map( (sentence, index, arr) => { const isLast = index === arr.length - 1; const maybeRef = isLast ? {ref: lastTranslationResultRef} : {}; return (
{sentence} {animateTextDisplay && isLast && ( 0 }> {'|'} )}
); }, )}
); }