|
import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react'; |
|
import Button from '@mui/material/Button'; |
|
import Typography from '@mui/material/Typography'; |
|
import InputLabel from '@mui/material/InputLabel'; |
|
import FormControl from '@mui/material/FormControl'; |
|
import Select, {SelectChangeEvent} from '@mui/material/Select'; |
|
import MenuItem from '@mui/material/MenuItem'; |
|
import Stack from '@mui/material/Stack'; |
|
import seamlessLogoUrl from './assets/seamless.svg'; |
|
import { |
|
AgentCapabilities, |
|
BaseResponse, |
|
BrowserAudioStreamConfig, |
|
DynamicConfig, |
|
PartialDynamicConfig, |
|
SUPPORTED_INPUT_SOURCES, |
|
SUPPORTED_OUTPUT_MODES, |
|
ServerExceptionData, |
|
ServerSpeechData, |
|
ServerState, |
|
ServerTextData, |
|
StartStreamEventConfig, |
|
StreamingStatus, |
|
SupportedInputSource, |
|
SupportedOutputMode, |
|
TranslationSentences, |
|
} from './types/StreamingTypes'; |
|
import FormLabel from '@mui/material/FormLabel'; |
|
import RadioGroup from '@mui/material/RadioGroup'; |
|
import FormControlLabel from '@mui/material/FormControlLabel'; |
|
import Radio from '@mui/material/Radio'; |
|
import './StreamingInterface.css'; |
|
import RoomConfig from './RoomConfig'; |
|
import Divider from '@mui/material/Divider'; |
|
import {useSocket} from './useSocket'; |
|
import {RoomState} from './types/RoomState'; |
|
import useStable from './useStable'; |
|
import float32To16BitPCM from './float32To16BitPCM'; |
|
import createBufferedSpeechPlayer from './createBufferedSpeechPlayer'; |
|
import Checkbox from '@mui/material/Checkbox'; |
|
import Alert from '@mui/material/Alert'; |
|
import isScrolledToDocumentBottom from './isScrolledToDocumentBottom'; |
|
import Box from '@mui/material/Box'; |
|
import Slider from '@mui/material/Slider'; |
|
import VolumeDown from '@mui/icons-material/VolumeDown'; |
|
import VolumeUp from '@mui/icons-material/VolumeUp'; |
|
import Mic from '@mui/icons-material/Mic'; |
|
import MicOff from '@mui/icons-material/MicOff'; |
|
import XRDialog from './react-xr/XRDialog'; |
|
import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData'; |
|
import { |
|
sliceTranslationSentencesUpToIndex, |
|
getTotalSentencesLength, |
|
} from './sliceTranslationSentencesUtils'; |
|
import Blink from './Blink'; |
|
import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval'; |
|
import {getURLParams} from './URLParams'; |
|
import debug from './debug'; |
|
import DebugSection from './DebugSection'; |
|
import {Grid} from '@mui/material'; |
|
import {getLanguageFromThreeLetterCode} from './languageLookup'; |
|
|
|
const AUDIO_STREAM_DEFAULTS: { |
|
[key in SupportedInputSource]: BrowserAudioStreamConfig; |
|
} = { |
|
userMedia: { |
|
echoCancellation: false, |
|
noiseSuppression: true, |
|
}, |
|
displayMedia: { |
|
echoCancellation: false, |
|
noiseSuppression: false, |
|
}, |
|
}; |
|
|
|
async function requestUserMediaAudioStream( |
|
config: BrowserAudioStreamConfig = { |
|
echoCancellation: false, |
|
noiseSuppression: true, |
|
}, |
|
) { |
|
const stream = await navigator.mediaDevices.getUserMedia({ |
|
audio: {...config, channelCount: 1}, |
|
}); |
|
console.debug( |
|
'[requestUserMediaAudioStream] stream created with settings:', |
|
stream.getAudioTracks()?.[0]?.getSettings(), |
|
); |
|
return stream; |
|
} |
|
|
|
async function requestDisplayMediaAudioStream( |
|
config: BrowserAudioStreamConfig = { |
|
echoCancellation: false, |
|
noiseSuppression: false, |
|
}, |
|
) { |
|
const stream = await navigator.mediaDevices.getDisplayMedia({ |
|
audio: {...config, channelCount: 1}, |
|
}); |
|
console.debug( |
|
'[requestDisplayMediaAudioStream] stream created with settings:', |
|
stream.getAudioTracks()?.[0]?.getSettings(), |
|
); |
|
return stream; |
|
} |
|
|
|
const buttonLabelMap: {[key in StreamingStatus]: string} = { |
|
stopped: 'Start Streaming', |
|
running: 'Stop Streaming', |
|
starting: 'Starting...', |
|
}; |
|
|
|
const BUFFER_LIMIT = 1; |
|
|
|
const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36; |
|
|
|
const GAIN_MULTIPLIER_OVER_1 = 3; |
|
|
|
const getGainScaledValue = (value) => |
|
value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value; |
|
|
|
const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2; |
|
|
|
const MAX_SERVER_EXCEPTIONS_TRACKED = 500; |
|
|
|
export const TYPING_ANIMATION_DELAY_MS = 6; |
|
|
|
export default function StreamingInterface() { |
|
const urlParams = getURLParams(); |
|
const debugParam = urlParams.debug; |
|
const [animateTextDisplay, setAnimateTextDisplay] = useState<boolean>( |
|
urlParams.animateTextDisplay, |
|
); |
|
|
|
const socketObject = useSocket(); |
|
const {socket, clientID} = socketObject; |
|
|
|
const [serverState, setServerState] = useState<ServerState | null>(null); |
|
const [agent, setAgent] = useState<AgentCapabilities | null>(null); |
|
const model = agent?.name ?? null; |
|
const agentsCapabilities: Array<AgentCapabilities> = |
|
serverState?.agentsCapabilities ?? []; |
|
const currentAgent: AgentCapabilities | null = |
|
agentsCapabilities.find((agent) => agent.name === model) ?? null; |
|
|
|
const [serverExceptions, setServerExceptions] = useState< |
|
Array<ServerExceptionData> |
|
>([]); |
|
const [roomState, setRoomState] = useState<RoomState | null>(null); |
|
const roomID = roomState?.room_id ?? null; |
|
const isSpeaker = |
|
(clientID != null && roomState?.speakers.includes(clientID)) ?? false; |
|
const isListener = |
|
(clientID != null && roomState?.listeners.includes(clientID)) ?? false; |
|
|
|
const [streamingStatus, setStreamingStatus] = |
|
useState<StreamingStatus>('stopped'); |
|
|
|
const isStreamConfiguredRef = useRef<boolean>(false); |
|
|
|
const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t'); |
|
const [inputSource, setInputSource] = |
|
useState<SupportedInputSource>('userMedia'); |
|
const [enableNoiseSuppression, setEnableNoiseSuppression] = useState< |
|
boolean | null |
|
>(null); |
|
const [enableEchoCancellation, setEnableEchoCancellation] = useState< |
|
boolean | null |
|
>(null); |
|
|
|
|
|
const [targetLang, setTargetLang] = useState<string | null>(null); |
|
|
|
const [serverDebugFlag, setServerDebugFlag] = useState<boolean>( |
|
debugParam ?? false, |
|
); |
|
|
|
const [receivedData, setReceivedData] = useState<Array<ServerTextData>>([]); |
|
const [ |
|
translationSentencesAnimatedIndex, |
|
setTranslationSentencesAnimatedIndex, |
|
] = useState<number>(0); |
|
|
|
const lastTranslationResultRef = useRef<HTMLDivElement | null>(null); |
|
|
|
const [inputStream, setInputStream] = useState<MediaStream | null>(null); |
|
const [inputStreamSource, setInputStreamSource] = |
|
useState<MediaStreamAudioSourceNode | null>(null); |
|
const audioContext = useStable<AudioContext>(() => new AudioContext()); |
|
const [scriptNodeProcessor, setScriptNodeProcessor] = |
|
useState<ScriptProcessorNode | null>(null); |
|
|
|
const [muted, setMuted] = useState<boolean>(false); |
|
|
|
|
|
const mutedRef = useRef<boolean>(muted); |
|
useEffect(() => { |
|
mutedRef.current = muted; |
|
}, [muted]); |
|
|
|
const [gain, setGain] = useState<number>(1); |
|
|
|
const isScrolledToBottomRef = useRef<boolean>(isScrolledToDocumentBottom()); |
|
|
|
|
|
|
|
const streamFixedConfigOptionsDisabled = |
|
streamingStatus !== 'stopped' || roomID == null; |
|
|
|
const bufferedSpeechPlayer = useStable(() => { |
|
const player = createBufferedSpeechPlayer({ |
|
onStarted: () => { |
|
console.debug('📢 PLAYBACK STARTED 📢'); |
|
}, |
|
onEnded: () => { |
|
console.debug('🛑 PLAYBACK ENDED 🛑'); |
|
}, |
|
}); |
|
|
|
|
|
player.start(); |
|
return player; |
|
}); |
|
|
|
const translationSentencesBase: TranslationSentences = |
|
getTranslationSentencesFromReceivedData(receivedData); |
|
|
|
const translationSentencesBaseTotalLength = getTotalSentencesLength( |
|
translationSentencesBase, |
|
); |
|
|
|
const translationSentences: TranslationSentences = animateTextDisplay |
|
? sliceTranslationSentencesUpToIndex( |
|
translationSentencesBase, |
|
translationSentencesAnimatedIndex, |
|
) |
|
: translationSentencesBase; |
|
|
|
|
|
const translationSentencesWithEmptyStartingString = |
|
streamingStatus === 'running' && translationSentences.length === 0 |
|
? [''] |
|
: translationSentences; |
|
|
|
|
|
|
|
|
|
|
|
const setAgentAndUpdateParams = useCallback( |
|
(newAgent: AgentCapabilities | null) => { |
|
setAgent((prevAgent) => { |
|
if (prevAgent?.name !== newAgent?.name) { |
|
setTargetLang(newAgent?.targetLangs[0] ?? null); |
|
} |
|
return newAgent; |
|
}); |
|
}, |
|
[], |
|
); |
|
|
|
const onSetDynamicConfig = useCallback( |
|
async (partialConfig: PartialDynamicConfig) => { |
|
return new Promise<void>((resolve, reject) => { |
|
if (socket == null) { |
|
reject(new Error('[onSetDynamicConfig] socket is null ')); |
|
return; |
|
} |
|
|
|
socket.emit( |
|
'set_dynamic_config', |
|
partialConfig, |
|
(result: BaseResponse) => { |
|
console.log('[emit result: set_dynamic_config]', result); |
|
if (result.status === 'ok') { |
|
resolve(); |
|
} else { |
|
reject(); |
|
} |
|
}, |
|
); |
|
}); |
|
}, |
|
[socket], |
|
); |
|
|
|
const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => { |
|
return new Promise<void>((resolve, reject) => { |
|
if (socket == null) { |
|
reject(new Error('[configureStreamAsync] socket is null ')); |
|
return; |
|
} |
|
const modelName = agent?.name ?? null; |
|
if (modelName == null) { |
|
reject(new Error('[configureStreamAsync] modelName is null ')); |
|
return; |
|
} |
|
|
|
const config: StartStreamEventConfig = { |
|
event: 'config', |
|
rate: sampleRate, |
|
model_name: modelName, |
|
debug: serverDebugFlag, |
|
|
|
async_processing: true, |
|
buffer_limit: BUFFER_LIMIT, |
|
model_type: outputMode, |
|
}; |
|
|
|
console.log('[configureStreamAsync] sending config', config); |
|
|
|
socket.emit('configure_stream', config, (statusObject) => { |
|
if (statusObject.status === 'ok') { |
|
isStreamConfiguredRef.current = true; |
|
console.debug( |
|
'[configureStreamAsync] stream configured!', |
|
statusObject, |
|
); |
|
resolve(); |
|
} else { |
|
isStreamConfiguredRef.current = false; |
|
reject( |
|
new Error( |
|
`[configureStreamAsync] configure_stream returned status: ${statusObject.status}`, |
|
), |
|
); |
|
return; |
|
} |
|
}); |
|
}); |
|
}; |
|
|
|
const startStreaming = async () => { |
|
if (streamingStatus !== 'stopped') { |
|
console.warn( |
|
`Attempting to start stream when status is ${streamingStatus}`, |
|
); |
|
return; |
|
} |
|
|
|
setStreamingStatus('starting'); |
|
|
|
if (audioContext.state === 'suspended') { |
|
console.warn('audioContext was suspended! resuming...'); |
|
await audioContext.resume(); |
|
} |
|
|
|
let stream: MediaStream | null = null; |
|
|
|
try { |
|
if (inputSource === 'userMedia') { |
|
stream = await requestUserMediaAudioStream({ |
|
noiseSuppression: |
|
enableNoiseSuppression ?? |
|
AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression, |
|
echoCancellation: |
|
enableEchoCancellation ?? |
|
AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation, |
|
}); |
|
} else if (inputSource === 'displayMedia') { |
|
stream = await requestDisplayMediaAudioStream({ |
|
noiseSuppression: |
|
enableNoiseSuppression ?? |
|
AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression, |
|
echoCancellation: |
|
enableEchoCancellation ?? |
|
AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation, |
|
}); |
|
} else { |
|
throw new Error(`Unsupported input source requested: ${inputSource}`); |
|
} |
|
setInputStream(stream); |
|
} catch (e) { |
|
console.error('[startStreaming] media stream request failed:', e); |
|
setStreamingStatus('stopped'); |
|
return; |
|
} |
|
|
|
const mediaStreamSource = audioContext.createMediaStreamSource(stream); |
|
setInputStreamSource(mediaStreamSource); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1); |
|
setScriptNodeProcessor(scriptProcessor); |
|
|
|
scriptProcessor.onaudioprocess = (event) => { |
|
if (isStreamConfiguredRef.current === false) { |
|
console.debug('[onaudioprocess] stream is not configured yet!'); |
|
return; |
|
} |
|
if (socket == null) { |
|
console.warn('[onaudioprocess] socket is null in onaudioprocess'); |
|
return; |
|
} |
|
|
|
if (mutedRef.current) { |
|
|
|
|
|
const mostlyEmptyInt16Array = new Int16Array(1); |
|
socket.emit('incoming_audio', mostlyEmptyInt16Array); |
|
} else { |
|
const float32Audio = event.inputBuffer.getChannelData(0); |
|
const pcm16Audio = float32To16BitPCM(float32Audio); |
|
socket.emit('incoming_audio', pcm16Audio); |
|
} |
|
|
|
debug()?.sentAudio(event); |
|
}; |
|
|
|
mediaStreamSource.connect(scriptProcessor); |
|
scriptProcessor.connect(audioContext.destination); |
|
|
|
bufferedSpeechPlayer.start(); |
|
|
|
try { |
|
if (targetLang == null) { |
|
throw new Error('[startStreaming] targetLang cannot be nullish'); |
|
} |
|
|
|
|
|
|
|
const fullDynamicConfig: DynamicConfig = { |
|
targetLanguage: targetLang, |
|
}; |
|
|
|
await onSetDynamicConfig(fullDynamicConfig); |
|
|
|
|
|
await configureStreamAsync({ |
|
sampleRate: audioContext.sampleRate, |
|
}); |
|
} catch (e) { |
|
console.error('configureStreamAsync failed', e); |
|
setStreamingStatus('stopped'); |
|
return; |
|
} |
|
|
|
setStreamingStatus('running'); |
|
}; |
|
|
|
const stopStreaming = useCallback(async () => { |
|
if (streamingStatus === 'stopped') { |
|
console.warn( |
|
`Attempting to stop stream when status is ${streamingStatus}`, |
|
); |
|
return; |
|
} |
|
|
|
|
|
bufferedSpeechPlayer.stop(); |
|
|
|
if (inputStreamSource == null || scriptNodeProcessor == null) { |
|
console.error( |
|
'inputStreamSource || scriptNodeProcessor is null in stopStreaming', |
|
); |
|
} else { |
|
inputStreamSource.disconnect(scriptNodeProcessor); |
|
scriptNodeProcessor.disconnect(audioContext.destination); |
|
|
|
|
|
inputStream?.getTracks().forEach((track) => track.stop()); |
|
} |
|
|
|
if (socket == null) { |
|
console.warn('Unable to emit stop_stream because socket is null'); |
|
} else { |
|
socket.emit('stop_stream', (result) => { |
|
console.debug('[emit result: stop_stream]', result); |
|
}); |
|
} |
|
|
|
setStreamingStatus('stopped'); |
|
}, [ |
|
audioContext.destination, |
|
bufferedSpeechPlayer, |
|
inputStream, |
|
inputStreamSource, |
|
scriptNodeProcessor, |
|
socket, |
|
streamingStatus, |
|
]); |
|
|
|
const onClearTranscriptForAll = useCallback(() => { |
|
if (socket != null) { |
|
socket.emit('clear_transcript_for_all'); |
|
} |
|
}, [socket]); |
|
|
|
|
|
|
|
|
|
|
|
useEffect(() => { |
|
if (socket == null) { |
|
return; |
|
} |
|
|
|
const onRoomStateUpdate = (roomState: RoomState) => { |
|
setRoomState(roomState); |
|
}; |
|
|
|
socket.on('room_state_update', onRoomStateUpdate); |
|
|
|
return () => { |
|
socket.off('room_state_update', onRoomStateUpdate); |
|
}; |
|
}, [socket]); |
|
|
|
useEffect(() => { |
|
if (socket != null) { |
|
const onTranslationText = (data: ServerTextData) => { |
|
setReceivedData((prev) => [...prev, data]); |
|
debug()?.receivedText(data.payload); |
|
}; |
|
|
|
const onTranslationSpeech = (data: ServerSpeechData) => { |
|
bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate); |
|
}; |
|
|
|
socket.on('translation_text', onTranslationText); |
|
socket.on('translation_speech', onTranslationSpeech); |
|
|
|
return () => { |
|
socket.off('translation_text', onTranslationText); |
|
socket.off('translation_speech', onTranslationSpeech); |
|
}; |
|
} |
|
}, [bufferedSpeechPlayer, socket]); |
|
|
|
useEffect(() => { |
|
if (socket != null) { |
|
const onServerStateUpdate = (newServerState: ServerState) => { |
|
setServerState(newServerState); |
|
|
|
|
|
if ( |
|
newServerState.serverLock?.isActive === true && |
|
newServerState.serverLock?.clientID !== clientID && |
|
streamingStatus === 'running' |
|
) { |
|
stopStreaming(); |
|
} |
|
|
|
const firstAgentNullable = newServerState.agentsCapabilities[0]; |
|
if (agent == null && firstAgentNullable != null) { |
|
setAgentAndUpdateParams(firstAgentNullable); |
|
} |
|
}; |
|
|
|
socket.on('server_state_update', onServerStateUpdate); |
|
|
|
return () => { |
|
socket.off('server_state_update', onServerStateUpdate); |
|
}; |
|
} |
|
}, [ |
|
agent, |
|
clientID, |
|
setAgentAndUpdateParams, |
|
socket, |
|
stopStreaming, |
|
streamingStatus, |
|
]); |
|
|
|
useEffect(() => { |
|
if (socket != null) { |
|
const onServerException = ( |
|
exceptionDataWithoutClientTime: ServerExceptionData, |
|
) => { |
|
const exceptionData = { |
|
...exceptionDataWithoutClientTime, |
|
timeStringClient: new Date( |
|
exceptionDataWithoutClientTime['timeEpochMs'], |
|
).toLocaleString(), |
|
}; |
|
|
|
setServerExceptions((prev) => |
|
[exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED), |
|
); |
|
console.error( |
|
`[server_exception] The server encountered an exception: ${exceptionData['message']}`, |
|
exceptionData, |
|
); |
|
}; |
|
|
|
socket.on('server_exception', onServerException); |
|
|
|
return () => { |
|
socket.off('server_exception', onServerException); |
|
}; |
|
} |
|
}, [socket]); |
|
|
|
useEffect(() => { |
|
if (socket != null) { |
|
const onClearTranscript = () => { |
|
setReceivedData([]); |
|
setTranslationSentencesAnimatedIndex(0); |
|
}; |
|
|
|
socket.on('clear_transcript', onClearTranscript); |
|
|
|
return () => { |
|
socket.off('clear_transcript', onClearTranscript); |
|
}; |
|
} |
|
}, [socket]); |
|
|
|
useEffect(() => { |
|
const onScroll = () => { |
|
if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) { |
|
isScrolledToBottomRef.current = true; |
|
return; |
|
} |
|
isScrolledToBottomRef.current = false; |
|
return; |
|
}; |
|
|
|
document.addEventListener('scroll', onScroll); |
|
|
|
return () => { |
|
document.removeEventListener('scroll', onScroll); |
|
}; |
|
}, []); |
|
|
|
useLayoutEffect(() => { |
|
if ( |
|
lastTranslationResultRef.current != null && |
|
isScrolledToBottomRef.current |
|
) { |
|
|
|
lastTranslationResultRef.current.scrollIntoView(); |
|
} |
|
|
|
|
|
|
|
}, [receivedData]); |
|
|
|
useEffect(() => { |
|
if (!animateTextDisplay) { |
|
return; |
|
} |
|
|
|
if ( |
|
translationSentencesAnimatedIndex < translationSentencesBaseTotalLength |
|
) { |
|
const timeout = setTimeout(() => { |
|
setTranslationSentencesAnimatedIndex((prev) => prev + 1); |
|
debug()?.startRenderText(); |
|
}, TYPING_ANIMATION_DELAY_MS); |
|
|
|
return () => clearTimeout(timeout); |
|
} else { |
|
debug()?.endRenderText(); |
|
} |
|
}, [ |
|
animateTextDisplay, |
|
translationSentencesAnimatedIndex, |
|
translationSentencesBaseTotalLength, |
|
]); |
|
|
|
|
|
|
|
|
|
|
|
const volumeSliderNode = ( |
|
<Stack |
|
spacing={2} |
|
direction="row" |
|
sx={{mb: 1, width: '100%'}} |
|
alignItems="center"> |
|
<VolumeDown color="primary" /> |
|
<Slider |
|
aria-label="Volume" |
|
defaultValue={1} |
|
scale={getGainScaledValue} |
|
min={0} |
|
max={3} |
|
step={0.1} |
|
marks={[ |
|
{value: 0, label: '0%'}, |
|
{value: 1, label: '100%'}, |
|
{value: 2, label: '400%'}, |
|
{value: 3, label: '700%'}, |
|
]} |
|
valueLabelFormat={(value) => `${(value * 100).toFixed(0)}%`} |
|
valueLabelDisplay="auto" |
|
value={gain} |
|
onChange={(_event: Event, newValue: number | number[]) => { |
|
if (typeof newValue === 'number') { |
|
const scaledGain = getGainScaledValue(newValue); |
|
// We want the actual gain node to use the scaled value |
|
bufferedSpeechPlayer.setGain(scaledGain); |
|
// But we want react state to keep track of the non-scaled value |
|
setGain(newValue); |
|
} else { |
|
console.error( |
|
`[volume slider] Unexpected non-number value: ${newValue}`, |
|
); |
|
} |
|
}} |
|
/> |
|
<VolumeUp color="primary" /> |
|
</Stack> |
|
); |
|
|
|
const xrDialogComponent = ( |
|
<XRDialog |
|
animateTextDisplay={ |
|
animateTextDisplay && |
|
translationSentencesAnimatedIndex == translationSentencesBaseTotalLength |
|
} |
|
bufferedSpeechPlayer={bufferedSpeechPlayer} |
|
translationSentences={translationSentences} |
|
roomState={roomState} |
|
roomID={roomID} |
|
startStreaming={startStreaming} |
|
stopStreaming={stopStreaming} |
|
debugParam={debugParam} |
|
onARHidden={() => { |
|
setAnimateTextDisplay(urlParams.animateTextDisplay); |
|
}} |
|
onARVisible={() => setAnimateTextDisplay(false)} |
|
/> |
|
); |
|
|
|
return ( |
|
<div className="app-wrapper-sra"> |
|
<Box |
|
// eslint-disable-next-line @typescript-eslint/ban-ts-comment |
|
// @ts-ignore Not sure why it's complaining about complexity here |
|
sx={{width: '100%', maxWidth: '660px', minWidth: '320px'}}> |
|
<div className="main-container-sra"> |
|
<div className="top-section-sra horizontal-padding-sra"> |
|
<div className="header-container-sra"> |
|
<img |
|
src={seamlessLogoUrl} |
|
className="header-icon-sra" |
|
alt="Seamless Translation Logo" |
|
height={24} |
|
width={24} |
|
/> |
|
|
|
<div> |
|
<Typography variant="h1" sx={{color: '#65676B'}}> |
|
Seamless Translation |
|
</Typography> |
|
</div> |
|
</div> |
|
<div className="header-container-sra"> |
|
<div> |
|
<Typography variant="body2" sx={{color: '#65676B'}}> |
|
Welcome! Join a room as speaker or listener (or both), and share the |
|
room code to invite listeners. |
|
<br/> |
|
SeamlessStreaming model is a research model and is not released |
|
for production deployment. The streaming quality is closely |
|
related to proper VAD segmentation. It works best if you pause |
|
every couple of sentences, or you may wish adjust the VAD threshold |
|
in the model config. |
|
</Typography> |
|
</div> |
|
</div> |
|
<Stack spacing="22px" direction="column"> |
|
<Box> |
|
<RoomConfig |
|
roomState={roomState} |
|
serverState={serverState} |
|
streamingStatus={streamingStatus} |
|
onJoinRoomOrUpdateRoles={() => { |
|
// If the user has switched from speaker to listener we need to tell the |
|
// player to play eagerly, since currently the listener doesn't have any stop/start controls |
|
bufferedSpeechPlayer.start(); |
|
}} |
|
/> |
|
|
|
{isListener && !isSpeaker && ( |
|
<Box |
|
sx={{ |
|
paddingX: 6, |
|
paddingBottom: 2, |
|
marginY: 2, |
|
display: 'flex', |
|
flexDirection: 'column', |
|
alignItems: 'center', |
|
}}> |
|
{volumeSliderNode} |
|
</Box> |
|
)} |
|
</Box> |
|
|
|
{isSpeaker && ( |
|
<> |
|
<Divider /> |
|
|
|
<Stack spacing="12px" direction="column"> |
|
<FormLabel id="output-modes-radio-group-label"> |
|
Model |
|
</FormLabel> |
|
<FormControl |
|
disabled={ |
|
streamFixedConfigOptionsDisabled || |
|
agentsCapabilities.length === 0 |
|
} |
|
fullWidth |
|
sx={{minWidth: '14em'}}> |
|
<InputLabel id="model-selector-input-label"> |
|
Model |
|
</InputLabel> |
|
<Select |
|
labelId="model-selector-input-label" |
|
label="Model" |
|
onChange={(e: SelectChangeEvent) => { |
|
const newAgent = |
|
agentsCapabilities.find( |
|
(agent) => e.target.value === agent.name, |
|
) ?? null; |
|
if (newAgent == null) { |
|
console.error( |
|
'Unable to find agent with name', |
|
e.target.value, |
|
); |
|
} |
|
setAgentAndUpdateParams(newAgent); |
|
}} |
|
value={model ?? ''}> |
|
{agentsCapabilities.map((agent) => ( |
|
<MenuItem value={agent.name} key={agent.name}> |
|
{agent.name} |
|
</MenuItem> |
|
))} |
|
</Select> |
|
</FormControl> |
|
|
|
</Stack> |
|
|
|
<Stack spacing={0.5}> |
|
<FormLabel id="output-modes-radio-group-label"> |
|
Output |
|
</FormLabel> |
|
|
|
<Box sx={{paddingTop: 2, paddingBottom: 1}}> |
|
<FormControl fullWidth sx={{minWidth: '14em'}}> |
|
<InputLabel id="target-selector-input-label"> |
|
Target Language |
|
</InputLabel> |
|
<Select |
|
labelId="target-selector-input-label" |
|
label="Target Language" |
|
onChange={(e: SelectChangeEvent) => { |
|
setTargetLang(e.target.value); |
|
onSetDynamicConfig({ |
|
targetLanguage: e.target.value, |
|
}); |
|
}} |
|
value={targetLang ?? ''}> |
|
{currentAgent?.targetLangs.map((langCode) => ( |
|
<MenuItem value={langCode} key={langCode}> |
|
{getLanguageFromThreeLetterCode(langCode) != null |
|
? `${getLanguageFromThreeLetterCode( |
|
langCode, |
|
)} (${langCode})` |
|
: langCode} |
|
</MenuItem> |
|
))} |
|
</Select> |
|
</FormControl> |
|
</Box> |
|
|
|
<Grid container> |
|
<Grid item xs={12} sm={4}> |
|
<FormControl |
|
disabled={streamFixedConfigOptionsDisabled}> |
|
<RadioGroup |
|
aria-labelledby="output-modes-radio-group-label" |
|
value={outputMode} |
|
onChange={(e) => |
|
setOutputMode( |
|
e.target.value as SupportedOutputMode, |
|
) |
|
} |
|
name="output-modes-radio-buttons-group"> |
|
{ |
|
// TODO: Use supported modalities from agentCapabilities |
|
SUPPORTED_OUTPUT_MODES.map(({value, label}) => ( |
|
<FormControlLabel |
|
key={value} |
|
value={value} |
|
control={<Radio />} |
|
label={label} |
|
/> |
|
)) |
|
} |
|
</RadioGroup> |
|
</FormControl> |
|
</Grid> |
|
|
|
<Grid item xs={12} sm={8}> |
|
<Stack |
|
direction="column" |
|
spacing={1} |
|
alignItems="flex-start" |
|
sx={{flexGrow: 1}}> |
|
{isListener && ( |
|
<Box |
|
sx={{ |
|
flexGrow: 1, |
|
paddingX: 1.5, |
|
paddingY: 1.5, |
|
width: '100%', |
|
}}> |
|
{volumeSliderNode} |
|
</Box> |
|
)} |
|
</Stack> |
|
</Grid> |
|
</Grid> |
|
</Stack> |
|
|
|
<Typography variant="body2" sx={{color: '#65676B'}}> |
|
Note: we don't recommend echo cancellation, as it may distort |
|
the input audio (dropping words/sentences) if there is output |
|
audio playing. Instead, you should use headphones if you'd like |
|
to listen to the output audio while speaking. |
|
</Typography> |
|
|
|
<Stack |
|
direction="row" |
|
spacing={2} |
|
justifyContent="space-between"> |
|
<Box sx={{flex: 1}}> |
|
<FormControl disabled={streamFixedConfigOptionsDisabled}> |
|
<FormLabel id="input-source-radio-group-label"> |
|
Input Source |
|
</FormLabel> |
|
<RadioGroup |
|
aria-labelledby="input-source-radio-group-label" |
|
value={inputSource} |
|
onChange={(e: React.ChangeEvent<HTMLInputElement>) => |
|
setInputSource( |
|
e.target.value as SupportedInputSource, |
|
) |
|
} |
|
name="input-source-radio-buttons-group"> |
|
{SUPPORTED_INPUT_SOURCES.map(({label, value}) => ( |
|
<FormControlLabel |
|
key={value} |
|
value={value} |
|
control={<Radio />} |
|
label={label} |
|
/> |
|
))} |
|
</RadioGroup> |
|
</FormControl> |
|
</Box> |
|
<Box sx={{flex: 1}}> |
|
<FormControl disabled={streamFixedConfigOptionsDisabled}> |
|
<FormLabel>Options</FormLabel> |
|
<FormControlLabel |
|
control={ |
|
<Checkbox |
|
checked={ |
|
enableNoiseSuppression ?? |
|
AUDIO_STREAM_DEFAULTS[inputSource] |
|
.noiseSuppression |
|
} |
|
onChange={( |
|
event: React.ChangeEvent<HTMLInputElement>, |
|
) => |
|
setEnableNoiseSuppression(event.target.checked) |
|
} |
|
/> |
|
} |
|
label="Noise Suppression (Browser)" |
|
/> |
|
<FormControlLabel |
|
control={ |
|
<Checkbox |
|
checked={ |
|
enableEchoCancellation ?? |
|
AUDIO_STREAM_DEFAULTS[inputSource] |
|
.echoCancellation |
|
} |
|
onChange={( |
|
event: React.ChangeEvent<HTMLInputElement>, |
|
) => |
|
setEnableEchoCancellation(event.target.checked) |
|
} |
|
/> |
|
} |
|
label="Echo Cancellation (Browser)" |
|
/> |
|
<FormControlLabel |
|
control={ |
|
<Checkbox |
|
checked={serverDebugFlag} |
|
onChange={( |
|
event: React.ChangeEvent<HTMLInputElement>, |
|
) => setServerDebugFlag(event.target.checked)} |
|
/> |
|
} |
|
label="Server Debug Flag" |
|
/> |
|
</FormControl> |
|
</Box> |
|
</Stack> |
|
|
|
<Stack direction="row" spacing={2}> |
|
{streamingStatus === 'stopped' ? ( |
|
<Button |
|
variant="contained" |
|
onClick={startStreaming} |
|
disabled={ |
|
roomID == null || |
|
// Prevent users from starting streaming if there is a server lock with an active session |
|
(serverState?.serverLock?.isActive === true && |
|
serverState.serverLock.clientID !== clientID) |
|
}> |
|
{buttonLabelMap[streamingStatus]} |
|
</Button> |
|
) : ( |
|
<Button |
|
variant="contained" |
|
color={ |
|
streamingStatus === 'running' ? 'error' : 'primary' |
|
} |
|
disabled={ |
|
streamingStatus === 'starting' || roomID == null |
|
} |
|
onClick={stopStreaming}> |
|
{buttonLabelMap[streamingStatus]} |
|
</Button> |
|
)} |
|
|
|
<Box> |
|
<Button |
|
variant="contained" |
|
aria-label={muted ? 'Unmute' : 'Mute'} |
|
color={muted ? 'info' : 'primary'} |
|
onClick={() => setMuted((prev) => !prev)} |
|
sx={{ |
|
borderRadius: 100, |
|
paddingX: 0, |
|
minWidth: '36px', |
|
}}> |
|
{muted ? <MicOff /> : <Mic />} |
|
</Button> |
|
</Box> |
|
|
|
{roomID == null ? null : ( |
|
<Box |
|
sx={{ |
|
flexGrow: 1, |
|
display: 'flex', |
|
justifyContent: 'flex-end', |
|
}}> |
|
{xrDialogComponent} |
|
</Box> |
|
)} |
|
</Stack> |
|
|
|
{serverExceptions.length > 0 && ( |
|
<div> |
|
<Alert severity="error"> |
|
{`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`} |
|
</Alert> |
|
</div> |
|
)} |
|
|
|
{serverState != null && |
|
serverState.totalActiveTranscoders >= |
|
TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && ( |
|
<div> |
|
<Alert severity="warning"> |
|
{`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`} |
|
</Alert> |
|
</div> |
|
)} |
|
|
|
{serverState?.serverLock != null && |
|
serverState.serverLock.clientID !== clientID && ( |
|
<div> |
|
<Alert severity="warning"> |
|
{`The server is currently locked by "${serverState.serverLock.name}". Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`} |
|
</Alert> |
|
</div> |
|
)} |
|
</> |
|
)} |
|
</Stack> |
|
|
|
{isListener && !isSpeaker && ( |
|
<Box sx={{marginBottom: 1, marginTop: 2}}> |
|
{xrDialogComponent} |
|
</Box> |
|
)} |
|
</div> |
|
|
|
{debugParam && roomID != null && <DebugSection />} |
|
|
|
<div className="translation-text-container-sra horizontal-padding-sra"> |
|
<Stack |
|
direction="row" |
|
spacing={2} |
|
sx={{mb: '16px', alignItems: 'center'}}> |
|
<Typography variant="h1" sx={{fontWeight: 700, flexGrow: 1}}> |
|
Transcript |
|
</Typography> |
|
{isSpeaker && ( |
|
<Button |
|
variant="text" |
|
size="small" |
|
onClick={onClearTranscriptForAll}> |
|
Clear Transcript for All |
|
</Button> |
|
)} |
|
</Stack> |
|
<Stack direction="row"> |
|
<div className="translation-text-sra"> |
|
{translationSentencesWithEmptyStartingString.map( |
|
(sentence, index, arr) => { |
|
const isLast = index === arr.length - 1; |
|
const maybeRef = isLast |
|
? {ref: lastTranslationResultRef} |
|
: {}; |
|
return ( |
|
<div className="text-chunk-sra" key={index} {...maybeRef}> |
|
<Typography variant="body1"> |
|
{sentence} |
|
{animateTextDisplay && isLast && ( |
|
<Blink |
|
intervalMs={CURSOR_BLINK_INTERVAL_MS} |
|
shouldBlink={ |
|
(roomState?.activeTranscoders ?? 0) > 0 |
|
}> |
|
<Typography |
|
component="span" |
|
variant="body1" |
|
sx={{ |
|
display: 'inline-block', |
|
transform: 'scaleY(1.25) translateY(-1px)', |
|
}}> |
|
{'|'} |
|
</Typography> |
|
</Blink> |
|
)} |
|
</Typography> |
|
</div> |
|
); |
|
}, |
|
)} |
|
</div> |
|
</Stack> |
|
</div> |
|
</div> |
|
</Box> |
|
</div> |
|
); |
|
} |
|
|