Spaces:
Running
on
T4
Running
on
T4
import {useCallback, useEffect, useRef, useState} from 'react'; | |
import { | |
Canvas, | |
createPortal, | |
extend, | |
useFrame, | |
useThree, | |
} from '@react-three/fiber'; | |
import ThreeMeshUI from 'three-mesh-ui'; | |
import {ARButton, XR, Hands, XREvent} from '@react-three/xr'; | |
import {TextGeometry} from 'three/examples/jsm/geometries/TextGeometry.js'; | |
import {TranslationSentences} from '../types/StreamingTypes'; | |
import Button from './Button'; | |
import {RoomState} from '../types/RoomState'; | |
import ThreeMeshUIText, {ThreeMeshUITextType} from './ThreeMeshUIText'; | |
import {BLACK, WHITE} from './Colors'; | |
/** | |
* Using `?url` at the end of this import tells vite this is a static asset, and | |
* provides us a URL to the hashed version of the file when the project is built. | |
* See: https://vitejs.dev/guide/assets.html#explicit-url-imports | |
*/ | |
import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url'; | |
import robotoFontTexture from '../assets/RobotoMono-Regular.png'; | |
import {getURLParams} from '../URLParams'; | |
import TextBlocks, {CHARS_PER_LINE} from './TextBlocks'; | |
import {BufferedSpeechPlayer} from '../createBufferedSpeechPlayer'; | |
import {CURSOR_BLINK_INTERVAL_MS} from '../cursorBlinkInterval'; | |
// Adds on react JSX for add-on libraries to react-three-fiber | |
extend(ThreeMeshUI); | |
extend({TextGeometry}); | |
async function fetchSupportedCharSet(): Promise<Set<string>> { | |
try { | |
const response = await fetch(robotoFontFamilyJson); | |
const fontFamily = await response.json(); | |
return new Set(fontFamily.info.charset); | |
} catch (e) { | |
console.error('Failed to fetch supported XR charset', e); | |
return new Set(); | |
} | |
} | |
let supportedCharSet = new Set(); | |
fetchSupportedCharSet().then((result) => (supportedCharSet = result)); | |
// This component wraps any children so it is positioned relative to the camera, rather than from the origin | |
function CameraLinkedObject({children}) { | |
const camera = useThree((state) => state.camera); | |
return createPortal(<>{children}</>, camera); | |
} | |
function ThreeMeshUIComponents({ | |
translationSentences, | |
skipARIntro, | |
roomState, | |
animateTextDisplay, | |
}: XRConfigProps & {skipARIntro: boolean}) { | |
// The "loop" for re-rendering required for threemeshUI | |
useFrame(() => { | |
ThreeMeshUI.update(); | |
}); | |
const [started, setStarted] = useState<boolean>(skipARIntro); | |
return ( | |
<> | |
<CameraLinkedObject> | |
{getURLParams().ARTranscriptionType === 'single_block' ? ( | |
<TranscriptPanelSingleBlock | |
started={started} | |
animateTextDisplay={animateTextDisplay} | |
roomState={roomState} | |
translationSentences={translationSentences} | |
/> | |
) : ( | |
<TranscriptPanelBlocks | |
animateTextDisplay={animateTextDisplay} | |
translationSentences={translationSentences} | |
/> | |
)} | |
{skipARIntro ? null : ( | |
<IntroPanel started={started} setStarted={setStarted} /> | |
)} | |
</CameraLinkedObject> | |
</> | |
); | |
} | |
// Original UI that just uses a single block to render 6 lines in a panel | |
function TranscriptPanelSingleBlock({ | |
animateTextDisplay, | |
started, | |
translationSentences, | |
roomState, | |
}: { | |
animateTextDisplay: boolean; | |
started: boolean; | |
translationSentences: TranslationSentences; | |
roomState: RoomState | null; | |
}) { | |
const textRef = useRef<ThreeMeshUITextType>(); | |
const [didReceiveTranslationSentences, setDidReceiveTranslationSentences] = | |
useState(false); | |
const hasActiveTranscoders = (roomState?.activeTranscoders ?? 0) > 0; | |
const [cursorBlinkOn, setCursorBlinkOn] = useState(false); | |
// Normally we don't setState in render, but here we need to for computed state, and this if statement assures it won't loop infinitely | |
if (!didReceiveTranslationSentences && translationSentences.length > 0) { | |
setDidReceiveTranslationSentences(true); | |
} | |
const width = 1; | |
const height = 0.3; | |
const fontSize = 0.03; | |
useEffect(() => { | |
if (animateTextDisplay && hasActiveTranscoders) { | |
const interval = setInterval(() => { | |
setCursorBlinkOn((prev) => !prev); | |
}, CURSOR_BLINK_INTERVAL_MS); | |
return () => clearInterval(interval); | |
} else { | |
setCursorBlinkOn(false); | |
} | |
}, [animateTextDisplay, hasActiveTranscoders]); | |
useEffect(() => { | |
if (textRef.current != null) { | |
const initialPrompt = | |
'Welcome to the presentation. We are excited to share with you the work we have been doing... Our model can now translate languages in less than 2 second latency.'; | |
// These are rough ratios based on spot checking | |
const maxLines = 6; | |
const charsPerLine = 55; | |
const transcriptSentences: string[] = didReceiveTranslationSentences | |
? translationSentences | |
: [initialPrompt]; | |
// The transcript is an array of sentences. For each sentence we break this down into an array of words per line. | |
// This is needed so we can "scroll" through without changing the order of words in the transcript | |
const linesToDisplay = transcriptSentences.flatMap((sentence, idx) => { | |
const blinkingCursor = | |
cursorBlinkOn && idx === transcriptSentences.length - 1 ? '|' : ' '; | |
const words = sentence.concat(blinkingCursor).split(/\s+/); | |
// Here we break each sentence up with newlines so all words per line fit within the panel | |
return words.reduce( | |
(wordChunks, currentWord) => { | |
const filteredWord = [...currentWord] | |
.filter((c) => { | |
if (supportedCharSet.has(c)) { | |
return true; | |
} | |
console.error( | |
`Unsupported char ${c} - make sure this is supported in the font family msdf file`, | |
); | |
return false; | |
}) | |
.join(''); | |
const lastLineSoFar = wordChunks[wordChunks.length - 1]; | |
const charCount = lastLineSoFar.length + filteredWord.length + 1; | |
if (charCount <= charsPerLine) { | |
wordChunks[wordChunks.length - 1] = | |
lastLineSoFar + ' ' + filteredWord; | |
} else { | |
wordChunks.push(filteredWord); | |
} | |
return wordChunks; | |
}, | |
[''], | |
); | |
}); | |
// Only keep the last maxLines so new text keeps scrolling up from the bottom | |
linesToDisplay.splice(0, linesToDisplay.length - maxLines); | |
textRef.current.set({content: linesToDisplay.join('\n')}); | |
} | |
}, [ | |
translationSentences, | |
textRef, | |
didReceiveTranslationSentences, | |
cursorBlinkOn, | |
]); | |
const opacity = started ? 1 : 0; | |
return ( | |
<block | |
args={[{padding: 0.05, backgroundOpacity: opacity}]} | |
position={[0, -0.4, -1.3]}> | |
<block | |
args={[ | |
{ | |
width, | |
height, | |
fontSize, | |
textAlign: 'left', | |
backgroundOpacity: opacity, | |
// TODO: support more language charsets | |
// This renders using MSDF format supported in WebGL. Renderable characters are defined in the "charset" json | |
// Currently supports most default keyboard inputs but this would exclude many non latin charset based languages. | |
// You can use https://msdf-bmfont.donmccurdy.com/ for easily generating these files | |
// fontFamily: '/src/assets/Roboto-msdf.json', | |
// fontTexture: '/src/assets/Roboto-msdf.png' | |
fontFamily: robotoFontFamilyJson, | |
fontTexture: robotoFontTexture, | |
}, | |
]}> | |
<ThreeMeshUIText | |
ref={textRef} | |
content={'Transcript'} | |
fontOpacity={opacity} | |
/> | |
</block> | |
</block> | |
); | |
} | |
// Splits up the lines into separate blocks to treat each one separately. | |
// This allows changing of opacity, animating per line, changing height / width per line etc | |
function TranscriptPanelBlocks({ | |
animateTextDisplay, | |
translationSentences, | |
}: { | |
animateTextDisplay: boolean; | |
translationSentences: TranslationSentences; | |
}) { | |
const [didReceiveTranslationSentences, setDidReceiveTranslationSentences] = | |
// Currently causing issues with displaying dummy text, skip over | |
useState(false); | |
// Normally we don't setState in render, but here we need to for computed state, and this if statement assures it won't loop infinitely | |
if (!didReceiveTranslationSentences && translationSentences.length > 0) { | |
setDidReceiveTranslationSentences(true); | |
} | |
const initialPrompt = 'Listening...'; | |
const transcriptSentences: string[] = didReceiveTranslationSentences | |
? translationSentences | |
: [initialPrompt]; | |
// The transcript is an array of sentences. For each sentence we break this down into an array of words per line. | |
// This is needed so we can "scroll" through without changing the order of words in the transcript | |
const sentenceLines = transcriptSentences.map((sentence) => { | |
const words = sentence.split(/\s+/); | |
// Here we break each sentence up with newlines so all words per line fit within the panel | |
return words.reduce( | |
(wordChunks, currentWord) => { | |
const filteredWord = [...currentWord] | |
.filter((c) => { | |
if (supportedCharSet.has(c)) { | |
return true; | |
} | |
console.error( | |
`Unsupported char ${c} - make sure this is supported in the font family msdf file`, | |
); | |
return false; | |
}) | |
.join(''); | |
const lastLineSoFar = wordChunks[wordChunks.length - 1]; | |
const charCount = lastLineSoFar.length + filteredWord.length + 1; | |
if (charCount <= CHARS_PER_LINE) { | |
wordChunks[wordChunks.length - 1] = | |
lastLineSoFar + ' ' + filteredWord; | |
} else { | |
wordChunks.push(filteredWord); | |
} | |
return wordChunks; | |
}, | |
[''], | |
); | |
}); | |
return ( | |
<TextBlocks sentences={sentenceLines} blinkCursor={animateTextDisplay} /> | |
); | |
} | |
function IntroPanel({started, setStarted}) { | |
const width = 0.5; | |
const height = 0.4; | |
const padding = 0.03; | |
// Kind of hacky but making the panel disappear by moving it completely off the camera view. | |
// If we try to remove elements we end up throwing and stopping the experience | |
// opacity=0 also runs into weird bugs where not everything is invisible | |
const xCoordinate = started ? 1000000 : 0; | |
const commonArgs = { | |
backgroundColor: WHITE, | |
width, | |
height, | |
padding, | |
backgroundOpacity: 1, | |
textAlign: 'center', | |
fontFamily: robotoFontFamilyJson, | |
fontTexture: robotoFontTexture, | |
}; | |
return ( | |
<> | |
<block | |
args={[ | |
{ | |
...commonArgs, | |
fontSize: 0.02, | |
}, | |
]} | |
position={[xCoordinate, -0.1, -0.5]}> | |
<ThreeMeshUIText | |
content="FAIR Seamless Streaming Demo" | |
fontColor={BLACK} | |
/> | |
</block> | |
<block | |
args={[ | |
{ | |
...commonArgs, | |
fontSize: 0.016, | |
backgroundOpacity: 0, | |
}, | |
]} | |
position={[xCoordinate, -0.15, -0.5001]}> | |
<ThreeMeshUIText | |
fontColor={BLACK} | |
content="Welcome to the Seamless team streaming demo experience! In this demo, you would experience AI powered text and audio translation in real time." | |
/> | |
</block> | |
<block | |
args={[ | |
{ | |
width: 0.1, | |
height: 0.1, | |
backgroundOpacity: 1, | |
backgroundColor: BLACK, | |
}, | |
]} | |
position={[xCoordinate, -0.23, -0.5002]}> | |
<Button | |
onClick={() => setStarted(true)} | |
content={'Start Experience'} | |
width={0.2} | |
height={0.035} | |
fontSize={0.015} | |
padding={0.01} | |
borderRadius={0.01} | |
/> | |
</block> | |
</> | |
); | |
} | |
export type XRConfigProps = { | |
animateTextDisplay: boolean; | |
bufferedSpeechPlayer: BufferedSpeechPlayer; | |
translationSentences: TranslationSentences; | |
roomState: RoomState | null; | |
roomID: string | null; | |
startStreaming: () => Promise<void>; | |
stopStreaming: () => Promise<void>; | |
debugParam: boolean | null; | |
}; | |
export default function XRConfig(props: XRConfigProps) { | |
const {bufferedSpeechPlayer, debugParam} = props; | |
const skipARIntro = getURLParams().skipARIntro; | |
const defaultDimensions = {width: 500, height: 500}; | |
const [dimensions, setDimensions] = useState( | |
debugParam ? defaultDimensions : {width: 0, height: 0}, | |
); | |
const {width, height} = dimensions; | |
// Make sure to reset buffer when headset is taken off / on so we don't get an endless stream | |
// of audio. The oculus actually runs for some time after the headset is taken off. | |
const resetBuffers = useCallback( | |
(event: XREvent<XRSessionEvent>) => { | |
const session = event.target; | |
if (!(session instanceof XRSession)) { | |
return; | |
} | |
switch (session.visibilityState) { | |
case 'visible': | |
bufferedSpeechPlayer.start(); | |
break; | |
case 'hidden': | |
bufferedSpeechPlayer.stop(); | |
break; | |
} | |
}, | |
[bufferedSpeechPlayer], | |
); | |
return ( | |
<div style={{height, width, margin: '0 auto', border: '1px solid #ccc'}}> | |
{/* This is the button that triggers AR flow if available via a button */} | |
<ARButton | |
onError={(e) => console.error(e)} | |
onClick={() => setDimensions(defaultDimensions)} | |
style={{ | |
position: 'absolute', | |
bottom: '24px', | |
left: '50%', | |
transform: 'translateX(-50%)', | |
padding: '12px 24px', | |
border: '1px solid white', | |
borderRadius: '4px', | |
backgroundColor: '#465a69', | |
color: 'white', | |
font: 'normal 0.8125rem sans-serif', | |
outline: 'none', | |
zIndex: 99999, | |
cursor: 'pointer', | |
}} | |
/> | |
{/* Canvas to draw if in browser but if in AR mode displays in pass through mode */} | |
{/* The camera here just works in 2D mode. In AR mode it starts at at origin */} | |
{/* <Canvas camera={{position: [0, 0, 1], fov: 60}}> */} | |
<Canvas camera={{position: [0, 0, 0.001], fov: 60}}> | |
<color attach="background" args={['grey']} /> | |
<XR referenceSpace="local" onVisibilityChange={resetBuffers}> | |
{/* | |
Uncomment this for controllers to show up | |
<Controllers /> | |
*/} | |
<Hands /> | |
{/* | |
Uncomment this for moving with controllers | |
<MovementController /> | |
*/} | |
{/* | |
Uncomment this for turning the view in non-vr mode | |
<OrbitControls | |
autoRotateSpeed={0.85} | |
zoomSpeed={1} | |
minPolarAngle={Math.PI / 2.5} | |
maxPolarAngle={Math.PI / 2.55} | |
/> | |
*/} | |
<ThreeMeshUIComponents {...props} skipARIntro={skipARIntro} /> | |
{/* Just for testing */} | |
{/* <RandomComponents /> */} | |
</XR> | |
</Canvas> | |
</div> | |
); | |
} | |