seamless-streaming

Running on T4

seamless-streaming / streaming-react-app /src /react-xr /XRConfig.tsx

Anna Sun

Initial OSS demo commit

2bd3674 12 months ago

15.1 kB

	import {useCallback, useEffect, useRef, useState} from 'react';
	import {
	Canvas,
	createPortal,
	extend,
	useFrame,
	useThree,
	} from '@react-three/fiber';
	import ThreeMeshUI from 'three-mesh-ui';

	import {ARButton, XR, Hands, XREvent} from '@react-three/xr';

	import {TextGeometry} from 'three/examples/jsm/geometries/TextGeometry.js';
	import {TranslationSentences} from '../types/StreamingTypes';
	import Button from './Button';
	import {RoomState} from '../types/RoomState';
	import ThreeMeshUIText, {ThreeMeshUITextType} from './ThreeMeshUIText';
	import {BLACK, WHITE} from './Colors';

	/**
	* Using `?url` at the end of this import tells vite this is a static asset, and
	* provides us a URL to the hashed version of the file when the project is built.
	* See: https://vitejs.dev/guide/assets.html#explicit-url-imports
	*/
	import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url';
	import robotoFontTexture from '../assets/RobotoMono-Regular.png';
	import {getURLParams} from '../URLParams';
	import TextBlocks, {CHARS_PER_LINE} from './TextBlocks';
	import {BufferedSpeechPlayer} from '../createBufferedSpeechPlayer';
	import {CURSOR_BLINK_INTERVAL_MS} from '../cursorBlinkInterval';

	// Adds on react JSX for add-on libraries to react-three-fiber
	extend(ThreeMeshUI);
	extend({TextGeometry});

	async function fetchSupportedCharSet(): Promise<Set<string>> {
	try {
	const response = await fetch(robotoFontFamilyJson);
	const fontFamily = await response.json();

	return new Set(fontFamily.info.charset);
	} catch (e) {
	console.error('Failed to fetch supported XR charset', e);
	return new Set();
	}
	}

	let supportedCharSet = new Set();
	fetchSupportedCharSet().then((result) => (supportedCharSet = result));

	// This component wraps any children so it is positioned relative to the camera, rather than from the origin
	function CameraLinkedObject({children}) {
	const camera = useThree((state) => state.camera);
	return createPortal(<>{children}</>, camera);
	}

	function ThreeMeshUIComponents({
	translationSentences,
	skipARIntro,
	roomState,
	animateTextDisplay,
	}: XRConfigProps & {skipARIntro: boolean}) {
	// The "loop" for re-rendering required for threemeshUI
	useFrame(() => {
	ThreeMeshUI.update();
	});
	const [started, setStarted] = useState<boolean>(skipARIntro);
	return (
	<>
	<CameraLinkedObject>
	{getURLParams().ARTranscriptionType === 'single_block' ? (
	<TranscriptPanelSingleBlock
	started={started}
	animateTextDisplay={animateTextDisplay}
	roomState={roomState}
	translationSentences={translationSentences}
	/>
	) : (
	<TranscriptPanelBlocks
	animateTextDisplay={animateTextDisplay}
	translationSentences={translationSentences}
	/>
	)}
	{skipARIntro ? null : (
	<IntroPanel started={started} setStarted={setStarted} />
	)}
	</CameraLinkedObject>
	</>
	);
	}

	// Original UI that just uses a single block to render 6 lines in a panel
	function TranscriptPanelSingleBlock({
	animateTextDisplay,
	started,
	translationSentences,
	roomState,
	}: {
	animateTextDisplay: boolean;
	started: boolean;
	translationSentences: TranslationSentences;
	roomState: RoomState \| null;
	}) {
	const textRef = useRef<ThreeMeshUITextType>();
	const [didReceiveTranslationSentences, setDidReceiveTranslationSentences] =
	useState(false);

	const hasActiveTranscoders = (roomState?.activeTranscoders ?? 0) > 0;

	const [cursorBlinkOn, setCursorBlinkOn] = useState(false);

	// Normally we don't setState in render, but here we need to for computed state, and this if statement assures it won't loop infinitely
	if (!didReceiveTranslationSentences && translationSentences.length > 0) {
	setDidReceiveTranslationSentences(true);
	}

	const width = 1;
	const height = 0.3;
	const fontSize = 0.03;

	useEffect(() => {
	if (animateTextDisplay && hasActiveTranscoders) {
	const interval = setInterval(() => {
	setCursorBlinkOn((prev) => !prev);
	}, CURSOR_BLINK_INTERVAL_MS);

	return () => clearInterval(interval);
	} else {
	setCursorBlinkOn(false);
	}
	}, [animateTextDisplay, hasActiveTranscoders]);

	useEffect(() => {
	if (textRef.current != null) {
	const initialPrompt =
	'Welcome to the presentation. We are excited to share with you the work we have been doing... Our model can now translate languages in less than 2 second latency.';
	// These are rough ratios based on spot checking
	const maxLines = 6;
	const charsPerLine = 55;

	const transcriptSentences: string[] = didReceiveTranslationSentences
	? translationSentences
	: [initialPrompt];

	// The transcript is an array of sentences. For each sentence we break this down into an array of words per line.
	// This is needed so we can "scroll" through without changing the order of words in the transcript
	const linesToDisplay = transcriptSentences.flatMap((sentence, idx) => {
	const blinkingCursor =
	cursorBlinkOn && idx === transcriptSentences.length - 1 ? '\|' : ' ';
	const words = sentence.concat(blinkingCursor).split(/\s+/);
	// Here we break each sentence up with newlines so all words per line fit within the panel
	return words.reduce(
	(wordChunks, currentWord) => {
	const filteredWord = [...currentWord]
	.filter((c) => {
	if (supportedCharSet.has(c)) {
	return true;
	}
	console.error(
	`Unsupported char ${c} - make sure this is supported in the font family msdf file`,
	);
	return false;
	})
	.join('');
	const lastLineSoFar = wordChunks[wordChunks.length - 1];
	const charCount = lastLineSoFar.length + filteredWord.length + 1;
	if (charCount <= charsPerLine) {
	wordChunks[wordChunks.length - 1] =
	lastLineSoFar + ' ' + filteredWord;
	} else {
	wordChunks.push(filteredWord);
	}
	return wordChunks;
	},
	[''],
	);
	});

	// Only keep the last maxLines so new text keeps scrolling up from the bottom
	linesToDisplay.splice(0, linesToDisplay.length - maxLines);
	textRef.current.set({content: linesToDisplay.join('\n')});
	}
	}, [
	translationSentences,
	textRef,
	didReceiveTranslationSentences,
	cursorBlinkOn,
	]);

	const opacity = started ? 1 : 0;
	return (
	<block
	args={[{padding: 0.05, backgroundOpacity: opacity}]}
	position={[0, -0.4, -1.3]}>
	<block
	args={[
	{
	width,
	height,
	fontSize,
	textAlign: 'left',
	backgroundOpacity: opacity,
	// TODO: support more language charsets
	// This renders using MSDF format supported in WebGL. Renderable characters are defined in the "charset" json
	// Currently supports most default keyboard inputs but this would exclude many non latin charset based languages.
	// You can use https://msdf-bmfont.donmccurdy.com/ for easily generating these files
	// fontFamily: '/src/assets/Roboto-msdf.json',
	// fontTexture: '/src/assets/Roboto-msdf.png'
	fontFamily: robotoFontFamilyJson,
	fontTexture: robotoFontTexture,
	},
	]}>
	<ThreeMeshUIText
	ref={textRef}
	content={'Transcript'}
	fontOpacity={opacity}
	/>
	</block>
	</block>
	);
	}

	// Splits up the lines into separate blocks to treat each one separately.
	// This allows changing of opacity, animating per line, changing height / width per line etc
	function TranscriptPanelBlocks({
	animateTextDisplay,
	translationSentences,
	}: {
	animateTextDisplay: boolean;
	translationSentences: TranslationSentences;
	}) {
	const [didReceiveTranslationSentences, setDidReceiveTranslationSentences] =
	// Currently causing issues with displaying dummy text, skip over
	useState(false);

	// Normally we don't setState in render, but here we need to for computed state, and this if statement assures it won't loop infinitely
	if (!didReceiveTranslationSentences && translationSentences.length > 0) {
	setDidReceiveTranslationSentences(true);
	}

	const initialPrompt = 'Listening...';
	const transcriptSentences: string[] = didReceiveTranslationSentences
	? translationSentences
	: [initialPrompt];

	// The transcript is an array of sentences. For each sentence we break this down into an array of words per line.
	// This is needed so we can "scroll" through without changing the order of words in the transcript
	const sentenceLines = transcriptSentences.map((sentence) => {
	const words = sentence.split(/\s+/);
	// Here we break each sentence up with newlines so all words per line fit within the panel
	return words.reduce(
	(wordChunks, currentWord) => {
	const filteredWord = [...currentWord]
	.filter((c) => {
	if (supportedCharSet.has(c)) {
	return true;
	}
	console.error(
	`Unsupported char ${c} - make sure this is supported in the font family msdf file`,
	);
	return false;
	})
	.join('');
	const lastLineSoFar = wordChunks[wordChunks.length - 1];
	const charCount = lastLineSoFar.length + filteredWord.length + 1;
	if (charCount <= CHARS_PER_LINE) {
	wordChunks[wordChunks.length - 1] =
	lastLineSoFar + ' ' + filteredWord;
	} else {
	wordChunks.push(filteredWord);
	}
	return wordChunks;
	},
	[''],
	);
	});
	return (
	<TextBlocks sentences={sentenceLines} blinkCursor={animateTextDisplay} />
	);
	}

	function IntroPanel({started, setStarted}) {
	const width = 0.5;
	const height = 0.4;
	const padding = 0.03;

	// Kind of hacky but making the panel disappear by moving it completely off the camera view.
	// If we try to remove elements we end up throwing and stopping the experience
	// opacity=0 also runs into weird bugs where not everything is invisible
	const xCoordinate = started ? 1000000 : 0;

	const commonArgs = {
	backgroundColor: WHITE,
	width,
	height,
	padding,
	backgroundOpacity: 1,
	textAlign: 'center',
	fontFamily: robotoFontFamilyJson,
	fontTexture: robotoFontTexture,
	};
	return (
	<>
	<block
	args={[
	{
	...commonArgs,
	fontSize: 0.02,
	},
	]}
	position={[xCoordinate, -0.1, -0.5]}>
	<ThreeMeshUIText
	content="FAIR Seamless Streaming Demo"
	fontColor={BLACK}
	/>
	</block>
	<block
	args={[
	{
	...commonArgs,
	fontSize: 0.016,
	backgroundOpacity: 0,
	},
	]}
	position={[xCoordinate, -0.15, -0.5001]}>
	<ThreeMeshUIText
	fontColor={BLACK}
	content="Welcome to the Seamless team streaming demo experience! In this demo, you would experience AI powered text and audio translation in real time."
	/>
	</block>
	<block
	args={[
	{
	width: 0.1,
	height: 0.1,
	backgroundOpacity: 1,
	backgroundColor: BLACK,
	},
	]}
	position={[xCoordinate, -0.23, -0.5002]}>
	<Button
	onClick={() => setStarted(true)}
	content={'Start Experience'}
	width={0.2}
	height={0.035}
	fontSize={0.015}
	padding={0.01}
	borderRadius={0.01}
	/>
	</block>
	</>
	);
	}

	export type XRConfigProps = {
	animateTextDisplay: boolean;
	bufferedSpeechPlayer: BufferedSpeechPlayer;
	translationSentences: TranslationSentences;
	roomState: RoomState \| null;
	roomID: string \| null;
	startStreaming: () => Promise<void>;
	stopStreaming: () => Promise<void>;
	debugParam: boolean \| null;
	};

	export default function XRConfig(props: XRConfigProps) {
	const {bufferedSpeechPlayer, debugParam} = props;
	const skipARIntro = getURLParams().skipARIntro;
	const defaultDimensions = {width: 500, height: 500};
	const [dimensions, setDimensions] = useState(
	debugParam ? defaultDimensions : {width: 0, height: 0},
	);
	const {width, height} = dimensions;

	// Make sure to reset buffer when headset is taken off / on so we don't get an endless stream
	// of audio. The oculus actually runs for some time after the headset is taken off.
	const resetBuffers = useCallback(
	(event: XREvent<XRSessionEvent>) => {
	const session = event.target;
	if (!(session instanceof XRSession)) {
	return;
	}
	switch (session.visibilityState) {
	case 'visible':
	bufferedSpeechPlayer.start();
	break;
	case 'hidden':
	bufferedSpeechPlayer.stop();
	break;
	}
	},
	[bufferedSpeechPlayer],
	);

	return (
	<div style={{height, width, margin: '0 auto', border: '1px solid #ccc'}}>
	{/* This is the button that triggers AR flow if available via a button */}
	<ARButton
	onError={(e) => console.error(e)}
	onClick={() => setDimensions(defaultDimensions)}
	style={{
	position: 'absolute',
	bottom: '24px',
	left: '50%',
	transform: 'translateX(-50%)',
	padding: '12px 24px',
	border: '1px solid white',
	borderRadius: '4px',
	backgroundColor: '#465a69',
	color: 'white',
	font: 'normal 0.8125rem sans-serif',
	outline: 'none',
	zIndex: 99999,
	cursor: 'pointer',
	}}
	/>
	{/* Canvas to draw if in browser but if in AR mode displays in pass through mode */}
	{/* The camera here just works in 2D mode. In AR mode it starts at at origin */}
	{/* <Canvas camera={{position: [0, 0, 1], fov: 60}}> */}
	<Canvas camera={{position: [0, 0, 0.001], fov: 60}}>
	<color attach="background" args={['grey']} />
	<XR referenceSpace="local" onVisibilityChange={resetBuffers}>
	{/*
	Uncomment this for controllers to show up
	<Controllers />
	*/}
	<Hands />

	{/*
	Uncomment this for moving with controllers
	<MovementController />
	*/}
	{/*
	Uncomment this for turning the view in non-vr mode
	<OrbitControls
	autoRotateSpeed={0.85}
	zoomSpeed={1}
	minPolarAngle={Math.PI / 2.5}
	maxPolarAngle={Math.PI / 2.55}
	/>
	*/}
	<ThreeMeshUIComponents {...props} skipARIntro={skipARIntro} />
	{/* Just for testing */}
	{/* <RandomComponents /> */}
	</XR>
	</Canvas>
	</div>
	);
	}