"""A simple web interactive chat demo based on gradio.""" import os import time import gradio as gr import numpy as np import spaces import torch from inference import OmniInference device = "cuda" if torch.cuda.is_available() else "cpu" omni_client = OmniInference('./checkpoint', device) OUT_CHUNK = 4096 OUT_RATE = 24000 OUT_CHANNELS = 1 @spaces.GPU def warmup(): omni_client.warm_up() warmup() @spaces.GPU def process_audio(audio): filepath = audio print(f"filepath: {filepath}") if filepath is None: return cnt = 0 tik = time.time() for chunk in omni_client.run_AT_batch_stream(filepath): # Convert chunk to numpy array if cnt == 0: print(f"first chunk time cost: {time.time() - tik:.3f}") cnt += 1 audio_data = np.frombuffer(chunk, dtype=np.int16) audio_data = audio_data.reshape(-1, OUT_CHANNELS) yield OUT_RATE, audio_data.astype(np.int16) demo = gr.Interface( process_audio, inputs=gr.Audio(type="filepath", label="Microphone"), outputs=[gr.Audio(label="Response", streaming=True, autoplay=True)], title="Chat Mini-Omni Demo", live=True, ) demo.queue() demo.launch()