File size: 1,151 Bytes
5e4b316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6683bb4
5e4b316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e281db3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""A simple web interactive chat demo based on gradio."""

import os
import time
import gradio as gr
import numpy as np
import spaces
import torch


from inference import OmniInference


device = "cuda" if torch.cuda.is_available() else "cpu"
omni_client = OmniInference('./checkpoint', device)
omni_client.warm_up()

OUT_CHUNK = 4096
OUT_RATE = 24000
OUT_CHANNELS = 1


def process_audio(audio):
    filepath = audio
    print(f"filepath: {filepath}")
    if filepath is None:
        return

    cnt = 0
    tik = time.time()
    for chunk in omni_client.run_AT_batch_stream(filepath):
        # Convert chunk to numpy array
        if cnt == 0:
            print(f"first chunk time cost: {time.time() - tik:.3f}")
        cnt += 1
        audio_data = np.frombuffer(chunk, dtype=np.int16)
        audio_data = audio_data.reshape(-1, OUT_CHANNELS)
        yield OUT_RATE, audio_data.astype(np.int16)


demo = gr.Interface(
    process_audio,
    inputs=gr.Audio(type="filepath", label="Microphone"),
    outputs=[gr.Audio(label="Response", streaming=True, autoplay=True)],
    title="Chat Mini-Omni Demo",
    live=True,
)
demo.queue()
demo.launch()