import gradio as gr from huggingface_hub import snapshot_download from threading import Thread import os import time import gradio as gr import base64 import numpy as np import requests from server import serve repo_id = "gpt-omni/mini-omni" snapshot_download(repo_id, local_dir="./checkpoint", revision="main") IP='0.0.0.0' PORT=60808 thread = Thread(target=serve, daemon=True) thread.start() API_URL = "http://0.0.0.0:60808/chat" OUT_CHUNK = 4096 OUT_RATE = 24000 OUT_CHANNELS = 1 def process_audio(audio): filepath = audio print(f"filepath: {filepath}") if filepath is None: return cnt = 0 with open(filepath, "rb") as f: data = f.read() base64_encoded = str(base64.b64encode(data), encoding="utf-8") files = {"audio": base64_encoded} tik = time.time() with requests.post(API_URL, json=files, stream=True) as response: try: for chunk in response.iter_content(chunk_size=OUT_CHUNK): if chunk: # Convert chunk to numpy array if cnt == 0: print(f"first chunk time cost: {time.time() - tik:.3f}") cnt += 1 audio_data = np.frombuffer(chunk, dtype=np.int16) audio_data = audio_data.reshape(-1, OUT_CHANNELS) yield OUT_RATE, audio_data.astype(np.int16) except Exception as e: print(f"error: {e}") def greet(name): return "Hello " + name + "!!" demo = gr.Interface(fn=greet, inputs="text", outputs="text") demo.launch()