import gradio as gr from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs import numpy as np import os from twilio.rest import Client import base64 import openai import re from huggingface_hub import InferenceClient from pydub import AudioSegment import io from dotenv import load_dotenv load_dotenv() hf_client = InferenceClient() spinner_html = open("spinner.html").read() account_sid = os.environ.get("TWILIO_ACCOUNT_SID") auth_token = os.environ.get("TWILIO_AUTH_TOKEN") if account_sid and auth_token: client = Client(account_sid, auth_token) token = client.tokens.create() rtc_configuration = { "iceServers": token.ice_servers, "iceTransportPolicy": "relay", } else: rtc_configuration = None client = openai.OpenAI( api_key=os.environ.get("SAMBANOVA_API_KEY"), base_url="https://api.sambanova.ai/v1", ) system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response." user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}" def extract_html_content(text): """ Extract content including HTML tags. """ match = re.search(r'.*?', text, re.DOTALL) return match.group(0) if match else None def audio_to_bytes(audio: tuple[int, np.ndarray]): audio_segment = AudioSegment( audio[1].squeeze().tobytes(), frame_rate=audio[0], sample_width=audio[1].dtype.itemsize, channels=1 ) # Export the audio segment to MP3 bytes - use a high bitrate to maximise quality mp3_io = io.BytesIO() audio_segment.export(mp3_io, format="mp3", bitrate="320k") # Get the MP3 bytes mp3_bytes = mp3_io.getvalue() mp3_io.close() return mp3_bytes def display_in_sandbox(code): encoded_html = base64.b64encode(code.encode('utf-8')).decode('utf-8') data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" return f"" def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str): yield AdditionalOutputs(history, spinner_html) text = hf_client.automatic_speech_recognition(audio_to_bytes(user_message)).text user_msg_formatted = user_prompt.format(user_message=text, code=code) history.append({"role": "user", "content": user_msg_formatted}) response = client.chat.completions.create( model='Meta-Llama-3.1-70B-Instruct', messages=history, temperature = 0.1, top_p = 0.1 ) output = response.choices[0].message.content html_code = extract_html_content(output) history.append({"role": "assistant", "content": output}) yield AdditionalOutputs(history, html_code) with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo: history = gr.State([{"role": "system", "content": system_prompt}]) with gr.Row(): with gr.Column(scale=1): gr.HTML( """

Llama Code Editor

Powered by SambaNova and Gradio-WebRTC ⚡️

Create and edit single-file HTML applications with just your voice!

Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.

""" ) webrtc = WebRTC(rtc_configuration=rtc_configuration, mode="send", modality="audio") with gr.Column(scale=10): with gr.Tabs(): with gr.Tab("Sandbox"): sandbox = gr.HTML(value=open("sandbox.html").read()) with gr.Tab("Code"): code = gr.Code(language="html", max_lines=50, interactive=False, elem_classes="code-component") with gr.Tab("Chat"): cb = gr.Chatbot(type="messages") webrtc.stream(ReplyOnPause(generate), inputs=[webrtc, history, code], outputs=[webrtc], time_limit=90, concurrency_limit=10) webrtc.on_additional_outputs(lambda history, code: (history, code, history), outputs=[history, code, cb]) code.change(display_in_sandbox, code, sandbox, queue=False) if __name__ == "__main__": demo.launch()