Spaces:

vakodiya
/

audio_conversation

Running

File size: 2,862 Bytes

545ec70
 
336a7dd
 
 
 
ca7863d
c68e0fd
545ec70
 
 
 
c68e0fd
aca81e0
e9fda1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aca81e0
 
c68e0fd
 
aca81e0
e9fda1a
aca81e0
 
31f3c81
aca81e0
c68e0fd
336a7dd
 
 
 
ca7863d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336a7dd
 
545ec70
 
336a7dd
44e5aa1
 
 
 
 
336a7dd
545ec70
 
 
 
 
 
336a7dd

import streamlit as st
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
import numpy as np
import io
import wave
import requests
from audio_to_text import audio_to_text
import streamlit.components.v1 as components

# Initialize Streamlit app layout
st.title("Microphone Input in Streamlit")

# Load the custom component
audio_recorder_html = """
<script>
const recordButton = document.getElementById("recordButton");
const stopButton = document.getElementById("stopButton");
const audioChunks = [];

let mediaRecorder;

navigator.mediaDevices.getUserMedia({ audio: true })
.then(function(stream) {
  mediaRecorder = new MediaRecorder(stream);

  recordButton.onclick = function() {
    mediaRecorder.start();
    console.log("Recording started...");
  };

  stopButton.onclick = function() {
    mediaRecorder.stop();
    console.log("Recording stopped...");
  };

  mediaRecorder.ondataavailable = function(e) {
    audioChunks.push(e.data);
  };

  mediaRecorder.onstop = function(e) {
    const audioBlob = new Blob(audioChunks, { type: "audio/wav" });
    const reader = new FileReader();
    reader.readAsDataURL(audioBlob);
    reader.onloadend = function() {
      const base64data = reader.result.split(',')[1];
      Streamlit.setComponentValue(base64data);
    };
  };
});
</script>
<button id="recordButton">Start Recording</button>
<button id="stopButton">Stop Recording</button>
"""

# Embed the JavaScript and HTML in Streamlit
components.html(audio_recorder_html, height=300)

# Retrieve the audio data from the component (if available)
audio_data = st.query_params().get("value")


def audio_callback(frame):
    # Get raw audio data from the frame
    audio_data = frame.to_ndarray().astype(np.int16)

    # Convert audio to text
    transcription = audio_to_text(audio_data)
    
    # Display the transcription
    st.write("Transcription:", transcription)

    API_URL = "https://eaa0-34-74-179-199.ngrok-free.app/generate"
    # Optionally, send the transcription to an API
    headers = {
        "Content-Type": "application/json"
    }
    payload = {
        "prompt": transcription
    }
    response = requests.post(API_URL, json=payload, headers=headers)
    if response.status_code == 200:
        st.write("Assistant:", response.json())
    else:
        st.write("Error:", response.status_code, response.text)
            

webrtc_ctx = webrtc_streamer(
    key="audio-only",
    mode=WebRtcMode.SENDRECV,
    rtc_configuration={"iceServers": [{"urls": ["stun:stun1.l.google.com:19302"]}]},
    media_stream_constraints={
        "audio": True,
        "video": False
    },
    audio_frame_callback=audio_callback
)

# Placeholder for capturing audio
if webrtc_ctx.state.playing:
    st.write("Microphone is active. Speak into the microphone...")
else:
    st.write("Click to start microphone input.")