llama-3.2-3b-voice-webrtc

Sleeping

App Files Files Community

freddyaboulton HF staff commited on Nov 1

Commit

694882d

•

1 Parent(s): 2558f9d

more edits

Browse files

Files changed (1) hide show

app.py +16 -38

app.py CHANGED Viewed

@@ -24,12 +24,11 @@ def update_or_append_conversation(conversation, id, role, content):
     conversation.append({"id": id, "role": role, "content": content})
-def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[str], client: OpenAI, output_format):
-    if state.client is None:
         raise gr.Error("Please enter a valid API key first.")
-    format_ = state.output_format
-    bitrate = 128 if format_ == "mp3" else 32  # Higher bitrate for MP3, lower for OPUS
     audio_data = base64.b64encode(audio_bytes).decode()
     try:
@@ -41,7 +40,7 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[st
                 "tts_audio_bitrate": bitrate
             },
             model="llama3.1-8b",
-            messages=state.conversation + [{"role": "user", "content": [{"type": "audio", "data": audio_data}]}],
             temperature=0.7,
             max_tokens=256,
             stream=True,
@@ -62,18 +61,18 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[st
             if asr_results:
                 asr_result += "".join(asr_results)
-                yield id, None, asr_result, None, state
             if content:
                 full_response += content
-                yield id, full_response, None, None, state
             if audio:
                 # Accumulate audio bytes and yield them
                 audio_bytes_accumulated += b''.join([base64.b64decode(a) for a in audio])
-                yield id, None, None, audio_bytes_accumulated, state
-        yield id, full_response, asr_result, audio_bytes_accumulated, state
     except Exception as e:
         raise gr.Error(f"Error during audio streaming: {e}")
@@ -81,7 +80,6 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[st
 def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
              gradio_conversation: list[dict], client: OpenAI, output_format: str):
     audio_buffer = io.BytesIO()
     segment = AudioSegment(
         audio[1].tobytes(),
@@ -93,36 +91,16 @@ def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
     generator = generate_response_and_audio(audio_buffer.getvalue(), state)
-    for id, text, asr, audio, updated_state in generator:
-        state = updated_state
         if asr:
-            update_or_append_conversation(state.conversation, id, "user", asr)
         if text:
-            update_or_append_conversation(state.conversation, id, "assistant", text)
-        chatbot_output = state.conversation
-        yield chatbot_output, audio, state
-    # Reset the audio stream for the next interaction
-    state.stream = None
-    state.pause_detected = False
-def maybe_call_response(state):
-    if state.pause_detected:
-        return response(state)
-    else:
-        # Do nothing
-        return gr.update(), gr.update(), state
-def start_recording_user(state: AppState):
-    if not state.stopped:
-        return gr.update(recording=True)
-    else:
-        return gr.update(recording=False)
-def update_format(format, state):
-    state.output_format = format
-    return state
 with gr.Blocks() as demo:
     with gr.Row():

     conversation.append({"id": id, "role": role, "content": content})
+def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[str], client: OpenAI, output_format: str):
+    if client is None:
         raise gr.Error("Please enter a valid API key first.")
+    bitrate = 128 if output_format == "mp3" else 32  # Higher bitrate for MP3, lower for OPUS
     audio_data = base64.b64encode(audio_bytes).decode()
     try:
                 "tts_audio_bitrate": bitrate
             },
             model="llama3.1-8b",
+            messages=lepton_conversation + [{"role": "user", "content": [{"type": "audio", "data": audio_data}]}],
             temperature=0.7,
             max_tokens=256,
             stream=True,
             if asr_results:
                 asr_result += "".join(asr_results)
+                yield id, None, asr_result, None
             if content:
                 full_response += content
+                yield id, full_response, None, None
             if audio:
                 # Accumulate audio bytes and yield them
                 audio_bytes_accumulated += b''.join([base64.b64decode(a) for a in audio])
+                yield id, None, None, audio_bytes_accumulated
+        yield id, full_response, asr_result, audio_bytes_accumulated
     except Exception as e:
         raise gr.Error(f"Error during audio streaming: {e}")
 def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
              gradio_conversation: list[dict], client: OpenAI, output_format: str):
     audio_buffer = io.BytesIO()
     segment = AudioSegment(
         audio[1].tobytes(),
     generator = generate_response_and_audio(audio_buffer.getvalue(), state)
+    for id, text, asr, audio in generator:
         if asr:
+            update_or_append_conversation(lepton_conversation, id, "user", asr)
+            update_or_append_conversation(gradio_conversation, id, "user", asr)
         if text:
+            update_or_append_conversation(lepton_conversation, id, "assistant", text)
+            update_or_append_conversation(gradio_conversation, id, "assistant", text)
+        yield (np.frombuffer(audio, dtype=np.int16).reshape(1, -1), ), AdditionalOutputs(lepton_conversation, gradio_conversation)
 with gr.Blocks() as demo:
     with gr.Row():