Spaces:

archit11
/

shuka_demo

Running on Zero

archit11 commited on Aug 15

Commit

05dddc6

•

1 Parent(s): 62dda31

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import numpy as np
 from typing import Dict, List, Tuple
 import spaces
 import librosa
 MODEL_NAME = 'sarvamai/shuka_v1'
 SAMPLE_RATE = 16000
@@ -39,9 +40,14 @@ def transcribe_and_respond(audio_input: Tuple[int, np.ndarray]) -> str:
         if sample_rate != SAMPLE_RATE:
             audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
-        turns = create_conversation_turns("<|audio|>")
         inputs = {
-            'audio': audio,
             'turns': turns,
             'sampling_rate': SAMPLE_RATE
         }
@@ -63,4 +69,4 @@ iface = gr.Interface(
 # Launch the app
 if __name__ == "__main__":
-    iface.launch()

 from typing import Dict, List, Tuple
 import spaces
 import librosa
+import soundfile as sf
 MODEL_NAME = 'sarvamai/shuka_v1'
 SAMPLE_RATE = 16000
         if sample_rate != SAMPLE_RATE:
             audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
+        # Convert the audio to WAV format
+        wav_data = librosa.util.buf_to_float(audio, n_bytes=2)
+        sf.write('temp_audio.wav', wav_data, SAMPLE_RATE)
+        # Prepare the inputs for the model
+        turns = create_conversation_turns("")
         inputs = {
+            'audio': wav_data,
             'turns': turns,
             'sampling_rate': SAMPLE_RATE
         }
 # Launch the app
 if __name__ == "__main__":
+    iface.launch()