archit11 commited on
Commit
05dddc6
1 Parent(s): 62dda31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -5,6 +5,7 @@ import numpy as np
5
  from typing import Dict, List, Tuple
6
  import spaces
7
  import librosa
 
8
 
9
  MODEL_NAME = 'sarvamai/shuka_v1'
10
  SAMPLE_RATE = 16000
@@ -39,9 +40,14 @@ def transcribe_and_respond(audio_input: Tuple[int, np.ndarray]) -> str:
39
  if sample_rate != SAMPLE_RATE:
40
  audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
41
 
42
- turns = create_conversation_turns("<|audio|>")
 
 
 
 
 
43
  inputs = {
44
- 'audio': audio,
45
  'turns': turns,
46
  'sampling_rate': SAMPLE_RATE
47
  }
@@ -63,4 +69,4 @@ iface = gr.Interface(
63
 
64
  # Launch the app
65
  if __name__ == "__main__":
66
- iface.launch()
 
5
  from typing import Dict, List, Tuple
6
  import spaces
7
  import librosa
8
+ import soundfile as sf
9
 
10
  MODEL_NAME = 'sarvamai/shuka_v1'
11
  SAMPLE_RATE = 16000
 
40
  if sample_rate != SAMPLE_RATE:
41
  audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
42
 
43
+ # Convert the audio to WAV format
44
+ wav_data = librosa.util.buf_to_float(audio, n_bytes=2)
45
+ sf.write('temp_audio.wav', wav_data, SAMPLE_RATE)
46
+
47
+ # Prepare the inputs for the model
48
+ turns = create_conversation_turns("")
49
  inputs = {
50
+ 'audio': wav_data,
51
  'turns': turns,
52
  'sampling_rate': SAMPLE_RATE
53
  }
 
69
 
70
  # Launch the app
71
  if __name__ == "__main__":
72
+ iface.launch()