archit11 commited on
Commit
2ed7223
1 Parent(s): 3448b20

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ import librosa
3
+ import gradio as gr
4
+ import spaces
5
+
6
+ pipe = transformers.pipeline(
7
+ model='sarvamai/shuka_v1',
8
+ trust_remote_code=True,
9
+ device=0,
10
+ torch_dtype='bfloat16'
11
+ )
12
+
13
+ @spaces.GPU(duration=120)
14
+ def transcribe_and_respond(audio_file):
15
+ audio, sr = librosa.load(audio_file, sr=16000)
16
+
17
+ turns = [
18
+ {'role': 'system', 'content': 'Respond naturally and informatively.'},
19
+ {'role': 'user', 'content': ''}
20
+ ]
21
+
22
+ response = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
23
+
24
+ return response
25
+
26
+ iface = gr.Interface(
27
+ fn=transcribe_and_respond,
28
+ inputs=gr.Audio(source="microphone", type="filepath"), # Use the microphone for audio input
29
+ outputs="text", # The output will be a text response
30
+ title="Voice Input for Transcription and Response",
31
+ description="Record your voice, and the model will respond naturally and informatively."
32
+ )
33
+
34
+ iface.launch()