Spaces:
Sleeping
Sleeping
divakaivan
commited on
Commit
•
9602bc7
1
Parent(s):
5233ff4
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import numpy as np
|
|
5 |
from datasets import load_dataset, Audio
|
6 |
from transformers import pipeline
|
7 |
import librosa
|
|
|
8 |
|
9 |
# Load ASR model
|
10 |
asr_pipe = pipeline(model="divakaivan/glaswegian-asr")
|
@@ -27,11 +28,14 @@ def transcribe(audio):
|
|
27 |
text = asr_pipe(audio)["text"]
|
28 |
return text
|
29 |
|
30 |
-
def generate_response(text):
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
35 |
|
36 |
def synthesize_speech(text):
|
37 |
inputs = processor(text=text, return_tensors="pt")
|
@@ -61,15 +65,18 @@ def create_speaker_embedding(waveform):
|
|
61 |
speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
|
62 |
return speaker_embeddings
|
63 |
|
64 |
-
def voice_assistant(audio):
|
65 |
transcribed_text = transcribe(audio)
|
66 |
-
response_text = generate_response(transcribed_text)
|
67 |
speech_audio = synthesize_speech(response_text)
|
68 |
return speech_audio
|
69 |
|
70 |
iface = gr.Interface(
|
71 |
fn=voice_assistant,
|
72 |
-
inputs=
|
|
|
|
|
|
|
73 |
outputs=gr.Audio(label="Response Speech", type="numpy"),
|
74 |
title="Your Glaswegian Assistant"
|
75 |
)
|
|
|
5 |
from datasets import load_dataset, Audio
|
6 |
from transformers import pipeline
|
7 |
import librosa
|
8 |
+
from openai import OpenAI
|
9 |
|
10 |
# Load ASR model
|
11 |
asr_pipe = pipeline(model="divakaivan/glaswegian-asr")
|
|
|
28 |
text = asr_pipe(audio)["text"]
|
29 |
return text
|
30 |
|
31 |
+
def generate_response(text, api_key):
|
32 |
+
client = OpenAI(api_key=api_key)
|
33 |
+
response = client.chat.completions.create(
|
34 |
+
model='gpt-4o-mini',
|
35 |
+
messages=[{"role": "user", "content": text}]
|
36 |
+
)
|
37 |
+
|
38 |
+
return response.choices[0].message.content
|
39 |
|
40 |
def synthesize_speech(text):
|
41 |
inputs = processor(text=text, return_tensors="pt")
|
|
|
65 |
speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
|
66 |
return speaker_embeddings
|
67 |
|
68 |
+
def voice_assistant(audio, api_key):
|
69 |
transcribed_text = transcribe(audio)
|
70 |
+
response_text = generate_response(transcribed_text, api_key)
|
71 |
speech_audio = synthesize_speech(response_text)
|
72 |
return speech_audio
|
73 |
|
74 |
iface = gr.Interface(
|
75 |
fn=voice_assistant,
|
76 |
+
inputs=[
|
77 |
+
gr.Audio(type="filepath"),
|
78 |
+
gr.Textbox(label="OpenAI API Key", type="password")
|
79 |
+
],
|
80 |
outputs=gr.Audio(label="Response Speech", type="numpy"),
|
81 |
title="Your Glaswegian Assistant"
|
82 |
)
|