File size: 1,910 Bytes
763f6d6
c153aa4
 
 
 
 
 
 
 
 
 
7c5e2f5
c153aa4
ecbfc2d
3ddb6ef
c153aa4
 
 
 
93471da
73b9216
2d076c6
7a90588
0256fc1
6185734
c6d9314
7104546
d7511d4
2d076c6
08aba45
 
 
39fde0b
 
1d48696
39fde0b
 
1d48696
39fde0b
3680dfd
c153aa4
94c77bb
9324d2a
0f9bdc0
08aba45
5af6446
08aba45
7bb0efa
 
 
 
 
be4e1da
 
 
 
 
 
6c3fca7
be4e1da
08aba45
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
import torch
import io
import base64
import numpy as np
import scipy.io.wavfile
from typing import Text
from pyannote.audio import Pipeline
from pyannote.audio import Audio
from pyannote.core import Segment
import gradio as gr
import os


    
import yt_dlp as youtube_dl
from gradio_client import Client
from transformers.pipelines.audio_utils import ffmpeg_read

HF_TOKEN = os.environ.get("HF_TOKEN")

# set up the diarization pipeline
#diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.0", use_auth_token=HF_TOKEN)
#diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=HF_TOKEN)
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=HF_TOKEN)

if torch.cuda.is_available():
 diarization_pipeline.to(torch.device("cuda"))


import gradio as gr

def transcribe(audio_path, num_speakers=2):
    # Configure the pipeline to use the provided number of speakers
    #diarization_pipeline.n_speakers = num_speakers
    
    # Run diarization
    diarization = diarization_pipeline(audio_path,num_speakers=2)
    
    return diarization

title = "SAML Speaker Diarization ⚡️ "

description = """ pyannote speaker diarization running locally"""

article = """SAMLOne Speaker Segmentation or Diarization"""

import gradio as gr

def greet(name):
    return "Hello " + name + "!!"

# iface = gr.Interface(fn=transcribe, inputs=gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"), outputs="text")
# iface.launch()
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
    audio_input = gr.Audio(type="filepath")
    text_output = gr.Textbox( label="speaker diarization")
    speaker_diarization_button = gr.Button("Submit")
    speaker_diarization_button.click(fn=transcribe, inputs=[audio_input], outputs=[text_output])
demo.launch(debug=True)