Spaces:
Runtime error
Runtime error
Ahsen Khaliq
commited on
Commit
•
676bbaa
1
Parent(s):
60750ec
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
2 |
+
import soundfile as sf
|
3 |
+
import torch
|
4 |
+
|
5 |
+
# load model and processor
|
6 |
+
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")
|
7 |
+
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")
|
8 |
+
|
9 |
+
# define function to read in sound file
|
10 |
+
def map_to_array(file):
|
11 |
+
speech, _ = sf.read(file)
|
12 |
+
return speech
|
13 |
+
|
14 |
+
# tokenize
|
15 |
+
def inference(audio):
|
16 |
+
input_values = processor(map_to_array('/content/sample_data/sample2.flac'), return_tensors="pt", padding="longest").input_values # Batch size 1
|
17 |
+
|
18 |
+
# retrieve logits
|
19 |
+
logits = model(input_values).logits
|
20 |
+
|
21 |
+
# take argmax and decode
|
22 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
23 |
+
transcription = processor.batch_decode(predicted_ids)
|
24 |
+
return transcription[0]
|
25 |
+
|
26 |
+
inputs = gr.inputs.Audio(label="Input Audio", type="file")
|
27 |
+
outputs = gr.outputs.Textbox(label="Output Text")
|
28 |
+
|
29 |
+
title = "wav2vec 2.0"
|
30 |
+
description = "demo for Facebook AI wav2vec 2.0 using Hugging Face transformers. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
|
31 |
+
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2006.11477'>wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations</a> | <a href='https://github.com/pytorch/fairseq'>Github Repo</a> | <a href='https://huggingface.co/facebook/wav2vec2-base-960h'>Hugging Face model</a></p>"
|
32 |
+
examples = [
|
33 |
+
["poem.wav"]
|
34 |
+
]
|
35 |
+
|
36 |
+
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()
|