Vinicius Oliveira commited on
Commit
1b4bf5a
1 Parent(s): b01f775

adicionando requirements e app

Browse files
Files changed (2) hide show
  1. app.py +40 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import speech_recognition as sr
3
+ from pydub import AudioSegment
4
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
5
+ import os
6
+ import torch
7
+
8
+ tokenizer = Wav2Vec2Processor.from_pretrained('jonatasgrosman/wav2vec2-large-xlsr-53-portuguese')
9
+ model = Wav2Vec2ForCTC.from_pretrained('jonatasgrosman/wav2vec2-large-xlsr-53-portuguese')
10
+ # Load the pre-trained speech recognition model
11
+ recognizer = sr.Recognizer()
12
+
13
+ def recognize_speech(audio_path):
14
+ print(audio_path)
15
+ # Perform speech recognition on the captured audio
16
+ try:
17
+ clip = AudioSegment.from_file(audio_path)
18
+ clip = clip.set_frame_rate(16000)
19
+ print(clip)
20
+ x = torch.FloatTensor(clip.get_array_of_samples())
21
+ inputs = tokenizer(x, sampling_rate=16000, return_tensors='pt', padding='longest').input_values
22
+ logits = model(inputs).logits
23
+ tokens = torch.argmax(logits, axis=-1)
24
+ text = tokenizer.batch_decode(tokens)
25
+ return str(text).lower()
26
+ except sr.UnknownValueError:
27
+ return "Could not understand the audio."
28
+ except sr.RequestError as e:
29
+ return f"Error accessing the Google Speech Recognition service: {e}"
30
+
31
+ # Create the Gradio interface with microphone input
32
+ audio_recognizer_interface = gr.Interface(
33
+ fn=recognize_speech,
34
+ inputs=gr.inputs.Audio(source="microphone", type="filepath", label="Speak into the microphone..."),
35
+ outputs="text",
36
+ title="Real-time Speech Recognition"
37
+ )
38
+
39
+ # Run the interface
40
+ audio_recognizer_interface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ speech_recognition
2
+ pydub
3
+ transformers
4
+ torch