muzammil-eds commited on
Commit
869504a
1 Parent(s): 83bef30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -30
app.py CHANGED
@@ -1,32 +1,37 @@
1
  import gradio as gr
2
  import requests
3
- import Levenshtein
4
- import librosa
5
- import torch
6
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
7
 
8
- def load_model():
9
- MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
10
- processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
11
- model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
12
- return processor, model
13
 
14
- processor, model = load_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def transcribe_audio_hf(audio_path):
17
  """
18
- Transcribes speech from an audio file using a pretrained Wav2Vec2 model.
19
  Args:
20
  audio_path (str): Path to the audio file.
21
  Returns:
22
- str: The transcription of the speech in the audio file.
23
  """
24
- speech_array, sampling_rate = librosa.load(audio_path, sr=16000)
25
- input_values = processor(speech_array, sampling_rate=sampling_rate, return_tensors="pt", padding=True).input_values
26
- with torch.no_grad():
27
- logits = model(input_values).logits
28
- predicted_ids = torch.argmax(logits, dim=-1)
29
- transcription = processor.batch_decode(predicted_ids)[0].strip()
30
  return transcription
31
 
32
  def levenshtein_similarity(transcription1, transcription2):
@@ -38,6 +43,7 @@ def levenshtein_similarity(transcription1, transcription2):
38
  Returns:
39
  float: A normalized similarity score between 0 and 1, where 1 indicates identical transcriptions.
40
  """
 
41
  distance = Levenshtein.distance(transcription1, transcription2)
42
  max_len = max(len(transcription1), len(transcription2))
43
  return 1 - distance / max_len # Normalize to get similarity score
@@ -67,19 +73,20 @@ def perform_testing(original_audio, user_audio):
67
 
68
  # Gradio Interface
69
  with gr.Blocks() as app:
70
- gr.Markdown("# Audio Transcription and Similarity Checker")
71
 
72
- original_audio_upload = gr.Audio(label="Upload Original Audio", type="filepath")
73
- user_audio_upload = gr.Audio(label="Upload User Audio", type="filepath")
74
- upload_button = gr.Button("Perform Testing")
75
- output_original_transcription = gr.Markdown()
76
- output_user_transcription = gr.Markdown()
77
- output_similarity_score = gr.Markdown()
 
78
 
79
- upload_button.click(
80
- perform_testing,
81
- inputs=[original_audio_upload, user_audio_upload],
82
- outputs=[output_original_transcription, output_user_transcription, output_similarity_score]
83
- )
84
 
85
  app.launch()
 
1
  import gradio as gr
2
  import requests
3
+ import os
 
 
 
4
 
5
+ # API information for Hugging Face Inference API
6
+ API_URL = "https://api-inference.huggingface.co/models/jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
 
 
 
7
 
8
+ # Fetch the API token from Hugging Face Secrets
9
+ hf_api_token = os.getenv("HF_API_TOKEN")
10
+ headers = {"Authorization": f"Bearer {hf_api_token}"}
11
+
12
+ def query(filename):
13
+ """
14
+ Queries the Hugging Face API to transcribe audio from a file.
15
+ Args:
16
+ filename (str): Path to the audio file.
17
+ Returns:
18
+ dict: The response from the Hugging Face API with transcription.
19
+ """
20
+ with open(filename, "rb") as f:
21
+ data = f.read()
22
+ response = requests.post(API_URL, headers=headers, data=data)
23
+ return response.json()
24
 
25
  def transcribe_audio_hf(audio_path):
26
  """
27
+ Transcribes the audio using the Hugging Face Inference API.
28
  Args:
29
  audio_path (str): Path to the audio file.
30
  Returns:
31
+ str: The transcription from the API.
32
  """
33
+ result = query(audio_path)
34
+ transcription = result.get('text', '').strip()
 
 
 
 
35
  return transcription
36
 
37
  def levenshtein_similarity(transcription1, transcription2):
 
43
  Returns:
44
  float: A normalized similarity score between 0 and 1, where 1 indicates identical transcriptions.
45
  """
46
+ import Levenshtein
47
  distance = Levenshtein.distance(transcription1, transcription2)
48
  max_len = max(len(transcription1), len(transcription2))
49
  return 1 - distance / max_len # Normalize to get similarity score
 
73
 
74
  # Gradio Interface
75
  with gr.Blocks() as app:
76
+ gr.Markdown("# Audio Transcription and Similarity Checker using Hugging Face Inference API")
77
 
78
+ with gr.Tab("Upload"):
79
+ original_audio_upload = gr.Audio(label="Upload Original Audio", type="filepath")
80
+ user_audio_upload = gr.Audio(label="Upload User Audio", type="filepath")
81
+ upload_button = gr.Button("Perform Testing")
82
+ output_original_transcription = gr.Markdown()
83
+ output_user_transcription = gr.Markdown()
84
+ output_similarity_score = gr.Markdown()
85
 
86
+ upload_button.click(
87
+ perform_testing,
88
+ inputs=[original_audio_upload, user_audio_upload],
89
+ outputs=[output_original_transcription, output_user_transcription, output_similarity_score]
90
+ )
91
 
92
  app.launch()