tonic commited on
Commit
971bee9
1 Parent(s): 5701b30

adding long audio parsing

Browse files
Files changed (2) hide show
  1. app.py +30 -2
  2. requirements.txt +2 -1
app.py CHANGED
@@ -14,7 +14,9 @@ import cohere
14
  import os
15
  import re
16
  import pandas as pd
17
-
 
 
18
 
19
  title = "# Welcome to AyaTonic"
20
  description = "Learn a New Language With Aya"
@@ -70,6 +72,31 @@ def translate_text(text, instructions=translatetextinst):
70
  )
71
  return response.generations[0].text
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  class TaggedPhraseExtractor:
74
  def __init__(self, text=''):
75
  self.text = text
@@ -184,7 +211,8 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
184
  final_text += "\nUnsupported file type."
185
  print("OCR Text: ", final_text)
186
  if audio is not None:
187
- audio_text = process_audio_to_text(audio)
 
188
  final_text += "\n" + audio_text
189
 
190
  final_text_with_producetext = final_text + producetext
 
14
  import os
15
  import re
16
  import pandas as pd
17
+ import pydub
18
+ from pydub import AudioSegment
19
+ from pydub.utils import make_chunks
20
 
21
  title = "# Welcome to AyaTonic"
22
  description = "Learn a New Language With Aya"
 
72
  )
73
  return response.generations[0].text
74
 
75
+ class LongAudioProcessor:
76
+ def __init__(self, audio_client, api_key=None):
77
+ self.client = audio_client
78
+ self.api_key = api_key
79
+
80
+ def process_long_audio(self, audio_path, chunk_length_ms=20000):
81
+ """
82
+ Process audio files longer than 29 seconds by chunking them into smaller segments.
83
+ """
84
+ audio = AudioSegment.from_file(audio_path)
85
+ chunks = make_chunks(audio, chunk_length_ms)
86
+ full_text = ""
87
+ for i, chunk in enumerate(chunks):
88
+ chunk_name = f"chunk{i}.wav"
89
+ with open(chunk_name, 'wb') as file:
90
+ chunk.export(file, format="wav")
91
+ try:
92
+ result = self.process_audio_to_text(chunk_name)
93
+ full_text += " " + result.strip()
94
+ except Exception as e:
95
+ print(f"Error processing {chunk_name}: {e}")
96
+ finally:
97
+ if os.path.exists(chunk_name):
98
+ os.remove(chunk_name)
99
+ return full_text.strip()
100
  class TaggedPhraseExtractor:
101
  def __init__(self, text=''):
102
  self.text = text
 
211
  final_text += "\nUnsupported file type."
212
  print("OCR Text: ", final_text)
213
  if audio is not None:
214
+ long_audio_processor = LongAudioProcessor(audio_client)
215
+ audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
216
  final_text += "\n" + audio_text
217
 
218
  final_text_with_producetext = final_text + producetext
requirements.txt CHANGED
@@ -7,4 +7,5 @@ pillow
7
  torchvision
8
  torch
9
  python-dotenv
10
- pandas
 
 
7
  torchvision
8
  torch
9
  python-dotenv
10
+ pandas
11
+ pydub