tonic commited on
Commit
be7cfd1
1 Parent(s): 6a99d7a
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -58,7 +58,7 @@ def get_language_code(language_name):
58
  print(f"Language name '{language_name}' not found.")
59
  return None
60
 
61
- def translate_text(text, instructions=translatetextinst):
62
  """
63
  translates text.
64
  """
@@ -77,9 +77,10 @@ def translate_text(text, instructions=translatetextinst):
77
  class LongAudioProcessor:
78
  def __init__(self, audio_client, api_key=None):
79
  self.client = audio_client
 
80
  self.api_key = api_key
81
 
82
- def process_long_audio(self, audio_path, chunk_length_ms=20000):
83
  """
84
  Process audio files longer than 29 seconds by chunking them into smaller segments.
85
  """
@@ -91,7 +92,7 @@ class LongAudioProcessor:
91
  with open(chunk_name, 'wb') as file:
92
  chunk.export(file, format="wav")
93
  try:
94
- result = self.process_audio_to_text(chunk_name)
95
  full_text += " " + result.strip()
96
  except Exception as e:
97
  print(f"Error processing {chunk_name}: {e}")
@@ -161,11 +162,18 @@ def process_text_to_audio(text, translatefrom="English", translateto="English"):
161
  )
162
  return result[0]
163
 
 
 
 
 
 
 
 
 
164
  class OCRProcessor:
165
  def __init__(self, lang_code=["en"]):
166
  self.lang_code = lang_code
167
- self.det_processor, self.det_model = load_det_processor(), load_det_model()
168
- self.rec_model, self.rec_processor = load_rec_model(), load_rec_processor()
169
 
170
  def process_image(self, image):
171
  """
@@ -215,14 +223,14 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
215
 
216
  response = co.generate(
217
  model='c4ai-aya',
218
- prompt=final_text_with_producetext,
219
  max_tokens=1024,
220
  temperature=0.5
221
  )
222
  # add graceful handling for errors (overflow)
223
  generated_text = response.generations[0].text
224
  print("Generated Text: ", generated_text)
225
- generated_text_with_format = generated_text + "\n" + formatinputstring
226
  response = co.generate(
227
  model='command-nightly',
228
  prompt=generated_text_with_format,
@@ -283,9 +291,8 @@ outputs = [
283
  def update_outputs(inputlanguage, target_language, audio, image, text, file):
284
  final_text, top_phrases, translations, audio_outputs = process_input(
285
  image=image, file=file, audio=audio, text=text,
286
- translatefrom=inputlanguage, translateto=target_language
287
  )
288
-
289
  processed_text_output = final_text
290
  audio_output_native_phrases = [native for _, native in audio_outputs]
291
  audio_output_target_phrases = [target for target, _ in audio_outputs]
 
58
  print(f"Language name '{language_name}' not found.")
59
  return None
60
 
61
+ def translate_text(text, instructions=translatetextinst.format(input_language=inputlanguage)):
62
  """
63
  translates text.
64
  """
 
77
  class LongAudioProcessor:
78
  def __init__(self, audio_client, api_key=None):
79
  self.client = audio_client
80
+ self.process_audio_to_text = process_audio_to_text
81
  self.api_key = api_key
82
 
83
+ def process_long_audio(self, audio_path, inputlanguage, outputlanguage, chunk_length_ms=20000):
84
  """
85
  Process audio files longer than 29 seconds by chunking them into smaller segments.
86
  """
 
92
  with open(chunk_name, 'wb') as file:
93
  chunk.export(file, format="wav")
94
  try:
95
+ result = self.process_audio_to_text(chunk_name, inputlanguage=inputlanguage, outputlanguage=outputlanguage)
96
  full_text += " " + result.strip()
97
  except Exception as e:
98
  print(f"Error processing {chunk_name}: {e}")
 
162
  )
163
  return result[0]
164
 
165
+ def initialize_ocr_models():
166
+ """
167
+ Load the detection and recognition models along with their processors.
168
+ """
169
+ det_processor, det_model = load_det_processor(), load_det_model()
170
+ rec_model, rec_processor = load_rec_model(), load_rec_processor()
171
+ return det_processor, det_model, rec_model, rec_processor
172
+
173
  class OCRProcessor:
174
  def __init__(self, lang_code=["en"]):
175
  self.lang_code = lang_code
176
+ self.det_processor, self.det_model, self.rec_model, self.rec_processor = initialize_ocr_models()
 
177
 
178
  def process_image(self, image):
179
  """
 
223
 
224
  response = co.generate(
225
  model='c4ai-aya',
226
+ prompt=final_text_with_producetext.format(target_language=target_language),
227
  max_tokens=1024,
228
  temperature=0.5
229
  )
230
  # add graceful handling for errors (overflow)
231
  generated_text = response.generations[0].text
232
  print("Generated Text: ", generated_text)
233
+ generated_text_with_format = generated_text + "\n" + formatinputstring,
234
  response = co.generate(
235
  model='command-nightly',
236
  prompt=generated_text_with_format,
 
291
  def update_outputs(inputlanguage, target_language, audio, image, text, file):
292
  final_text, top_phrases, translations, audio_outputs = process_input(
293
  image=image, file=file, audio=audio, text=text,
294
+ translateto=target_language, translatefrom=inputlanguage
295
  )
 
296
  processed_text_output = final_text
297
  audio_output_native_phrases = [native for _, native in audio_outputs]
298
  audio_output_target_phrases = [target for target, _ in audio_outputs]