Spaces:

MultiTransformer
/

AyaTonic

Runtime error

App Files Files Community

tonic commited on Feb 26

Commit

be7cfd1

•

1 Parent(s): 6a99d7a

bug fixes

Browse files

Files changed (1) hide show

app.py +16 -9

app.py CHANGED Viewed

@@ -58,7 +58,7 @@ def get_language_code(language_name):
         print(f"Language name '{language_name}' not found.")
         return None
-def translate_text(text, instructions=translatetextinst):
     """
     translates text.
     """
@@ -77,9 +77,10 @@ def translate_text(text, instructions=translatetextinst):
 class LongAudioProcessor:
     def __init__(self, audio_client, api_key=None):
         self.client = audio_client
         self.api_key = api_key
-    def process_long_audio(self, audio_path, chunk_length_ms=20000):
         """
         Process audio files longer than 29 seconds by chunking them into smaller segments.
         """
@@ -91,7 +92,7 @@ class LongAudioProcessor:
             with open(chunk_name, 'wb') as file:
                 chunk.export(file, format="wav")
             try:
-                result = self.process_audio_to_text(chunk_name)
                 full_text += " " + result.strip()
             except Exception as e:
                 print(f"Error processing {chunk_name}: {e}")
@@ -161,11 +162,18 @@ def process_text_to_audio(text, translatefrom="English", translateto="English"):
     )
     return result[0]
 class OCRProcessor:
     def __init__(self, lang_code=["en"]):
         self.lang_code = lang_code
-        self.det_processor, self.det_model = load_det_processor(), load_det_model()
-        self.rec_model, self.rec_processor = load_rec_model(), load_rec_processor()
     def process_image(self, image):
         """
@@ -215,14 +223,14 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
     response = co.generate(
         model='c4ai-aya',
-        prompt=final_text_with_producetext,
         max_tokens=1024,
         temperature=0.5
     )
     # add graceful handling for errors (overflow)
     generated_text = response.generations[0].text
     print("Generated Text: ", generated_text)
-    generated_text_with_format = generated_text + "\n" + formatinputstring
     response = co.generate(
         model='command-nightly',
         prompt=generated_text_with_format,
@@ -283,9 +291,8 @@ outputs = [
 def update_outputs(inputlanguage, target_language, audio, image, text, file):
     final_text, top_phrases, translations, audio_outputs = process_input(
     image=image, file=file, audio=audio, text=text,
-    translatefrom=inputlanguage, translateto=target_language
     )
     processed_text_output = final_text
     audio_output_native_phrases = [native for _, native in audio_outputs]
     audio_output_target_phrases = [target for target, _ in audio_outputs]

         print(f"Language name '{language_name}' not found.")
         return None
+def translate_text(text, instructions=translatetextinst.format(input_language=inputlanguage)):
     """
     translates text.
     """
 class LongAudioProcessor:
     def __init__(self, audio_client, api_key=None):
         self.client = audio_client
+        self.process_audio_to_text = process_audio_to_text
         self.api_key = api_key
+    def process_long_audio(self, audio_path, inputlanguage, outputlanguage, chunk_length_ms=20000):
         """
         Process audio files longer than 29 seconds by chunking them into smaller segments.
         """
             with open(chunk_name, 'wb') as file:
                 chunk.export(file, format="wav")
             try:
+                result = self.process_audio_to_text(chunk_name, inputlanguage=inputlanguage, outputlanguage=outputlanguage)
                 full_text += " " + result.strip()
             except Exception as e:
                 print(f"Error processing {chunk_name}: {e}")
     )
     return result[0]
+def initialize_ocr_models():
+    """
+    Load the detection and recognition models along with their processors.
+    """
+    det_processor, det_model = load_det_processor(), load_det_model()
+    rec_model, rec_processor = load_rec_model(), load_rec_processor()
+    return det_processor, det_model, rec_model, rec_processor
 class OCRProcessor:
     def __init__(self, lang_code=["en"]):
         self.lang_code = lang_code
+        self.det_processor, self.det_model, self.rec_model, self.rec_processor = initialize_ocr_models()
     def process_image(self, image):
         """
     response = co.generate(
         model='c4ai-aya',
+        prompt=final_text_with_producetext.format(target_language=target_language),
         max_tokens=1024,
         temperature=0.5
     )
     # add graceful handling for errors (overflow)
     generated_text = response.generations[0].text
     print("Generated Text: ", generated_text)
+    generated_text_with_format = generated_text + "\n" + formatinputstring,
     response = co.generate(
         model='command-nightly',
         prompt=generated_text_with_format,
 def update_outputs(inputlanguage, target_language, audio, image, text, file):
     final_text, top_phrases, translations, audio_outputs = process_input(
     image=image, file=file, audio=audio, text=text,
+    translateto=target_language, translatefrom=inputlanguage
     )
     processed_text_output = final_text
     audio_output_native_phrases = [native for _, native in audio_outputs]
     audio_output_target_phrases = [target for target, _ in audio_outputs]