Amelia-James
commited on
Commit
•
4b3604b
1
Parent(s):
cfea1fe
Update app.py
Browse files
app.py
CHANGED
@@ -60,13 +60,20 @@ LANGUAGES = {
|
|
60 |
# Function to get the appropriate translation model and tokenizer
|
61 |
def get_translation_model(source_lang, target_lang):
|
62 |
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
# Function to translate text
|
68 |
def translate_text(text, source_lang, target_lang):
|
69 |
model, tokenizer = get_translation_model(source_lang, target_lang)
|
|
|
|
|
70 |
inputs = tokenizer([text], return_tensors="pt", truncation=True)
|
71 |
translated_ids = model.generate(inputs['input_ids'], max_length=1024)
|
72 |
translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
|
@@ -85,12 +92,14 @@ def summarize_text(text, target_language="English"):
|
|
85 |
early_stopping=True
|
86 |
)
|
87 |
summary = multilingual_summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
|
|
88 |
|
89 |
-
target_lang_code = LANGUAGES
|
90 |
|
91 |
# Translate summary to the target language if needed
|
92 |
if target_lang_code != "en_XX":
|
93 |
summary = translate_text(summary, "en_XX", target_lang_code)
|
|
|
94 |
|
95 |
return summary
|
96 |
|
|
|
60 |
# Function to get the appropriate translation model and tokenizer
|
61 |
def get_translation_model(source_lang, target_lang):
|
62 |
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
|
63 |
+
try:
|
64 |
+
model = MarianMTModel.from_pretrained(model_name)
|
65 |
+
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
66 |
+
print(f"Loaded translation model for {source_lang} to {target_lang}")
|
67 |
+
return model, tokenizer
|
68 |
+
except Exception as e:
|
69 |
+
print(f"Error loading translation model for {source_lang} to {target_lang}: {e}")
|
70 |
+
return None, None
|
71 |
|
72 |
# Function to translate text
|
73 |
def translate_text(text, source_lang, target_lang):
|
74 |
model, tokenizer = get_translation_model(source_lang, target_lang)
|
75 |
+
if model is None or tokenizer is None:
|
76 |
+
return "Translation model error."
|
77 |
inputs = tokenizer([text], return_tensors="pt", truncation=True)
|
78 |
translated_ids = model.generate(inputs['input_ids'], max_length=1024)
|
79 |
translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
|
|
|
92 |
early_stopping=True
|
93 |
)
|
94 |
summary = multilingual_summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
95 |
+
print(f"Generated summary in English: {summary}")
|
96 |
|
97 |
+
target_lang_code = LANGUAGES.get(target_language, "en_XX")
|
98 |
|
99 |
# Translate summary to the target language if needed
|
100 |
if target_lang_code != "en_XX":
|
101 |
summary = translate_text(summary, "en_XX", target_lang_code)
|
102 |
+
print(f"Translated summary to {target_language}: {summary}")
|
103 |
|
104 |
return summary
|
105 |
|