Spaces:
Running
on
Zero
Running
on
Zero
cointegrated
commited on
Commit
•
d0ffdbf
1
Parent(s):
5c565ab
adjust the generation parameters to avoid repetitions
Browse files
app.py
CHANGED
@@ -69,6 +69,9 @@ def _translate(text: str, src_lang: str, tgt_lang: str):
|
|
69 |
forced_bos_token_id=tokenizer.convert_tokens_to_ids(code_mapping[tgt_lang]),
|
70 |
max_length=len(input_tokens) + 50,
|
71 |
num_return_sequences=1,
|
|
|
|
|
|
|
72 |
)
|
73 |
translated_chunk = tokenizer.decode(
|
74 |
translated_chunk[0], skip_special_tokens=True
|
|
|
69 |
forced_bos_token_id=tokenizer.convert_tokens_to_ids(code_mapping[tgt_lang]),
|
70 |
max_length=len(input_tokens) + 50,
|
71 |
num_return_sequences=1,
|
72 |
+
num_beams=5,
|
73 |
+
no_repeat_ngram_size=4, # repetition blocking works better if this number is below num_beams
|
74 |
+
renormalize_logits=True, # recompute token probabilities after banning the repetitions
|
75 |
)
|
76 |
translated_chunk = tokenizer.decode(
|
77 |
translated_chunk[0], skip_special_tokens=True
|