cointegrated commited on
Commit
d0ffdbf
1 Parent(s): 5c565ab

adjust the generation parameters to avoid repetitions

Browse files
Files changed (1) hide show
  1. app.py +3 -0
app.py CHANGED
@@ -69,6 +69,9 @@ def _translate(text: str, src_lang: str, tgt_lang: str):
69
  forced_bos_token_id=tokenizer.convert_tokens_to_ids(code_mapping[tgt_lang]),
70
  max_length=len(input_tokens) + 50,
71
  num_return_sequences=1,
 
 
 
72
  )
73
  translated_chunk = tokenizer.decode(
74
  translated_chunk[0], skip_special_tokens=True
 
69
  forced_bos_token_id=tokenizer.convert_tokens_to_ids(code_mapping[tgt_lang]),
70
  max_length=len(input_tokens) + 50,
71
  num_return_sequences=1,
72
+ num_beams=5,
73
+ no_repeat_ngram_size=4, # repetition blocking works better if this number is below num_beams
74
+ renormalize_logits=True, # recompute token probabilities after banning the repetitions
75
  )
76
  translated_chunk = tokenizer.decode(
77
  translated_chunk[0], skip_special_tokens=True