donut-web / special_tokens_map.json
laverdes's picture
feat: new tokenizer and config
29b0e23
{
"additional_special_tokens": [
"<s_Text>",
"</s_Text>",
"<s_Link>",
"</s_Link>",
"<s_Caption>",
"</s_Caption>",
"<s_Subheadline>",
"</s_Subheadline>",
"<s_Misc>",
"</s_Misc>",
"<s_Headline>",
"</s_Headline>",
"<s_Address>",
"</s_Address>",
"<s_List-item>",
"</s_List-item>",
"<s_Footer>",
"</s_Footer>",
"<s_Value>",
"</s_Value>",
"<s_Author>",
"</s_Author>",
"<s_Field-Name>",
"</s_Field-Name>",
"<s_Table>",
"</s_Table>",
"<s_Header>",
"</s_Header>",
"<s_Title>",
"</s_Title>",
"<s_Advertisement>",
"</s_Advertisement>",
"<s_Metadata>",
"</s_Metadata>",
"<s_Chart>",
"</s_Chart>",
"<s_Page number>",
"</s_Page number>",
"<s_Abstract>",
"</s_Abstract>",
"<s>",
"</s>"
],
"bos_token": "<s>",
"cls_token": "<s>",
"eos_token": "</s>",
"mask_token": {
"content": "<mask>",
"lstrip": true,
"normalized": true,
"rstrip": false,
"single_word": false
},
"pad_token": "<pad>",
"sep_token": "</s>",
"unk_token": "<unk>"
}