File size: 550 Bytes
d76004b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
{
  "tokenizer_name": "CSUMLM Tokenizer",
  "model_name": "CSUMLM",
  "description": "Tokenizer for the CognoSphere Unified Multimodal Language Model",
  "author": "Or4cl3 AI Solutions",
  "language": "Multimodal (Text, Image, Audio)",
  "vocab_size": 32000,
  "max_sequence_length": 512,
  "special_tokens": {
    "bos_token": "<BOS>",
    "eos_token": "<EOS>",
    "pad_token": "<PAD>",
    "unk_token": "<UNK>",
    "mask_token": "<MASK>"
  },
  "tokenization_method": "Byte Pair Encoding (BPE)",
  "training_data": "Custom 1500 Example Dataset"
}