veerpareek commited on
Commit
577d9ca
1 Parent(s): 01ddce8

Upload 35 files

Browse files
Files changed (36) hide show
  1. .gitattributes +2 -0
  2. README.md +13 -0
  3. __pycache__/inference.cpython-311.pyc +0 -0
  4. __pycache__/load_model.cpython-311.pyc +0 -0
  5. __pycache__/processor.cpython-311.pyc +0 -0
  6. app.py +103 -0
  7. fine_tune.py +165 -0
  8. inference.py +132 -0
  9. load_model.py +50 -0
  10. model/language/__pycache__/language_components.cpython-311.pyc +0 -0
  11. model/language/__pycache__/language_config.cpython-311.pyc +0 -0
  12. model/language/__pycache__/language_model.cpython-311.pyc +0 -0
  13. model/language/language_components.py +192 -0
  14. model/language/language_config.py +33 -0
  15. model/language/language_model.py +47 -0
  16. model/multimodal/__pycache__/multimodal_components.cpython-311.pyc +0 -0
  17. model/multimodal/__pycache__/multimodal_config.cpython-311.pyc +0 -0
  18. model/multimodal/__pycache__/multimodal_model.cpython-311.pyc +0 -0
  19. model/multimodal/multimodal_components.py +59 -0
  20. model/multimodal/multimodal_config.py +35 -0
  21. model/multimodal/multimodal_model.py +98 -0
  22. model/utils/__pycache__/kv_cache.cpython-311.pyc +0 -0
  23. model/utils/kv_cache.py +29 -0
  24. model/vision/__pycache__/siglip_components.cpython-311.pyc +0 -0
  25. model/vision/__pycache__/siglip_config.cpython-311.pyc +0 -0
  26. model/vision/__pycache__/siglip_model.cpython-311.pyc +0 -0
  27. model/vision/siglip_components.py +141 -0
  28. model/vision/siglip_config.py +25 -0
  29. model/vision/siglip_model.py +14 -0
  30. processor.py +47 -0
  31. requirements.txt +11 -0
  32. run.sh +20 -0
  33. tokenizer/special_tokens_map.json +33 -0
  34. tokenizer/tokenizer.json +3 -0
  35. tokenizer/tokenizer.model +3 -0
  36. tokenizer/tokenizer_config.json +1764 -0
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
2
+ tokenizer/tokenizer.model filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Vlm O
3
+ emoji: 💬
4
+ colorFrom: yellow
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.36.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
__pycache__/inference.cpython-311.pyc ADDED
Binary file (6.25 kB). View file
 
__pycache__/load_model.cpython-311.pyc ADDED
Binary file (3.17 kB). View file
 
__pycache__/processor.cpython-311.pyc ADDED
Binary file (4.88 kB). View file
 
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from processor import MultiModalProcessor
5
+ from inference import test_inference
6
+ from load_model import load_hf_model
7
+
8
+ # Load model and processor
9
+ MODEL_PATH = "merve/paligemma_vqav2" # or your local model path
10
+ TOKENIZER_PATH = "./tokenizer" # path to your local tokenizer
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+
13
+ model, tokenizer = load_hf_model(MODEL_PATH, TOKENIZER_PATH, device)
14
+ model = model.eval()
15
+
16
+ num_image_tokens = model.config.vision_config.num_image_tokens
17
+ image_size = model.config.vision_config.image_size
18
+ max_length = 512
19
+ processor = MultiModalProcessor(tokenizer, num_image_tokens, image_size, max_length)
20
+
21
+ def generate_caption(image, prompt, max_tokens=300, temperature=0.8, top_p=0.9, do_sample=False):
22
+ # Save the input image temporarily
23
+ temp_image_path = "temp_image.jpg"
24
+ Image.fromarray(image).save(temp_image_path)
25
+
26
+ # Use the existing test_inference function
27
+ result = []
28
+ def capture_print(text):
29
+ result.append(text)
30
+
31
+ import builtins
32
+ original_print = builtins.print
33
+ builtins.print = capture_print
34
+
35
+ test_inference(
36
+ model,
37
+ processor,
38
+ device,
39
+ prompt,
40
+ temp_image_path,
41
+ max_tokens,
42
+ temperature,
43
+ top_p,
44
+ do_sample
45
+ )
46
+
47
+ builtins.print = original_print
48
+
49
+ # Return the captured output
50
+ return "".join(result)
51
+
52
+ # Define Gradio demo
53
+ with gr.Blocks(title="Image Captioning with PaliGemma", theme=gr.themes.Monochrome()) as demo:
54
+ gr.Markdown(
55
+ """
56
+ # Image Captioning with PaliGemma
57
+ This demo uses the PaliGemma model to generate captions for images.
58
+ """
59
+ )
60
+
61
+ with gr.Tabs():
62
+ with gr.TabItem("Generate Caption"):
63
+ with gr.Row():
64
+ with gr.Column(scale=1):
65
+ image_input = gr.Image(type="numpy", label="Upload Image")
66
+ prompt_input = gr.Textbox(label="Prompt", placeholder="What is happening in the photo?")
67
+
68
+ with gr.Column(scale=1):
69
+ with gr.Group():
70
+ max_tokens_input = gr.Slider(1, 500, value=300, step=1, label="Max Tokens")
71
+ temperature_input = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature")
72
+ top_p_input = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top P")
73
+ do_sample_input = gr.Checkbox(label="Do Sample")
74
+
75
+ generate_button = gr.Button("Generate Caption")
76
+
77
+ output = gr.Textbox(label="Generated Caption", lines=5)
78
+
79
+ with gr.TabItem("About"):
80
+ gr.Markdown(
81
+ """
82
+ ## How to use:
83
+ 1. Upload an image in the 'Generate Caption' tab.
84
+ 2. Enter a prompt to guide the caption generation.
85
+ 3. Adjust the generation parameters if desired.
86
+ 4. Click 'Generate Caption' to see the results.
87
+
88
+ ## Model Details:
89
+ - Model: PaliGemma
90
+ - Type: Multimodal (Text + Image)
91
+ - Task: Image Captioning
92
+ """
93
+ )
94
+
95
+ generate_button.click(
96
+ generate_caption,
97
+ inputs=[image_input, prompt_input, max_tokens_input, temperature_input, top_p_input, do_sample_input],
98
+ outputs=output
99
+ )
100
+
101
+ # Launch the demo
102
+ if __name__ == "__main__":
103
+ demo.launch()
fine_tune.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import Dataset
3
+ from datasets import load_dataset
4
+ from processor import MultiModalProcessor
5
+ from load_model import load_hf_model
6
+ from transformers import Trainer, TrainingArguments
7
+ from dataclasses import dataclass, field
8
+ from typing import List
9
+
10
+ @dataclass
11
+ class LoraConfig:
12
+ r: int = 8
13
+ lora_alpha: int = 16
14
+ target_modules: List[str] = field(default_factory=lambda: ["q_proj", "v_proj"])
15
+ lora_dropout: float = 0.05
16
+ bias: str = "none"
17
+ task_type: str = "CAUSAL_LM"
18
+
19
+ def __post_init__(self):
20
+ self.inference_mode = False
21
+ self.r = {}
22
+ self.lora_alpha = {}
23
+ self.scaling = {}
24
+ self.lora_dropout = {}
25
+ for key in self.target_modules:
26
+ self.r[key] = self.r
27
+ self.lora_alpha[key] = self.lora_alpha
28
+ self.scaling[key] = self.lora_alpha[key] / self.r[key]
29
+ self.lora_dropout[key] = self.lora_dropout
30
+
31
+ class LoraLinear(torch.nn.Module):
32
+ def __init__(self, in_features, out_features, config: LoraConfig):
33
+ super().__init__()
34
+ self.linear = torch.nn.Linear(in_features, out_features, bias=False)
35
+ self.lora_A = torch.nn.Parameter(torch.zeros((config.r, in_features)))
36
+ self.lora_B = torch.nn.Parameter(torch.zeros((out_features, config.r)))
37
+ self.scaling = config.scaling
38
+ self.dropout = torch.nn.Dropout(p=config.lora_dropout)
39
+
40
+ def forward(self, x):
41
+ result = self.linear(x)
42
+ lora_output = (self.dropout(x) @ self.lora_A.t() @ self.lora_B.t()) * self.scaling
43
+ return result + lora_output
44
+
45
+ def apply_lora_to_model(model, config: LoraConfig):
46
+ for name, module in model.named_modules():
47
+ if any(target in name for target in config.target_modules):
48
+ if isinstance(module, torch.nn.Linear):
49
+ lora_module = LoraLinear(module.in_features, module.out_features, config)
50
+ lora_module.linear.weight.data = module.weight.data
51
+ if module.bias is not None:
52
+ lora_module.linear.bias = module.bias
53
+ setattr(model, name, lora_module)
54
+ return model
55
+
56
+ # Load the dataset
57
+ ds = load_dataset('HuggingFaceM4/VQAv2', split="train[:10%]")
58
+ cols_remove = ["question_type", "answers", "answer_type", "image_id", "question_id"]
59
+ ds = ds.remove_columns(cols_remove)
60
+
61
+ # Create a small test split
62
+ split_ds = ds.train_test_split(test_size=0.05)
63
+ train_ds = split_ds["train"]
64
+ test_ds = split_ds["test"]
65
+
66
+ print(train_ds)
67
+ print(test_ds)
68
+
69
+ # Load the model and processor
70
+ model_id = "./paligemma-3b-pt-224"
71
+ model, tokenizer = load_hf_model(model_id, "cuda")
72
+ processor = MultiModalProcessor(tokenizer, model.config.vision_config.num_image_tokens, model.config.vision_config.image_size)
73
+
74
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
75
+ model = model.to(device)
76
+
77
+ # Apply LoRA to the model
78
+ lora_config = LoraConfig(r=8, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.05)
79
+ model = apply_lora_to_model(model, lora_config)
80
+
81
+ # Define a custom dataset
82
+ class PaliGemmaDataset(Dataset):
83
+ def __init__(self, dataset, processor):
84
+ self.dataset = dataset
85
+ self.processor = processor
86
+
87
+ def __len__(self):
88
+ return len(self.dataset)
89
+
90
+ def __getitem__(self, idx):
91
+ item = self.dataset[idx]
92
+ prompt = "answer " + item["question"]
93
+ image = item["image"].convert("RGB")
94
+ answer = item["multiple_choice_answer"]
95
+
96
+ # Process inputs
97
+ inputs = self.processor(text=[prompt], images=[image])
98
+
99
+ # Process labels
100
+ label_inputs = self.processor(text=[answer], images=[image])
101
+ labels = label_inputs['input_ids'][0]
102
+
103
+ # Set the labels to -100 for the input part (we don't want to compute loss on it)
104
+ inputs['labels'] = torch.full_like(inputs['input_ids'][0], -100)
105
+ inputs['labels'][-len(labels):] = torch.tensor(labels)
106
+
107
+ return inputs
108
+
109
+ # Create datasets
110
+ train_dataset = PaliGemmaDataset(train_ds, processor)
111
+ eval_dataset = PaliGemmaDataset(test_ds, processor)
112
+
113
+ # Define a custom data collator
114
+ def custom_data_collator(features):
115
+ batch = {
116
+ 'pixel_values': torch.stack([f['pixel_values'][0] for f in features]),
117
+ 'input_ids': torch.stack([f['input_ids'][0] for f in features]),
118
+ 'attention_mask': torch.stack([f['attention_mask'][0] for f in features]),
119
+ 'labels': torch.stack([f['labels'] for f in features])
120
+ }
121
+ return batch
122
+
123
+ # Define training arguments
124
+ training_args = TrainingArguments(
125
+ output_dir="./results",
126
+ num_train_epochs=3,
127
+ per_device_train_batch_size=4,
128
+ per_device_eval_batch_size=4,
129
+ warmup_steps=500,
130
+ weight_decay=0.01,
131
+ logging_dir='./logs',
132
+ logging_steps=10,
133
+ evaluation_strategy="epoch",
134
+ save_strategy="epoch",
135
+ load_best_model_at_end=True,
136
+ )
137
+
138
+ # Initialize the Trainer
139
+ trainer = Trainer(
140
+ model=model,
141
+ args=training_args,
142
+ train_dataset=train_dataset,
143
+ eval_dataset=eval_dataset,
144
+ data_collator=custom_data_collator,
145
+ )
146
+
147
+ # Fine-tune the model
148
+ trainer.train()
149
+
150
+ # Save the fine-tuned model
151
+ trainer.save_model("lora_paligemma_vqa")
152
+
153
+ # Function to save LoRA weights separately
154
+ def save_lora_weights(model, path):
155
+ lora_state_dict = {}
156
+ for name, module in model.named_modules():
157
+ if isinstance(module, LoraLinear):
158
+ lora_state_dict[f"{name}.lora_A"] = module.lora_A.data
159
+ lora_state_dict[f"{name}.lora_B"] = module.lora_B.data
160
+ torch.save(lora_state_dict, path)
161
+
162
+ # Save LoRA weights
163
+ save_lora_weights(model, "lora_weights.pt")
164
+
165
+ print("Fine-tuning completed and model saved.")
inference.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import torch
3
+ import fire
4
+
5
+ from processor import MultiModalProcessor
6
+ from model.utils.kv_cache import KVCache
7
+ from model.multimodal.multimodal_model import PaliGemmaForConditionalGeneration
8
+ from load_model import load_hf_model
9
+
10
+
11
+ def move_inputs_to_device(model_inputs: dict, device: str):
12
+ model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
13
+ return model_inputs
14
+
15
+
16
+ def get_model_inputs(
17
+ processor: MultiModalProcessor, prompt: str, image_file_path: str, device: str
18
+ ):
19
+ image = Image.open(image_file_path)
20
+ images = [image]
21
+ prompts = [prompt]
22
+ model_inputs = processor(text=prompts, images=images)
23
+ model_inputs = move_inputs_to_device(model_inputs, device)
24
+ return model_inputs
25
+
26
+
27
+ def test_inference(
28
+ model: PaliGemmaForConditionalGeneration,
29
+ processor: MultiModalProcessor,
30
+ device: str,
31
+ prompt: str,
32
+ image_file_path: str,
33
+ max_tokens_to_generate: int,
34
+ temperature: float,
35
+ top_p: float,
36
+ do_sample: bool,
37
+ ):
38
+ model_inputs = get_model_inputs(processor, prompt, image_file_path, device)
39
+ input_ids = model_inputs["input_ids"]
40
+ attention_mask = model_inputs["attention_mask"]
41
+ pixel_values = model_inputs["pixel_values"]
42
+
43
+ kv_cache = KVCache()
44
+
45
+ stop_token = processor.tokenizer.eos_token_id
46
+ generated_tokens = []
47
+
48
+ for _ in range(max_tokens_to_generate):
49
+ outputs = model(
50
+ input_ids=input_ids,
51
+ pixel_values=pixel_values,
52
+ attention_mask=attention_mask,
53
+ kv_cache=kv_cache,
54
+ )
55
+ kv_cache = outputs["kv_cache"]
56
+ next_token_logits = outputs["logits"][:, -1, :]
57
+ if do_sample:
58
+ next_token_logits = torch.softmax(next_token_logits / temperature, dim=-1)
59
+ next_token = _sample_top_p(next_token_logits, top_p)
60
+ else:
61
+ next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
62
+ assert next_token.size() == (1, 1)
63
+ next_token = next_token.squeeze(0)
64
+ generated_tokens.append(next_token)
65
+ if next_token.item() == stop_token:
66
+ break
67
+ input_ids = next_token.unsqueeze(-1)
68
+ attention_mask = torch.cat(
69
+ [attention_mask, torch.ones((1, 1), device=input_ids.device)], dim=-1
70
+ )
71
+
72
+ generated_tokens = torch.cat(generated_tokens, dim=-1)
73
+ decoded = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
74
+
75
+ print(decoded)
76
+
77
+
78
+ def _sample_top_p(probs: torch.Tensor, p: float):
79
+ probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True)
80
+ probs_sum = torch.cumsum(probs_sort, dim=-1)
81
+ mask = probs_sum - probs_sort > p
82
+ probs_sort[mask] = 0.0
83
+ probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
84
+ next_token = torch.multinomial(probs_sort, num_samples=1)
85
+ next_token = torch.gather(probs_idx, -1, next_token)
86
+ return next_token
87
+
88
+ def main(
89
+ model_path: str = None,
90
+ prompt: str = None,
91
+ image_file_path: str = None,
92
+ max_tokens_to_generate: int = 100,
93
+ temperature: float = 0.8,
94
+ top_p: float = 0.9,
95
+ do_sample: bool = False,
96
+ only_cpu: bool = False,
97
+ ):
98
+ device = "cpu"
99
+
100
+ if not only_cpu:
101
+ if torch.cuda.is_available():
102
+ device = "cuda"
103
+ elif torch.backends.mps.is_available():
104
+ device = "mps"
105
+
106
+ print("Device in use: ", device)
107
+
108
+ print(f"Loading model")
109
+ model, tokenizer = load_hf_model(model_path, device)
110
+ model = model.to(device).eval()
111
+
112
+ num_image_tokens = model.config.vision_config.num_image_tokens
113
+ image_size = model.config.vision_config.image_size
114
+ max_length = 512
115
+ processor = MultiModalProcessor(tokenizer, num_image_tokens, image_size, max_length)
116
+
117
+ print("Running inference")
118
+ with torch.no_grad():
119
+ test_inference(
120
+ model,
121
+ processor,
122
+ device,
123
+ prompt,
124
+ image_file_path,
125
+ max_tokens_to_generate,
126
+ temperature,
127
+ top_p,
128
+ do_sample,
129
+ )
130
+
131
+ if __name__ == "__main__":
132
+ fire.Fire(main)
load_model.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model.multimodal.multimodal_model import PaliGemmaForConditionalGeneration
2
+ from model.multimodal.multimodal_config import MultiModalConfig
3
+ from transformers import AutoTokenizer
4
+ import json
5
+ import glob
6
+ from safetensors import safe_open
7
+ from typing import Tuple
8
+ import os
9
+ from huggingface_hub import hf_hub_download
10
+
11
+ def load_hf_model(model_path: str, tokenizer_path: str, device: str) -> Tuple[PaliGemmaForConditionalGeneration, AutoTokenizer]:
12
+ # Load tokenizer from the specified path
13
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, padding_side="right")
14
+ assert tokenizer.padding_side == "right"
15
+
16
+ # Check if model_path is a local directory or a HuggingFace model ID
17
+ is_local = os.path.isdir(model_path)
18
+
19
+ # Load model configuration
20
+ if is_local:
21
+ config_path = os.path.join(model_path, "config.json")
22
+ else:
23
+ config_path = hf_hub_download(repo_id=model_path, filename="config.json")
24
+
25
+ with open(config_path, "r") as f:
26
+ model_config_file = json.load(f)
27
+ config = MultiModalConfig(**model_config_file)
28
+
29
+ # Initialize model
30
+ model = PaliGemmaForConditionalGeneration(config).to(device)
31
+
32
+ # Load model weights
33
+ tensors = {}
34
+ if is_local:
35
+ safetensors_files = glob.glob(os.path.join(model_path, "*.safetensors"))
36
+ else:
37
+ safetensors_files = [
38
+ hf_hub_download(repo_id=model_path, filename=f"model-0000{i}-of-00002.safetensors")
39
+ for i in range(1, 3)
40
+ ]
41
+
42
+ for safetensors_file in safetensors_files:
43
+ with safe_open(safetensors_file, framework="pt", device="cpu") as f:
44
+ for key in f.keys():
45
+ tensors[key] = f.get_tensor(key)
46
+
47
+ model.load_state_dict(tensors, strict=False)
48
+ model.tie_weights()
49
+
50
+ return (model, tokenizer)
model/language/__pycache__/language_components.cpython-311.pyc ADDED
Binary file (14.8 kB). View file
 
model/language/__pycache__/language_config.cpython-311.pyc ADDED
Binary file (1.42 kB). View file
 
model/language/__pycache__/language_model.cpython-311.pyc ADDED
Binary file (3.06 kB). View file
 
model/language/language_components.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from typing import Optional, Tuple
4
+ import math
5
+
6
+ from ..utils.kv_cache import KVCache
7
+
8
+ from .language_config import LanguageModelConfig
9
+
10
+ class RMSNorm(nn.Module):
11
+ def __init__(self, dim: int, eps: float = 1e-6):
12
+ super().__init__()
13
+ self.eps = eps
14
+ self.weight = nn.Parameter(torch.zeros(dim))
15
+
16
+ def _norm(self, x):
17
+ return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
18
+
19
+ def forward(self, x):
20
+ output = self._norm(x.float())
21
+ output = output * (1.0 + self.weight.float())
22
+ return output.type_as(x)
23
+
24
+ class RotaryEmbedding(nn.Module):
25
+ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
26
+ super().__init__()
27
+
28
+ self.dim = dim
29
+ self.max_position_embeddings = max_position_embeddings
30
+ self.base = base
31
+
32
+ inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2, dtype=torch.int64).float() / self.dim))
33
+ self.register_buffer("inv_freq", tensor=inv_freq, persistent=False)
34
+
35
+ @torch.no_grad()
36
+ def forward(self, x, position_ids, seq_len=None):
37
+ self.inv_freq.to(x.device)
38
+ inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
39
+ position_ids_expanded = position_ids[:, None, :].float()
40
+ device_type = x.device.type
41
+ device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu"
42
+ with torch.autocast(device_type=device_type, enabled=False):
43
+ freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
44
+ emb = torch.cat((freqs, freqs), dim=-1)
45
+ cos = emb.cos()
46
+ sin = emb.sin()
47
+ return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
48
+
49
+
50
+ def rotate_half(x):
51
+ x1 = x[..., : x.shape[-1] // 2]
52
+ x2 = x[..., x.shape[-1] // 2 :]
53
+ return torch.cat((-x2, x1), dim=-1)
54
+
55
+
56
+ def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
57
+ cos = cos.unsqueeze(unsqueeze_dim)
58
+ sin = sin.unsqueeze(unsqueeze_dim)
59
+ q_embed = (q * cos) + (rotate_half(q) * sin)
60
+ k_embed = (k * cos) + (rotate_half(k) * sin)
61
+ return q_embed, k_embed
62
+
63
+
64
+ class MLP(nn.Module):
65
+ def __init__(self, config):
66
+ super().__init__()
67
+ self.config = config
68
+ self.hidden_size = config.hidden_size
69
+ self.intermediate_size = config.intermediate_size
70
+ self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
71
+ self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
72
+ self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
73
+
74
+ def forward(self, x):
75
+ return self.down_proj(nn.functional.gelu(self.gate_proj(x), approximate="tanh") * self.up_proj(x))
76
+
77
+ def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
78
+ batch, num_key_value_heads, slen, head_dim = hidden_states.shape
79
+ if n_rep == 1:
80
+ return hidden_states
81
+ hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
82
+ return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
83
+
84
+ class Attention(nn.Module):
85
+
86
+ def __init__(self, config: LanguageModelConfig, layer_idx: Optional[int] = None):
87
+ super().__init__()
88
+ self.config = config
89
+ self.layer_idx = layer_idx
90
+
91
+ self.attention_dropout = config.attention_dropout
92
+ self.hidden_size = config.hidden_size
93
+ self.num_heads = config.num_attention_heads
94
+ self.head_dim = config.head_dim
95
+ self.num_key_value_heads = config.num_key_value_heads
96
+ self.num_key_value_groups = self.num_heads // self.num_key_value_heads
97
+ self.max_position_embeddings = config.max_position_embeddings
98
+ self.rope_theta = config.rope_theta
99
+ self.is_causal = True
100
+
101
+ assert self.hidden_size % self.num_heads == 0
102
+
103
+ self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=config.attention_bias)
104
+ self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
105
+ self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
106
+ self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=config.attention_bias)
107
+ self.rotary_emb = RotaryEmbedding(
108
+ self.head_dim,
109
+ max_position_embeddings=self.max_position_embeddings,
110
+ base=self.rope_theta,
111
+ )
112
+
113
+ def forward(
114
+ self,
115
+ hidden_states: torch.Tensor,
116
+ attention_mask: Optional[torch.Tensor] = None,
117
+ position_ids: Optional[torch.LongTensor] = None,
118
+ kv_cache: Optional[KVCache] = None,
119
+ **kwargs,
120
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
121
+ bsz, q_len, _ = hidden_states.size()
122
+ query_states = self.q_proj(hidden_states)
123
+ key_states = self.k_proj(hidden_states)
124
+ value_states = self.v_proj(hidden_states)
125
+ query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
126
+ key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
127
+ value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
128
+
129
+ cos, sin = self.rotary_emb(value_states, position_ids, seq_len=None)
130
+ query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
131
+
132
+ if kv_cache is not None:
133
+ key_states, value_states = kv_cache.update(key_states, value_states, self.layer_idx)
134
+
135
+ key_states = repeat_kv(key_states, self.num_key_value_groups)
136
+ value_states = repeat_kv(value_states, self.num_key_value_groups)
137
+ attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
138
+
139
+ assert attention_mask is not None
140
+ attn_weights = attn_weights + attention_mask
141
+
142
+ attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
143
+ attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
144
+ attn_output = torch.matmul(attn_weights, value_states)
145
+
146
+ if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
147
+ raise ValueError(
148
+ f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
149
+ f" {attn_output.size()}"
150
+ )
151
+ attn_output = attn_output.transpose(1, 2).contiguous()
152
+ attn_output = attn_output.view(bsz, q_len, -1)
153
+ attn_output = self.o_proj(attn_output)
154
+
155
+ return attn_output, attn_weights
156
+
157
+ class DecoderLayer(nn.Module):
158
+
159
+ def __init__(self, config: LanguageModelConfig, layer_idx: int):
160
+ super().__init__()
161
+ self.hidden_size = config.hidden_size
162
+
163
+ self.self_attn = Attention(config=config, layer_idx=layer_idx)
164
+
165
+ self.mlp = MLP(config)
166
+ self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
167
+ self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
168
+
169
+ def forward(
170
+ self,
171
+ hidden_states: torch.Tensor,
172
+ attention_mask: Optional[torch.Tensor] = None,
173
+ position_ids: Optional[torch.LongTensor] = None,
174
+ kv_cache: Optional[KVCache] = None,
175
+ ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
176
+ residual = hidden_states
177
+ hidden_states = self.input_layernorm(hidden_states)
178
+
179
+ hidden_states, _, = self.self_attn(
180
+ hidden_states=hidden_states,
181
+ attention_mask=attention_mask,
182
+ position_ids=position_ids,
183
+ kv_cache=kv_cache,
184
+ )
185
+ hidden_states = residual + hidden_states
186
+
187
+ residual = hidden_states
188
+ hidden_states = self.post_attention_layernorm(hidden_states)
189
+ hidden_states = self.mlp(hidden_states)
190
+ hidden_states = residual + hidden_states
191
+
192
+ return hidden_states
model/language/language_config.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class LanguageModelConfig():
2
+
3
+ def __init__(
4
+ self,
5
+ vocab_size,
6
+ hidden_size,
7
+ intermediate_size,
8
+ num_hidden_layers,
9
+ num_attention_heads,
10
+ num_key_value_heads,
11
+ head_dim=256,
12
+ max_position_embeddings=8192,
13
+ rms_norm_eps=1e-6,
14
+ rope_theta=10000.0,
15
+ attention_bias=False,
16
+ attention_dropout=0.0,
17
+ pad_token_id=None,
18
+ **kwargs,
19
+ ):
20
+ super().__init__()
21
+ self.vocab_size = vocab_size
22
+ self.max_position_embeddings = max_position_embeddings
23
+ self.hidden_size = hidden_size
24
+ self.intermediate_size = intermediate_size
25
+ self.num_hidden_layers = num_hidden_layers
26
+ self.num_attention_heads = num_attention_heads
27
+ self.head_dim = head_dim
28
+ self.num_key_value_heads = num_key_value_heads
29
+ self.rms_norm_eps = rms_norm_eps
30
+ self.rope_theta = rope_theta
31
+ self.attention_bias = attention_bias
32
+ self.attention_dropout = attention_dropout
33
+ self.pad_token_id = pad_token_id
model/language/language_model.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from typing import Optional
4
+
5
+ from .language_config import LanguageModelConfig
6
+ from .language_components import DecoderLayer, RMSNorm, KVCache
7
+
8
+ class LanguageModel(nn.Module):
9
+
10
+ def __init__(self, config: LanguageModelConfig):
11
+ super().__init__()
12
+ self.config = config
13
+ self.padding_idx = config.pad_token_id
14
+ self.vocab_size = config.vocab_size
15
+
16
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
17
+ self.layers = nn.ModuleList(
18
+ [DecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
19
+ )
20
+ self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
21
+
22
+ def get_input_embeddings(self):
23
+ return self.embed_tokens
24
+
25
+ # Ignore copy
26
+ def forward(
27
+ self,
28
+ attention_mask: Optional[torch.Tensor] = None,
29
+ position_ids: Optional[torch.LongTensor] = None,
30
+ inputs_embeds: Optional[torch.FloatTensor] = None,
31
+ kv_cache: Optional[KVCache] = None,
32
+ ) -> torch.FloatTensor:
33
+ hidden_states = inputs_embeds
34
+ normalizer = torch.tensor(self.config.hidden_size**0.5, dtype=hidden_states.dtype)
35
+ hidden_states = hidden_states * normalizer
36
+
37
+ for decoder_layer in self.layers:
38
+ hidden_states = decoder_layer(
39
+ hidden_states,
40
+ attention_mask=attention_mask,
41
+ position_ids=position_ids,
42
+ kv_cache=kv_cache,
43
+ )
44
+
45
+ hidden_states = self.norm(hidden_states)
46
+
47
+ return hidden_states
model/multimodal/__pycache__/multimodal_components.cpython-311.pyc ADDED
Binary file (3.64 kB). View file
 
model/multimodal/__pycache__/multimodal_config.cpython-311.pyc ADDED
Binary file (1.76 kB). View file
 
model/multimodal/__pycache__/multimodal_model.cpython-311.pyc ADDED
Binary file (6.37 kB). View file
 
model/multimodal/multimodal_components.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from typing import Optional, Tuple
4
+
5
+ from .multimodal_config import MultiModalConfig
6
+ from ..utils.kv_cache import KVCache
7
+ from ..language.language_model import LanguageModel
8
+
9
+ class CausalLM(nn.Module):
10
+
11
+ def __init__(self, config):
12
+ super().__init__()
13
+ self.config = config
14
+ self.model = LanguageModel(config)
15
+ self.vocab_size = config.vocab_size
16
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
17
+
18
+ def get_input_embeddings(self):
19
+ return self.model.embed_tokens
20
+
21
+ def tie_weights(self):
22
+ self.lm_head.weight = self.model.embed_tokens.weight
23
+
24
+ def forward(
25
+ self,
26
+ attention_mask: Optional[torch.Tensor] = None,
27
+ position_ids: Optional[torch.LongTensor] = None,
28
+ inputs_embeds: Optional[torch.FloatTensor] = None,
29
+ kv_cache: Optional[KVCache] = None,
30
+ ) -> Tuple:
31
+ outputs = self.model(
32
+ attention_mask=attention_mask,
33
+ position_ids=position_ids,
34
+ inputs_embeds=inputs_embeds,
35
+ kv_cache=kv_cache,
36
+ )
37
+
38
+ hidden_states = outputs
39
+ logits = self.lm_head(hidden_states)
40
+ logits = logits.float()
41
+
42
+ return_data = {
43
+ "logits": logits,
44
+ }
45
+
46
+ if kv_cache is not None:
47
+ return_data["kv_cache"] = kv_cache
48
+
49
+ return return_data
50
+
51
+ class MultiModalProjector(nn.Module):
52
+ def __init__(self, config: MultiModalConfig):
53
+ super().__init__()
54
+ self.linear = nn.Linear(config.vision_config.hidden_size, config.vision_config.projection_dim, bias=True)
55
+
56
+ def forward(self, image_features):
57
+ hidden_states = self.linear(image_features)
58
+ return hidden_states
59
+
model/multimodal/multimodal_config.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..vision.siglip_config import SigLipConfig
2
+ from ..language.language_config import LanguageModelConfig
3
+
4
+ class MultiModalConfig():
5
+
6
+ def __init__(
7
+ self,
8
+ vision_config=None,
9
+ text_config=None,
10
+ ignore_index=-100,
11
+ image_token_index=256000,
12
+ vocab_size=257152,
13
+ projection_dim=2048,
14
+ hidden_size=2048,
15
+ pad_token_id=None,
16
+ **kwargs,
17
+ ):
18
+ super().__init__()
19
+ self.ignore_index = ignore_index
20
+ self.image_token_index = image_token_index
21
+ self.vocab_size = vocab_size
22
+ self.projection_dim = projection_dim
23
+ self.hidden_size = hidden_size
24
+ self.vision_config = vision_config
25
+ self.is_encoder_decoder = False
26
+ self.pad_token_id = pad_token_id
27
+
28
+ self.vision_config = SigLipConfig(**vision_config)
29
+ self.text_config = text_config
30
+
31
+ self.text_config = LanguageModelConfig(**text_config, pad_token_id=pad_token_id)
32
+ self.vocab_size = self.text_config.vocab_size
33
+
34
+ self.text_config.num_image_tokens = (self.vision_config.image_size // self.vision_config.patch_size) ** 2
35
+ self.vision_config.projection_dim = projection_dim
model/multimodal/multimodal_model.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from typing import Optional, Tuple, List
4
+
5
+ from .multimodal_config import MultiModalConfig
6
+ from .multimodal_components import CausalLM, MultiModalProjector
7
+ from ..vision.siglip_model import SigLip
8
+ from ..utils.kv_cache import KVCache
9
+
10
+ class PaliGemmaForConditionalGeneration(nn.Module):
11
+
12
+ def __init__(self, config: MultiModalConfig):
13
+ super().__init__()
14
+ self.config = config
15
+ self.vision_tower = SigLip(config.vision_config)
16
+ self.multi_modal_projector = MultiModalProjector(config)
17
+ self.vocab_size = config.vocab_size
18
+
19
+ language_model = CausalLM(config.text_config)
20
+ self.language_model = language_model
21
+
22
+ self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1
23
+
24
+ def tie_weights(self):
25
+ return self.language_model.tie_weights()
26
+
27
+ def _merge_input_ids_with_image_features(
28
+ self, image_features: torch.Tensor, inputs_embeds: torch.Tensor, input_ids: torch.Tensor, attention_mask: torch.Tensor, kv_cache: Optional[KVCache] = None
29
+ ):
30
+ _, _, embed_dim = image_features.shape
31
+ batch_size, sequence_length = input_ids.shape
32
+ dtype, device = inputs_embeds.dtype, inputs_embeds.device
33
+ scaled_image_features = image_features / (self.config.hidden_size**0.5)
34
+
35
+ final_embedding = torch.zeros(batch_size, sequence_length, embed_dim, dtype=inputs_embeds.dtype, device=inputs_embeds.device)
36
+ text_mask = (input_ids != self.config.image_token_index) & (input_ids != self.pad_token_id)
37
+ image_mask = input_ids == self.config.image_token_index
38
+ pad_mask = input_ids == self.pad_token_id
39
+
40
+ text_mask_expanded = text_mask.unsqueeze(-1).expand(-1, -1, embed_dim)
41
+ pad_mask_expanded = pad_mask.unsqueeze(-1).expand(-1, -1, embed_dim)
42
+ image_mask_expanded = image_mask.unsqueeze(-1).expand(-1, -1, embed_dim)
43
+
44
+ final_embedding = torch.where(text_mask_expanded, inputs_embeds, final_embedding)
45
+ final_embedding = final_embedding.masked_scatter(image_mask_expanded, scaled_image_features)
46
+ final_embedding = torch.where(pad_mask_expanded, torch.zeros_like(final_embedding), final_embedding)
47
+
48
+
49
+ dtype, device = inputs_embeds.dtype, inputs_embeds.device
50
+ min_dtype = torch.finfo(dtype).min
51
+ q_len = inputs_embeds.shape[1]
52
+
53
+ if kv_cache is None or kv_cache.num_items() == 0:
54
+ causal_mask = torch.full(
55
+ (batch_size, q_len, q_len), fill_value=0, dtype=dtype, device=device
56
+ )
57
+ else:
58
+ assert q_len == 1
59
+ kv_len = kv_cache.num_items() + q_len
60
+ causal_mask = torch.full((batch_size, q_len, kv_len), fill_value=0, dtype=dtype, device=device)
61
+
62
+ causal_mask = causal_mask.unsqueeze(1)
63
+
64
+ if kv_cache is not None and kv_cache.num_items() > 0:
65
+ position_ids = attention_mask.cumsum(-1)[:, -1]
66
+ if position_ids.dim() == 1:
67
+ position_ids = position_ids.unsqueeze(0)
68
+ else:
69
+ position_ids = (attention_mask.cumsum(-1)).masked_fill_((attention_mask == 0), 1).to(device)
70
+
71
+ return final_embedding, causal_mask, position_ids
72
+
73
+ def forward(
74
+ self,
75
+ input_ids: torch.LongTensor = None,
76
+ pixel_values: torch.FloatTensor = None,
77
+ attention_mask: Optional[torch.Tensor] = None,
78
+ kv_cache: Optional[KVCache] = None,
79
+ ) -> Tuple:
80
+
81
+ assert torch.all(attention_mask == 1), "The input cannot be padded"
82
+
83
+ inputs_embeds = self.language_model.get_input_embeddings()(input_ids)
84
+
85
+ selected_image_feature = self.vision_tower(pixel_values.to(inputs_embeds.dtype))
86
+
87
+ image_features = self.multi_modal_projector(selected_image_feature)
88
+
89
+ inputs_embeds, attention_mask, position_ids = self._merge_input_ids_with_image_features(image_features, inputs_embeds, input_ids, attention_mask, kv_cache)
90
+
91
+ outputs = self.language_model(
92
+ attention_mask=attention_mask,
93
+ position_ids=position_ids,
94
+ inputs_embeds=inputs_embeds,
95
+ kv_cache=kv_cache,
96
+ )
97
+
98
+ return outputs
model/utils/__pycache__/kv_cache.cpython-311.pyc ADDED
Binary file (1.98 kB). View file
 
model/utils/kv_cache.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from typing import List, Tuple
3
+
4
+ class KVCache():
5
+
6
+ def __init__(self) -> None:
7
+ self.key_cache: List[torch.Tensor] = []
8
+ self.value_cache: List[torch.Tensor] = []
9
+
10
+ def num_items(self) -> int:
11
+ if len(self.key_cache) == 0:
12
+ return 0
13
+ else:
14
+ return self.key_cache[0].shape[-2]
15
+
16
+ def update(
17
+ self,
18
+ key_states: torch.Tensor,
19
+ value_states: torch.Tensor,
20
+ layer_idx: int,
21
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
22
+ if len(self.key_cache) <= layer_idx:
23
+ self.key_cache.append(key_states)
24
+ self.value_cache.append(value_states)
25
+ else:
26
+ self.key_cache[layer_idx] = torch.cat([self.key_cache[layer_idx], key_states], dim=-2)
27
+ self.value_cache[layer_idx] = torch.cat([self.value_cache[layer_idx], value_states], dim=-2)
28
+
29
+ return self.key_cache[layer_idx], self.value_cache[layer_idx]
model/vision/__pycache__/siglip_components.cpython-311.pyc ADDED
Binary file (11.8 kB). View file
 
model/vision/__pycache__/siglip_config.cpython-311.pyc ADDED
Binary file (1.18 kB). View file
 
model/vision/__pycache__/siglip_model.cpython-311.pyc ADDED
Binary file (1.36 kB). View file
 
model/vision/siglip_components.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from typing import Optional, Tuple
4
+ from .siglip_config import SigLipConfig
5
+
6
+ class SiglipTransformer(nn.Module):
7
+ def __init__(self, config: SigLipConfig):
8
+ super().__init__()
9
+ self.config = config
10
+ embed_dim = config.hidden_size
11
+
12
+ self.embeddings = SigLipEmbeddings(config)
13
+ self.encoder = SiglipEncoder(config)
14
+ self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps)
15
+
16
+ def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
17
+ hidden_states = self.embeddings(pixel_values)
18
+ last_hidden_state = self.encoder(inputs_embeds=hidden_states)
19
+ last_hidden_state = self.post_layernorm(last_hidden_state)
20
+ return last_hidden_state
21
+
22
+ class SiglipEncoder(nn.Module):
23
+ def __init__(self, config: SigLipConfig):
24
+ super().__init__()
25
+ self.config = config
26
+ self.layers = nn.ModuleList(
27
+ [SigLipEncoderLayer(config) for _ in range(config.num_hidden_layers)]
28
+ )
29
+
30
+ def forward(self, inputs_embeds: torch.Tensor) -> torch.Tensor:
31
+ hidden_states = inputs_embeds
32
+ for encoder_layer in self.layers:
33
+ hidden_states = encoder_layer(hidden_states)
34
+ return hidden_states
35
+
36
+ class SigLipEncoderLayer(nn.Module):
37
+ def __init__(self, config: SigLipConfig):
38
+ super().__init__()
39
+ self.embed_dim = config.hidden_size
40
+ self.self_attn = SigLipAttention(config)
41
+ self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps)
42
+ self.mlp = SigLipMLP(config)
43
+ self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps)
44
+
45
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
46
+ residual = hidden_states
47
+ hidden_states = self.layer_norm1(hidden_states)
48
+ hidden_states, _ = self.self_attn(hidden_states=hidden_states)
49
+ hidden_states = residual + hidden_states
50
+ residual = hidden_states
51
+ hidden_states = self.layer_norm2(hidden_states)
52
+ hidden_states = self.mlp(hidden_states)
53
+ hidden_states = residual + hidden_states
54
+ return hidden_states
55
+
56
+ class SigLipMLP(nn.Module):
57
+ def __init__(self, config):
58
+ super().__init__()
59
+ self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size)
60
+ self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size)
61
+
62
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
63
+ hidden_states = self.fc1(hidden_states)
64
+ hidden_states = nn.functional.gelu(hidden_states, approximate="tanh")
65
+ hidden_states = self.fc2(hidden_states)
66
+ return hidden_states
67
+
68
+ class SigLipAttention(nn.Module):
69
+ def __init__(self, config):
70
+ super().__init__()
71
+ self.embed_dim = config.hidden_size
72
+ self.num_heads = config.num_attention_heads
73
+ self.head_dim = self.embed_dim // self.num_heads
74
+ self.scale = self.head_dim**-0.5
75
+ self.dropout = config.attention_dropout
76
+
77
+ self.k_proj = nn.Linear(self.embed_dim, self.embed_dim)
78
+ self.v_proj = nn.Linear(self.embed_dim, self.embed_dim)
79
+ self.q_proj = nn.Linear(self.embed_dim, self.embed_dim)
80
+ self.out_proj = nn.Linear(self.embed_dim, self.embed_dim)
81
+
82
+ def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
83
+ batch_size, seq_len, _ = hidden_states.size()
84
+ query_states = self.q_proj(hidden_states)
85
+ key_states = self.k_proj(hidden_states)
86
+ value_states = self.v_proj(hidden_states)
87
+ query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
88
+ key_states = key_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
89
+ value_states = value_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
90
+ attn_weights = (torch.matmul(query_states, key_states.transpose(2, 3)) * self.scale)
91
+
92
+ if attn_weights.size() != (batch_size, self.num_heads, seq_len, seq_len):
93
+ raise ValueError(
94
+ f"Attention weights should be of size {(batch_size, self.num_heads, seq_len, seq_len)}, but is"
95
+ f" {attn_weights.size()}"
96
+ )
97
+
98
+ attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
99
+ attn_weights = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training)
100
+ attn_output = torch.matmul(attn_weights, value_states)
101
+
102
+ if attn_output.size() != (batch_size, self.num_heads, seq_len, self.head_dim):
103
+ raise ValueError(
104
+ f"`attn_output` should be of size {(batch_size, self.num_heads, seq_len, self.head_dim)}, but is"
105
+ f" {attn_output.size()}"
106
+ )
107
+ attn_output = attn_output.transpose(1, 2).contiguous()
108
+ attn_output = attn_output.reshape(batch_size, seq_len, self.embed_dim)
109
+ attn_output = self.out_proj(attn_output)
110
+
111
+ return attn_output, attn_weights
112
+
113
+ class SigLipEmbeddings(nn.Module):
114
+ def __init__(self, config: SigLipConfig):
115
+ super().__init__()
116
+ self.embed_dim = config.hidden_size
117
+ self.image_size = config.image_size
118
+ self.patch_size = config.patch_size
119
+
120
+ self.patch_embedding = nn.Conv2d(
121
+ in_channels=config.num_channels,
122
+ out_channels=self.embed_dim,
123
+ kernel_size=self.patch_size,
124
+ stride=self.patch_size,
125
+ padding="valid"
126
+ )
127
+
128
+ self.num_patches = (self.image_size // self.patch_size) ** 2
129
+ self.num_positions = self.num_patches
130
+ self.position_embedding = nn.Embedding(self.num_positions, self.embed_dim)
131
+ self.register_buffer(
132
+ "position_ids",
133
+ torch.arange(self.num_positions).expand((1, -1)),
134
+ persistent=False,
135
+ )
136
+
137
+ def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
138
+ patch_embeds = self.patch_embedding(pixel_values)
139
+ embeddings = patch_embeds.flatten(2).transpose(1, 2)
140
+ embeddings = embeddings + self.position_embedding(self.position_ids)
141
+ return embeddings
model/vision/siglip_config.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class SigLipConfig:
2
+ def __init__(
3
+ self,
4
+ hidden_size=768,
5
+ intermediate_size=3072,
6
+ num_hidden_layers=12,
7
+ num_attention_heads=12,
8
+ num_channels=3,
9
+ image_size=224,
10
+ patch_size=16,
11
+ layer_norm_eps=1e-6,
12
+ attention_dropout=0.0,
13
+ num_image_tokens: int = None,
14
+ **kwargs
15
+ ):
16
+ self.hidden_size = hidden_size
17
+ self.intermediate_size = intermediate_size
18
+ self.num_hidden_layers = num_hidden_layers
19
+ self.num_attention_heads = num_attention_heads
20
+ self.num_channels = num_channels
21
+ self.patch_size = patch_size
22
+ self.image_size = image_size
23
+ self.attention_dropout = attention_dropout
24
+ self.layer_norm_eps = layer_norm_eps
25
+ self.num_image_tokens = num_image_tokens
model/vision/siglip_model.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+ import torch
3
+ import torch.nn as nn
4
+ from .siglip_config import SigLipConfig
5
+ from .siglip_components import SiglipTransformer
6
+
7
+ class SigLip(nn.Module):
8
+ def __init__(self, config: SigLipConfig):
9
+ super().__init__()
10
+ self.config = config
11
+ self.vision_model = SiglipTransformer(config)
12
+
13
+ def forward(self, pixel_values) -> Tuple:
14
+ return self.vision_model(pixel_values=pixel_values)
processor.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Optional, Union, Tuple, Iterable
2
+ import numpy as np
3
+ from PIL import Image
4
+ import torch
5
+ from torchvision import transforms
6
+
7
+ IMAGENET_STANDARD_MEAN = (0.485, 0.456, 0.406)
8
+ IMAGENET_STANDARD_STD = (0.229, 0.224, 0.225)
9
+
10
+ class MultiModalProcessor:
11
+ IMAGE_TOKEN = "<image>"
12
+
13
+ def __init__(self, tokenizer, num_image_tokens: int, image_size: int, max_length: int = 512) -> None:
14
+ super().__init__()
15
+
16
+ self.image_seq_length = num_image_tokens
17
+ self.image_size = image_size
18
+ self.max_length = max_length
19
+
20
+ tokens_to_add = {"additional_special_tokens": [self.IMAGE_TOKEN]}
21
+ tokenizer.add_special_tokens(tokens_to_add)
22
+ EXTRA_TOKENS = [f"<loc{i:04d}>" for i in range(1024)] + [f"<seg{i:03d}>" for i in range(128)]
23
+ tokenizer.add_tokens(EXTRA_TOKENS)
24
+ self.image_token_id = tokenizer.convert_tokens_to_ids(self.IMAGE_TOKEN)
25
+ tokenizer.add_bos_token = False
26
+ tokenizer.add_eos_token = False
27
+
28
+ self.tokenizer = tokenizer
29
+
30
+ self.image_transform = transforms.Compose([
31
+ transforms.Resize((self.image_size, self.image_size), interpolation=transforms.InterpolationMode.BICUBIC),
32
+ transforms.ToTensor(),
33
+ transforms.Normalize(mean=IMAGENET_STANDARD_MEAN, std=IMAGENET_STANDARD_STD)
34
+ ])
35
+
36
+ def __call__(self, text: List[str], images: List[Image.Image], padding: str = "longest", truncation: bool = True) -> dict:
37
+ assert len(images) == len(text) == 1, f"Received {len(images)} images for {len(text)} prompts. Expected 1 each."
38
+
39
+ pixel_values = torch.stack([self.image_transform(img) for img in images])
40
+
41
+ input_strings = [self._add_image_tokens_to_prompt(prompt) for prompt in text]
42
+ inputs = self.tokenizer(input_strings, return_tensors="pt", padding=padding, truncation=truncation, max_length=self.max_length)
43
+
44
+ return {"pixel_values": pixel_values, **inputs}
45
+
46
+ def _add_image_tokens_to_prompt(self, prefix_prompt: str) -> str:
47
+ return f"{self.IMAGE_TOKEN * self.image_seq_length}{self.tokenizer.bos_token}{prefix_prompt}\n"
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fire==0.6.0
2
+ numpy==1.26.4
3
+ pillow==10.3.0
4
+ safetensors==0.4.3
5
+ tokenizers==0.19.1
6
+ torch==2.3.0
7
+ torchaudio==2.3.0
8
+ torchvision==0.18.0
9
+ tqdm==4.66.4
10
+ transformers==4.41.2
11
+ streamlit
run.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ MODEL_PATH="./weights"
4
+ PROMPT="What is happening in the photo?"
5
+ IMAGE_FILE_PATH="test_images/image1.jpg"
6
+ MAX_TOKENS_TO_GENERATE=300
7
+ TEMPERATURE=0.8
8
+ TOP_P=0.9
9
+ DO_SAMPLE="False"
10
+ ONLY_CPU="FALSE"
11
+
12
+ python3 inference.py \
13
+ --model_path "$MODEL_PATH" \
14
+ --prompt "$PROMPT" \
15
+ --image_file_path "$IMAGE_FILE_PATH" \
16
+ --max_tokens_to_generate $MAX_TOKENS_TO_GENERATE \
17
+ --temperature $TEMPERATURE \
18
+ --top_p $TOP_P \
19
+ --do_sample $DO_SAMPLE \
20
+ --only_cpu $ONLY_CPU \
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<image>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<bos>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "<eos>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
tokenizer/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef6773c135b77b834de1d13c75a4c98ab7a3684ffd602d1831e1f1bf5467c563
3
+ size 17549604
tokenizer/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8986bb4f423f07f8c7f70d0dbe3526fb2316056c17bae71b1ea975e77a168fc6
3
+ size 4264023
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,1764 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<eos>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "<bos>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "4": {
38
+ "content": "<mask>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": false
44
+ },
45
+ "5": {
46
+ "content": "<2mass>",
47
+ "lstrip": false,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": false
52
+ },
53
+ "6": {
54
+ "content": "[@BOS@]",
55
+ "lstrip": false,
56
+ "normalized": true,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": false
60
+ },
61
+ "7": {
62
+ "content": "<unused0>",
63
+ "lstrip": false,
64
+ "normalized": true,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": false
68
+ },
69
+ "8": {
70
+ "content": "<unused1>",
71
+ "lstrip": false,
72
+ "normalized": true,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": false
76
+ },
77
+ "9": {
78
+ "content": "<unused2>",
79
+ "lstrip": false,
80
+ "normalized": true,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": false
84
+ },
85
+ "10": {
86
+ "content": "<unused3>",
87
+ "lstrip": false,
88
+ "normalized": true,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": false
92
+ },
93
+ "11": {
94
+ "content": "<unused4>",
95
+ "lstrip": false,
96
+ "normalized": true,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": false
100
+ },
101
+ "12": {
102
+ "content": "<unused5>",
103
+ "lstrip": false,
104
+ "normalized": true,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": false
108
+ },
109
+ "13": {
110
+ "content": "<unused6>",
111
+ "lstrip": false,
112
+ "normalized": true,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": false
116
+ },
117
+ "14": {
118
+ "content": "<unused7>",
119
+ "lstrip": false,
120
+ "normalized": true,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "15": {
126
+ "content": "<unused8>",
127
+ "lstrip": false,
128
+ "normalized": true,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "16": {
134
+ "content": "<unused9>",
135
+ "lstrip": false,
136
+ "normalized": true,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "17": {
142
+ "content": "<unused10>",
143
+ "lstrip": false,
144
+ "normalized": true,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "18": {
150
+ "content": "<unused11>",
151
+ "lstrip": false,
152
+ "normalized": true,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "19": {
158
+ "content": "<unused12>",
159
+ "lstrip": false,
160
+ "normalized": true,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "20": {
166
+ "content": "<unused13>",
167
+ "lstrip": false,
168
+ "normalized": true,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "21": {
174
+ "content": "<unused14>",
175
+ "lstrip": false,
176
+ "normalized": true,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "22": {
182
+ "content": "<unused15>",
183
+ "lstrip": false,
184
+ "normalized": true,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "23": {
190
+ "content": "<unused16>",
191
+ "lstrip": false,
192
+ "normalized": true,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "24": {
198
+ "content": "<unused17>",
199
+ "lstrip": false,
200
+ "normalized": true,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "25": {
206
+ "content": "<unused18>",
207
+ "lstrip": false,
208
+ "normalized": true,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ },
213
+ "26": {
214
+ "content": "<unused19>",
215
+ "lstrip": false,
216
+ "normalized": true,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": false
220
+ },
221
+ "27": {
222
+ "content": "<unused20>",
223
+ "lstrip": false,
224
+ "normalized": true,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": false
228
+ },
229
+ "28": {
230
+ "content": "<unused21>",
231
+ "lstrip": false,
232
+ "normalized": true,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": false
236
+ },
237
+ "29": {
238
+ "content": "<unused22>",
239
+ "lstrip": false,
240
+ "normalized": true,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": false
244
+ },
245
+ "30": {
246
+ "content": "<unused23>",
247
+ "lstrip": false,
248
+ "normalized": true,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": false
252
+ },
253
+ "31": {
254
+ "content": "<unused24>",
255
+ "lstrip": false,
256
+ "normalized": true,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": false
260
+ },
261
+ "32": {
262
+ "content": "<unused25>",
263
+ "lstrip": false,
264
+ "normalized": true,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": false
268
+ },
269
+ "33": {
270
+ "content": "<unused26>",
271
+ "lstrip": false,
272
+ "normalized": true,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": false
276
+ },
277
+ "34": {
278
+ "content": "<unused27>",
279
+ "lstrip": false,
280
+ "normalized": true,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": false
284
+ },
285
+ "35": {
286
+ "content": "<unused28>",
287
+ "lstrip": false,
288
+ "normalized": true,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": false
292
+ },
293
+ "36": {
294
+ "content": "<unused29>",
295
+ "lstrip": false,
296
+ "normalized": true,
297
+ "rstrip": false,
298
+ "single_word": false,
299
+ "special": false
300
+ },
301
+ "37": {
302
+ "content": "<unused30>",
303
+ "lstrip": false,
304
+ "normalized": true,
305
+ "rstrip": false,
306
+ "single_word": false,
307
+ "special": false
308
+ },
309
+ "38": {
310
+ "content": "<unused31>",
311
+ "lstrip": false,
312
+ "normalized": true,
313
+ "rstrip": false,
314
+ "single_word": false,
315
+ "special": false
316
+ },
317
+ "39": {
318
+ "content": "<unused32>",
319
+ "lstrip": false,
320
+ "normalized": true,
321
+ "rstrip": false,
322
+ "single_word": false,
323
+ "special": false
324
+ },
325
+ "40": {
326
+ "content": "<unused33>",
327
+ "lstrip": false,
328
+ "normalized": true,
329
+ "rstrip": false,
330
+ "single_word": false,
331
+ "special": false
332
+ },
333
+ "41": {
334
+ "content": "<unused34>",
335
+ "lstrip": false,
336
+ "normalized": true,
337
+ "rstrip": false,
338
+ "single_word": false,
339
+ "special": false
340
+ },
341
+ "42": {
342
+ "content": "<unused35>",
343
+ "lstrip": false,
344
+ "normalized": true,
345
+ "rstrip": false,
346
+ "single_word": false,
347
+ "special": false
348
+ },
349
+ "43": {
350
+ "content": "<unused36>",
351
+ "lstrip": false,
352
+ "normalized": true,
353
+ "rstrip": false,
354
+ "single_word": false,
355
+ "special": false
356
+ },
357
+ "44": {
358
+ "content": "<unused37>",
359
+ "lstrip": false,
360
+ "normalized": true,
361
+ "rstrip": false,
362
+ "single_word": false,
363
+ "special": false
364
+ },
365
+ "45": {
366
+ "content": "<unused38>",
367
+ "lstrip": false,
368
+ "normalized": true,
369
+ "rstrip": false,
370
+ "single_word": false,
371
+ "special": false
372
+ },
373
+ "46": {
374
+ "content": "<unused39>",
375
+ "lstrip": false,
376
+ "normalized": true,
377
+ "rstrip": false,
378
+ "single_word": false,
379
+ "special": false
380
+ },
381
+ "47": {
382
+ "content": "<unused40>",
383
+ "lstrip": false,
384
+ "normalized": true,
385
+ "rstrip": false,
386
+ "single_word": false,
387
+ "special": false
388
+ },
389
+ "48": {
390
+ "content": "<unused41>",
391
+ "lstrip": false,
392
+ "normalized": true,
393
+ "rstrip": false,
394
+ "single_word": false,
395
+ "special": false
396
+ },
397
+ "49": {
398
+ "content": "<unused42>",
399
+ "lstrip": false,
400
+ "normalized": true,
401
+ "rstrip": false,
402
+ "single_word": false,
403
+ "special": false
404
+ },
405
+ "50": {
406
+ "content": "<unused43>",
407
+ "lstrip": false,
408
+ "normalized": true,
409
+ "rstrip": false,
410
+ "single_word": false,
411
+ "special": false
412
+ },
413
+ "51": {
414
+ "content": "<unused44>",
415
+ "lstrip": false,
416
+ "normalized": true,
417
+ "rstrip": false,
418
+ "single_word": false,
419
+ "special": false
420
+ },
421
+ "52": {
422
+ "content": "<unused45>",
423
+ "lstrip": false,
424
+ "normalized": true,
425
+ "rstrip": false,
426
+ "single_word": false,
427
+ "special": false
428
+ },
429
+ "53": {
430
+ "content": "<unused46>",
431
+ "lstrip": false,
432
+ "normalized": true,
433
+ "rstrip": false,
434
+ "single_word": false,
435
+ "special": false
436
+ },
437
+ "54": {
438
+ "content": "<unused47>",
439
+ "lstrip": false,
440
+ "normalized": true,
441
+ "rstrip": false,
442
+ "single_word": false,
443
+ "special": false
444
+ },
445
+ "55": {
446
+ "content": "<unused48>",
447
+ "lstrip": false,
448
+ "normalized": true,
449
+ "rstrip": false,
450
+ "single_word": false,
451
+ "special": false
452
+ },
453
+ "56": {
454
+ "content": "<unused49>",
455
+ "lstrip": false,
456
+ "normalized": true,
457
+ "rstrip": false,
458
+ "single_word": false,
459
+ "special": false
460
+ },
461
+ "57": {
462
+ "content": "<unused50>",
463
+ "lstrip": false,
464
+ "normalized": true,
465
+ "rstrip": false,
466
+ "single_word": false,
467
+ "special": false
468
+ },
469
+ "58": {
470
+ "content": "<unused51>",
471
+ "lstrip": false,
472
+ "normalized": true,
473
+ "rstrip": false,
474
+ "single_word": false,
475
+ "special": false
476
+ },
477
+ "59": {
478
+ "content": "<unused52>",
479
+ "lstrip": false,
480
+ "normalized": true,
481
+ "rstrip": false,
482
+ "single_word": false,
483
+ "special": false
484
+ },
485
+ "60": {
486
+ "content": "<unused53>",
487
+ "lstrip": false,
488
+ "normalized": true,
489
+ "rstrip": false,
490
+ "single_word": false,
491
+ "special": false
492
+ },
493
+ "61": {
494
+ "content": "<unused54>",
495
+ "lstrip": false,
496
+ "normalized": true,
497
+ "rstrip": false,
498
+ "single_word": false,
499
+ "special": false
500
+ },
501
+ "62": {
502
+ "content": "<unused55>",
503
+ "lstrip": false,
504
+ "normalized": true,
505
+ "rstrip": false,
506
+ "single_word": false,
507
+ "special": false
508
+ },
509
+ "63": {
510
+ "content": "<unused56>",
511
+ "lstrip": false,
512
+ "normalized": true,
513
+ "rstrip": false,
514
+ "single_word": false,
515
+ "special": false
516
+ },
517
+ "64": {
518
+ "content": "<unused57>",
519
+ "lstrip": false,
520
+ "normalized": true,
521
+ "rstrip": false,
522
+ "single_word": false,
523
+ "special": false
524
+ },
525
+ "65": {
526
+ "content": "<unused58>",
527
+ "lstrip": false,
528
+ "normalized": true,
529
+ "rstrip": false,
530
+ "single_word": false,
531
+ "special": false
532
+ },
533
+ "66": {
534
+ "content": "<unused59>",
535
+ "lstrip": false,
536
+ "normalized": true,
537
+ "rstrip": false,
538
+ "single_word": false,
539
+ "special": false
540
+ },
541
+ "67": {
542
+ "content": "<unused60>",
543
+ "lstrip": false,
544
+ "normalized": true,
545
+ "rstrip": false,
546
+ "single_word": false,
547
+ "special": false
548
+ },
549
+ "68": {
550
+ "content": "<unused61>",
551
+ "lstrip": false,
552
+ "normalized": true,
553
+ "rstrip": false,
554
+ "single_word": false,
555
+ "special": false
556
+ },
557
+ "69": {
558
+ "content": "<unused62>",
559
+ "lstrip": false,
560
+ "normalized": true,
561
+ "rstrip": false,
562
+ "single_word": false,
563
+ "special": false
564
+ },
565
+ "70": {
566
+ "content": "<unused63>",
567
+ "lstrip": false,
568
+ "normalized": true,
569
+ "rstrip": false,
570
+ "single_word": false,
571
+ "special": false
572
+ },
573
+ "71": {
574
+ "content": "<unused64>",
575
+ "lstrip": false,
576
+ "normalized": true,
577
+ "rstrip": false,
578
+ "single_word": false,
579
+ "special": false
580
+ },
581
+ "72": {
582
+ "content": "<unused65>",
583
+ "lstrip": false,
584
+ "normalized": true,
585
+ "rstrip": false,
586
+ "single_word": false,
587
+ "special": false
588
+ },
589
+ "73": {
590
+ "content": "<unused66>",
591
+ "lstrip": false,
592
+ "normalized": true,
593
+ "rstrip": false,
594
+ "single_word": false,
595
+ "special": false
596
+ },
597
+ "74": {
598
+ "content": "<unused67>",
599
+ "lstrip": false,
600
+ "normalized": true,
601
+ "rstrip": false,
602
+ "single_word": false,
603
+ "special": false
604
+ },
605
+ "75": {
606
+ "content": "<unused68>",
607
+ "lstrip": false,
608
+ "normalized": true,
609
+ "rstrip": false,
610
+ "single_word": false,
611
+ "special": false
612
+ },
613
+ "76": {
614
+ "content": "<unused69>",
615
+ "lstrip": false,
616
+ "normalized": true,
617
+ "rstrip": false,
618
+ "single_word": false,
619
+ "special": false
620
+ },
621
+ "77": {
622
+ "content": "<unused70>",
623
+ "lstrip": false,
624
+ "normalized": true,
625
+ "rstrip": false,
626
+ "single_word": false,
627
+ "special": false
628
+ },
629
+ "78": {
630
+ "content": "<unused71>",
631
+ "lstrip": false,
632
+ "normalized": true,
633
+ "rstrip": false,
634
+ "single_word": false,
635
+ "special": false
636
+ },
637
+ "79": {
638
+ "content": "<unused72>",
639
+ "lstrip": false,
640
+ "normalized": true,
641
+ "rstrip": false,
642
+ "single_word": false,
643
+ "special": false
644
+ },
645
+ "80": {
646
+ "content": "<unused73>",
647
+ "lstrip": false,
648
+ "normalized": true,
649
+ "rstrip": false,
650
+ "single_word": false,
651
+ "special": false
652
+ },
653
+ "81": {
654
+ "content": "<unused74>",
655
+ "lstrip": false,
656
+ "normalized": true,
657
+ "rstrip": false,
658
+ "single_word": false,
659
+ "special": false
660
+ },
661
+ "82": {
662
+ "content": "<unused75>",
663
+ "lstrip": false,
664
+ "normalized": true,
665
+ "rstrip": false,
666
+ "single_word": false,
667
+ "special": false
668
+ },
669
+ "83": {
670
+ "content": "<unused76>",
671
+ "lstrip": false,
672
+ "normalized": true,
673
+ "rstrip": false,
674
+ "single_word": false,
675
+ "special": false
676
+ },
677
+ "84": {
678
+ "content": "<unused77>",
679
+ "lstrip": false,
680
+ "normalized": true,
681
+ "rstrip": false,
682
+ "single_word": false,
683
+ "special": false
684
+ },
685
+ "85": {
686
+ "content": "<unused78>",
687
+ "lstrip": false,
688
+ "normalized": true,
689
+ "rstrip": false,
690
+ "single_word": false,
691
+ "special": false
692
+ },
693
+ "86": {
694
+ "content": "<unused79>",
695
+ "lstrip": false,
696
+ "normalized": true,
697
+ "rstrip": false,
698
+ "single_word": false,
699
+ "special": false
700
+ },
701
+ "87": {
702
+ "content": "<unused80>",
703
+ "lstrip": false,
704
+ "normalized": true,
705
+ "rstrip": false,
706
+ "single_word": false,
707
+ "special": false
708
+ },
709
+ "88": {
710
+ "content": "<unused81>",
711
+ "lstrip": false,
712
+ "normalized": true,
713
+ "rstrip": false,
714
+ "single_word": false,
715
+ "special": false
716
+ },
717
+ "89": {
718
+ "content": "<unused82>",
719
+ "lstrip": false,
720
+ "normalized": true,
721
+ "rstrip": false,
722
+ "single_word": false,
723
+ "special": false
724
+ },
725
+ "90": {
726
+ "content": "<unused83>",
727
+ "lstrip": false,
728
+ "normalized": true,
729
+ "rstrip": false,
730
+ "single_word": false,
731
+ "special": false
732
+ },
733
+ "91": {
734
+ "content": "<unused84>",
735
+ "lstrip": false,
736
+ "normalized": true,
737
+ "rstrip": false,
738
+ "single_word": false,
739
+ "special": false
740
+ },
741
+ "92": {
742
+ "content": "<unused85>",
743
+ "lstrip": false,
744
+ "normalized": true,
745
+ "rstrip": false,
746
+ "single_word": false,
747
+ "special": false
748
+ },
749
+ "93": {
750
+ "content": "<unused86>",
751
+ "lstrip": false,
752
+ "normalized": true,
753
+ "rstrip": false,
754
+ "single_word": false,
755
+ "special": false
756
+ },
757
+ "94": {
758
+ "content": "<unused87>",
759
+ "lstrip": false,
760
+ "normalized": true,
761
+ "rstrip": false,
762
+ "single_word": false,
763
+ "special": false
764
+ },
765
+ "95": {
766
+ "content": "<unused88>",
767
+ "lstrip": false,
768
+ "normalized": true,
769
+ "rstrip": false,
770
+ "single_word": false,
771
+ "special": false
772
+ },
773
+ "96": {
774
+ "content": "<unused89>",
775
+ "lstrip": false,
776
+ "normalized": true,
777
+ "rstrip": false,
778
+ "single_word": false,
779
+ "special": false
780
+ },
781
+ "97": {
782
+ "content": "<unused90>",
783
+ "lstrip": false,
784
+ "normalized": true,
785
+ "rstrip": false,
786
+ "single_word": false,
787
+ "special": false
788
+ },
789
+ "98": {
790
+ "content": "<unused91>",
791
+ "lstrip": false,
792
+ "normalized": true,
793
+ "rstrip": false,
794
+ "single_word": false,
795
+ "special": false
796
+ },
797
+ "99": {
798
+ "content": "<unused92>",
799
+ "lstrip": false,
800
+ "normalized": true,
801
+ "rstrip": false,
802
+ "single_word": false,
803
+ "special": false
804
+ },
805
+ "100": {
806
+ "content": "<unused93>",
807
+ "lstrip": false,
808
+ "normalized": true,
809
+ "rstrip": false,
810
+ "single_word": false,
811
+ "special": false
812
+ },
813
+ "101": {
814
+ "content": "<unused94>",
815
+ "lstrip": false,
816
+ "normalized": true,
817
+ "rstrip": false,
818
+ "single_word": false,
819
+ "special": false
820
+ },
821
+ "102": {
822
+ "content": "<unused95>",
823
+ "lstrip": false,
824
+ "normalized": true,
825
+ "rstrip": false,
826
+ "single_word": false,
827
+ "special": false
828
+ },
829
+ "103": {
830
+ "content": "<unused96>",
831
+ "lstrip": false,
832
+ "normalized": true,
833
+ "rstrip": false,
834
+ "single_word": false,
835
+ "special": false
836
+ },
837
+ "104": {
838
+ "content": "<unused97>",
839
+ "lstrip": false,
840
+ "normalized": true,
841
+ "rstrip": false,
842
+ "single_word": false,
843
+ "special": false
844
+ },
845
+ "105": {
846
+ "content": "<unused98>",
847
+ "lstrip": false,
848
+ "normalized": true,
849
+ "rstrip": false,
850
+ "single_word": false,
851
+ "special": false
852
+ },
853
+ "106": {
854
+ "content": "<start_of_turn>",
855
+ "lstrip": false,
856
+ "normalized": true,
857
+ "rstrip": false,
858
+ "single_word": false,
859
+ "special": false
860
+ },
861
+ "107": {
862
+ "content": "<end_of_turn>",
863
+ "lstrip": false,
864
+ "normalized": true,
865
+ "rstrip": false,
866
+ "single_word": false,
867
+ "special": false
868
+ },
869
+ "108": {
870
+ "content": "\n",
871
+ "lstrip": false,
872
+ "normalized": true,
873
+ "rstrip": false,
874
+ "single_word": false,
875
+ "special": false
876
+ },
877
+ "109": {
878
+ "content": "\n\n",
879
+ "lstrip": false,
880
+ "normalized": true,
881
+ "rstrip": false,
882
+ "single_word": false,
883
+ "special": false
884
+ },
885
+ "110": {
886
+ "content": "\n\n\n",
887
+ "lstrip": false,
888
+ "normalized": true,
889
+ "rstrip": false,
890
+ "single_word": false,
891
+ "special": false
892
+ },
893
+ "111": {
894
+ "content": "\n\n\n\n",
895
+ "lstrip": false,
896
+ "normalized": true,
897
+ "rstrip": false,
898
+ "single_word": false,
899
+ "special": false
900
+ },
901
+ "112": {
902
+ "content": "\n\n\n\n\n",
903
+ "lstrip": false,
904
+ "normalized": true,
905
+ "rstrip": false,
906
+ "single_word": false,
907
+ "special": false
908
+ },
909
+ "113": {
910
+ "content": "\n\n\n\n\n\n",
911
+ "lstrip": false,
912
+ "normalized": true,
913
+ "rstrip": false,
914
+ "single_word": false,
915
+ "special": false
916
+ },
917
+ "114": {
918
+ "content": "\n\n\n\n\n\n\n",
919
+ "lstrip": false,
920
+ "normalized": true,
921
+ "rstrip": false,
922
+ "single_word": false,
923
+ "special": false
924
+ },
925
+ "115": {
926
+ "content": "\n\n\n\n\n\n\n\n",
927
+ "lstrip": false,
928
+ "normalized": true,
929
+ "rstrip": false,
930
+ "single_word": false,
931
+ "special": false
932
+ },
933
+ "116": {
934
+ "content": "\n\n\n\n\n\n\n\n\n",
935
+ "lstrip": false,
936
+ "normalized": true,
937
+ "rstrip": false,
938
+ "single_word": false,
939
+ "special": false
940
+ },
941
+ "117": {
942
+ "content": "\n\n\n\n\n\n\n\n\n\n",
943
+ "lstrip": false,
944
+ "normalized": true,
945
+ "rstrip": false,
946
+ "single_word": false,
947
+ "special": false
948
+ },
949
+ "118": {
950
+ "content": "\n\n\n\n\n\n\n\n\n\n\n",
951
+ "lstrip": false,
952
+ "normalized": true,
953
+ "rstrip": false,
954
+ "single_word": false,
955
+ "special": false
956
+ },
957
+ "119": {
958
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n",
959
+ "lstrip": false,
960
+ "normalized": true,
961
+ "rstrip": false,
962
+ "single_word": false,
963
+ "special": false
964
+ },
965
+ "120": {
966
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n",
967
+ "lstrip": false,
968
+ "normalized": true,
969
+ "rstrip": false,
970
+ "single_word": false,
971
+ "special": false
972
+ },
973
+ "121": {
974
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
975
+ "lstrip": false,
976
+ "normalized": true,
977
+ "rstrip": false,
978
+ "single_word": false,
979
+ "special": false
980
+ },
981
+ "122": {
982
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
983
+ "lstrip": false,
984
+ "normalized": true,
985
+ "rstrip": false,
986
+ "single_word": false,
987
+ "special": false
988
+ },
989
+ "123": {
990
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
991
+ "lstrip": false,
992
+ "normalized": true,
993
+ "rstrip": false,
994
+ "single_word": false,
995
+ "special": false
996
+ },
997
+ "124": {
998
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
999
+ "lstrip": false,
1000
+ "normalized": true,
1001
+ "rstrip": false,
1002
+ "single_word": false,
1003
+ "special": false
1004
+ },
1005
+ "125": {
1006
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1007
+ "lstrip": false,
1008
+ "normalized": true,
1009
+ "rstrip": false,
1010
+ "single_word": false,
1011
+ "special": false
1012
+ },
1013
+ "126": {
1014
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1015
+ "lstrip": false,
1016
+ "normalized": true,
1017
+ "rstrip": false,
1018
+ "single_word": false,
1019
+ "special": false
1020
+ },
1021
+ "127": {
1022
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1023
+ "lstrip": false,
1024
+ "normalized": true,
1025
+ "rstrip": false,
1026
+ "single_word": false,
1027
+ "special": false
1028
+ },
1029
+ "128": {
1030
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1031
+ "lstrip": false,
1032
+ "normalized": true,
1033
+ "rstrip": false,
1034
+ "single_word": false,
1035
+ "special": false
1036
+ },
1037
+ "129": {
1038
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1039
+ "lstrip": false,
1040
+ "normalized": true,
1041
+ "rstrip": false,
1042
+ "single_word": false,
1043
+ "special": false
1044
+ },
1045
+ "130": {
1046
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1047
+ "lstrip": false,
1048
+ "normalized": true,
1049
+ "rstrip": false,
1050
+ "single_word": false,
1051
+ "special": false
1052
+ },
1053
+ "131": {
1054
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1055
+ "lstrip": false,
1056
+ "normalized": true,
1057
+ "rstrip": false,
1058
+ "single_word": false,
1059
+ "special": false
1060
+ },
1061
+ "132": {
1062
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1063
+ "lstrip": false,
1064
+ "normalized": true,
1065
+ "rstrip": false,
1066
+ "single_word": false,
1067
+ "special": false
1068
+ },
1069
+ "133": {
1070
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1071
+ "lstrip": false,
1072
+ "normalized": true,
1073
+ "rstrip": false,
1074
+ "single_word": false,
1075
+ "special": false
1076
+ },
1077
+ "134": {
1078
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1079
+ "lstrip": false,
1080
+ "normalized": true,
1081
+ "rstrip": false,
1082
+ "single_word": false,
1083
+ "special": false
1084
+ },
1085
+ "135": {
1086
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1087
+ "lstrip": false,
1088
+ "normalized": true,
1089
+ "rstrip": false,
1090
+ "single_word": false,
1091
+ "special": false
1092
+ },
1093
+ "136": {
1094
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1095
+ "lstrip": false,
1096
+ "normalized": true,
1097
+ "rstrip": false,
1098
+ "single_word": false,
1099
+ "special": false
1100
+ },
1101
+ "137": {
1102
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1103
+ "lstrip": false,
1104
+ "normalized": true,
1105
+ "rstrip": false,
1106
+ "single_word": false,
1107
+ "special": false
1108
+ },
1109
+ "138": {
1110
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1111
+ "lstrip": false,
1112
+ "normalized": true,
1113
+ "rstrip": false,
1114
+ "single_word": false,
1115
+ "special": false
1116
+ },
1117
+ "139": {
1118
+ "content": "▁▁",
1119
+ "lstrip": false,
1120
+ "normalized": true,
1121
+ "rstrip": false,
1122
+ "single_word": false,
1123
+ "special": false
1124
+ },
1125
+ "140": {
1126
+ "content": "▁▁▁",
1127
+ "lstrip": false,
1128
+ "normalized": true,
1129
+ "rstrip": false,
1130
+ "single_word": false,
1131
+ "special": false
1132
+ },
1133
+ "141": {
1134
+ "content": "▁▁▁▁",
1135
+ "lstrip": false,
1136
+ "normalized": true,
1137
+ "rstrip": false,
1138
+ "single_word": false,
1139
+ "special": false
1140
+ },
1141
+ "142": {
1142
+ "content": "▁▁▁▁▁",
1143
+ "lstrip": false,
1144
+ "normalized": true,
1145
+ "rstrip": false,
1146
+ "single_word": false,
1147
+ "special": false
1148
+ },
1149
+ "143": {
1150
+ "content": "▁▁▁▁▁▁",
1151
+ "lstrip": false,
1152
+ "normalized": true,
1153
+ "rstrip": false,
1154
+ "single_word": false,
1155
+ "special": false
1156
+ },
1157
+ "144": {
1158
+ "content": "▁▁▁▁▁▁▁",
1159
+ "lstrip": false,
1160
+ "normalized": true,
1161
+ "rstrip": false,
1162
+ "single_word": false,
1163
+ "special": false
1164
+ },
1165
+ "145": {
1166
+ "content": "▁▁▁▁▁▁▁▁",
1167
+ "lstrip": false,
1168
+ "normalized": true,
1169
+ "rstrip": false,
1170
+ "single_word": false,
1171
+ "special": false
1172
+ },
1173
+ "146": {
1174
+ "content": "▁▁▁▁▁▁▁▁▁",
1175
+ "lstrip": false,
1176
+ "normalized": true,
1177
+ "rstrip": false,
1178
+ "single_word": false,
1179
+ "special": false
1180
+ },
1181
+ "147": {
1182
+ "content": "▁▁▁▁▁▁▁▁▁▁",
1183
+ "lstrip": false,
1184
+ "normalized": true,
1185
+ "rstrip": false,
1186
+ "single_word": false,
1187
+ "special": false
1188
+ },
1189
+ "148": {
1190
+ "content": "▁▁▁▁▁▁▁▁▁▁▁",
1191
+ "lstrip": false,
1192
+ "normalized": true,
1193
+ "rstrip": false,
1194
+ "single_word": false,
1195
+ "special": false
1196
+ },
1197
+ "149": {
1198
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁",
1199
+ "lstrip": false,
1200
+ "normalized": true,
1201
+ "rstrip": false,
1202
+ "single_word": false,
1203
+ "special": false
1204
+ },
1205
+ "150": {
1206
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁",
1207
+ "lstrip": false,
1208
+ "normalized": true,
1209
+ "rstrip": false,
1210
+ "single_word": false,
1211
+ "special": false
1212
+ },
1213
+ "151": {
1214
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1215
+ "lstrip": false,
1216
+ "normalized": true,
1217
+ "rstrip": false,
1218
+ "single_word": false,
1219
+ "special": false
1220
+ },
1221
+ "152": {
1222
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1223
+ "lstrip": false,
1224
+ "normalized": true,
1225
+ "rstrip": false,
1226
+ "single_word": false,
1227
+ "special": false
1228
+ },
1229
+ "153": {
1230
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1231
+ "lstrip": false,
1232
+ "normalized": true,
1233
+ "rstrip": false,
1234
+ "single_word": false,
1235
+ "special": false
1236
+ },
1237
+ "154": {
1238
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1239
+ "lstrip": false,
1240
+ "normalized": true,
1241
+ "rstrip": false,
1242
+ "single_word": false,
1243
+ "special": false
1244
+ },
1245
+ "155": {
1246
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1247
+ "lstrip": false,
1248
+ "normalized": true,
1249
+ "rstrip": false,
1250
+ "single_word": false,
1251
+ "special": false
1252
+ },
1253
+ "156": {
1254
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1255
+ "lstrip": false,
1256
+ "normalized": true,
1257
+ "rstrip": false,
1258
+ "single_word": false,
1259
+ "special": false
1260
+ },
1261
+ "157": {
1262
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1263
+ "lstrip": false,
1264
+ "normalized": true,
1265
+ "rstrip": false,
1266
+ "single_word": false,
1267
+ "special": false
1268
+ },
1269
+ "158": {
1270
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1271
+ "lstrip": false,
1272
+ "normalized": true,
1273
+ "rstrip": false,
1274
+ "single_word": false,
1275
+ "special": false
1276
+ },
1277
+ "159": {
1278
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1279
+ "lstrip": false,
1280
+ "normalized": true,
1281
+ "rstrip": false,
1282
+ "single_word": false,
1283
+ "special": false
1284
+ },
1285
+ "160": {
1286
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1287
+ "lstrip": false,
1288
+ "normalized": true,
1289
+ "rstrip": false,
1290
+ "single_word": false,
1291
+ "special": false
1292
+ },
1293
+ "161": {
1294
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1295
+ "lstrip": false,
1296
+ "normalized": true,
1297
+ "rstrip": false,
1298
+ "single_word": false,
1299
+ "special": false
1300
+ },
1301
+ "162": {
1302
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1303
+ "lstrip": false,
1304
+ "normalized": true,
1305
+ "rstrip": false,
1306
+ "single_word": false,
1307
+ "special": false
1308
+ },
1309
+ "163": {
1310
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1311
+ "lstrip": false,
1312
+ "normalized": true,
1313
+ "rstrip": false,
1314
+ "single_word": false,
1315
+ "special": false
1316
+ },
1317
+ "164": {
1318
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1319
+ "lstrip": false,
1320
+ "normalized": true,
1321
+ "rstrip": false,
1322
+ "single_word": false,
1323
+ "special": false
1324
+ },
1325
+ "165": {
1326
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1327
+ "lstrip": false,
1328
+ "normalized": true,
1329
+ "rstrip": false,
1330
+ "single_word": false,
1331
+ "special": false
1332
+ },
1333
+ "166": {
1334
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1335
+ "lstrip": false,
1336
+ "normalized": true,
1337
+ "rstrip": false,
1338
+ "single_word": false,
1339
+ "special": false
1340
+ },
1341
+ "167": {
1342
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1343
+ "lstrip": false,
1344
+ "normalized": true,
1345
+ "rstrip": false,
1346
+ "single_word": false,
1347
+ "special": false
1348
+ },
1349
+ "168": {
1350
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1351
+ "lstrip": false,
1352
+ "normalized": true,
1353
+ "rstrip": false,
1354
+ "single_word": false,
1355
+ "special": false
1356
+ },
1357
+ "169": {
1358
+ "content": "<table>",
1359
+ "lstrip": false,
1360
+ "normalized": true,
1361
+ "rstrip": false,
1362
+ "single_word": false,
1363
+ "special": false
1364
+ },
1365
+ "170": {
1366
+ "content": "<caption>",
1367
+ "lstrip": false,
1368
+ "normalized": true,
1369
+ "rstrip": false,
1370
+ "single_word": false,
1371
+ "special": false
1372
+ },
1373
+ "171": {
1374
+ "content": "<thead>",
1375
+ "lstrip": false,
1376
+ "normalized": true,
1377
+ "rstrip": false,
1378
+ "single_word": false,
1379
+ "special": false
1380
+ },
1381
+ "172": {
1382
+ "content": "<tbody>",
1383
+ "lstrip": false,
1384
+ "normalized": true,
1385
+ "rstrip": false,
1386
+ "single_word": false,
1387
+ "special": false
1388
+ },
1389
+ "173": {
1390
+ "content": "<tfoot>",
1391
+ "lstrip": false,
1392
+ "normalized": true,
1393
+ "rstrip": false,
1394
+ "single_word": false,
1395
+ "special": false
1396
+ },
1397
+ "174": {
1398
+ "content": "<tr>",
1399
+ "lstrip": false,
1400
+ "normalized": true,
1401
+ "rstrip": false,
1402
+ "single_word": false,
1403
+ "special": false
1404
+ },
1405
+ "175": {
1406
+ "content": "<th>",
1407
+ "lstrip": false,
1408
+ "normalized": true,
1409
+ "rstrip": false,
1410
+ "single_word": false,
1411
+ "special": false
1412
+ },
1413
+ "176": {
1414
+ "content": "<td>",
1415
+ "lstrip": false,
1416
+ "normalized": true,
1417
+ "rstrip": false,
1418
+ "single_word": false,
1419
+ "special": false
1420
+ },
1421
+ "177": {
1422
+ "content": "</table>",
1423
+ "lstrip": false,
1424
+ "normalized": true,
1425
+ "rstrip": false,
1426
+ "single_word": false,
1427
+ "special": false
1428
+ },
1429
+ "178": {
1430
+ "content": "</caption>",
1431
+ "lstrip": false,
1432
+ "normalized": true,
1433
+ "rstrip": false,
1434
+ "single_word": false,
1435
+ "special": false
1436
+ },
1437
+ "179": {
1438
+ "content": "</thead>",
1439
+ "lstrip": false,
1440
+ "normalized": true,
1441
+ "rstrip": false,
1442
+ "single_word": false,
1443
+ "special": false
1444
+ },
1445
+ "180": {
1446
+ "content": "</tbody>",
1447
+ "lstrip": false,
1448
+ "normalized": true,
1449
+ "rstrip": false,
1450
+ "single_word": false,
1451
+ "special": false
1452
+ },
1453
+ "181": {
1454
+ "content": "</tfoot>",
1455
+ "lstrip": false,
1456
+ "normalized": true,
1457
+ "rstrip": false,
1458
+ "single_word": false,
1459
+ "special": false
1460
+ },
1461
+ "182": {
1462
+ "content": "</tr>",
1463
+ "lstrip": false,
1464
+ "normalized": true,
1465
+ "rstrip": false,
1466
+ "single_word": false,
1467
+ "special": false
1468
+ },
1469
+ "183": {
1470
+ "content": "</th>",
1471
+ "lstrip": false,
1472
+ "normalized": true,
1473
+ "rstrip": false,
1474
+ "single_word": false,
1475
+ "special": false
1476
+ },
1477
+ "184": {
1478
+ "content": "</td>",
1479
+ "lstrip": false,
1480
+ "normalized": true,
1481
+ "rstrip": false,
1482
+ "single_word": false,
1483
+ "special": false
1484
+ },
1485
+ "185": {
1486
+ "content": "<h1>",
1487
+ "lstrip": false,
1488
+ "normalized": true,
1489
+ "rstrip": false,
1490
+ "single_word": false,
1491
+ "special": false
1492
+ },
1493
+ "186": {
1494
+ "content": "<h2>",
1495
+ "lstrip": false,
1496
+ "normalized": true,
1497
+ "rstrip": false,
1498
+ "single_word": false,
1499
+ "special": false
1500
+ },
1501
+ "187": {
1502
+ "content": "<h3>",
1503
+ "lstrip": false,
1504
+ "normalized": true,
1505
+ "rstrip": false,
1506
+ "single_word": false,
1507
+ "special": false
1508
+ },
1509
+ "188": {
1510
+ "content": "<h4>",
1511
+ "lstrip": false,
1512
+ "normalized": true,
1513
+ "rstrip": false,
1514
+ "single_word": false,
1515
+ "special": false
1516
+ },
1517
+ "189": {
1518
+ "content": "<h5>",
1519
+ "lstrip": false,
1520
+ "normalized": true,
1521
+ "rstrip": false,
1522
+ "single_word": false,
1523
+ "special": false
1524
+ },
1525
+ "190": {
1526
+ "content": "<h6>",
1527
+ "lstrip": false,
1528
+ "normalized": true,
1529
+ "rstrip": false,
1530
+ "single_word": false,
1531
+ "special": false
1532
+ },
1533
+ "191": {
1534
+ "content": "<blockquote>",
1535
+ "lstrip": false,
1536
+ "normalized": true,
1537
+ "rstrip": false,
1538
+ "single_word": false,
1539
+ "special": false
1540
+ },
1541
+ "192": {
1542
+ "content": "</h1>",
1543
+ "lstrip": false,
1544
+ "normalized": true,
1545
+ "rstrip": false,
1546
+ "single_word": false,
1547
+ "special": false
1548
+ },
1549
+ "193": {
1550
+ "content": "</h2>",
1551
+ "lstrip": false,
1552
+ "normalized": true,
1553
+ "rstrip": false,
1554
+ "single_word": false,
1555
+ "special": false
1556
+ },
1557
+ "194": {
1558
+ "content": "</h3>",
1559
+ "lstrip": false,
1560
+ "normalized": true,
1561
+ "rstrip": false,
1562
+ "single_word": false,
1563
+ "special": false
1564
+ },
1565
+ "195": {
1566
+ "content": "</h4>",
1567
+ "lstrip": false,
1568
+ "normalized": true,
1569
+ "rstrip": false,
1570
+ "single_word": false,
1571
+ "special": false
1572
+ },
1573
+ "196": {
1574
+ "content": "</h5>",
1575
+ "lstrip": false,
1576
+ "normalized": true,
1577
+ "rstrip": false,
1578
+ "single_word": false,
1579
+ "special": false
1580
+ },
1581
+ "197": {
1582
+ "content": "</h6>",
1583
+ "lstrip": false,
1584
+ "normalized": true,
1585
+ "rstrip": false,
1586
+ "single_word": false,
1587
+ "special": false
1588
+ },
1589
+ "198": {
1590
+ "content": "</blockquote>",
1591
+ "lstrip": false,
1592
+ "normalized": true,
1593
+ "rstrip": false,
1594
+ "single_word": false,
1595
+ "special": false
1596
+ },
1597
+ "199": {
1598
+ "content": "<strong>",
1599
+ "lstrip": false,
1600
+ "normalized": true,
1601
+ "rstrip": false,
1602
+ "single_word": false,
1603
+ "special": false
1604
+ },
1605
+ "200": {
1606
+ "content": "<em>",
1607
+ "lstrip": false,
1608
+ "normalized": true,
1609
+ "rstrip": false,
1610
+ "single_word": false,
1611
+ "special": false
1612
+ },
1613
+ "201": {
1614
+ "content": "<b>",
1615
+ "lstrip": false,
1616
+ "normalized": true,
1617
+ "rstrip": false,
1618
+ "single_word": false,
1619
+ "special": false
1620
+ },
1621
+ "202": {
1622
+ "content": "<i>",
1623
+ "lstrip": false,
1624
+ "normalized": true,
1625
+ "rstrip": false,
1626
+ "single_word": false,
1627
+ "special": false
1628
+ },
1629
+ "203": {
1630
+ "content": "<u>",
1631
+ "lstrip": false,
1632
+ "normalized": true,
1633
+ "rstrip": false,
1634
+ "single_word": false,
1635
+ "special": false
1636
+ },
1637
+ "204": {
1638
+ "content": "<s>",
1639
+ "lstrip": false,
1640
+ "normalized": true,
1641
+ "rstrip": false,
1642
+ "single_word": false,
1643
+ "special": false
1644
+ },
1645
+ "205": {
1646
+ "content": "<sub>",
1647
+ "lstrip": false,
1648
+ "normalized": true,
1649
+ "rstrip": false,
1650
+ "single_word": false,
1651
+ "special": false
1652
+ },
1653
+ "206": {
1654
+ "content": "<sup>",
1655
+ "lstrip": false,
1656
+ "normalized": true,
1657
+ "rstrip": false,
1658
+ "single_word": false,
1659
+ "special": false
1660
+ },
1661
+ "207": {
1662
+ "content": "<code>",
1663
+ "lstrip": false,
1664
+ "normalized": true,
1665
+ "rstrip": false,
1666
+ "single_word": false,
1667
+ "special": false
1668
+ },
1669
+ "208": {
1670
+ "content": "</strong>",
1671
+ "lstrip": false,
1672
+ "normalized": true,
1673
+ "rstrip": false,
1674
+ "single_word": false,
1675
+ "special": false
1676
+ },
1677
+ "209": {
1678
+ "content": "</em>",
1679
+ "lstrip": false,
1680
+ "normalized": true,
1681
+ "rstrip": false,
1682
+ "single_word": false,
1683
+ "special": false
1684
+ },
1685
+ "210": {
1686
+ "content": "</b>",
1687
+ "lstrip": false,
1688
+ "normalized": true,
1689
+ "rstrip": false,
1690
+ "single_word": false,
1691
+ "special": false
1692
+ },
1693
+ "211": {
1694
+ "content": "</i>",
1695
+ "lstrip": false,
1696
+ "normalized": true,
1697
+ "rstrip": false,
1698
+ "single_word": false,
1699
+ "special": false
1700
+ },
1701
+ "212": {
1702
+ "content": "</u>",
1703
+ "lstrip": false,
1704
+ "normalized": true,
1705
+ "rstrip": false,
1706
+ "single_word": false,
1707
+ "special": false
1708
+ },
1709
+ "213": {
1710
+ "content": "</s>",
1711
+ "lstrip": false,
1712
+ "normalized": true,
1713
+ "rstrip": false,
1714
+ "single_word": false,
1715
+ "special": false
1716
+ },
1717
+ "214": {
1718
+ "content": "</sub>",
1719
+ "lstrip": false,
1720
+ "normalized": true,
1721
+ "rstrip": false,
1722
+ "single_word": false,
1723
+ "special": false
1724
+ },
1725
+ "215": {
1726
+ "content": "</sup>",
1727
+ "lstrip": false,
1728
+ "normalized": true,
1729
+ "rstrip": false,
1730
+ "single_word": false,
1731
+ "special": false
1732
+ },
1733
+ "216": {
1734
+ "content": "</code>",
1735
+ "lstrip": false,
1736
+ "normalized": true,
1737
+ "rstrip": false,
1738
+ "single_word": false,
1739
+ "special": false
1740
+ },
1741
+ "257152": {
1742
+ "content": "<image>",
1743
+ "lstrip": false,
1744
+ "normalized": false,
1745
+ "rstrip": false,
1746
+ "single_word": false,
1747
+ "special": true
1748
+ }
1749
+ },
1750
+ "additional_special_tokens": [
1751
+ "<image>"
1752
+ ],
1753
+ "bos_token": "<bos>",
1754
+ "clean_up_tokenization_spaces": false,
1755
+ "eos_token": "<eos>",
1756
+ "model_max_length": 1000000000000000019884624838656,
1757
+ "pad_token": "<pad>",
1758
+ "processor_class": "PaliGemmaProcessor",
1759
+ "sp_model_kwargs": {},
1760
+ "spaces_between_special_tokens": false,
1761
+ "tokenizer_class": "GemmaTokenizer",
1762
+ "unk_token": "<unk>",
1763
+ "use_default_system_prompt": false
1764
+ }