joy-caption-pre-alpha-mod

Running on Zero

App Files Files Community

John6666 commited on Sep 27

Commit

458dcad

•

1 Parent(s): dbcc400

Upload 15 files

Browse files

Files changed (13) hide show

README.md +1 -1
app.py +49 -24
cgrkzexw-599808/clip_model.pt +3 -0
cgrkzexw-599808/config.yaml +39 -0
cgrkzexw-599808/image_adapter.pt +3 -0
cgrkzexw-599808/text_model/README.md +202 -0
cgrkzexw-599808/text_model/adapter_config.json +34 -0
cgrkzexw-599808/text_model/adapter_model.safetensors +3 -0
cgrkzexw-599808/text_model/special_tokens_map.json +23 -0
cgrkzexw-599808/text_model/tokenizer.json +0 -0
cgrkzexw-599808/text_model/tokenizer_config.json +2064 -0
joycaption.py +162 -80
requirements.txt +2 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Joy Caption Alpha One Mod
 emoji: 💬
 colorFrom: yellow
 colorTo: purple

 ---
+title: Joy Caption Alpha Two Mod
 emoji: 💬
 colorFrom: yellow
 colorTo: purple

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import spaces
 import gradio as gr
 from joycaption import stream_chat_mod, get_text_model, change_text_model, get_repo_gguf
-JC_TITLE_MD = "<h1><center>JoyCaption Alpha One Mod</center></h1>"
-JC_DESC_MD = """This space is mod of [fancyfeast/joy-caption-alpha-one](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-one),
  [Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha). Thanks to [dominic1021](https://huggingface.co/dominic1021)"""
 css = """
@@ -16,24 +16,47 @@ with gr.Blocks(fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
         with gr.Column():
             with gr.Group():
                 jc_input_image = gr.Image(type="pil", label="Input Image", sources=["upload", "clipboard"], height=384)
-                with gr.Row():
-                    jc_caption_type = gr.Dropdown(
-                        choices=["descriptive", "training_prompt", "rng-tags"],
-                        label="Caption Type",
-                        value="descriptive",
-                    )
-                    jc_caption_tone = gr.Dropdown(
-                        choices=["formal", "informal"],
-                        label="Caption Tone",
-                        value="formal",
-                    )
-                    jc_caption_length = gr.Dropdown(
-                        choices=["any", "very short", "short", "medium-length", "long", "very long"] +
-                                [str(i) for i in range(20, 261, 10)],
-                        label="Caption Length",
-                        value="any",
                     )
-                gr.Markdown("**Note:** Caption tone doesn't affect `rng-tags` and `training_prompt`.", elem_classes="info")
                 with gr.Accordion("Advanced", open=False):
                     with gr.Row():
                         jc_text_model = gr.Dropdown(label="LLM Model", info="You can enter a huggingface model repo_id to want to use.",
@@ -42,7 +65,7 @@ with gr.Blocks(fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
                         jc_gguf = gr.Dropdown(label=f"GGUF Filename", choices=[], value="",
                                               allow_custom_value=True, min_width=320, visible=False)
                         jc_nf4 = gr.Checkbox(label="Use NF4 quantization", value=True)
-                        jc_text_model_button = gr.Button("Load Model", variant="secondary")
                     jc_use_inference_client = gr.Checkbox(label="Use Inference Client", value=False, visible=False)
                     with gr.Row():
                         jc_tokens = gr.Slider(minimum=1, maximum=4096, value=300, step=1, label="Max tokens")
@@ -50,16 +73,18 @@ with gr.Blocks(fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
                         jc_topp = gr.Slider(minimum=0, maximum=2.0, value=0.9, step=0.01, label="Top-P")
             jc_run_button = gr.Button("Caption", variant="primary")
         with gr.Column():
             jc_output_caption = gr.Textbox(label="Caption", show_copy_button=True)
     gr.Markdown(JC_DESC_MD, elem_classes="info")
     gr.LoginButton()
     gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")
-    jc_run_button.click(fn=stream_chat_mod, inputs=[jc_input_image, jc_caption_type, jc_caption_tone, jc_caption_length,
-                                                    jc_tokens, jc_topp, jc_temperature, jc_text_model], outputs=[jc_output_caption])
-    jc_text_model_button.click(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
     #jc_text_model.change(get_repo_gguf, [jc_text_model], [jc_gguf], show_api=False)
-    jc_use_inference_client.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False)
 if __name__ == "__main__":
     #demo.queue()

 import gradio as gr
 from joycaption import stream_chat_mod, get_text_model, change_text_model, get_repo_gguf
+JC_TITLE_MD = "<h1><center>JoyCaption Alpha Two Mod</center></h1>"
+JC_DESC_MD = """This space is mod of [fancyfeast/joy-caption-alpha-two](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-two),
  [Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha). Thanks to [dominic1021](https://huggingface.co/dominic1021)"""
 css = """
         with gr.Column():
             with gr.Group():
                 jc_input_image = gr.Image(type="pil", label="Input Image", sources=["upload", "clipboard"], height=384)
+                with gr.Accordion("Options", open=False):
+                    with gr.Row():
+                        jc_caption_type = gr.Dropdown(
+                            choices=["Descriptive", "Descriptive (Informal)", "Training Prompt", "MidJourney", "Booru tag list", "Booru-like tag list", "Art Critic", "Product Listing", "Social Media Post"],
+                            label="Caption Type",
+                            value="Descriptive",
+                        )
+                        jc_caption_length = gr.Dropdown(
+                            choices=["any", "very short", "short", "medium-length", "long", "very long"] +
+                                    [str(i) for i in range(20, 261, 10)],
+                            label="Caption Length",
+                            value="long",
+                        )
+                    jc_extra_options = gr.CheckboxGroup(
+                        choices=[
+                            "If there is a person/character in the image you must refer to them as {name}.",
+                            "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
+                            "Include information about lighting.",
+                            "Include information about camera angle.",
+                            "Include information about whether there is a watermark or not.",
+                            "Include information about whether there are JPEG artifacts or not.",
+                            "If it is a photo you MUST include information about what camera was likely used and details such as aperture, shutter speed, ISO, etc.",
+                            "Do NOT include anything sexual; keep it PG.",
+                            "Do NOT mention the image's resolution.",
+                            "You MUST include information about the subjective aesthetic quality of the image from low to very high.",
+                            "Include information on the image's composition style, such as leading lines, rule of thirds, or symmetry.",
+                            "Do NOT mention any text that is in the image.",
+                            "Specify the depth of field and whether the background is in focus or blurred.",
+                            "If applicable, mention the likely use of artificial or natural lighting sources.",
+                            "Do NOT use any ambiguous language.",
+                            "Include whether the image is sfw, suggestive, or nsfw.",
+                            "ONLY describe the most important elements of the image."
+                        ],
+                        label="Extra Options"
                     )
+                    with gr.Row():
+                        jc_name_input = gr.Textbox(label="Person/Character Name (if applicable)")
+                        gr.Markdown("**Note:** Name input is only used if an Extra Option is selected that requires it.")
+                        jc_custom_prompt = gr.Textbox(label="Custom Prompt (optional, will override all other settings)")
+                        gr.Markdown("**Note:** Alpha Two is not a general instruction follower and will not follow prompts outside its training data well. Use this feature with caution.")
                 with gr.Accordion("Advanced", open=False):
                     with gr.Row():
                         jc_text_model = gr.Dropdown(label="LLM Model", info="You can enter a huggingface model repo_id to want to use.",
                         jc_gguf = gr.Dropdown(label=f"GGUF Filename", choices=[], value="",
                                               allow_custom_value=True, min_width=320, visible=False)
                         jc_nf4 = gr.Checkbox(label="Use NF4 quantization", value=True)
+                        jc_text_model_button = gr.Button("Load Model", variant="secondary", visible=False)
                     jc_use_inference_client = gr.Checkbox(label="Use Inference Client", value=False, visible=False)
                     with gr.Row():
                         jc_tokens = gr.Slider(minimum=1, maximum=4096, value=300, step=1, label="Max tokens")
                         jc_topp = gr.Slider(minimum=0, maximum=2.0, value=0.9, step=0.01, label="Top-P")
             jc_run_button = gr.Button("Caption", variant="primary")
         with gr.Column():
+            jc_output_prompt = gr.Textbox(label="Prompt that was used")
             jc_output_caption = gr.Textbox(label="Caption", show_copy_button=True)
     gr.Markdown(JC_DESC_MD, elem_classes="info")
     gr.LoginButton()
     gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")
+    jc_run_button.click(fn=stream_chat_mod, inputs=[jc_input_image, jc_caption_type, jc_caption_length, jc_extra_options, jc_name_input, jc_custom_prompt,
+                                                    jc_tokens, jc_topp, jc_temperature, jc_text_model], outputs=[jc_output_prompt, jc_output_caption])
+    jc_text_model.change(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
+    #jc_text_model_button.click(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
     #jc_text_model.change(get_repo_gguf, [jc_text_model], [jc_gguf], show_api=False)
+    #jc_use_inference_client.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False)
 if __name__ == "__main__":
     #demo.queue()

cgrkzexw-599808/clip_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9277e041aab3e7f20a8e6ecf7248b663aac1c281daf4472c12a6e5013cf9f0cc
+size 1713067838

cgrkzexw-599808/config.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+wandb_project: joy-caption-1
+device_batch_size: 2
+batch_size: 256
+learning_rate: 0.0002
+warmup_samples: 18000
+max_samples: 600000
+save_every: 50000
+test_every: 50000
+use_amp: true
+grad_scaler: true
+lr_scheduler_type: cosine
+min_lr_ratio: 0.0
+allow_tf32: true
+seed: 69
+num_workers: 8
+optimizer_type: adamw
+adam_beta1: 0.9
+adam_beta2: 0.999
+adam_eps: 1.0e-08
+adam_weight_decay: 0.0
+clip_grad_norm: 1.0
+dataset: fancyfeast/joy-captioning-20240924a
+clip_model: google/siglip-so400m-patch14-384
+text_model: ../lora-train/lora_model_vwbzycxh
+resume: null
+gradient_checkpointing: false
+test_size: 2048
+grad_scaler_init: 65536.0
+max_caption_length: 257
+num_image_tokens: 32
+adapter_type: mlp
+text_model_dtype: bfloat16
+pre_test: false
+train_image_model: true
+image_model_lr: null
+train_lora: true
+lora_r: 64
+lora_alpha: 16
+lora_dropout: 0.1

cgrkzexw-599808/image_adapter.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38db2fe263be2d494a50be4a7bbfd7b23b76f9d03e4008a1b7df97d6b27894ef
+size 86067714

cgrkzexw-599808/text_model/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: unsloth/Meta-Llama-3.1-8B-Instruct
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

cgrkzexw-599808/text_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "gate_proj",
+    "down_proj",
+    "o_proj",
+    "k_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

cgrkzexw-599808/text_model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd883ebd089f87e0fab7f17960c5f4451ceae43aecead44a9984b3369018dbdb
+size 671149168

cgrkzexw-599808/text_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|begin_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|eot_id|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|finetune_right_pad_id|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

cgrkzexw-599808/text_model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

cgrkzexw-599808/text_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,2064 @@

+{
+  "added_tokens_decoder": {
+    "128000": {
+      "content": "<|begin_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128001": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128002": {
+      "content": "<|reserved_special_token_0|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128003": {
+      "content": "<|reserved_special_token_1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128004": {
+      "content": "<|finetune_right_pad_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128005": {
+      "content": "<|reserved_special_token_2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128006": {
+      "content": "<|start_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128007": {
+      "content": "<|end_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128008": {
+      "content": "<|eom_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128009": {
+      "content": "<|eot_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128010": {
+      "content": "<|python_tag|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128011": {
+      "content": "<|reserved_special_token_3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128012": {
+      "content": "<|reserved_special_token_4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128013": {
+      "content": "<|reserved_special_token_5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128014": {
+      "content": "<|reserved_special_token_6|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128015": {
+      "content": "<|reserved_special_token_7|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128016": {
+      "content": "<|reserved_special_token_8|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128017": {
+      "content": "<|reserved_special_token_9|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128018": {
+      "content": "<|reserved_special_token_10|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128019": {
+      "content": "<|reserved_special_token_11|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128020": {
+      "content": "<|reserved_special_token_12|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128021": {
+      "content": "<|reserved_special_token_13|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128022": {
+      "content": "<|reserved_special_token_14|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128023": {
+      "content": "<|reserved_special_token_15|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128024": {
+      "content": "<|reserved_special_token_16|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128025": {
+      "content": "<|reserved_special_token_17|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128026": {
+      "content": "<|reserved_special_token_18|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128027": {
+      "content": "<|reserved_special_token_19|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128028": {
+      "content": "<|reserved_special_token_20|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128029": {
+      "content": "<|reserved_special_token_21|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128030": {
+      "content": "<|reserved_special_token_22|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128031": {
+      "content": "<|reserved_special_token_23|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128032": {
+      "content": "<|reserved_special_token_24|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128033": {
+      "content": "<|reserved_special_token_25|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128034": {
+      "content": "<|reserved_special_token_26|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128035": {
+      "content": "<|reserved_special_token_27|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128036": {
+      "content": "<|reserved_special_token_28|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128037": {
+      "content": "<|reserved_special_token_29|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128038": {
+      "content": "<|reserved_special_token_30|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128039": {
+      "content": "<|reserved_special_token_31|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128040": {
+      "content": "<|reserved_special_token_32|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128041": {
+      "content": "<|reserved_special_token_33|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128042": {
+      "content": "<|reserved_special_token_34|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128043": {
+      "content": "<|reserved_special_token_35|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128044": {
+      "content": "<|reserved_special_token_36|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128045": {
+      "content": "<|reserved_special_token_37|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128046": {
+      "content": "<|reserved_special_token_38|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128047": {
+      "content": "<|reserved_special_token_39|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128048": {
+      "content": "<|reserved_special_token_40|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128049": {
+      "content": "<|reserved_special_token_41|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128050": {
+      "content": "<|reserved_special_token_42|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128051": {
+      "content": "<|reserved_special_token_43|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128052": {
+      "content": "<|reserved_special_token_44|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128053": {
+      "content": "<|reserved_special_token_45|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128054": {
+      "content": "<|reserved_special_token_46|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128055": {
+      "content": "<|reserved_special_token_47|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128056": {
+      "content": "<|reserved_special_token_48|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128057": {
+      "content": "<|reserved_special_token_49|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128058": {
+      "content": "<|reserved_special_token_50|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128059": {
+      "content": "<|reserved_special_token_51|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128060": {
+      "content": "<|reserved_special_token_52|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128061": {
+      "content": "<|reserved_special_token_53|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128062": {
+      "content": "<|reserved_special_token_54|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128063": {
+      "content": "<|reserved_special_token_55|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128064": {
+      "content": "<|reserved_special_token_56|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128065": {
+      "content": "<|reserved_special_token_57|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128066": {
+      "content": "<|reserved_special_token_58|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128067": {
+      "content": "<|reserved_special_token_59|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128068": {
+      "content": "<|reserved_special_token_60|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128069": {
+      "content": "<|reserved_special_token_61|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128070": {
+      "content": "<|reserved_special_token_62|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128071": {
+      "content": "<|reserved_special_token_63|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128072": {
+      "content": "<|reserved_special_token_64|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128073": {
+      "content": "<|reserved_special_token_65|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128074": {
+      "content": "<|reserved_special_token_66|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128075": {
+      "content": "<|reserved_special_token_67|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128076": {
+      "content": "<|reserved_special_token_68|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128077": {
+      "content": "<|reserved_special_token_69|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128078": {
+      "content": "<|reserved_special_token_70|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128079": {
+      "content": "<|reserved_special_token_71|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128080": {
+      "content": "<|reserved_special_token_72|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128081": {
+      "content": "<|reserved_special_token_73|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128082": {
+      "content": "<|reserved_special_token_74|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128083": {
+      "content": "<|reserved_special_token_75|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128084": {
+      "content": "<|reserved_special_token_76|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128085": {
+      "content": "<|reserved_special_token_77|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128086": {
+      "content": "<|reserved_special_token_78|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128087": {
+      "content": "<|reserved_special_token_79|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128088": {
+      "content": "<|reserved_special_token_80|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128089": {
+      "content": "<|reserved_special_token_81|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128090": {
+      "content": "<|reserved_special_token_82|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128091": {
+      "content": "<|reserved_special_token_83|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128092": {
+      "content": "<|reserved_special_token_84|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128093": {
+      "content": "<|reserved_special_token_85|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128094": {
+      "content": "<|reserved_special_token_86|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128095": {
+      "content": "<|reserved_special_token_87|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128096": {
+      "content": "<|reserved_special_token_88|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128097": {
+      "content": "<|reserved_special_token_89|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128098": {
+      "content": "<|reserved_special_token_90|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128099": {
+      "content": "<|reserved_special_token_91|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128100": {
+      "content": "<|reserved_special_token_92|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128101": {
+      "content": "<|reserved_special_token_93|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128102": {
+      "content": "<|reserved_special_token_94|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128103": {
+      "content": "<|reserved_special_token_95|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128104": {
+      "content": "<|reserved_special_token_96|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128105": {
+      "content": "<|reserved_special_token_97|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128106": {
+      "content": "<|reserved_special_token_98|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128107": {
+      "content": "<|reserved_special_token_99|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128108": {
+      "content": "<|reserved_special_token_100|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128109": {
+      "content": "<|reserved_special_token_101|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128110": {
+      "content": "<|reserved_special_token_102|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128111": {
+      "content": "<|reserved_special_token_103|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128112": {
+      "content": "<|reserved_special_token_104|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128113": {
+      "content": "<|reserved_special_token_105|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128114": {
+      "content": "<|reserved_special_token_106|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128115": {
+      "content": "<|reserved_special_token_107|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128116": {
+      "content": "<|reserved_special_token_108|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128117": {
+      "content": "<|reserved_special_token_109|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128118": {
+      "content": "<|reserved_special_token_110|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128119": {
+      "content": "<|reserved_special_token_111|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128120": {
+      "content": "<|reserved_special_token_112|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128121": {
+      "content": "<|reserved_special_token_113|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128122": {
+      "content": "<|reserved_special_token_114|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128123": {
+      "content": "<|reserved_special_token_115|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128124": {
+      "content": "<|reserved_special_token_116|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128125": {
+      "content": "<|reserved_special_token_117|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128126": {
+      "content": "<|reserved_special_token_118|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128127": {
+      "content": "<|reserved_special_token_119|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128128": {
+      "content": "<|reserved_special_token_120|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128129": {
+      "content": "<|reserved_special_token_121|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128130": {
+      "content": "<|reserved_special_token_122|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128131": {
+      "content": "<|reserved_special_token_123|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128132": {
+      "content": "<|reserved_special_token_124|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128133": {
+      "content": "<|reserved_special_token_125|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128134": {
+      "content": "<|reserved_special_token_126|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128135": {
+      "content": "<|reserved_special_token_127|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128136": {
+      "content": "<|reserved_special_token_128|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128137": {
+      "content": "<|reserved_special_token_129|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128138": {
+      "content": "<|reserved_special_token_130|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128139": {
+      "content": "<|reserved_special_token_131|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128140": {
+      "content": "<|reserved_special_token_132|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128141": {
+      "content": "<|reserved_special_token_133|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128142": {
+      "content": "<|reserved_special_token_134|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128143": {
+      "content": "<|reserved_special_token_135|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128144": {
+      "content": "<|reserved_special_token_136|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128145": {
+      "content": "<|reserved_special_token_137|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128146": {
+      "content": "<|reserved_special_token_138|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128147": {
+      "content": "<|reserved_special_token_139|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128148": {
+      "content": "<|reserved_special_token_140|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128149": {
+      "content": "<|reserved_special_token_141|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128150": {
+      "content": "<|reserved_special_token_142|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128151": {
+      "content": "<|reserved_special_token_143|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128152": {
+      "content": "<|reserved_special_token_144|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128153": {
+      "content": "<|reserved_special_token_145|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128154": {
+      "content": "<|reserved_special_token_146|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128155": {
+      "content": "<|reserved_special_token_147|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128156": {
+      "content": "<|reserved_special_token_148|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128157": {
+      "content": "<|reserved_special_token_149|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128158": {
+      "content": "<|reserved_special_token_150|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128159": {
+      "content": "<|reserved_special_token_151|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128160": {
+      "content": "<|reserved_special_token_152|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128161": {
+      "content": "<|reserved_special_token_153|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128162": {
+      "content": "<|reserved_special_token_154|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128163": {
+      "content": "<|reserved_special_token_155|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128164": {
+      "content": "<|reserved_special_token_156|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128165": {
+      "content": "<|reserved_special_token_157|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128166": {
+      "content": "<|reserved_special_token_158|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128167": {
+      "content": "<|reserved_special_token_159|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128168": {
+      "content": "<|reserved_special_token_160|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128169": {
+      "content": "<|reserved_special_token_161|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128170": {
+      "content": "<|reserved_special_token_162|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128171": {
+      "content": "<|reserved_special_token_163|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128172": {
+      "content": "<|reserved_special_token_164|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128173": {
+      "content": "<|reserved_special_token_165|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128174": {
+      "content": "<|reserved_special_token_166|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128175": {
+      "content": "<|reserved_special_token_167|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128176": {
+      "content": "<|reserved_special_token_168|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128177": {
+      "content": "<|reserved_special_token_169|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128178": {
+      "content": "<|reserved_special_token_170|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128179": {
+      "content": "<|reserved_special_token_171|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128180": {
+      "content": "<|reserved_special_token_172|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128181": {
+      "content": "<|reserved_special_token_173|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128182": {
+      "content": "<|reserved_special_token_174|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128183": {
+      "content": "<|reserved_special_token_175|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128184": {
+      "content": "<|reserved_special_token_176|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128185": {
+      "content": "<|reserved_special_token_177|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128186": {
+      "content": "<|reserved_special_token_178|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128187": {
+      "content": "<|reserved_special_token_179|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128188": {
+      "content": "<|reserved_special_token_180|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128189": {
+      "content": "<|reserved_special_token_181|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128190": {
+      "content": "<|reserved_special_token_182|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128191": {
+      "content": "<|reserved_special_token_183|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128192": {
+      "content": "<|reserved_special_token_184|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128193": {
+      "content": "<|reserved_special_token_185|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128194": {
+      "content": "<|reserved_special_token_186|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128195": {
+      "content": "<|reserved_special_token_187|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128196": {
+      "content": "<|reserved_special_token_188|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128197": {
+      "content": "<|reserved_special_token_189|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128198": {
+      "content": "<|reserved_special_token_190|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128199": {
+      "content": "<|reserved_special_token_191|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128200": {
+      "content": "<|reserved_special_token_192|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128201": {
+      "content": "<|reserved_special_token_193|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128202": {
+      "content": "<|reserved_special_token_194|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128203": {
+      "content": "<|reserved_special_token_195|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128204": {
+      "content": "<|reserved_special_token_196|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128205": {
+      "content": "<|reserved_special_token_197|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128206": {
+      "content": "<|reserved_special_token_198|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128207": {
+      "content": "<|reserved_special_token_199|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128208": {
+      "content": "<|reserved_special_token_200|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128209": {
+      "content": "<|reserved_special_token_201|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128210": {
+      "content": "<|reserved_special_token_202|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128211": {
+      "content": "<|reserved_special_token_203|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128212": {
+      "content": "<|reserved_special_token_204|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128213": {
+      "content": "<|reserved_special_token_205|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128214": {
+      "content": "<|reserved_special_token_206|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128215": {
+      "content": "<|reserved_special_token_207|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128216": {
+      "content": "<|reserved_special_token_208|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128217": {
+      "content": "<|reserved_special_token_209|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128218": {
+      "content": "<|reserved_special_token_210|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128219": {
+      "content": "<|reserved_special_token_211|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128220": {
+      "content": "<|reserved_special_token_212|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128221": {
+      "content": "<|reserved_special_token_213|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128222": {
+      "content": "<|reserved_special_token_214|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128223": {
+      "content": "<|reserved_special_token_215|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128224": {
+      "content": "<|reserved_special_token_216|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128225": {
+      "content": "<|reserved_special_token_217|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128226": {
+      "content": "<|reserved_special_token_218|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128227": {
+      "content": "<|reserved_special_token_219|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128228": {
+      "content": "<|reserved_special_token_220|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128229": {
+      "content": "<|reserved_special_token_221|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128230": {
+      "content": "<|reserved_special_token_222|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128231": {
+      "content": "<|reserved_special_token_223|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128232": {
+      "content": "<|reserved_special_token_224|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128233": {
+      "content": "<|reserved_special_token_225|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128234": {
+      "content": "<|reserved_special_token_226|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128235": {
+      "content": "<|reserved_special_token_227|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128236": {
+      "content": "<|reserved_special_token_228|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128237": {
+      "content": "<|reserved_special_token_229|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128238": {
+      "content": "<|reserved_special_token_230|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128239": {
+      "content": "<|reserved_special_token_231|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128240": {
+      "content": "<|reserved_special_token_232|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128241": {
+      "content": "<|reserved_special_token_233|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128242": {
+      "content": "<|reserved_special_token_234|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128243": {
+      "content": "<|reserved_special_token_235|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128244": {
+      "content": "<|reserved_special_token_236|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128245": {
+      "content": "<|reserved_special_token_237|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128246": {
+      "content": "<|reserved_special_token_238|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128247": {
+      "content": "<|reserved_special_token_239|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128248": {
+      "content": "<|reserved_special_token_240|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128249": {
+      "content": "<|reserved_special_token_241|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128250": {
+      "content": "<|reserved_special_token_242|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128251": {
+      "content": "<|reserved_special_token_243|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128252": {
+      "content": "<|reserved_special_token_244|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128253": {
+      "content": "<|reserved_special_token_245|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128254": {
+      "content": "<|reserved_special_token_246|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128255": {
+      "content": "<|reserved_special_token_247|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|begin_of_text|>",
+  "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 July 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\n\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\n\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content'] %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\n\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\n\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "pad_token": "<|finetune_right_pad_id|>",
+  "padding_side": "right",
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}

joycaption.py CHANGED Viewed

@@ -11,7 +11,7 @@ else:
 import gradio as gr
 from huggingface_hub import InferenceClient
 from torch import nn
-from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM
 from pathlib import Path
 import torch
 import torch.amp.autocast_mode
@@ -24,13 +24,11 @@ from typing import Union
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-# Define the base directory
-BASE_DIR = Path(__file__).resolve().parent
 device = "cuda" if torch.cuda.is_available() else "cpu"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 use_inference_client = False
-PIXTRAL_PATHS = ["mistral-community/pixtral-12b"]
 llm_models = {
     "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2": None,
@@ -45,24 +43,55 @@ llm_models = {
 CLIP_PATH = "google/siglip-so400m-patch14-384"
 MODEL_PATH = list(llm_models.keys())[0]
-CHECKPOINT_PATH = BASE_DIR / Path("9em124t2-499968")
 LORA_PATH = CHECKPOINT_PATH / "text_model"
-TITLE = "<h1><center>JoyCaption Alpha One (2024-09-20a)</center></h1>"
 CAPTION_TYPE_MAP = {
-    ("descriptive", "formal", False, False): ["Write a descriptive caption for this image in a formal tone."],
-    ("descriptive", "formal", False, True): ["Write a descriptive caption for this image in a formal tone within {word_count} words."],
-    ("descriptive", "formal", True, False): ["Write a {length} descriptive caption for this image in a formal tone."],
-    ("descriptive", "informal", False, False): ["Write a descriptive caption for this image in a casual tone."],
-    ("descriptive", "informal", False, True): ["Write a descriptive caption for this image in a casual tone within {word_count} words."],
-    ("descriptive", "informal", True, False): ["Write a {length} descriptive caption for this image in a casual tone."],
-    ("training_prompt", "formal", False, False): ["Write a stable diffusion prompt for this image."],
-    ("training_prompt", "formal", False, True): ["Write a stable diffusion prompt for this image within {word_count} words."],
-    ("training_prompt", "formal", True, False): ["Write a {length} stable diffusion prompt for this image."],
-    ("rng-tags", "formal", False, False): ["Write a list of Booru tags for this image."],
-    ("rng-tags", "formal", False, True): ["Write a list of Booru tags for this image within {word_count} words."],
-    ("rng-tags", "formal", True, False): ["Write a {length} list of Booru tags for this image."],
 }
 class ImageAdapter(nn.Module):
@@ -79,10 +108,6 @@ class ImageAdapter(nn.Module):
         self.ln1 = nn.Identity() if not ln1 else nn.LayerNorm(input_features)
         self.pos_emb = None if not pos_emb else nn.Parameter(torch.zeros(num_image_tokens, input_features))
-        # Mode token
-        #self.mode_token = nn.Embedding(n_modes, output_features)
-        #self.mode_token.weight.data.normal_(mean=0.0, std=0.02)   # Matches HF's implementation of llama3
         # Other tokens (<|image_start|>, <|image_end|>, <|eot_id|>)
         self.other_tokens = nn.Embedding(3, output_features)
         self.other_tokens.weight.data.normal_(mean=0.0, std=0.02)   # Matches HF's implementation of llama3
@@ -111,11 +136,6 @@ class ImageAdapter(nn.Module):
         x = self.activation(x)
         x = self.linear2(x)
-        # Mode token
-        #mode_token = self.mode_token(mode)
-        #assert mode_token.shape == (x.shape[0], mode_token.shape[1], x.shape[2]), f"Expected {(x.shape[0], 1, x.shape[2])}, got {mode_token.shape}"
-        #x = torch.cat((x, mode_token), dim=1)
         # <|image_start|>, IMAGE, <|image_end|>
         other_tokens = self.other_tokens(torch.tensor([0, 1], device=self.other_tokens.weight.device).expand(x.shape[0], -1))
         assert other_tokens.shape == (x.shape[0], 2, x.shape[2]), f"Expected {(x.shape[0], 2, x.shape[2])}, got {other_tokens.shape}"
@@ -143,23 +163,35 @@ text_model_client = None
 text_model = None
 image_adapter = None
 peft_config = None
 def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None, is_nf4: bool=True):
-    global tokenizer, text_model, image_adapter, peft_config, text_model_client, use_inference_client
     try:
         from transformers import BitsAndBytesConfig
         nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
                                         bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16)
-        if model_name in PIXTRAL_PATHS:
-            from transformers import AutoProcessor, LlavaForConditionalGeneration
             if is_nf4:
-                text_model = LlavaForConditionalGeneration.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
-                image_adapter = AutoProcessor.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16)
             else:
-                text_model = LlavaForConditionalGeneration.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16).eval()
-                image_adapter = AutoProcessor.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16)
-            tokenizer = None
-            peft_config = None
         print("Loading tokenizer")
         if gguf_file: tokenizer = AutoTokenizer.from_pretrained(model_name, gguf_file=gguf_file, use_fast=True, legacy=False)
@@ -191,7 +223,7 @@ def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None
         print("Loading image adapter")
         image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
-        image_adapter.load_state_dict(torch.load(CHECKPOINT_PATH / "image_adapter.pt", map_location="cpu", weights_only=True))
         image_adapter.eval().to(device)
     except Exception as e:
         print(f"LLM load error: {e}")
@@ -208,7 +240,7 @@ clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
 clip_model = AutoModel.from_pretrained(CLIP_PATH).vision_model
 if (CHECKPOINT_PATH / "clip_model.pt").exists():
     print("Loading VLM's custom vision model")
-    checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu', weights_only=True)
     checkpoint = {k.replace("_orig_mod.module.", ""): v for k, v in checkpoint.items()}
     clip_model.load_state_dict(checkpoint)
     del checkpoint
@@ -217,13 +249,18 @@ clip_model.eval().requires_grad_(False).to(device)
 # Tokenizer
 # LLM
 # Image Adapter
 load_text_model()
 @spaces.GPU()
 @torch.inference_mode()
-def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: Union[str, int],
-                    max_new_tokens: int=300, top_p: float=0.9, temperature: float=0.6, model_name: str=MODEL_PATH, progress=gr.Progress(track_tqdm=True)) -> str:
-    global tokenizer, text_model, image_adapter, peft_config, text_model_client, use_inference_client
     torch.cuda.empty_cache()
     gc.collect()
@@ -235,67 +272,112 @@ def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_tone: s
             length = int(length)
         except ValueError:
             pass
-    # 'rng-tags' and 'training_prompt' don't have formal/informal tones
-    if caption_type == "rng-tags" or caption_type == "training_prompt":
-        caption_tone = "formal"
     # Build prompt
-    prompt_key = (caption_type, caption_tone, isinstance(length, str), isinstance(length, int))
-    if prompt_key not in CAPTION_TYPE_MAP:
-        raise ValueError(f"Invalid caption type: {prompt_key}")
-    prompt_str = CAPTION_TYPE_MAP[prompt_key][0].format(length=length, word_count=length)
     print(f"Prompt: {prompt_str}")
     # Pixtral
     if model_name in PIXTRAL_PATHS:
-        input_images = [input_image]
-        inputs = image_adapter(text=prompt_str, images=input_images, return_tensors="pt").to(device)
-        generate_ids = text_model.generate(**inputs, max_new_tokens=max_new_tokens)
-        output = image_adapter.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
-        return output.strip()
     # Preprocess image
     image = input_image.resize((384, 384), Image.LANCZOS)
     pixel_values = TVF.pil_to_tensor(image).unsqueeze(0) / 255.0
     pixel_values = TVF.normalize(pixel_values, [0.5], [0.5])
     pixel_values = pixel_values.to(device)
-    # Tokenize the prompt
-    prompt = tokenizer.encode(prompt_str, return_tensors='pt', padding=False, truncation=False, add_special_tokens=False)
     # Embed image
     with torch.amp.autocast_mode.autocast(device, enabled=True):
         vision_outputs = clip_model(pixel_values=pixel_values, output_hidden_states=True)
         image_features = vision_outputs.hidden_states
         embedded_images = image_adapter(image_features)
         embedded_images = embedded_images.to(device)
-    # Embed prompt
-    prompt_embeds = text_model.model.embed_tokens(prompt.to(device))
-    assert prompt_embeds.shape == (1, prompt.shape[1], text_model.config.hidden_size), f"Prompt shape is {prompt_embeds.shape}, expected {(1, prompt.shape[1], text_model.config.hidden_size)}"
-    embedded_bos = text_model.model.embed_tokens(torch.tensor([[tokenizer.bos_token_id]], device=text_model.device, dtype=torch.int64))
-    eot_embed = image_adapter.get_eot_embedding().unsqueeze(0).to(dtype=text_model.dtype)
-    # Construct prompts
-    inputs_embeds = torch.cat([
-        embedded_bos.expand(embedded_images.shape[0], -1, -1),
-        embedded_images.to(dtype=embedded_bos.dtype),
-        prompt_embeds.expand(embedded_images.shape[0], -1, -1),
-        eot_embed.expand(embedded_images.shape[0], -1, -1),
-    ], dim=1)
     input_ids = torch.cat([
-        torch.tensor([[tokenizer.bos_token_id]], dtype=torch.long),
-        torch.zeros((1, embedded_images.shape[1]), dtype=torch.long),
-        prompt,
-        torch.tensor([[tokenizer.convert_tokens_to_ids("<|eot_id|>")]], dtype=torch.long),
     ], dim=1).to(device)
     attention_mask = torch.ones_like(input_ids)
     text_model.to(device)
-    generate_ids = text_model.generate(input_ids, inputs_embeds=inputs_embeds, attention_mask=attention_mask, max_new_tokens=max_new_tokens,
                                        do_sample=True, suppress_tokens=None, top_p=top_p, temperature=temperature)
     # Trim off the prompt
@@ -305,7 +387,7 @@ def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_tone: s
     caption = tokenizer.batch_decode(generate_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False)[0]
-    return caption.strip()
 # https://huggingface.co/docs/transformers/v4.44.2/main_classes/text_generation#transformers.FlaxGenerationMixin.generate

 import gradio as gr
 from huggingface_hub import InferenceClient
 from torch import nn
+from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM, LlavaForConditionalGeneration
 from pathlib import Path
 import torch
 import torch.amp.autocast_mode
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+BASE_DIR = Path(__file__).resolve().parent # Define the base directory
 device = "cuda" if torch.cuda.is_available() else "cpu"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 use_inference_client = False
+PIXTRAL_PATHS = ["SeanScripts/pixtral-12b-nf4", "mistral-community/pixtral-12b"]
 llm_models = {
     "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2": None,
 CLIP_PATH = "google/siglip-so400m-patch14-384"
 MODEL_PATH = list(llm_models.keys())[0]
+CHECKPOINT_PATH = BASE_DIR / Path("cgrkzexw-599808")
 LORA_PATH = CHECKPOINT_PATH / "text_model"
+TITLE = "<h1><center>JoyCaption Alpha Two (2024-09-26a)</center></h1>"
 CAPTION_TYPE_MAP = {
+    "Descriptive": [
+        "Write a descriptive caption for this image in a formal tone.",
+        "Write a descriptive caption for this image in a formal tone within {word_count} words.",
+        "Write a {length} descriptive caption for this image in a formal tone.",
+    ],
+    "Descriptive (Informal)": [
+        "Write a descriptive caption for this image in a casual tone.",
+        "Write a descriptive caption for this image in a casual tone within {word_count} words.",
+        "Write a {length} descriptive caption for this image in a casual tone.",
+    ],
+    "Training Prompt": [
+        "Write a stable diffusion prompt for this image.",
+        "Write a stable diffusion prompt for this image within {word_count} words.",
+        "Write a {length} stable diffusion prompt for this image.",
+    ],
+    "MidJourney": [
+        "Write a MidJourney prompt for this image.",
+        "Write a MidJourney prompt for this image within {word_count} words.",
+        "Write a {length} MidJourney prompt for this image.",
+    ],
+    "Booru tag list": [
+        "Write a list of Booru tags for this image.",
+        "Write a list of Booru tags for this image within {word_count} words.",
+        "Write a {length} list of Booru tags for this image.",
+    ],
+    "Booru-like tag list": [
+        "Write a list of Booru-like tags for this image.",
+        "Write a list of Booru-like tags for this image within {word_count} words.",
+        "Write a {length} list of Booru-like tags for this image.",
+    ],
+    "Art Critic": [
+        "Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc.",
+        "Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc. Keep it within {word_count} words.",
+        "Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc. Keep it {length}.",
+    ],
+    "Product Listing": [
+        "Write a caption for this image as though it were a product listing.",
+        "Write a caption for this image as though it were a product listing. Keep it under {word_count} words.",
+        "Write a {length} caption for this image as though it were a product listing.",
+    ],
+    "Social Media Post": [
+        "Write a caption for this image as if it were being used for a social media post.",
+        "Write a caption for this image as if it were being used for a social media post. Limit the caption to {word_count} words.",
+        "Write a {length} caption for this image as if it were being used for a social media post.",
+    ],
 }
 class ImageAdapter(nn.Module):
         self.ln1 = nn.Identity() if not ln1 else nn.LayerNorm(input_features)
         self.pos_emb = None if not pos_emb else nn.Parameter(torch.zeros(num_image_tokens, input_features))
         # Other tokens (<|image_start|>, <|image_end|>, <|eot_id|>)
         self.other_tokens = nn.Embedding(3, output_features)
         self.other_tokens.weight.data.normal_(mean=0.0, std=0.02)   # Matches HF's implementation of llama3
         x = self.activation(x)
         x = self.linear2(x)
         # <|image_start|>, IMAGE, <|image_end|>
         other_tokens = self.other_tokens(torch.tensor([0, 1], device=self.other_tokens.weight.device).expand(x.shape[0], -1))
         assert other_tokens.shape == (x.shape[0], 2, x.shape[2]), f"Expected {(x.shape[0], 2, x.shape[2])}, got {other_tokens.shape}"
 text_model = None
 image_adapter = None
 peft_config = None
+pixtral_model = None
+pixtral_processor = None
 def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None, is_nf4: bool=True):
+    global tokenizer, text_model, image_adapter, peft_config, pixtral_model, pixtral_processor, text_model_client, use_inference_client
     try:
+        tokenizer = None
+        text_model_client = None
+        text_model = None
+        image_adapter = None
+        peft_config = None
+        pixtral_model = None
+        pixtral_processor = None
+        torch.cuda.empty_cache()
+        gc.collect()
         from transformers import BitsAndBytesConfig
         nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
                                         bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16)
+        if model_name in PIXTRAL_PATHS: # Pixtral
+            print(f"Loading LLM: {model_name}")
             if is_nf4:
+                pixtral_model = LlavaForConditionalGeneration.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
             else:
+                pixtral_model = LlavaForConditionalGeneration.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16).eval()
+            pixtral_processor = AutoProcessor.from_pretrained(model_name)
+            print(f"pixtral_model: {type(pixtral_model)}") #
+            print(f"pixtral_processor: {type(pixtral_processor)}") #
+            return
         print("Loading tokenizer")
         if gguf_file: tokenizer = AutoTokenizer.from_pretrained(model_name, gguf_file=gguf_file, use_fast=True, legacy=False)
         print("Loading image adapter")
         image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
+        image_adapter.load_state_dict(torch.load(CHECKPOINT_PATH / "image_adapter.pt", map_location="cpu", weights_only=False))
         image_adapter.eval().to(device)
     except Exception as e:
         print(f"LLM load error: {e}")
 clip_model = AutoModel.from_pretrained(CLIP_PATH).vision_model
 if (CHECKPOINT_PATH / "clip_model.pt").exists():
     print("Loading VLM's custom vision model")
+    checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu', weights_only=False)
     checkpoint = {k.replace("_orig_mod.module.", ""): v for k, v in checkpoint.items()}
     clip_model.load_state_dict(checkpoint)
     del checkpoint
 # Tokenizer
 # LLM
 # Image Adapter
+#load_text_model(PIXTRAL_PATHS[0])
+#print(f"pixtral_model: {type(pixtral_model)}") #
+#print(f"pixtral_processor: {type(pixtral_processor)}") #
 load_text_model()
+print(f"pixtral_model: {type(pixtral_model)}") #
+print(f"pixtral_processor: {type(pixtral_processor)}") #
 @spaces.GPU()
 @torch.inference_mode()
+def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length: Union[str, int], extra_options: list[str], name_input: str, custom_prompt: str,
+                    max_new_tokens: int=300, top_p: float=0.9, temperature: float=0.6, model_name: str=MODEL_PATH, progress=gr.Progress(track_tqdm=True)) -> tuple[str, str]:
+    global tokenizer, text_model, image_adapter, peft_config, pixtral_model, pixtral_processor, text_model_client, use_inference_client
     torch.cuda.empty_cache()
     gc.collect()
             length = int(length)
         except ValueError:
             pass
     # Build prompt
+    if length is None:
+        map_idx = 0
+    elif isinstance(length, int):
+        map_idx = 1
+    elif isinstance(length, str):
+        map_idx = 2
+    else:
+        raise ValueError(f"Invalid caption length: {length}")
+    prompt_str = CAPTION_TYPE_MAP[caption_type][map_idx]
+    # Add extra options
+    if len(extra_options) > 0:
+        prompt_str += " " + " ".join(extra_options)
+    # Add name, length, word_count
+    prompt_str = prompt_str.format(name=name_input, length=caption_length, word_count=caption_length)
+    if custom_prompt.strip() != "":
+        prompt_str = custom_prompt.strip()
+    # For debugging
     print(f"Prompt: {prompt_str}")
     # Pixtral
     if model_name in PIXTRAL_PATHS:
+        print(f"pixtral_model: {type(pixtral_model)}") #
+        print(f"pixtral_processor: {type(pixtral_processor)}") #
+        input_images = [input_image.convert("RGB")]
+        #input_prompt = f"[INST]{prompt_str}\n[IMG][/INST]"
+        input_prompt = "[INST]Caption this image:\n[IMG][/INST]"
+        inputs = pixtral_processor(images=input_images, text=input_prompt, return_tensors="pt").to(device)
+        generate_ids = pixtral_model.generate(**inputs, max_new_tokens=max_new_tokens)
+        output = pixtral_processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        return input_prompt, output.strip()
     # Preprocess image
+    # NOTE: I found the default processor for so400M to have worse results than just using PIL directly
+    #image = clip_processor(images=input_image, return_tensors='pt').pixel_values
     image = input_image.resize((384, 384), Image.LANCZOS)
     pixel_values = TVF.pil_to_tensor(image).unsqueeze(0) / 255.0
     pixel_values = TVF.normalize(pixel_values, [0.5], [0.5])
     pixel_values = pixel_values.to(device)
     # Embed image
+    # This results in Batch x Image Tokens x Features
     with torch.amp.autocast_mode.autocast(device, enabled=True):
         vision_outputs = clip_model(pixel_values=pixel_values, output_hidden_states=True)
         image_features = vision_outputs.hidden_states
         embedded_images = image_adapter(image_features)
         embedded_images = embedded_images.to(device)
+    # Build the conversation
+    convo = [
+        {
+            "role": "system",
+            "content": "You are a helpful image captioner.",
+        },
+        {
+            "role": "user",
+            "content": prompt_str,
+        },
+    ]
+    # Format the conversation
+    convo_string = tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = True)
+    assert isinstance(convo_string, str)
+    # Tokenize the conversation
+    # prompt_str is tokenized separately so we can do the calculations below
+    convo_tokens = tokenizer.encode(convo_string, return_tensors="pt", add_special_tokens=False, truncation=False)
+    prompt_tokens = tokenizer.encode(prompt_str, return_tensors="pt", add_special_tokens=False, truncation=False)
+    assert isinstance(convo_tokens, torch.Tensor) and isinstance(prompt_tokens, torch.Tensor)
+    convo_tokens = convo_tokens.squeeze(0)   # Squeeze just to make the following easier
+    prompt_tokens = prompt_tokens.squeeze(0)
+    # Calculate where to inject the image
+    eot_id_indices = (convo_tokens == tokenizer.convert_tokens_to_ids("<|eot_id|>")).nonzero(as_tuple=True)[0].tolist()
+    assert len(eot_id_indices) == 2, f"Expected 2 <|eot_id|> tokens, got {len(eot_id_indices)}"
+    preamble_len = eot_id_indices[1] - prompt_tokens.shape[0]   # Number of tokens before the prompt
+    # Embed the tokens
+    convo_embeds = text_model.model.embed_tokens(convo_tokens.unsqueeze(0).to(device))
+    # Construct the input
+    input_embeds = torch.cat([
+        convo_embeds[:, :preamble_len],   # Part before the prompt
+        embedded_images.to(dtype=convo_embeds.dtype),   # Image
+        convo_embeds[:, preamble_len:],   # The prompt and anything after it
+    ], dim=1).to(device)
     input_ids = torch.cat([
+        convo_tokens[:preamble_len].unsqueeze(0),
+        torch.zeros((1, embedded_images.shape[1]), dtype=torch.long),   # Dummy tokens for the image (TODO: Should probably use a special token here so as not to confuse any generation algorithms that might be inspecting the input)
+        convo_tokens[preamble_len:].unsqueeze(0),
     ], dim=1).to(device)
     attention_mask = torch.ones_like(input_ids)
+    # Debugging
+    print(f"Input to model: {repr(tokenizer.decode(input_ids[0]))}")
     text_model.to(device)
+    generate_ids = text_model.generate(input_ids, inputs_embeds=input_embeds, attention_mask=attention_mask, max_new_tokens=max_new_tokens,
                                        do_sample=True, suppress_tokens=None, top_p=top_p, temperature=temperature)
     # Trim off the prompt
     caption = tokenizer.batch_decode(generate_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False)[0]
+    return prompt_str, caption.strip()
 # https://huggingface.co/docs/transformers/v4.44.2/main_classes/text_generation#transformers.FlaxGenerationMixin.generate

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-huggingface_hub
 accelerate
 torch
 git+https://github.com/huggingface/transformers
@@ -8,5 +8,5 @@ Pillow
 protobuf
 gguf
 numpy<2.0.0
-peft
 torchvision

+huggingface_hub>=0.23.4
 accelerate
 torch
 git+https://github.com/huggingface/transformers
 protobuf
 gguf
 numpy<2.0.0
+peft>=0.12.0
 torchvision