carsonhxsu commited on Feb 28

Commit

•

1 Parent(s): 2ebc29a

# This is a combination of 22 commits.

# This is the 1st commit message:

Init

# This is the commit message #2:

[Enhancement] Update README

# This is the commit message #3:

Upload SM80 so files

# This is the commit message #4:

Track model files

# This is the commit message #5:

Update gitignore

# This is the commit message #6:

Upload converted XVERSE 13B Chat model files

# This is the commit message #7:

Upload converted Baichuan2 13B Chat model files

# This is the commit message #8:

Upload converted Baichuan2 7B model files

# This is the commit message #9:

Upload converted Baichuan2 13B Base model files

# This is the commit message #10:

Upload converted Baichuan 7B Base model files

# This is the commit message #11:

Upload converted Baichuan 13B Chat model files

# This is the commit message #12:

Upload converted Baichuan 13B Base model files

# This is the commit message #13:

Upload converted LLaMA Ziya 13B model files

# This is the commit message #14:

Upload converted Yi 6B model files

# This is the commit message #15:

Update README and .gitattributes

# This is the commit message #16:

Remove SM70 so files

# This is the commit message #17:

Update README

# This is the commit message #18:

Update SM80 Cuda11 so file

# This is the commit message #19:

Update README

# This is the commit message #20:

Update Python codes

# This is the commit message #21:

Update Python codes

# This is the commit message #22:

Update gitattributes

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
.gitignore +8 -0
README.md +157 -0
lyralib/.gitattributes +3 -0
lyralib/.gitkeep +0 -0
lyralib/sm80/cuda11/lyraOp.cpython-38-x86_64-linux-gnu.so +3 -0
lyralib/sm80/cuda12/lyraOp.cpython-38-x86_64-linux-gnu.so +3 -0
lyrallms/LyraBaichuanPy/README.md +88 -0
lyrallms/LyraBaichuanPy/configuration_baichuan.py +69 -0
lyrallms/LyraBaichuanPy/examples/README.md +105 -0
lyrallms/LyraBaichuanPy/examples/batch_demo.py +103 -0
lyrallms/LyraBaichuanPy/examples/batch_stream_demo.py +101 -0
lyrallms/LyraBaichuanPy/examples/random_batch_demo.py +116 -0
lyrallms/LyraBaichuanPy/examples/varlen_prompts.json +6 -0
lyrallms/LyraBaichuanPy/generation_utils.py +83 -0
lyrallms/LyraBaichuanPy/lyra_baichuan/__init__.py +1 -0
lyrallms/LyraBaichuanPy/lyra_baichuan/config.py +34 -0
lyrallms/LyraBaichuanPy/lyra_baichuan/lyra_baichuan.py +391 -0
lyrallms/LyraBaichuanPy/lyra_baichuan/model.py +258 -0
lyrallms/LyraBaichuanPy/lyra_baichuan/tokenization_baichuan.py +232 -0
lyrallms/LyraLlamaPy/README.md +75 -0
lyrallms/LyraLlamaPy/examples/README.md +114 -0
lyrallms/LyraLlamaPy/examples/batch_demo.py +109 -0
lyrallms/LyraLlamaPy/examples/batch_stream_demo.py +135 -0
lyrallms/LyraLlamaPy/examples/random_batch_demo.py +123 -0
lyrallms/LyraLlamaPy/examples/test.sh +20 -0
lyrallms/LyraLlamaPy/examples/test_stream.sh +21 -0
lyrallms/LyraLlamaPy/examples/torch_benchmark.py +111 -0
lyrallms/LyraLlamaPy/examples/varlen_prompts.json +6 -0
lyrallms/LyraLlamaPy/lyra_llama/__init__.py +1 -0
lyrallms/LyraLlamaPy/lyra_llama/config.py +34 -0
lyrallms/LyraLlamaPy/lyra_llama/lyra_llama.py +232 -0
lyrallms/LyraLlamaPy/lyra_llama/model.py +270 -0
lyrallms/README.md +27 -0
models/.gitkeep +0 -0
models/Baichuan/Baichuan2_13B_Base/1-gpu-fp16.bin +3 -0
models/Baichuan/Baichuan2_13B_Base/config.ini +14 -0
models/Baichuan/Baichuan2_13B_Base/config.json +28 -0
models/Baichuan/Baichuan2_13B_Base/special_tokens_map.json +30 -0
models/Baichuan/Baichuan2_13B_Base/tokenizer.model +3 -0
models/Baichuan/Baichuan2_13B_Base/tokenizer_config.json +46 -0
models/Baichuan/Baichuan2_13B_Chat/1-gpu-fp16.bin +3 -0
models/Baichuan/Baichuan2_13B_Chat/config.ini +14 -0
models/Baichuan/Baichuan2_13B_Chat/config.json +29 -0
models/Baichuan/Baichuan2_13B_Chat/special_tokens_map.json +30 -0
models/Baichuan/Baichuan2_13B_Chat/tokenizer.model +3 -0
models/Baichuan/Baichuan2_13B_Chat/tokenizer_config.json +46 -0
models/Baichuan/Baichuan2_7B_Base/1-gpu-fp16.bin +3 -0
models/Baichuan/Baichuan2_7B_Base/config.ini +14 -0
models/Baichuan/Baichuan2_7B_Base/config.json +28 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+models/* filter=lfs diff=lfs merge=lfs -text
+lyralib/* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+*~
+*.o
+*build*/
+__pycache__/
+.vscode
+.idea
+.cache
+**/.ipynb_checkpoints/

README.md CHANGED Viewed

@@ -1,3 +1,160 @@
 ---
 license: mit
 ---

 ---
 license: mit
+language: en
+tags:
+- LLM
+- LLaMA
+- Baichuan
+- Baichuan2
+- XVERSE
 ---
+# Model Card for lyraLLMs
+## Introduction
+We have released **lyraLLMs**, a highly optimized and easy-to-use inference engine for LLMs.
+**lyraLLMs** is suitable for NVIDIA GPUs:
+- Volta (V100)
+- Turing (T4)
+- Ampere (A100/A10)
+- Ada Lovelace (RTX 4090, etc.)
+**lyraLLMs** supports many popular HuggingFace models as follows:
+- [BELLE](https://huggingface.co/TMElyralab/lyraBELLE)
+- [ChatGLM](https://huggingface.co/TMElyralab/lyraChatGLM)
+- LLaMA
+- LLaMA 2
+- XVERSE
+- Baichuan 1 & 2
+**lyraLLMs** is fast, memory-efficient & easy to use with:
+- State-of-the-art throughtput (up to 7K tokens/s for LLaMA 13B)
+- Efficient memory usage of attention with FlashAttention2
+- Quantization: MEMOPT mode (W8A16, W4A16), KVCache Int8
+- Easy-to-use Python API to serve LLMs
+- Streaming outputs
+If you like our work and consider to join us, feel free to drop a line at [email protected]
+## Speed
+### Settings
+* Evaluated at tokens/s (input + output)
+* Test on A100 40G, CUDA 12.0
+* Enable the use of MEMOPT mode and KVCache Int8
+### Throughputs
+### XVERSE-13B-Chat
+#### Input
+北京的景点：故宫、天坛、万里长城等。\n深圳的景点：
+| Version | Batch Size 1 | Batch Size 64 | Batch Size 128 | Batch Size 256 | Batch Size 512 |
+| --- | --- | --- | --- | --- | --- |
+| Torch 2.1.0 | 52.9 | 2308.1 | OOM |  |  |
+| lyraXVERSE | 200.4 | 4624.8 | 5759.7 | 6075.6 | 5733 |
+### Baichuan2-7B-Base
+#### Input
+北京的景点：登鹳雀楼->王之涣\n夜雨寄北->
+| Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
+| --- | --- | --- | --- | --- | --- |
+| Torch 2.0.1 | 41.2 | 323.2 | 640.0 | 1256.8 | 2231.0 |
+| lyraBaichuan | 125.9 | 948.1 | 1749.3 | 2974.0 | 4370.1 |
+### Baichuan2-13B-Base
+#### Input
+北京的景点：登鹳雀楼->王之涣\n夜雨寄北->
+| Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
+| --- | --- | --- | --- | --- | --- |
+| Torch 2.0.1 | 40.9 | 307.9 | 555.6 | 1010.4 | 1601.0 |
+| lyraBaichuan | 80.0 | 568.2 | 1124.4 | 1942.6 | 2828.0 |
+### Yi-6B
+#### Input
+\# write the quick sort algorithm
+| Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
+| --- | --- | --- | --- | --- | --- |
+| Torch 2.1.0 | 31.4 | 247.5 | 490.4 | 987.2 | 1796.3 |
+| lyraLLaMA | 93.8 | 735.6 | 2339.8 | 3020.9 | 4630.8 |
+### Yi-34B
+Due to limitation of VRAM, we cannot profile the throughputs of Yi-34B on A100 40G using Torch.
+#### Input
+Let me tell you an interesting story about cat Tom and mouse Jerry,
+| Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
+| --- | --- | --- | --- | --- | --- |
+| lyraLLaMA | 52.5 | 399.4 | 753.0 | 1138.2 | 1926.2 |
+## Usage
+### Environment (Docker recommended)
+- For Cuda 11.X: we recommend ```nvcr.io/nvidia/pytorch:22.12-py3```
+- For Cuda 12.0: we recommend ```nvcr.io/nvidia/pytorch:23.02-py3```
+```bash
+docker pull nvcr.io/nvidia/pytorch:23.02-py3
+docker run --rm -it --gpus all -v ./:/lyraLLMs nvcr.io/nvidia/pytorch:23.02-py3
+pip install -r requirements.txt
+```
+### Convert Models
+We have released multiple optimized models converted from original HuggingFace ones:
+- ChatGLM-6B
+- XVERSE-13B-Chat
+- LLaMA-Ziya-13B
+- Baichuan-7B, Baichuan-13B-Base, Baichuan-13B-Chat, Baichuan2-7B-Base, Baichuan2-7B-Chat, Baichuan2-13B-Base and lyraBaichuan2-13B-Chat
+- Yi-6B
+Feel free to contact us if you would like to convert a finetuned version of LLMs.
+### Inference
+Refer to [README.md](./lyrallms/README.md) for inference of converted models with **lyraLLMs**.
+### Python Demo
+```python
+from lyra_llama import lyraLlama
+model_path = 'XXX' # 包含转换后的模型参数，配置，tokenizer文件目录
+data_type = 'fp16'
+memopt_mode = 0 # 如需使用MEMOPT模式推理, memopt_mode=1
+model = lyraLlama(model_path, data_type, memopt_mode)
+prompts = '列出3个不同的机器学习算法，并说明它们的适用范围.'
+prompts = [prompts,] * 64
+output_texts = model.generate(prompts, output_length=150, do_sample=False, top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0)
+print(output_texts)
+```
+## Citation
+``` bibtex
+@Misc{lyraLLMs2024,
+  author =       {Kangjian Wu, Zhengtao Wang, Yibo Lu, Haoxiong Su, Bin Wu},
+  title =        {lyraLLMs: A highly optimized and easy-to-use inference engine for LLMs},
+  howpublished = {\url{https://huggingface.co/TMElyralab/lyraLLMs}},
+  year =         {2024}
+}
+```
+## Report bug
+- start a discussion to report any bugs!--> https://huggingface.co/TMElyralab/lyraLLMs/discussions
+- report bug with a `[bug]` mark in the title.

lyralib/.gitattributes ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:213543e928e2727580c3f1dcbddfaf56b7a778ec7dfb29f4b3b66ab0009bfd0b
+size 41

lyralib/.gitkeep ADDED Viewed

File without changes

lyralib/sm80/cuda11/lyraOp.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f50ec3dbe390bffc052e754a294614025fb423b23c6bc8a26a8dadf52d1b29c2
+size 233586480

lyralib/sm80/cuda12/lyraOp.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b8189f3321cd0578da920f15d4b74e7a96be7556731e1de3cb313b8700e3c45
+size 234352496

lyrallms/LyraBaichuanPy/README.md ADDED Viewed

	@@ -0,0 +1,88 @@

+## 模型和环境
+### 构建环境
+```shell
+# 本地获取ngc pytorch原生镜像
+docker pull nvcr.io/nvidia/pytorch:23.02-py3
+# 启动容器
+docker run --gpus all -itd --rm --name lyrallms_cu12 nvcr.io/nvidia/pytorch:23.02-py3
+docker exec -it lyrallms_cu12 bash
+```
+获取代码后安装依赖
+```shell
+pip install -r requirements.txt
+```
+将`lyralib`下对应cuda版本的[so文件](../../lyralib/sm80) 复制到`/usr/lib/lyralib`下。
+## 推理使用
+### 使用核心片段
+```python
+from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
+model_path = 'XXX' # 包含转换后的模型参数，配置，tokenizer文件目录
+tokenizer_path = 'XXX'
+data_type = 'fp16' # 推理精度
+memopt_mode = 1
+# 加载加速后的模型，C++ 底层已经掩盖，依赖加速编译的 /usr/lib/ftlib 下的 so 库，已经打在镜像中
+# 模型加载需要花一些时间，建议把下载的模型参数解压到本地磁盘
+# 如需使用Baichuan1/2-7B模型，下方更换为：lyraBaichuan7B(model_path, tokenizer_path, data_type, memopt_mode)
+model = lyraBaichuan13B(model_path, tokenizer_path, data_type, memopt_mode)
+# 输入, 若有多个输入，可batch 推理，prompts 支持列表，这里为模拟多个输入，直接复制 32 分，batchsize 达到32
+prompts = "登鹳雀楼->王之涣\n夜雨寄北->"
+prompts = [prompts,]*32
+# 生成, 最大长度可自行调整，这里设置 64，模型遇到 end token 或者达到最大计算长度时会停止当前批次计算.
+output_texts = model.generate(prompts, output_length=64, do_sample=False, top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0)
+# 输出查看， 虽然 输入字符串也是参与计算，用于评估模型吞吐量和计算速度。
+# 这里为了显示应用方便， output_texts 中每个样本的输出已经去掉了输入字符串
+print(output_texts)
+# 输出示例
+>>> Inputs: 登鹳雀楼->王之涣
+夜雨寄北->
+>>> Outputs:
+李商隐
+望洞庭->刘禹锡
+黄鹤楼送孟浩然之广陵->李白
+登岳阳楼->杜甫
+秋词->刘禹锡
+枫桥夜泊->张继
+饮湖上初晴后雨->苏轼
+浪淘沙->刘禹锡
+```
+### demo 脚本
+`examples/batch_demo.py` 中有类似上面的使用示例，做了简单的跑速测试，考虑大家对 token 的理解各有不同，我们这里直接按字符数来评估，不同 token 的理解可以自行根据生成结果字符数去观测。
+更多测试脚本及用法详见参考 `examples` 下的 [README.md](./examples/README.md) ，如：
+- Batch推理
+- 不等长Batch推理
+- Batch流式推理
+## 自定义模型参数
+已提供转换脚本 `parse_model_params.py` 可以将 Baichuan1/2 模型的 HuggingFace 格式参数，转换为加速版本下各层模型需要的模型参数。这里我们提供一个模型名字 `-model_name` 的转换参数，可以自行填入，以便生成可区分的 config.in 文件。
+```shell
+python parse_model_params.py -i your_model_dir -o output_dir -t_g 1 -i_g 1 -weight_data_type "fp16" -model_name "baichuan2-13b"
+```
+该转换脚本还会将同目录下 tokenizer_source 里的 `tokenizer.model` `special_tokens_map.json` `tokenizer_config.json` 四个文件拷贝到 output_dir 下，以便后续使用加速模型时直接能初始化对应的 加速后的 Baichuan 的 tokenizer.
+转换后的模型参数将以每个参数一个文件的形式存放在 `output_dir/{i_g}-gpu-{weight_data_type}` 下，需要使用`merge_bin.py`将多个bin文件合并为一个。
+```shell
+layer_num=40 # 13B->40, 7B->32
+python merge_bin.py -i model_dir/{i_g}-gpu-{weight_data_type} -o output_dir -l ${layer_num}
+```
+将上述 `config.ini` `config.json` `tokenizer.model` `special_tokens_map.json` `tokenizer_config.json` 五个文件拷贝到 output_dir 下。

lyrallms/LyraBaichuanPy/configuration_baichuan.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# Copyright 2023 Baichuan Inc. All Rights Reserved.
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+class BaichuanConfig(PretrainedConfig):
+    model_type = "baichuan"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    def __init__(
+        self,
+        vocab_size=125696,
+        hidden_size=4096,
+        intermediate_size=11008,
+        num_hidden_layers=32,
+        num_attention_heads=32,
+        hidden_act="silu",
+        max_position_embeddings=4096,
+        initializer_range=0.02,
+        rms_norm_eps=1e-6,
+        use_cache=True,
+        pad_token_id=0,
+        bos_token_id=1,
+        eos_token_id=2,
+        tie_word_embeddings=False,
+        z_loss_weight=0,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.z_loss_weight = z_loss_weight
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )

lyrallms/LyraBaichuanPy/examples/README.md ADDED Viewed

	@@ -0,0 +1,105 @@

+## 测试脚本
+### batch推理
+```sh
+export FMHA_VERSION=OFF
+export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8，设置 KV_CACHE_DTYPE=INT8
+model_path=ModelPath # 转换后模型所处文件夹路径
+data_type=fp16 # 权重保存精度
+memopt_mode=0 # MEMOPT模式: 0/1
+quant_type="int8" # 量化精度: int4/int8
+max_output_length=256
+warmups=1
+avgnums=1
+python batch_demo.py --model-path $model_path\
+                     --tokenizer-path $model_path\
+                     --data-type $data_type\
+                     --memopt_mode $memopt_mode\
+                     --quant-type ${quant_type}\
+                     --max-output-length $max_output_length\
+                     --warmups $warmups\
+                     --avgnums $avgnums
+```
+### batch流式推理
+```sh
+export FMHA_VERSION=OFF
+export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8，设置 KV_CACHE_DTYPE=INT8
+export LYRA_STREAM_CB_STEP=30 # 回调函数间隔步数
+model_path=ModelPath # 转换后模型所处文件夹路径
+data_type=fp16 # 权重保存精度
+memopt_mode=0 # MEMOPT模式: 0/1
+quant_type="int8" # 量化精度: int4/int8
+max_output_length=256
+warmups=1
+avgnums=1
+python batch_stream_demo.py --model-path $model_path\
+                            --tokenizer-path $model_path\
+                            --data-type $data_type\
+                            --memopt_mode $memopt_mode\
+                            --quant-type ${quant_type}\
+                            --max-output-length $max_output_length\
+                            --warmups $warmups\
+                            --avgnums $avgnums
+```
+### 不等长batch推理
+```sh
+export FMHA_VERSION=OFF
+export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8，设置 KV_CACHE_DTYPE=INT8
+model_path=ModelPath # 转换后模型所处文件夹路径
+prompt_filepath=valen_prompts.json # 用于测试的不等长prompts文件，从中采样
+data_type=fp16 # 权重保存精度
+memopt_mode=0 # MEMOPT模式: 0/1
+quant_type="int8" # 量化精度: int4/int8
+max_output_length=256
+warmups=1
+avgnums=1
+python random_batch_demo.py --model-path $model_path\
+                            --tokenizer-path $model_path\
+                            --data-type $data_type\
+                            --memopt_mode $memopt_mode\
+                            --quant-type ${quant_type}\
+                            --prompt_filepath $prompt_filepath\
+                            --max-output-length $max_output_length\
+                            --warmups $warmups\
+                            --avgnums $avgnums
+```
+## Prompt例子
+### 短序列
+```
+北京的景点：故宫、天坛、万里长城等。\n深圳的景点：
+```
+```
+今天天气大概 25度，有点小雨，吹着风，我想去户外散步，应该穿什么样的衣服 裤子鞋子搭配
+```
+### 1K序列
+```
+《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲，于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日，“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者：主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J （大卫·哈斯金斯）。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲：“Boys”；“咬我的臀部”；“Some Faces”和斯卡雷鬼曲调“Harry”，这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议，凯文·哈斯金斯 (Kevin Haskins) 说，“那里有力量流行音乐，还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中（除了“Bela Lugosi's Dead”），只有“Harry”获得了官方发行；1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目，包括“Boys”的原始录音，一直未发行，直到 2018 年The Bela Session以黑胶唱片和CD 形式发行，并可供乐队数字下载。[11]在额外的曲目中，《经典摇滚》杂志写道：“其余的材料发现乐队正在摸索方向，甚至触及了斯卡。”\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答：
+```
+### 2K序列
+```
+根据所给刑事法律文书中的案情描述，预测被告人被判的罪名。你需要从这些罪名中选择最恰当的一项：妨害公务，寻衅滋事，盗窃、侮辱尸体，危险物品肇事，非法采矿，组织、强迫、引诱、容留、介绍卖淫，开设赌场，聚众斗殴，绑架，非法持有毒品，销售假冒注册商标的商品，容留他人吸毒，假冒注册商标，交通肇事，破坏电力设备，组织卖淫，合同诈骗，走私武器、弹药，抢劫，非法处置查封、扣押、冻结的财产，以危险方法危害公共安全，过失投放危险物质，非法制造、买卖、运输、邮寄、储存枪支、弹药、爆炸物，伪造、变造、买卖武装部队公文、证件、印章，持有、使用假币，重婚，聚众冲击国家机关，生产、销售伪劣农药、兽药、化肥、种子，收买被拐卖的妇女、儿童，聚众哄抢，重大劳动安全事故，侵占，包庇毒品犯罪分子，虚报注册资本，违法发放贷款，制造、贩卖、传播淫秽物品，窝藏、包庇，帮助毁灭、伪造证据，放火，强奸，非法携带枪支、弹药、管制刀具、危险物品危及公共安全，伪造、变造金融票证，爆炸，玩忽职守，对非国家工作人员行贿，伪造、倒卖伪造的有价票证，私分国有资产，非法收购、运输、加工、出售国家重点保护植物、国家重点保护植物制品，生产、销售假药，挪用特定款物，过失致人死亡，走私国家禁止进出口的货物、物品，非法制造、买卖、运输、储存危险物质，洗钱，骗取贷款、票据承兑、金融票证，非法买卖制毒物品，非法买卖、运输、携带、持有毒品原植物种子、幼苗，生产、销售有毒、有害食品，滥用职权，招收公务员、学生徇私舞弊，诬告陷害，非法获取国家秘密，非法行医，非法收购、运输、出售珍贵、濒危野生动物、珍贵、濒危野生动物制品，非法出售发票，行贿，高利转贷，非法吸收公众存款，传播淫秽物品，非法进行节育手术，盗伐林木，聚众扰乱社会秩序，走私、贩卖、运输、制造毒品，滥伐林木，赌博，非法经营，生产、销售不符合安全标准的食品，提供侵入、非法控制计算机信息系统程序、工具，倒卖文物，窃取、收买、非法提供信用卡信息，盗掘古文化遗址、古墓葬，协助组织卖淫，破坏广播电视设施、公用电信设施，走私普通货物、物品，逃税，破坏监管秩序，失火，受贿，组织、领导、参加黑社会性质组织，票据诈骗，非法制造、销售非法制造的注册商标标识，侵犯著作权，伪造、变造、买卖国家机关公文、证件、印章，徇私舞弊不征、少征税款，强迫劳动，贷款诈骗，劫持船只、汽车，诈骗，非法种植毒品原植物，非法狩猎，挪用资金，非法收购、运输盗伐、滥伐的林木，出售、购买、运输假币，抢夺，虐待被监管人，窝藏、转移、收购、销售赃物，破坏计算机信息系统，制作、复制、出版、贩卖、传播淫秽物品牟利，拒不支付劳动报酬，盗窃、抢夺枪支、弹药、爆炸物，强迫他人吸毒，走私珍贵动物、珍贵动物制品，虐待，非法获取公民个人信息，破坏交通设施，非法转让、倒卖土地使用权，非法捕捞水产品，非法占用农用地，非法制造、出售非法制造的发票，非法持有、私藏枪支、弹药，集资诈骗，强迫卖淫，伪造公司、企业、事业单位、人民团体印章，利用影响力受贿，编造、故意传播虚假恐怖信息，介绍贿赂，传播性病，拐卖妇女、儿童，倒卖车票、船票，窝藏、转移、隐瞒毒品、毒赃，徇私舞弊不移交刑事案件，过失损坏广播电视设施、公用电信设施，动植物检疫徇私舞弊，破坏交通工具，猥亵儿童，挪用公款，伪造货币，冒充军人招摇撞骗，非法采伐、毁坏国家重点保护植物，故意毁坏财物，非法拘禁，招摇撞骗，伪造、变造居民身份证，徇私枉法，非法生产、买卖警用装备，掩饰、隐瞒犯罪所得、犯罪所得收益，生产、销售伪劣产品，破坏生产经营，帮助犯罪分子逃避处罚，贪污，投放危险物质，持有伪造的发票，危险驾驶，妨害作证，非法猎捕、杀害珍贵、濒危野生动物，重大责任事故，诽谤，虚开发票，引诱、教唆、欺骗他人吸毒，脱逃，扰乱无线电通讯管理秩序，保险诈骗，非法生产、销售间谍专用器材，非法组织卖血，强迫交易，串通投标，破坏易燃易爆设备，传授犯罪方法，妨害信用卡管理，拐骗儿童，单位行贿，打击报复证人，拒不执行判决、裁定，经济犯，金融凭证诈骗，虚开增值税专用发票、用于骗取出口退税、抵扣税款发票，走私废物，组织、领导传销活动，单位受贿，盗窃、抢夺枪支、弹药、爆炸物、危险物质，过失以危险方法危害公共安全，过失致人重伤，引诱、容留、介绍卖淫，遗弃，走私，信用卡诈骗，对单位行贿，故意杀人，聚众扰乱公共场��秩序、交通秩序，盗窃，故意伤害，非法侵入住宅，强制猥亵、侮辱妇女，伪证，污染环境，巨额财产来源不明，非国家工作人员受贿，侮辱，隐匿、故意销毁会计凭证、会计帐簿、财务会计报告，过失损坏武器装备、军事设施、军事通信，敲诈勒索，职务侵占。\n经审理查明：2013年9月底的一天晚上，被告人陆某德酒后经过沭阳县某镇某村张某荣家时，发现张某荣家没有人，即用石头砸破张某荣家房门玻璃，打开房门进入张某荣家中。因进入张某荣时被房门遗留的玻璃划伤，被告人陆某德在张某荣家北屋门和北屋东首间墙面遗留两处血迹。2014年1月7日，被告人陆某德被公安民警从其家中传唤到案，并如实供述自己的罪行。上述事实，有公诉机关提交的，经过庭审质证的，且均具有证据证明效力的以下证据予以证明：被告人陆某德供述其非法侵入他人住宅的时间、地点、经过等事实。该供述得到了被害人张某荣的陈述、证人周某花、李某华等人的证言、法庭科学DNA检验鉴定书、现场勘验检查笔录、现场图、现场照片等证据予以证实，足以认定。刑事判决书证明证明了被告人陆某德有前科；公安机关出具的“发破案经过”及“抓获经过”证明了本案案发及被告人陆某德的归案情况。\n
+```
+### 4K序列
+```
+<context>/*\n * Implement the \"Falling Rocks\" game in the text console. \n * A small dwarf stays at the bottom of the screen and can \n * move left and right (by the arrows keys). A number of rocks \n * of different sizes and forms constantly fall down and you \n * need to avoid a crash.\n * Rocks are the symbols ^, @, *, &, +, %, $, #, !, ., ;, - distributed \n * with appropriate density. The dwarf is (O). \n * Ensure a constant game speed by Thread.Sleep(150).\n * Implement collision detection and scoring system.\n*/\n\nusing System;\nusing System.Threading;\nusing System.Collections.Generic;\nusing System.Threading.Tasks;\n\nclass FallingRocks\n{\n    struct Position\n    {\n        public int X, Y;\n        public string symbol;\n        public ConsoleColor color;\n\n        public Position(int x, int y, string symbol, ConsoleColor color)\n        {\n            this.X = x;\n            this.Y = y;\n            this.symbol = symbol;\n            this.color = color;\n        }\n    }\n\n    static void Main()\n    {\n        Thread oThread = new Thread(new ThreadStart(Mainn));\n        Thread aThread = new Thread(new ThreadStart(Clr));\n        \n        aThread.Start();\n        oThread.Start();\n        oThread.Join();\n        aThread.Join();\n    }\n\n    static void Clr()\n    {\n        while (true)\n        {\n            Thread.Sleep(10);\n            Console.Clear();\n        }\n    }\n    static void Mainn()\n    {\n        //Random generator for rocks color, position and symbol\n        Random randomGenerator = new Random();\n        \n        //Sleep time for the game loop\n        double sleepTime = 150;\n        //Console settings\n        Console.CursorVisible = false;\n        Console.BufferHeight = Console.WindowHeight;\n        \n        //number of rocks in the Array rocks\n        int rocksCount = 0;\n\n        //array with the symbols of the rocks\n        string[] symbols = new string[] { \"^\", \"@\", \"*\", \"&\", \"+\", \"%\", \"$\", \"#\", \"!\", \".\", \";\" };\n        \n        //array with colors for the rocks\n        ConsoleColor[] colors = new ConsoleColor[] {ConsoleColor.Yellow, ConsoleColor.White, ConsoleColor.Gray};\n        \n        //array with rocks\n        Position[] rocks = new Position[200];\n        \n        //position for the dwarf\n        Position dwarf = new Position(10, Console.WindowHeight  - 1,\"(0)\",ConsoleColor.Red);\n        \n        //bool variable to say when the game loop to be over\n        bool gameLoop = true;\n\n        //variable keeping the score\n        ulong score = 0;\n\n        //the game loop\n        while (gameLoop)\n        {\n            //score is growing as the cycle runs\n            score++;\n\n            //setting the Y component for all the rocks in the array to grow with 2\n            for (int i = 0; i <= rocks.Length - 1; i++)\n            {\n                rocks[i].Y = rocks[i].Y + 2;\n            }\n\n            //generating rocks\n            for (int x = 0; x <= randomGenerator.Next(2, 4); x++)\n            {\n                rocks[rocksCount] = new Position(randomGenerator.Next(x * 15, x * 15 + 20), 0\n                    , symbols[randomGenerator.Next(0, symbols.Length - 1)]\n                    , colors[randomGenerator.Next(0, colors.Length - 1)]);\n                if (rocksCount >= 199) rocksCount = 0;\n                rocksCount++;\n            }\n\n            //printing the rocks and other stuff\n            foreach (var item in rocks)\n            {\n                foreach (var rock in rocks)\n                {\n                    //checking for colision\n                    if ((rock.X >= dwarf.X) && (rock.X <= (dwarf.X + 2)) && (rock.Y == dwarf.Y))\n                    {\n                        gameLoop = false;\n                        break;\n                    }\n                } \n\n                //printing the rocks\n                if (item.Y < Console.WindowHeight)\n                {                    \n                    Console.SetCursorPosition(item.X, item.Y);\n                    Console.ForegroundColor = item.color;\n                    Console.Write(item.symbol);\n                }\n\n                //checking for key pressed\n                if (Console.KeyAvailable)\n                {\n                    ConsoleKeyInfo pressedKey = Console.ReadKey();\n                    if (pressedKey.Key == ConsoleKey.RightArrow)\n                    {\n                        if(dwarf.X < Console.WindowWidth - 20)\n                        {\n                            //removing the old positions of the dwarf and increasing his X value\n                            Console.SetCursorPosition(dwarf.X, dwarf.Y);\n                            Console.Write(\"   \");\n                            dwarf.X++;\n                        }\n                    }\n                    if (pressedKey.Key == ConsoleKey.LeftArrow)                       \n                    {\n                        if(dwarf.X >= 1)\n                        {\n                            //removing the old positions of the dwarf and decreasing his X value\n                            Console.SetCursorPosition(dwarf.X, dwarf.Y);\n                            Console.Write(\"   \");\n                            dwarf.X--;\n                        }\n                    }\n                }\n            }\n          \n            //printing the dwarf\n            Console.SetCursorPosition(dwarf.X, dwarf.Y);\n            Console.ForegroundColor = dwarf.color;\n            Console.Write(dwarf.symbol);            \n            \n            //sleeping the loop for sometime\n            //Thread.Sleep((int)sleepTime);\n\n            //reducing the sleep time of the loop\n            sleepTime -= 0.5;\n\n            \n            //removing the rocks \n            //foreach (var item in rocks)\n            //{\n            //    if (item.Y < Console.WindowHeight)\n            //    {\n            //        Console.SetCursorPosition(item.X, item.Y);\n            //        Console.Write(\" \");\n            //    }\n            //}                        \n        }\n        //Printing the score after the game is over\n        Console.Clear();\n        Console.WriteLine(\"Game over! Your score is: \" + score);\n\n    }\n}\n</context>\n\n这个\"Falling Rocks\"游戏是如何工作的呢？可以详细解释一下代码的运作机制吗？ \n\n\n\n
+```
+### 8K序列
+```
+<context># -*- coding: utf-8 -*-\n# This code is part of Amoco\n# Copyright (C) 2021 Axel Tillequin ([email protected])\n# published under GPLv2 license\nfrom amoco.arch.tricore import env\nfrom amoco.arch.core import *\n# -------------------------------------------------------\n# from TriCore TC1.6.2 core architecture manual V1.2.2\n# (32-bit Unified Processor Core), 2020-01-15\n# define all except FPU instructions\n# -------------------------------------------------------\nISPECS = []\n@ispec("32<[ disp1(16) disp2(8) {6d} ]", mnemonic="CALL")\n@ispec("32<[ disp1(16) disp2(8) {61} ]", mnemonic="FCALL")\n@ispec("32<[ disp1(16) disp2(8) {1d} ]", mnemonic="J")\n@ispec("32<[ disp1(16) disp2(8) {5d} ]", mnemonic="JL")\ndef tricore_branch(obj, disp1, disp2):\n    v = env.cst(((disp2<<16)+disp1)<<1,24)\n    obj.operands = [disp.signextend(32)]\n    obj.type = type_control_flow\n@ispec("32<[ disp1(16) disp2(8) {ed} ]", mnemonic="CALLA")\n@ispec("32<[ disp1(16) disp2(8) {e1} ]", mnemonic="FCALLA")\n@ispec("32<[ disp1(16) disp2(8) {9d} ]", mnemonic="JA")\n@ispec("32<[ disp1(16) disp2(8) {dd} ]", mnemonic="JLA")\ndef tricore_branch(obj, disp1, disp2):\n    v = env.cst((disp2<<16)+disp1,24)\n    addr = composer([env.bit0,v[0:20],env.cst(0,7),v[20:24]])\n    obj.operands = [addr]\n    obj.type = type_control_flow\n@ispec("32<[ ---- {00} ---- ---- a(4) {2d} ]", mnemonic="CALLI")\n@ispec("32<[ ---- {01} ---- ---- a(4) {2d} ]", mnemonic="FCALLI")\n@ispec("32<[ ---- {03} ---- ---- a(4) {2d} ]", mnemonic="JI")\n@ispec("32<[ ---- {02} ---- ---- a(4) {2d} ]", mnemonic="JLI")\ndef tricore_branchI(obj, a):\n    src = env.A[a]\n    obj.operands = [src]\n    obj.type = type_control_flow\n@ispec("16<[ disp(8) {5c} ]", mnemonic="CALL")\n@ispec("16<[ disp(8) {3c} ]", mnemonic="J")\n@ispec("16<[ disp(8) {ee} ]", mnemonic="JNZ")\n@ispec("16<[ disp(8) {6e} ]", mnemonic="JZ")\ndef tricore_branch(obj, disp):\n    disp = env.cst(disp<<1,8)\n    obj.operands = [disp.signextend(32)]\n    obj.type = type_control_flow\n@ispec("32<[ ---- 0000000 const9(9) ---- {ad} ]", mnemonic="BISR")\n@ispec("32<[ ---- 0000100 const9(9) ---- {ad} ]", mnemonic="SYSCALL")\ndef tricore_system(obj, const9):\n    obj.operands = [env.cst(const9,9)]\n    obj.type = type_system\n@ispec("32<[ c(4) {1c} ---- b(4) ---- {0b} ]", mnemonic="ABS")\n@ispec("32<[ c(4) {5c} ---- b(4) ---- {0b} ]", mnemonic="ABS_B")\n@ispec("32<[ c(4) {7c} ---- b(4) ---- {0b} ]", mnemonic="ABS_H")\n@ispec("32<[ c(4) {1d} ---- b(4) ---- {0b} ]", mnemonic="ABSS")\n@ispec("32<[ c(4) {7d} ---- b(4) ---- {0b} ]", mnemonic="ABSS_H")\n@ispec("32<[ c(4) {1f} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n    src = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {80} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n    src = env.D[b]\n    dst = env.E[c]\n    obj.operands = [dst, src.signextend(64)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {81} ---- b(4) a(4) {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b, a):\n    src2 = env.D[b]\n    dst = env.E[c]\n    obj.operands = [dst, composer([src2,src1])]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) {4e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_B")\n@ispec("32<[ c(4) {6e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_H")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) {6f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS_H")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0b} ]", mnemonic="ADD_B")\n@ispec("32<[ c(4) {60} ---- b(4) a(4) {0b} ]", mnemonic="ADD_H")\n@ispec("32<[ c(4) {05} ---- b(4) a(4) {0b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_H")\n@ispec("32<[ c(4) {63} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_HU")\n@ispec("32<[ c(4) {03} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_U")\n@ispec("32<[ c(4) {04} ---- b(4) a(4) {0b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0f} ]", mnemonic="AND")\n@ispec("32<[ c(4) {20} ---- b(4) a(4) {0b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) {24} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) {25} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) {22} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) {23} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) {21} ---- b(4) a(4) {0b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) {10} ---- b(4) a(4) {0b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) {50} ---- b(4) a(4) {0b} ]", mnemonic="EQ_B")\n@ispec("32<[ c(4) {70} ---- b(4) a(4) {0b} ]", mnemonic="EQ_H")\n@ispec("32<[ c(4) {90} ---- b(4) a(4) {0b} ]", mnemonic="EQ_W")\n@ispec("32<[ c(4) {56} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) {76} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) {14} ---- b(4) a(4) {0b} ]", mnemonic="GE")\n@ispec("32<[ c(4) {15} ---- b(4) a(4) {0b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) {12} ---- b(4) a(4) {0b} ]", mnemonic="LT")\n@ispec("32<[ c(4) {13} ---- b(4) a(4) {0b} ]", mnemonic="LT_U")\n@ispec("32<[ c(4) {52} ---- b(4) a(4) {0b} ]", mnemonic="LT_B")\n@ispec("32<[ c(4) {53} ---- b(4) a(4) {0b} ]", mnemonic="LT_BU")\n@ispec("32<[ c(4) {72} ---- b(4) a(4) {0b} ]", mnemonic="LT_H")\n@ispec("32<[ c(4) {73} ---- b(4) a(4) {0b} ]", mnemonic="LT_HU")\n@ispec("32<[ c(4) {92} ---- b(4) a(4) {0b} ]", mnemonic="LT_W")\n@ispec("32<[ c(4) {93} ---- b(4) a(4) {0b} ]", mnemonic="LT_WU")\n@ispec("32<[ c(4) {1a} ---- b(4) a(4) {0b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) {1b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) {5a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_B")\n@ispec("32<[ c(4) {5b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_BU")\n@ispec("32<[ c(4) {7a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_H")\n@ispec("32<[ c(4) {7b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_HU")\n@ispec("32<[ c(4) {18} ---- b(4) a(4) {0b} ]", mnemonic="MIN")\n@ispec("32<[ c(4) {19} ---- b(4) a(4) {0b} ]", mnemonic="MIN_U")\n@ispec("32<[ c(4) {58} ---- b(4) a(4) {0b} ]", mnemonic="MIN_B")\n@ispec("32<[ c(4) {59} ---- b(4) a(4) {0b} ]", mnemonic="MIN_BU")\n@ispec("32<[ c(4) {78} ---- b(4) a(4) {0b} ]", mnemonic="MIN_H")\n@ispec("32<[ c(4) {79} ---- b(4) a(4) {0b} ]", mnemonic="MIN_HU")\n@ispec("32<[ c(4) {09} ---- b(4) a(4) {0f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) {11} ---- b(4) a(4) {0b} ]", mnemonic="NE")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0f} ]", mnemonic="OR")\n@ispec("32<[ c(4) {27} ---- b(4) a(4) {0b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) {2b} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) {2c} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) {29} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) {2a} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) {28} ---- b(4) a(4) {0b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0f} ]", mnemonic="SH")\n@ispec("32<[ c(4) {37} ---- b(4) a(4) {0b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) {3b} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) {3c} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) {39} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) {3a} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) {38} ---- b(4) a(4) {0b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {0f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {0f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0b} ]", mnemonic="SUB")\n@ispec("32<[ c(4) {48} ---- b(4) a(4) {0b} ]", mnemonic="SUB_B")\n@ispec("32<[ c(4) {68} ---- b(4) a(4) {0b} ]", mnemonic="SUB_H")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0b} ]", mnemonic="SUBC")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_U")\n@ispec("32<[ c(4) {6a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_H")\n@ispec("32<[ c(4) {6b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_HU")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0b} ]", mnemonic="SUBX")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) {2f} ---- b(4) a(4) {0b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) {30} ---- b(4) a(4) {0b} ]", mnemonic="XOR_NE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {01} ]", mnemonic="EQ_A")\n@ispec("32<[ c(4) {43} ---- b(4) a(4) {01} ]", mnemonic="GE_A")\n@ispec("32<[ c(4) {42} ---- b(4) a(4) {01} ]", mnemonic="LT_A")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {01} ]", mnemonic="NE_A")\ndef tricore_daa_arithmetic(obj, c, b, a):\n    src1 = env.A[a]\n    src2 = env.A[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {63} ---- b(4) ---- {01} ]", mnemonic="MOV_A",  _dst=env.A, _src=env.D)\n@ispec("32<[ c(4) {00} ---- b(4) ---- {01} ]", mnemonic="MOV_AA", _dst=env.A, _src=env.A)\n@ispec("32<[ c(4) {4c} ---- b(4) ---- {01} ]", mnemonic="MOV_D",  _dst=env.D, _src=env.A)\ndef tricore_daa_arithmetic(obj, c, b, _dst, _src):\n    dst = _dst[c]\n    src = _src[b]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {48} ---- ---- a(4) {01} ]", mnemonic="EQZ_A")\n@ispec("32<[ c(4) {49} ---- ---- a(4) {01} ]", mnemonic="NEZ_A")\ndef tricore_da_arithmetic(obj, c, a):\n    src1 = env.A[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {01} --00 b(4) a(4) {4b} ]", mnemonic="BMERGE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {06} --00 b(4) a(4) {4b} ]", mnemonic="CRC32_B")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32B_W")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32L_W")\ndef tricore_crc32(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src2, src1]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {20} --01 b(4) a(4) {4b} ]", mnemonic="DIV")\n@ispec("32<[ c(4) {21} --01 b(4) a(4) {4b} ]", mnemonic="DIV_U")\n@ispec("32<[ c(4) {5a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_B")\n@ispec("32<[ c(4) {4a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_BU")\n@ispec("32<[ c(4) {3a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_H")\n@ispec("32<[ c(4) {2a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_HU")\n@ispec("32<[ c(4) {1a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT")\n@ispec("32<[ c(4) {0a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_U")\ndef tricore_edd_arithmetic(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 100 ----- b(4) a(4) {17} ]", mnemonic="DEXTR")\ndef tricore_dddc(obj, c, d, b, a):\n    shift = env.D[d]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, shift]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 ----- ---- a(4) {17} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 ----- ---- a(4) {17} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, a):\n    if d%2:\n        raise InstructionError(obj)\n    width = env.E[d][32:37]\n    src1 = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1, width]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 0--00 ---- a(4) {6b} ]", mnemonic="PACK")\ndef tricore_extr(obj, c, d, a):\n    if d%2:\n        raise InstructionError(obj)\n    src1 = env.E[d]\n    src2 = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {08} -- 00 ---- a(4) {4b} ]", mnemonic="UNPACK")\ndef tricore_extr(obj, c, d, a):\n    src = env.D[a]\n    dst = env.E[c]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {02} -- 00 ---- a(4) {4b} ]", mnemonic="PARITY")\n@ispec("32<[ c(4) {22} -- 00 ---- a(4) {4b} ]", mnemonic="POPCNT_W")\ndef tricore_extr(obj, c, d, a):\n    src = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 ----- b(4) a(4) {77} ]", mnemonic="DEXTR")\ndef tricore_dextr(obj, c, pos, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, env.cst(pos,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 10 width(5) ---- a(4) {37} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) pos(5) 11 width(5) ---- a(4) {37} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, pos, width, a):\n    src1 = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) const(4) ---- {b7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, const):\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) const(4) ---- {d7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, const):\n    src2 = env.D[d]\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, env.cst(const,4), src2, env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) b(4) ---- {37} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, b):\n    src1 = env.D[b]\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) b(4) ---- {57} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, b):\n    src1 = env.D[b]\n    src2 = env.D[d]\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, src1, src2, env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) const(4) a(4) {b7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, const, a):\n    dst = env.D[c]\n    src1 = env.D[a]\n    obj.operands = [dst, src1, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- const(4) a(4) {97} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, const, a):\n    src1 = env.D[a]\n    if d%2:\n        raise InstructionError(obj)\n    src3 = env.E[d]\n    dst = env.D[c]\n    obj.operands = [dst, src1, env.cst(const,4), src3]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) const(4) a(4) {d7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, const, a):\n    src1 = env.D[a]\n    src3 = env.D[d]\n    dst = env.D[c]\n    obj.operands = [dst, src1, env.cst(const,4), src3]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) b(4) a(4) {37} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, b, a):\n    dst = env.D[c]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    obj.operands = [dst, src1, src2, env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- b(4) a(4) {17} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    if d%2:\n        raise InstructionError(obj)\n    src3 = env.E[d]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, src3]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) b(4) a(4) {57} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    src3 = env.D[d]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, src3, env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 width(5) ---- a(4) {57} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 width(5) ---- a(4) {57} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, width, a):\n    src2 = env.D[d]\n    src1 = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {09} --00 ---- a(4) {4b} ]", mnemonic="BSPLIT")\ndef tricore_edd_arithmetic(obj, c, a):\n    src1 = env.D[a]\n    dst = env.E[c]\n    obj.operands = [dst, src1]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) 0001110 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) 0001111 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) 0000101 ~const9(9) a(4) {8b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) 0000011 ~const9(9) a(4) {8b} ]", mnemonic="ADDS_U")  #const9 is signed\n@ispec("32<[ c(4) 0000100 ~const9(9) a(4) {8b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) 0100000 ~const9(9) a(4) {8b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) 0100100 ~const9(9) a(4) {8b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) 0100010 ~const9(9) a(4) {8b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) 0100001 ~const9(9) a(4) {8b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) 0010000 ~const9(9) a(4) {8b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) 1010110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) 1110110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) 0010100 ~const9(9) a(4) {8b} ]", mnemonic="GE")\n@ispec("32<[ c(4) 0010010 ~const9(9) a(4) {8b} ]", mnemonic="LT")\n@ispec("32<[ c(4) 0011010 ~const9(9) a(4) {8b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) 0010001 ~const9(9) a(4) {8b} ]", mnemonic="NE")\n@ispec("32<[ c(4) 0100111 ~const9(9) a(4) {8b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) 0101011 ~const9(9) a(4) {8b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) 0101001 ~const9(9) a(4) {8b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) 0001000 ~const9(9) a(4) {8b} ]", mnemonic="RSUB")\n@ispec("32<[ c(4) 0001001 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS")\n@ispec("32<[ c(4) 0001011 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS_U") #const9 is signed\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8f} ]", mnemonic="SH")\n@ispec("32<[ c(4) 1000000 ~const9(9) a(4) {8f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) 0110111 ~const9(9) a(4) {8b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) 0111011 ~const9(9) a(4) {8b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) 0111001 ~const9(9) a(4) {8b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) 0111000 ~const9(9) a(4) {8b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) 0000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) 1000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) 0101111 ~const9(9) a(4) {8b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) 0110011 ~const9(9) a(4) {8b} ]", mnemonic="XOR_GE")\n@ispec("32<[ c(4) 0110001 ~const9(9) a(4) {8b} ]", mnemonic="XOR_LT")\n@ispec("32<[ c(4) 0110000 ~const9(9) a(4) {8b} ]", mnemonic="XOR_NE")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n    src1 = env.D[a]\n    if obj.mnemonic in ("SH","SHA","SHAS"):\n        const9 = const9[0:6]\n    elif obj.mnemonic in ("SH_H","SHA_H"):\n        const9 = const9[0:5]\n    src2 = env.cst(const9.int(-1),32)\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_OR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {87} ]", mnemonic="AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {87} ]", mnemonic="ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {67} ]", mnemonic="INS_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {67} ]", mnemonic="INSN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {07} ]", mnemonic="NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {87} ]", mnemonic="NOR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {87} ]", mnemonic="OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {07} ]", mnemonic="ORN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_ORN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XOR_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {07} ]", mnemonic="XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {07} ]", mnemonic="XOR_T")\ndef tricore_ddd_arithmetic(obj, c, pos2, pos1, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1[pos1:pos1+1], src2[pos2:pos2+1]]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) 0001000 const9(9) a(4) {8f} ]", mnemonic="AND")\n@ispec("32<[ c(4) 0100101 const9(9) a(4) {8b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) 0100011 const9(9) a(4) {8b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) 0001110 const9(9) a(4) {8f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) 0001001 const9(9) a(4) {8f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) 0001011 const9(9) a(4) {8f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) 0010101 const9(9) a(4) {8b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) 0001010 const9(9) a(4) {8f} ]", mnemonic="OR")\n@ispec("32<[ c(4) 0101100 const9(9) a(4) {8b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) 0101010 const9(9) a(4) {8b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) 0101000 const9(9) a(4) {8b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) 0001111 const9(9) a(4) {8f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) 0000111 const9(9) a(4) {8f} ]", mnemonic="SHUFFLE")\n@ispec("32<[ c(4) 0001101 const9(9) a(4) {8f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) 0001100 const9(9) a(4) {8f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) 0111100 const9(9) a(4) {8b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) 0111010 const9(9) a(4) {8b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) 0110100 const9(9) a(4) {8b} ]", mnemonic="XOR_GE_U")\n@ispec("32<[ c(4) 0110011 const9(9) a(4) {8b} ]", mnemonic="XOR_LT_U")\n@ispec("32<[ c(4) 0011011 const9(9) a(4) {8b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) 0010011 const9(9) a(4) {8b} ]", mnemonic="LT_U")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n    src1 = env.D[a]\n    src2 = env.cst(const9,32)\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {c2} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {06} ]", mnemonic="SH")\n@ispec("16<[ ~const4(4) a(4) {86} ]", mnemonic="SHA")\ndef tricore_ddc_arithmetic(obj, const4, a):\n    dst = env.D[a]\n    src2 = env.cst(const4.int(-1),32)\n    src1 = env.D[a]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {92} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {8a} ]", mnemonic="CADD")\n@ispec("16<[ ~const4(4) a(4) {ca} ]", mnemonic="CADDN")\n@ispec("16<[ ~const4(4) a(4) {aa} ]", mnemonic="CMOV")\n@ispec("16<[ ~const4(4) a(4) {ea} ]", mnemonic="CMOVN")\ndef tricore_ddc_arithmetic(obj, const4, a):\n    dst = env.D[a]\n    src2 = env.cst(const4.int(-1),32)\n    src1 = env.D[15]\n    obj.operands = [dst, src1, src2]\n    if "CADD" in obj.mnemonic:\n        obj.operands = [dst, src1, dst, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {9a} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {ba} ]", mnemonic="EQ")\n@ispec("16<[ ~const4(4) a(4) {fa} ]", mnemonic="LT")\n@ispec("16<[ ~const4(4) a(4) {82} ]", mnemonic="MOV")\ndef tricore_ddc_arithmetic(obj, const4, a):\n    dst = env.D[15]\n    src2 = env.cst(const4.int(-1),32)\n    src1 = env.D[a]\n    obj.operands = [dst, src1, src2]\n    if obj.mnemonic=="MOV":\n        obj.operands = [src1,src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {d2} ]", mnemonic="MOV")\ndef tricore_ec_arithmetic(obj, const4, a):\n    dst = env.E[a]\n    src = env.cst(const4.int(-1),64)\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("16<[ const4(4) a(4) {a0} ]", mnemonic="MOV_A")\ndef tricore_ec_arithmetic(obj, const4, a):\n    dst = env.A[a]\n    src = env.cst(const4,32)\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("16<[ const8(8) {16} ]", mnemonic="AND")\n@ispec("16<[ const8(8) {da} ]", mnemonic="MOV")\n@ispec("16<[ const8(8) {96} ]", mnemonic="OR")\ndef tricore_ddc_arithmetic(obj, const8):\n    dst = env.D[15]\n    src2 = env.cst(const8,32)\n    src1 = env.D[15]\n    obj.operands = [dst, src1, src2]\n    if obj.mnemonic=="MOV":\n        obj.operands = [src1,src2]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {42} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {26} ]", mnemonic="AND")\n@ispec("16<[ b(4) a(4) {a6} ]", mnemonic="OR")\n@ispec("16<[ b(4) a(4) {a2} ]", mnemonic="SUB")\n@ispec("16<[ b(4) a(4) {62} ]", mnemonic="SUBS")\n@ispec("16<[ b(4) a(4) {c6} ]", mnemonic="XOR")\ndef tricore_dd_arithmetic(obj, b, a):\n    dst = env.D[a]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {02} ]", mnemonic="MOV"    , _dst=env.D, _src=env.D)\n@ispec("16<[ b(4) a(4) {60} ]", mnemonic="MOV_A"  , _dst=env.A, _src=env.D)\n@ispec("16<[ b(4) a(4) {40} ]", mnemonic="MOV_AA" , _dst=env.A, _src=env.A)\n@ispec("16<[ b(4) a(4) {80} ]", mnemonic="MOV_D"  , _dst=env.D, _src=env.A)\ndef tricore_mov(obj, b, a, _dst, _src):\n    dst = _dst[a]\n    src = _src[b]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {12} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {2a} ]", mnemonic="CMOV")\n@ispec("16<[ b(4) a(4) {6a} ]", mnemonic="CMOVN")\n@ispec("16<[ b(4) a(4) {52} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n    dst = env.D[a]\n    src1 = env.D[15]\n    src2 = env.D[b]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {1a} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {22} ]", mnemonic="ADDS")\n@ispec("16<[ b(4) a(4) {3a} ]", mnemonic="EQ")\n@ispec("16<[ b(4) a(4) {7a} ]", mnemonic="LT")\n@ispec("16<[ b(4) a(4) {5a} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n    dst = env.D[15]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {01} ]", mnemonic="ADD_A")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {01} ]", mnemonic="SUB_A")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n    src1 = env.A[a]\n    src2 = env.A[b]\n    dst = env.A[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {b0} ]", mnemonic="ADD_A")\ndef tricore_aac_arithmetic(obj, const4, a):\n    dst = env.A[a]\n    src2 = env.cst(const4.int(-1),32)\n    src1 = env.A[a]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ const8(8) {20} ]", mnemonic="SUB_A")\ndef tricore_aac_arithmetic(obj, const8, a):\n    dst = env.A[10]\n    src2 = env.cst(const8,32)\n    src1 = env.A[10]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {30} ]", mnemonic="ADD_A")\ndef tricore_aa_arithmetic(obj, b, a):\n    dst = env.A[a]\n    src1 = env.A[a]\n    src2 = env.A[b]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {1b} ]", mnemonic="ADDI")\n@ispec("32<[ c(4) ~const16(16) a(4) {9b} ]", mnemonic="ADDIH")\ndef tricore_di_arithmetic(obj, c, const16, a):\n    src1 = env.D[a]\n    src2 = env.cst(const16.int(-1),32)\n    if self.mnemonic=="ADDIH": src2=src2<<16\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {11} ]", mnemonic="ADDIH_A")\ndef tricore_ai_arithmetic(obj, c, const16, a):\n    src1 = env.A[a]\n    src2 = env.cst(const16.int(-1),32)<<16\n    dst = env.A[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {60} -- n(2) b(4) a(4) {01} ]", mnemonic="ADDSC_A")\ndef tricore_aaa_arithmetic(obj, c, n, b, a):\n    src1 = env.D[a]\n    src2 = env.A[b]\n    dst = env.A[c]\n    obj.operands = [dst, src2, src1, env.cst(n,2)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {01} ]", mnemonic="ADDSC_AT")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.A[b]\n    dst = env.A[c]\n    obj.operands = [dst, src2, src1]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) n(2) 010000 ]", mnemonic="ADDSC_A")\ndef tricore_aa_arithmetic(obj, b, a, n):\n    dst = env.A[a]\n    src1 = env.D[15]\n    src2 = env.A[b]\n    obj.operands = [dst, src2, src1, env.cst(n,2)]\n    obj.type = type_data_processing\n@ispec("32<[ off2(4) 10 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I",  mode="Short-offset")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I",  mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I",  mode="Circular")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I",  mode="Post-increment")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I",  mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W",  mode="Short-offset")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W",  mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W",  mode="Circular")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W",  mode="Post-increment")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W",  mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Circular")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W",  mode="Short-offset")\n@ispec("32<[ off2(4) 00 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W",  mode="Post-increment")\n@ispec("32<[ off2(4) 01 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W",  mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I",  mode="Short-offset")\n@ispec("32<[ off2(4) 00 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I",  mode="Post-increment")\n@ispec("32<[ off2(4) 01 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I",  mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Pre-increment")\ndef tricore_cache(obj, off2, off1, b):\n    src2 = env.A[b]\n    src1 = env.cst((off2<<6)+off1,10)\n    obj.operands = [src2, src1]\n    obj.type = type_system\n@ispec("32<[ off2(4) 10 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Pre-increment")\ndef tricore_swap(obj, off2, off1, b, a):\n    if a%2:\n        raise InstructionError(obj)\n    dst = env.D[a]\n    src1 = env.A[b]\n    src2 = env.cst((off2<<6)+off1,10)\n    src3 = env.E[a]\n    obj.operands = [dst, src1, src2, src3]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ~const9(9) a(4) {ab} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {ab} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {13} ]", mnemonic="MADDS",  opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {33} ]", mnemonic="MSUBS",  opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {ab} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {ab} ]", mnemonic="SELN")\ndef tricore_cond_ddc(obj, c, d, const9, a):\n    cond = env.D[d]\n    src1 = env.D[a]\n    src2 = env.cst(const9.int(-1),32)\n    dst = env.D[c]\n    obj.operands = [dst, cond, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {13} ]", mnemonic="MADD_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {33} ]", mnemonic="MSUB_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="64+(32+K9)->64")\ndef tricore_cond_eec(obj, c, d, const9, a):\n    cond = env.E[d]\n    src1 = env.D[a]\n    src2 = env.cst(const9.int(-1),32)\n    dst = env.E[c]\n    obj.operands = [dst, cond, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*16U)->64")\ndef tricore_cond_eec(obj, c, d, n, b, a):\n    cond = env.E[d]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.E[c]\n    obj.operands = [dst, cond, src1, src2, env.cst(n,2)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 0000 ---- b(4) a(4) {2b} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 0001 ---- b(4) a(4) {2b} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 0010 ---- b(4) a(4) {2b} ]", mnemonic="CSUB")\n@ispec("32<[ c(4) d(4) 0011 ---- b(4) a(4) {2b} ]", mnemonic="CSUBN")\n@ispec("32<[ c(4) d(4)      {0a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4)      {8a} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4)      {88} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) 0100 ---- b(4) a(4) {2b} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 0101 ---- b(4) a(4) {2b} ]", mnemonic="SELN")\ndef tricore_cond_ddd(obj, c, d, b, a):\n    cond = env.D[d]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, cond, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) {6a}      b(4) a(4) {03} ]", mnemonic="MADD", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {ea}      b(4) a(4) {03} ]", mnemonic="MADDS", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {68}      b(4) a(4) {03} ]", mnemonic="MADD_U", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {e8}      b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="64+(32*32)->64")\ndef tricore_cond_ddd(obj, c, d, b, a):\n    cond = env.E[d]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.E[c]\n    obj.operands = [dst, cond, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {1c} ---- ---- a(4) {0f} ]", mnemonic="CLO")\n@ispec("32<[ c(4) {7d} ---- ---- a(4) {0f} ]", mnemonic="CLO_H")\n@ispec("32<[ c(4) {1d} ---- ---- a(4) {0f} ]", mnemonic="CLS")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0f} ]", mnemonic="CLS_H")\n@ispec("32<[ c(4) {1b} ---- ---- a(4) {0f} ]", mnemonic="CLZ")\n@ispec("32<[ c(4) {7c} ---- ---- a(4) {0f} ]", mnemonic="CLZ_H")\n@ispec("32<[ c(4) {5e} ---- ---- a(4) {0b} ]", mnemonic="SAT_B")\n@ispec("32<[ c(4) {5f} ---- ---- a(4) {0b} ]", mnemonic="SAT_BU")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0b} ]", mnemonic="SAT_H")\n@ispec("32<[ c(4) {7f} ---- ---- a(4) {0b} ]", mnemonic="SAT_HU")\ndef tricore_dd_arithmetic(obj, c, a):\n    src = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("16<[ 1010 ---- {00} ]", mnemonic="DEBUG")\n@ispec("16<[ 0000 ---- {00} ]", mnemonic="NOP")\ndef tricore_system(obj):\n    obj.operands = []\n    obj.type = type_system\n@ispec("16<[ 0111 ---- {00} ]", mnemonic="FRET")\n@ispec("16<[ 1001 ---- {00} ]", mnemonic="RET")\n@ispec("16<[ 1000 ---- {00} ]", mnemonic="RFE")\ndef tricore_ret(obj):\n    obj.operands = []\n    obj.type = type_control_flow\n@ispec("32<[ ---- 000100 ---------- ---- {0d} ]", mnemonic="DEBUG")\n@ispec("32<[ ---- 001101 ---------- ---- {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 010010 ---------- ---- {0d} ]", mnemonic="DSYNC")\n@ispec("32<[ ---- 001100 ---------- ---- {0d} ]", mnemonic="ENABLE")\n@ispec("32<[ ---- 010011 ---------- ---- {0d} ]", mnemonic="ISYNC")\n@ispec("32<[ ---- 010101 ---------- ---- {0d} ]", mnemonic="TRAPSV")\n@ispec("32<[ ---- 010100 ---------- ---- {0d} ]", mnemonic="TRAPV")\n@ispec("32<[ ---- 000000 ---------- ---- {0d} ]", mnemonic="NOP")\n@ispec("32<[ ---- 001001 ---------- ---- {0d} ]", mnemonic="RSLCX")\n@ispec("32<[ ---- 000000 ---------- ---- {2f} ]", mnemonic="RSTV")\n@ispec("32<[ ---- 001000 ---------- ---- {0d} ]", mnemonic="SVLCX")\n@ispec("32<[ ---- 010110 ---------- ---- {0d} ]", mnemonic="WAIT")\ndef tricore_system(obj):\n    obj.operands = []\n    obj.type = type_system\n@ispec("32<[ ---- 000011 ---------- ---- {0d} ]", mnemonic="FRET")\n@ispec("32<[ ---- 000110 ---------- ---- {0d} ]", mnemonic="RET")\n@ispec("32<[ ---- 000111 ---------- ---- {0d} ]", mnemonic="RFE")\n@ispec("32<[ ---- 000101 ---------- ---- {0d} ]", mnemonic="RFM")\ndef tricore_ret(obj):\n    obj.operands = []\n    obj.type = type_control_flow\n@ispec("32<[ ---- 001111 ---------- a(4) {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 001110 ---------- a(4) {0d} ]", mnemonic="RESTORE")\ndef tricore_system(obj, a):\n    obj.operands = [env.D[a]]\n    obj.type = type_system\n@ispec("32<[ c(4) d(4) 1101 -- 00 b(4) ---- {6b} ]", mnemonic="DVADJ")\n@ispec("32<[ c(4) d(4) 1111 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP")\n@ispec("32<[ c(4) d(4) 1110 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP_U")\n@ispec("32<[ c(4) d(4) 1010 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX")\n@ispec("32<[ c(4) d(4) 1011 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX_U")\n@ispec("32<[ c(4) d(4) 1000 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN")\n@ispec("32<[ c(4) d(4) 1001 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN_U")\ndef tricore_eee(obj, c, d, b):\n    if d%2 or b%2 or c%2:\n        raise InstructionError(obj)\n    src1 = env.E[d]\n    src2 = env.E[b]\n    dst = env.E[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) disp(4) {1e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {9e} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ ~const4(4) disp(4) {5e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {de} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, const4, disp, _off):\n    dst = env.D[15]\n    src1 = env.cst(const4.int(-1),32)\n    src2 = env.cst(disp,32)+_off\n    obj.operands = [dst, src1, src2]\n    obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {3e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ b(4) disp(4) {be} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ b(4) disp(4) {7e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ b(4) disp(4) {fe} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, b, disp, _off):\n    dst = env.D[15]\n    src1 = env.D[b]\n    src2 = env.cst(disp,32)+_off\n    obj.operands = [dst, src1, src2]\n    obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {ce} ]", mnemonic="JGEZ")\n@ispec("16<[ b(4) disp(4) {4e} ]", mnemonic="JGTZ")\n@ispec("16<[ b(4) disp(4) {8e} ]", mnemonic="JLEZ")\n@ispec("16<[ b(4) disp(4) {0e} ]", mnemonic="JLTZ")\n@ispec("16<[ b(4) disp(4) {f6} ]", mnemonic="JNZ")\n@ispec("16<[ b(4) disp(4) {76} ]", mnemonic="JZ")\ndef tricore_jcc(obj, b, disp):\n    src1 = env.D[b]\n    src2 = env.cst(disp,32)\n    obj.operands = [src1, src2]\n    obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {df} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {df} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, const, a):\n    src1 = env.D[a]\n    src2 = env.cst(const,4)\n    obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JEQ_A")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JNE_A")\ndef tricore_jcc(obj, disp, b, a):\n    src1 = env.A[a]\n    src2 = env.A[b]\n    obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) ---- a(4) {bd} ]", mnemonic="JNZ_A")\n@ispec("32<[ 0 ~disp(15) ---- a(4) {bd} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, disp, a):\n    src1 = env.A[a]\n    src2 = env.A[b]\n    obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOP")\n@ispec("32<[ 1 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOPU")\ndef tricore_jcc(obj, disp, b):\n    src1 = env.A[b]\n    src2 =  env.cst(disp.int(-1)*2,32)\n    obj.operands = [src1, src2]\n    if obj.mnemonic=="LOOPU":\n        obj.operands = [src2]\n    obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {7c} ]", mnemonic="JNZ_A")\n@ispec("16<[ b(4) disp(4) {bc} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, b, disp):\n    src1 = env.A[b]\n    src2 = env.cst(disp,32)\n    obj.operands = [src1, src2]\n    obj.type = type_control_flow\n@ispec("16<[ b(4) #disp(4) {fc} ]", mnemonic="LOOP")\ndef tricore_jcc(obj, b, disp):\n    src1 = env.A[b]\n    src2 = env.cst(int(("1"*27)+disp+"0",2),32)\n    obj.operands = [src1, src2]\n    obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {dc} ]", mnemonic="JI")\ndef tricore_ji(obj, a):\n    src = env.A[a]\n    obj.operands = [src]\n    obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {46} ]", mnemonic="NOT")\n@ispec("16<[ 0101 a(4) {32} ]", mnemonic="RSUB")\n@ispec("16<[ 0000 a(4) {32} ]", mnemonic="SAT_B")\n@ispec("16<[ 0001 a(4) {32} ]", mnemonic="SAT_BU")\n@ispec("16<[ 0010 a(4) {32} ]", mnemonic="SAT_H")\n@ispec("16<[ 0011 a(4) {32} ]", mnemonic="SAT_HU")\ndef tricore_a(obj, a):\n    src = env.D[a]\n    obj.operands = [src]\n    obj.type = type_data_processing\n@ispec("16<[ n(4) disp(4) {ae} ]", mnemonic="JNZ_T")\n@ispec("16<[ n(4) disp(4) {2e} ]", mnemonic="JZ_T")\ndef tricore_ji(obj, n, disp):\n    obj.operands = [env.D[15][n:n+1], env.cst(disp,32)]\n    obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JNZ_T")\n@ispec("32<[ 0 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JZ_T")\ndef tricore_jcc(obj, disp, n, a, h):\n    i = n+(h<<4)\n    src = env.D[a][i:i+1]\n    obj.operands = [src, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_BU", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_HU", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {45} ]", mnemonic="LD_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LEA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n    dst = env.D[a]\n    if obj.mnemonic in ("LD_A", "LEA")  : dst = env.A[a]\n    if obj.mnemonic in ("LD_D","LDMST") : dst = env.E[a]\n    if obj.mnemonic=="LD_DA": dst = env.P[a]\n    src = off1//off2//off3\n    obj.operands = [dst, composer([env.cst(src.int(),28),env.cst(off4,4)])]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LHA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n    dst = env.A[a]\n    src = off1//off2//off3//off4\n    obj.operands = [dst, composer([env.cst(0,14),env.cst(src.int(),18)])]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {65} ]", mnemonic="ST_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="SWAP_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="LDMST", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, a):\n    src = env.D[a]\n    if obj.mnemonic in ("ST_A",)  : src = env.A[a]\n    if obj.mnemonic in ("ST_D","LDMST") : src = env.E[a]\n    if obj.mnemonic=="ST_DA": src = env.P[a]\n    addr = off1//off2//off3\n    obj.operands = [composer([env.cst(addr.int(),28),env.cst(off4,4)]), src]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) b bpos(3) {d5} ]", mnemonic="ST_T", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, b, bpos):\n    obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)]), env.cst(bpos,3), env.cst(b,1)]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) ---- {15} ]", mnemonic="STLCX", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4):\n    obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDLCX", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDUCX", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n    src = off1//off2//off3\n    obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="LEA", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b, a):\n    dst = env.D[a]\n    if   obj.mnemonic=="LD_A"  : dst = env.A[a]\n    elif obj.mnemonic=="LEA"   : dst = env.A[a]\n    elif obj.mnemonic=="LD_D"  : dst = env.E[a]\n    elif obj.mnemonic=="LDMST" : dst = env.E[a]\n    elif obj.mnemonic=="LD_DA" : dst = env.P[a]\n    obj.b = b\n    src1 = env.A[b]\n    off10 = off1//off2\n    src2 = env.cst(off10.int(-1),10)\n    obj.operands = [dst, src1, src2]\n    if obj.mode == "Bit-Reverse":\n        obj.operands.pop()\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Pre-increment")\ndef tricore_st(obj, off2, off1, b, a):\n    dst = env.D[a]\n    if   obj.mnemonic=="ST_A"  : dst = env.A[a]\n    elif obj.mnemonic=="ST_D"  : dst = env.E[a]\n    elif obj.mnemonic=="ST_DA" : dst = env.P[a]\n    elif obj.mnemonic=="LDMST" : dst = env.E[a]\n    obj.b = b\n    src1 = env.A[b]\n    off10 = off1//off2\n    src2 = env.cst(off10.int(-1),10)\n    obj.operands = [src1, src2, dst]\n    if obj.mode == "Bit-Reverse":\n        obj.operands.pop()\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Pre-increment")\ndef tricore_ld(obj, off2, off1, b, a):\n    dst = env.D[a]\n    src1 = env.P[b]\n    off10 = off1//off2\n    src2 = env.cst(off10.int(-1),10)\n    obj.operands = [src1, src2, dst]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) ---- {49} ]", mnemonic="LDLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) ---- {49} ]", mnemonic="LDUCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) ---- {49} ]", mnemonic="STLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) ---- {49} ]", mnemonic="STUCX", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b):\n    src1 = env.A[b]\n    off10 = off1//off2\n    src2 = env.cst(off10.int(-1),10)\n    obj.operands = [src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {99} ]", mnemonic="LD_A", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {79} ]", mnemonic="LD_B", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {39} ]", mnemonic="LD_BU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {b9} ]", mnemonic="LD_HU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {19} ]", mnemonic="LD_W", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {d9} ]", mnemonic="LEA", mode="Long-offset")\ndef tricore_ld(obj, off2, off3, off1, b, a):\n    dst = env.D[a]\n\n</context>\n\n假设一个实际的场景，我们需要采用这段代码来实现工作流程，可以给我解释一下这段代码的作用吗？ \n\n\n\n
+```

lyrallms/LyraBaichuanPy/examples/batch_demo.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import argparse
+from time import perf_counter
+import sys
+sys.path.append('../')
+from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
+def get_args():
+    parser = argparse.ArgumentParser(description="Faster Baichuan Demo")
+    parser.add_argument('--model-path', type=str, required=True,
+                        help='Model Path, include config.ini and tokenizer files')
+    # parser.add_argument('--tokenizer-path', type=str, default='/group/30063/users/vanewu/LocalModels/ChatGLM6B-Torch/chatglm-6b')
+    parser.add_argument('--tokenizer-path', type=str, default=None)
+    parser.add_argument(
+        '--data-type', type=str, metavar='TYPE', default='fp16',
+        choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
+        help='The data type to inference. If None, the data type follows the '
+             'checkpoint data type.')
+    parser.add_argument(
+        '--memopt_mode', type=int, default=0, choices=[0, 1],
+        help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
+             ' 0: FP16 mode'
+             ' 1: Use MEMOPT mode')
+    parser.add_argument(
+        '--quant-type', type=str, metavar='TYPE', default='int8',
+        choices=['int4', 'int8'],
+        help='The data type of quantization. Only used in MEMOPT.')
+    parser.add_argument("--prompt", type=str, required=False)
+    parser.add_argument("--max-output-length", type=int, default=512)
+    parser.add_argument("--warmups", type=int, default=10)
+    parser.add_argument("--avgnums", type=int, default=10)
+    args = parser.parse_args()
+    print('\n=================== Arguments ===================')
+    for k, v in vars(args).items():
+        print(f' - {k.ljust(25, ".")}: {v}')
+    print('=================================================')
+    return args
+def main():
+    args = get_args()
+    # model = lyraBaichuan7B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type)
+    model = lyraBaichuan13B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type)
+    # prompt_template = "<reserved_106>{}\n<reserved_107>" # baichuan chat
+    prompt_template = "{}" # baichuan
+    prompt = prompt_template.format(args.prompt)
+    test_batch_size = [1, 2, 4] # 8, 16, 32, 64
+    print("test_batch_size: ", test_batch_size)
+    for i, bs in enumerate(test_batch_size):
+        prompts = [prompt, ]*bs
+        # warmup gpu
+        for _ in range(args.warmups):
+            output_texts = model.generate(
+                prompts, output_length=args.max_output_length,
+                top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.1, do_sample=False)
+        start = perf_counter()
+        for _ in range(args.avgnums):
+            output_texts = model.generate(
+                prompts, output_length=args.max_output_length,
+                top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
+        end = perf_counter()
+        cost = (end - start) / args.avgnums
+        input_output_texts = [prompt+' ' + gtext for prompt,
+                            gtext in zip(prompts, output_texts)]
+        tokens = 0
+        input_tokens = len(model.tokenizer.encode(prompt))
+        words = 0
+        for text in input_output_texts:
+            tokens += len(model.tokenizer.encode(text))
+            words += len(text)
+        print(
+            f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
+        )
+        print(
+            f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
+        )
+        if i == 0:
+            for k in range(bs):
+                print(
+                    f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
+                if k>2:
+                    break
+if __name__ == "__main__":
+    main()

lyrallms/LyraBaichuanPy/examples/batch_stream_demo.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import argparse
+from time import perf_counter
+import sys
+sys.path.append('../')
+from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
+def print_list(lines):
+    # 清空终端输出
+    print("\033c", end="")
+    # 逐行打印字符串列表
+    print('\n'.join(lines))
+def get_args():
+    parser = argparse.ArgumentParser(description="Faster Baichuan Demo")
+    parser.add_argument('--model-path', type=str, required=True,
+                        help='Model Path, include config.ini and tokenizer files')
+    parser.add_argument('--tokenizer-path', type=str, default=None)
+    parser.add_argument(
+        '--data-type', type=str, metavar='TYPE', default='fp16',
+        choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
+        help='The data type to inference. If None, the data type follows the '
+             'checkpoint data type.')
+    parser.add_argument(
+        '--memopt_mode', type=int, default=0, choices=[0, 1],
+        help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
+             ' 0: FP16 mode'
+             ' 1: Use MEMOPT mode')
+    parser.add_argument("--prompt", type=str, required=False)
+    parser.add_argument("--max-output-length", type=int, default=512)
+    parser.add_argument("--warmups", type=int, default=10)
+    parser.add_argument("--avgnums", type=int, default=10)
+    args = parser.parse_args()
+    print('\n=================== Arguments ===================')
+    for k, v in vars(args).items():
+        print(f' - {k.ljust(25, ".")}: {v}')
+    print('=================================================')
+    return args
+def main():
+    args = get_args()
+    # model = lyraBaichuan7B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
+    model = lyraBaichuan13B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
+    # prompt_template = "<reserved_106>{}\n<reserved_107>" # baichuan chat
+    prompt_template = "{}" # baichuan
+    prompt = prompt_template.format(args.prompt)
+    test_batch_size = [1, 2, 4] # 8, 16, 32, 64
+    print("test_batch_size: ", test_batch_size)
+    for i, bs in enumerate(test_batch_size):
+        prompts = [prompt, ]*bs
+        # warmup gpu
+        for _ in range(args.warmups):
+            output_texts = model.generate(
+                prompts, output_length=args.max_output_length,
+                top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.1, do_sample=False)
+        start = perf_counter()
+        for _ in range(args.avgnums):
+            for finish, output_texts in model.stream_generate(prompts,
+                                                            output_length=args.max_output_length,
+                                                            top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False):
+                print_list(output_texts)
+                if finish:
+                    break
+        end = perf_counter()
+        cost = (end - start) / args.avgnums
+        input_output_texts = [prompt+' ' + gtext for prompt,
+                            gtext in zip(prompts, output_texts)]
+        tokens = 0
+        input_tokens = len(model.tokenizer.encode(prompt))
+        words = 0
+        for text in input_output_texts:
+            tokens += len(model.tokenizer.encode(text))
+            words += len(text)
+        print(
+            f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
+        )
+        print(
+            f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
+        )
+if __name__ == "__main__":
+    main()

lyrallms/LyraBaichuanPy/examples/random_batch_demo.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import argparse
+import json
+import random
+import numpy as np
+from time import perf_counter
+import sys
+sys.path.append('../')
+from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
+def get_args():
+    parser = argparse.ArgumentParser(description="Faster Baichuan Demo")
+    parser.add_argument('--model-path', type=str, required=True,
+                        help='Model Path, include config.ini and tokenizer files')
+    # parser.add_argument('--tokenizer-path', type=str, default='/group/30063/users/vanewu/LocalModels/ChatGLM6B-Torch/chatglm-6b')
+    parser.add_argument('--tokenizer-path', type=str, default=None)
+    parser.add_argument(
+        '--data-type', type=str, metavar='TYPE', default='fp16',
+        choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
+        help='The data type to inference. If None, the data type follows the '
+             'checkpoint data type.')
+    parser.add_argument(
+        '--memopt_mode', type=int, default=0, choices=[0, 1],
+        help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
+             ' 0: FP16 mode'
+             ' 1: Use MEMOPT mode')
+    parser.add_argument("--prompt_filepath", type=str, required=True)
+    parser.add_argument("--max-output-length", type=int, default=512)
+    parser.add_argument("--warmups", type=int, default=10)
+    parser.add_argument("--avgnums", type=int, default=10)
+    args = parser.parse_args()
+    print('\n=================== Arguments ===================')
+    for k, v in vars(args).items():
+        print(f' - {k.ljust(25, ".")}: {v}')
+    print('=================================================')
+    return args
+def main():
+    args = get_args()
+    # model = lyraBaichuan7B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
+    model = lyraBaichuan13B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
+    with open(args.prompt_filepath, "rb") as f:
+        input_datas = json.loads(f.read())
+    used_input_data = input_datas[0]
+    # prompt_template = "<reserved_106>{}\n<reserved_107>" # baichuan chat
+    prompt_template = "{}" # baichuan
+    test_batch_size = [1, 2, 4,] # 8, 16, 32, 64
+    print("test_batch_size: ", test_batch_size)
+    for i, bs in enumerate(test_batch_size):
+        all_use_prompts = []
+        all_output_texts = []
+        # warmup gpu
+        for _ in range(args.warmups):
+            prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
+            output_texts = model.generate(
+                prompts, output_length=args.max_output_length,
+                top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
+        all_cost_s = 0.0
+        for _ in range(args.avgnums):
+            prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
+            all_use_prompts.extend(prompts)
+            start = perf_counter()
+            output_texts = model.generate(
+                prompts, output_length=args.max_output_length,
+                top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
+            all_cost_s += perf_counter() - start
+            all_output_texts.extend(output_texts)
+        cost = all_cost_s / args.avgnums
+        input_output_texts = [prompt + ' ' + gtext for prompt,gtext in zip(all_use_prompts, all_output_texts)]
+        tokens = 0
+        avg_input_tokens = np.mean([len(model.tokenizer.encode(prompt)) for prompt in all_use_prompts])
+        words = 0
+        for text in input_output_texts:
+            tokens += len(model.tokenizer.encode(text))
+            words += len(text)
+        print(
+            f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Avg Input tokens: {avg_input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
+        )
+        print(
+            f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
+        )
+        if i == 0:
+            for k in range(bs):
+                print(
+                    f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
+                if k>2:
+                    break
+if __name__ == "__main__":
+    main()

lyrallms/LyraBaichuanPy/examples/varlen_prompts.json ADDED Viewed

	@@ -0,0 +1,6 @@

+[
+    "歌曲名：《幸福万年长》；歌手名：汤灿；歌曲描述：汤灿的幸福万年长创作背景：2001年，汤灿决定推出一首能够贴近听众和潮流的民歌。为此，她邀请了创作过歌曲《为你》《快乐老家》的音乐人浮克合作，邀其担任该曲的制作工作。虽然浮克此前一直从事流行歌曲的工作，但他其实也是一位衷情民歌风格的音乐人，于是两人一拍即合，合作了该曲。\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答：",
+    "歌曲名：《小丑面具》；歌手名：韩庚；歌曲描述：韩庚的小丑面具的歌曲鉴赏：韩庚在这首歌化身为“小丑”，带上面具调侃这社会上的表面功夫，用幽默又神经质的方式批判愈形冷酷的人心。在这首独特的电子舞曲当中，韩庚尝试了各种不同的发声方式，冷笑、哭喊、啜泣……甚至用声乐融合鬼魅的方法演唱，让人不禁陷入他建构的虚幻氛围而随之起舞。\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答：",
+    "《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲，于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日，“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者：主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J （大卫·哈斯金斯）。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲：“Boys”；“咬我的臀部”；“Some Faces”和斯卡雷鬼曲调“Harry”，这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议，凯文·哈斯金斯 (Kevin Haskins) 说，“那里有力量流行音乐，还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中（除了“Bela Lugosi's Dead”），只有“Harry”获得了官方发行；1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目，包括“Boys”的原始录音，一直未发行，直到 2018 年The Bela Session以黑胶唱片和CD 形式发行，并可供乐队数字下载。[11]在额外的曲目中，《经典摇滚》杂志写道：“其余的材料发现乐队正在摸索方向，甚至触及了斯卡。”\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答：",
+    "歌曲名：《仓颉》；歌手名：五月天；歌曲描述：五月天的仓颉的歌曲鉴赏：五月天 仓颉(2张)《仓颉》是一首写在文明即将消失前的情诗，陈信宏的词写得颇有味道。《仓颉》这样淡淡的歌曲，或许不够大气，但是陈信宏真诚的演唱足以令人感动，而且《仓颉》的歌词也写得很有哲理。这首歌曲朗朗上口的旋律和诗意的文字使得它很适合在KTV演唱。\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答："
+]

lyrallms/LyraBaichuanPy/generation_utils.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from typing import List
+from queue import Queue
+import torch
+def build_chat_input(model, tokenizer, messages: List[dict], max_new_tokens: int=0):
+    def _parse_messages(messages, split_role="user"):
+        system, rounds = "", []
+        round = []
+        for i, message in enumerate(messages):
+            if message["role"] == "system":
+                assert i == 0
+                system = message["content"]
+                continue
+            if message["role"] == split_role and round:
+                rounds.append(round)
+                round = []
+            round.append(message)
+        if round:
+            rounds.append(round)
+        return system, rounds
+    max_new_tokens = max_new_tokens or model.generation_config.max_new_tokens
+    max_input_tokens = model.config.model_max_length - max_new_tokens
+    system, rounds = _parse_messages(messages, split_role="user")
+    system_tokens = tokenizer.encode(system)
+    max_history_tokens = max_input_tokens - len(system_tokens)
+    history_tokens = []
+    for round in rounds[::-1]:
+        round_tokens = []
+        for message in round:
+            if message["role"] == "user":
+                round_tokens.append(model.generation_config.user_token_id)
+            else:
+                round_tokens.append(model.generation_config.assistant_token_id)
+            round_tokens.extend(tokenizer.encode(message["content"]))
+        if len(history_tokens) == 0 or len(history_tokens) + len(round_tokens) <= max_history_tokens:
+            history_tokens = round_tokens + history_tokens  # concat left
+            if len(history_tokens) < max_history_tokens:
+                continue
+        break
+    input_tokens = system_tokens + history_tokens
+    if messages[-1]["role"] != "assistant":
+        input_tokens.append(model.generation_config.assistant_token_id)
+    input_tokens = input_tokens[-max_input_tokens:]  # truncate left
+    return torch.LongTensor([input_tokens]).to(model.device)
+class TextIterStreamer:
+    def __init__(self, tokenizer, skip_prompt=False, skip_special_tokens=False):
+        self.tokenizer = tokenizer
+        self.skip_prompt = skip_prompt
+        self.skip_special_tokens = skip_special_tokens
+        self.tokens = []
+        self.text_queue = Queue()
+        self.next_tokens_are_prompt = True
+    def put(self, value):
+        if self.skip_prompt and self.next_tokens_are_prompt:
+            self.next_tokens_are_prompt = False
+        else:
+            if len(value.shape) > 1:
+                value = value[0]
+            self.tokens.extend(value.tolist())
+            self.text_queue.put(
+                self.tokenizer.decode(self.tokens, skip_special_tokens=self.skip_special_tokens))
+    def end(self):
+        self.text_queue.put(None)
+    def __iter__(self):
+        return self
+    def __next__(self):
+        value = self.text_queue.get()
+        if value is None:
+            raise StopIteration()
+        else:
+            return value

lyrallms/LyraBaichuanPy/lyra_baichuan/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B

lyrallms/LyraBaichuanPy/lyra_baichuan/config.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import dataclasses
+from typing import Optional
+@dataclasses.dataclass
+class LyraBaichuanParam:
+    num_heads: int = 40
+    size_per_head: int = 128
+    inter_size: int = 13824
+    num_layers: int = 40
+    vocab_size: int = 39424
+    start_id: Optional[int] = 1
+    end_id: Optional[int] = 2
+    tensor_para_size: int = 1
+    pipeline_para_size: int = 1
+    remove_padding: bool = True
+    shared_contexts_ratio: float = 1.0
+    layernorm_eps: float = 1e-6
+    weights_data_type: str = "fp16"
+    rotary_embedding: int = 128
+    use_gptj_residual: bool = False
+    def __post_init__(self):
+        if not 0.0 <= self.shared_contexts_ratio <= 1.0:
+            raise ValueError(
+                f'Got an invalid value of shared_context_ratio '
+                f'{self.shared_contexts_ratio} - range: [0.0, 1.0]')
+    def asdict(self):
+        return dataclasses.asdict(self)
+LYRA_BAICHUAN_PARAM = LyraBaichuanParam()
+LIB_SO_PATH = '/usr/lib/ftlib/libth_lyrallms.so'

lyrallms/LyraBaichuanPy/lyra_baichuan/lyra_baichuan.py ADDED Viewed

	@@ -0,0 +1,391 @@

+from __future__ import annotations
+import configparser
+import pathlib
+import typing
+import os
+import torch
+import transformers
+from torch.nn.utils.rnn import pad_sequence
+from .config import LYRA_BAICHUAN_PARAM, LIB_SO_PATH
+from .model import BaichuanModel
+from .tokenization_baichuan import BaichuanTokenizer
+class lyraBaichuan7B:
+    def __init__(self, model_path, tokenizer_path=None, dtype='fp16', memopt_mode=0, quant_dtype="int4") -> None:
+        self.model_path = model_path
+        self.tokenizer_path = tokenizer_path
+        self.dtype = dtype
+        self.memopt_mode = memopt_mode
+        self.quant_data_type = quant_dtype
+        self.model, self.tokenizer = self.load_model_and_tokenizer()
+        print("Got model and tokenizer")
+    def load_model_and_tokenizer(self):
+        if self.tokenizer_path is None:
+            tokenizer_path = self.model_path
+        else:
+            tokenizer_path = self.tokenizer_path
+        print(f'Loading tokenizer from {tokenizer_path}')
+        tokenizer = BaichuanTokenizer.from_pretrained(tokenizer_path)
+        checkpoint_path = pathlib.Path(self.model_path)
+        config_path = checkpoint_path / 'config.ini'
+        if config_path.exists():
+            # Read model params from config.
+            cfg = configparser.ConfigParser()
+            cfg.read(config_path)
+            model_name = 'baichuan'
+            inference_data_type = self.dtype
+            if inference_data_type == None:
+                inference_data_type = cfg.get(model_name, "weight_data_type")
+            model_args = dict(
+                head_num=cfg.getint(model_name, 'head_num'),
+                size_per_head=cfg.getint(model_name, "size_per_head"),
+                inter_size=cfg.getint(model_name, 'inter_size'),
+                layer_num=cfg.getint(model_name, "num_layer"),
+                rotary_embedding_dim=cfg.getint(model_name, 'rotary_embedding'),
+                layernorm_eps=cfg.getfloat(model_name, 'layernorm_eps'),
+                vocab_size=cfg.getint(model_name, "vocab_size"),
+                start_id=cfg.getint(model_name, "start_id"),
+                end_id=cfg.getint(model_name, "end_id"),
+                weights_data_type=cfg.get(model_name, "weight_data_type"),
+                tensor_para_size=cfg.getint(model_name, "tensor_para_size"),
+                inference_data_type=inference_data_type)
+        else:
+            inference_data_type = self.dtype
+            if inference_data_type == None:
+                inference_data_type = LYRA_BAICHUAN_PARAM.weights_data_type
+            model_args = dict(head_num=LYRA_BAICHUAN_PARAM.num_heads,
+                              size_per_head=LYRA_BAICHUAN_PARAM.size_per_head,
+                              inter_size=LYRA_BAICHUAN_PARAM.inter_size,
+                              layer_num=LYRA_BAICHUAN_PARAM.num_layers,
+                              rotary_embedding_dim=LYRA_BAICHUAN_PARAM.rotary_embedding,
+                              layernorm_eps=LYRA_BAICHUAN_PARAM.layernorm_eps,
+                              vocab_size=LYRA_BAICHUAN_PARAM.vocab_size,
+                              start_id=LYRA_BAICHUAN_PARAM.start_id or tokenizer.bos_token_id,
+                              end_id=LYRA_BAICHUAN_PARAM.end_id or tokenizer.eos_token_id,
+                              weights_data_type=LYRA_BAICHUAN_PARAM.weights_data_type,
+                              tensor_para_size=LYRA_BAICHUAN_PARAM.tensor_para_size,
+                              inference_data_type=inference_data_type)
+        # update common parameters
+        model_args.update(dict(
+            lib_path=LIB_SO_PATH,
+            model_path=os.path.join(self.model_path, "1-gpu-fp16.bin"),
+            max_seq_len=0,  # for position seq embedding
+            pipeline_para_size=LYRA_BAICHUAN_PARAM.pipeline_para_size,
+            use_gptj_residual=LYRA_BAICHUAN_PARAM.use_gptj_residual,
+            memopt_mode=self.memopt_mode,
+            quant_data_type=self.quant_data_type
+        ))
+        print('[FT][INFO] Load Our FT Highly Optimized Baichuan-7B model')
+        for k, v in model_args.items():
+            print(f' - {k.ljust(25, ".")}: {v}')
+        # Check sanity and consistency between the model and tokenizer.
+        checklist = ['head_num', 'size_per_head', 'vocab_size', 'layer_num',
+                     'tensor_para_size', 'tensor_para_size', 'weights_data_type']
+        if None in [model_args[k] for k in checklist]:
+            none_params = [p for p in checklist if model_args[p] is None]
+            print(f'[FT][WARNING] Found None parameters {none_params}. They must '
+                  f'be provided either by config file or CLI arguments.')
+        if model_args['start_id'] != tokenizer.bos_token_id:
+            print('[FT][WARNING] Given start_id is not matched with the bos token '
+                  'id of the pretrained tokenizer.')
+        if model_args['end_id'] not in (tokenizer.pad_token_id, tokenizer.eos_token_id):
+            print('[FT][WARNING] Given end_id is not matched with neither pad '
+                  'token id nor eos token id of the pretrained tokenizer.')
+        print(f'Loading model from {self.model_path}')
+        model = BaichuanModel(**model_args)
+        return model, tokenizer
+    def generate(self, prompts: typing.List[str] | str,
+                 output_length: int = 512,
+                 beam_width: int = 1,
+                 top_k: typing.Optional[torch.IntTensor] = 1,
+                 top_p: typing.Optional[torch.FloatTensor] = 1.0,
+                 beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
+                 temperature: typing.Optional[torch.FloatTensor] = 1.0,
+                 len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
+                 repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
+                 presence_penalty: typing.Optional[torch.FloatTensor] = None,
+                 min_length: typing.Optional[torch.IntTensor] = None,
+                 bad_words_list: typing.Optional[torch.IntTensor] = None,
+                 do_sample: bool = False,
+                 return_output_length: bool = False,
+                 return_cum_log_probs: int = 0):
+        #
+        if isinstance(prompts, str):
+            prompts = [prompts, ]
+        inputs = prompts
+        batch_size = len(inputs)
+        ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
+        ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
+        # we must encode the raw prompt text one by one in order to compute the length of the original text.
+        input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
+        input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
+        # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
+        input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
+        random_seed = None
+        if do_sample:
+            random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
+        outputs = self.model(start_ids=input_token_ids,
+                             start_lengths=input_lengths,
+                             output_len=output_length,
+                             beam_width=beam_width,
+                             top_k=top_k * ones_int,
+                             top_p=top_p * ones_float,
+                             beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
+                             temperature=temperature * ones_float,
+                             len_penalty=len_penalty * ones_float,
+                             repetition_penalty=repetition_penalty * ones_float,
+                             random_seed=random_seed,
+                             return_output_length=return_output_length,
+                             return_cum_log_probs=return_cum_log_probs)
+        if return_cum_log_probs > 0:
+            outputs = outputs[0]  # output_token_ids.
+        # Slice the generated token ids of the 1st beam result.
+        # output = input tokens + generated tokens.
+        output_token_ids = [out[0, length:].cpu()
+                            for out, length in zip(outputs, input_lengths)]
+        output_texts = self.tokenizer.batch_decode(
+            output_token_ids, skip_special_tokens=True)
+        return output_texts
+class lyraBaichuan13B:
+    def __init__(self, model_path, tokenizer_path=None, dtype='fp16', memopt_mode=0, quant_dtype="int4") -> None:
+        self.model_path = model_path
+        self.tokenizer_path = tokenizer_path
+        self.dtype = dtype
+        self.memopt_mode = memopt_mode
+        self.quant_data_type = quant_dtype
+        self.model, self.tokenizer = self.load_model_and_tokenizer()
+        print("Got model and tokenizer")
+    def load_model_and_tokenizer(self):
+        if self.tokenizer_path is None:
+            tokenizer_path = self.model_path
+        else:
+            tokenizer_path = self.tokenizer_path
+        print(f'Loading tokenizer from {tokenizer_path}')
+        tokenizer = BaichuanTokenizer.from_pretrained(tokenizer_path)
+        checkpoint_path = pathlib.Path(self.model_path)
+        config_path = checkpoint_path / 'config.ini'
+        if config_path.exists():
+            # Read model params from config.
+            cfg = configparser.ConfigParser()
+            cfg.read(config_path)
+            model_name = 'baichuan'
+            inference_data_type = self.dtype
+            if inference_data_type == None:
+                inference_data_type = cfg.get(model_name, "weight_data_type")
+            model_args = dict(
+                head_num=cfg.getint(model_name, 'head_num'),
+                size_per_head=cfg.getint(model_name, "size_per_head"),
+                inter_size=cfg.getint(model_name, 'inter_size'),
+                layer_num=cfg.getint(model_name, "num_layer"),
+                rotary_embedding_dim=0,
+                layernorm_eps=cfg.getfloat(model_name, 'layernorm_eps'),
+                vocab_size=cfg.getint(model_name, "vocab_size"),
+                start_id=cfg.getint(model_name, "start_id"),
+                end_id=cfg.getint(model_name, "end_id"),
+                weights_data_type=cfg.get(model_name, "weight_data_type"),
+                tensor_para_size=cfg.getint(model_name, "tensor_para_size"),
+                inference_data_type=inference_data_type)
+        else:
+            inference_data_type = self.dtype
+            if inference_data_type == None:
+                inference_data_type = LYRA_BAICHUAN_PARAM.weights_data_type
+            model_args = dict(head_num=LYRA_BAICHUAN_PARAM.num_heads,
+                              size_per_head=LYRA_BAICHUAN_PARAM.size_per_head,
+                              inter_size=LYRA_BAICHUAN_PARAM.inter_size,
+                              layer_num=LYRA_BAICHUAN_PARAM.num_layers,
+                              rotary_embedding_dim=0,
+                              layernorm_eps=LYRA_BAICHUAN_PARAM.layernorm_eps,
+                              vocab_size=LYRA_BAICHUAN_PARAM.vocab_size,
+                              start_id=LYRA_BAICHUAN_PARAM.start_id or tokenizer.bos_token_id,
+                              end_id=LYRA_BAICHUAN_PARAM.end_id or tokenizer.eos_token_id,
+                              weights_data_type=LYRA_BAICHUAN_PARAM.weights_data_type,
+                              tensor_para_size=LYRA_BAICHUAN_PARAM.tensor_para_size,
+                              inference_data_type=inference_data_type)
+        # update common parameters
+        model_args.update(dict(
+            lib_path=LIB_SO_PATH,
+            model_path=os.path.join(self.model_path, "1-gpu-fp16.bin"),
+            max_seq_len=0,  # for position seq embedding
+            pipeline_para_size=LYRA_BAICHUAN_PARAM.pipeline_para_size,
+            use_gptj_residual=LYRA_BAICHUAN_PARAM.use_gptj_residual,
+            memopt_mode=self.memopt_mode,
+            quant_data_type=self.quant_data_type
+        ))
+        print('[FT][INFO] Load Our FT Highly Optimized Baichuan-13B model')
+        for k, v in model_args.items():
+            print(f' - {k.ljust(25, ".")}: {v}')
+        # Check sanity and consistency between the model and tokenizer.
+        checklist = ['head_num', 'size_per_head', 'vocab_size', 'layer_num',
+                     'tensor_para_size', 'tensor_para_size', 'weights_data_type']
+        if None in [model_args[k] for k in checklist]:
+            none_params = [p for p in checklist if model_args[p] is None]
+            print(f'[FT][WARNING] Found None parameters {none_params}. They must '
+                  f'be provided either by config file or CLI arguments.')
+        if model_args['start_id'] != tokenizer.bos_token_id:
+            print('[FT][WARNING] Given start_id is not matched with the bos token '
+                  'id of the pretrained tokenizer.')
+        if model_args['end_id'] not in (tokenizer.pad_token_id, tokenizer.eos_token_id):
+            print('[FT][WARNING] Given end_id is not matched with neither pad '
+                  'token id nor eos token id of the pretrained tokenizer.')
+        print(f'Loading model from {self.model_path}')
+        model = BaichuanModel(**model_args)
+        return model, tokenizer
+    def generate(self, prompts: typing.List[str] | str,
+                 output_length: int = 512,
+                 beam_width: int = 1,
+                 top_k: typing.Optional[torch.IntTensor] = 1,
+                 top_p: typing.Optional[torch.FloatTensor] = 1.0,
+                 beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
+                 temperature: typing.Optional[torch.FloatTensor] = 1.0,
+                 len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
+                 repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
+                 presence_penalty: typing.Optional[torch.FloatTensor] = None,
+                 min_length: typing.Optional[torch.IntTensor] = None,
+                 bad_words_list: typing.Optional[torch.IntTensor] = None,
+                 do_sample: bool = False,
+                 return_output_length: bool = False,
+                 return_cum_log_probs: int = 0):
+        #
+        if isinstance(prompts, str):
+            prompts = [prompts, ]
+        inputs = prompts
+        batch_size = len(inputs)
+        ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
+        ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
+        # we must encode the raw prompt text one by one in order to compute the length of the original text.
+        input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
+        input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
+        # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
+        input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
+        random_seed = None
+        if do_sample:
+            random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
+        outputs = self.model(start_ids=input_token_ids,
+                             start_lengths=input_lengths,
+                             output_len=output_length,
+                             beam_width=beam_width,
+                             top_k=top_k * ones_int,
+                             top_p=top_p * ones_float,
+                             beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
+                             temperature=temperature * ones_float,
+                             len_penalty=len_penalty * ones_float,
+                             repetition_penalty=repetition_penalty * ones_float,
+                             random_seed=random_seed,
+                             return_output_length=return_output_length,
+                             return_cum_log_probs=return_cum_log_probs)
+        if return_cum_log_probs > 0:
+            outputs = outputs[0]  # output_token_ids.
+        # Slice the generated token ids of the 1st beam result.
+        # output = input tokens + generated tokens.
+        output_token_ids = [out[0, length:].cpu()
+                            for out, length in zip(outputs, input_lengths)]
+        output_texts = self.tokenizer.batch_decode(
+            output_token_ids, skip_special_tokens=True)
+        return output_texts
+    def stream_generate(self, prompts: typing.List[str] | str,
+                        output_length: int = 512,
+                        beam_width: int = 1,
+                        top_k: typing.Optional[torch.IntTensor] = 1,
+                        top_p: typing.Optional[torch.FloatTensor] = 1.0,
+                        beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
+                        temperature: typing.Optional[torch.FloatTensor] = 1.0,
+                        len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
+                        repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
+                        presence_penalty: typing.Optional[torch.FloatTensor] = None,
+                        min_length: typing.Optional[torch.IntTensor] = None,
+                        bad_words_list: typing.Optional[torch.IntTensor] = None,
+                        do_sample: bool = False,
+                        return_output_length: bool = False,
+                        return_cum_log_probs: int = 0):
+        if isinstance(prompts, str):
+            prompts = [prompts, ]
+        inputs = prompts
+        batch_size = len(inputs)
+        ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
+        ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
+        # we must encode the raw prompt text one by one in order to compute the length of the original text.
+        input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
+        input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
+        # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
+        input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
+        random_seed = None
+        if do_sample:
+            random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
+        for finish, output_ids, sequence_length, output_cum_log_probs in self.model.stream_forward(start_ids=input_token_ids,
+                                                                                        start_lengths=input_lengths,
+                                                                                        output_len=output_length,
+                                                                                        beam_width=beam_width,
+                                                                                        top_k=top_k * ones_int,
+                                                                                        top_p=top_p * ones_float,
+                                                                                        beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
+                                                                                        temperature=temperature * ones_float,
+                                                                                        len_penalty=len_penalty * ones_float,
+                                                                                        repetition_penalty=repetition_penalty * ones_float,
+                                                                                        random_seed=random_seed,
+                                                                                        return_output_length=return_output_length,
+                                                                                        return_cum_log_probs=return_cum_log_probs):
+            # Slice the generated token ids of the 1st beam result.
+            # output = input tokens + generated tokens.
+            output_token_ids = [out[0, length:].cpu()
+                                for out, length in zip(output_ids, input_lengths)]
+            output_texts = self.tokenizer.batch_decode(
+                output_token_ids, skip_special_tokens=True)
+            if finish:
+                break
+            yield finish, output_texts
+        return finish, output_texts

lyrallms/LyraBaichuanPy/lyra_baichuan/model.py ADDED Viewed

	@@ -0,0 +1,258 @@

+from __future__ import print_function
+import copy
+import os
+import pathlib
+import typing
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from queue import Queue
+from threading import Thread
+import sys
+sys.path.append('/usr/lib/lyralib')
+import lyraOp
+str_type_map = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}
+class BaichuanModel(nn.Module):
+    def __init__(self,
+                 head_num,
+                 size_per_head,
+                 inter_size,
+                 vocab_size,
+                 rotary_embedding_dim,
+                 start_id, end_id, layer_num,
+                 max_seq_len: int,
+                 layernorm_eps,
+                 tensor_para_size: int,
+                 pipeline_para_size: int,
+                 use_gptj_residual,
+                 lib_path: typing.Union[str, pathlib.Path],
+                 model_path,
+                 memopt_mode: int = 0,
+                 quant_data_type: str = "int8",
+                 inference_data_type: str = "fp16",
+                 weights_data_type: typing.Union[str, np.dtype] = np.float32):
+        super().__init__()
+        self.head_num = head_num
+        self.size_per_head = size_per_head
+        self.inter_size = inter_size
+        self.vocab_size = vocab_size
+        self.rotary_embedding_dim = rotary_embedding_dim
+        self.start_id = start_id
+        self.end_id = end_id
+        self.max_seq_len = max_seq_len
+        self.layer_num = layer_num
+        self.use_gptj_residual = use_gptj_residual
+        self.layernorm_eps = layernorm_eps
+        self.memopt_mode = memopt_mode
+        self.quant_data_type = quant_data_type
+        # multi-gpu params
+        self.tensor_para_size = tensor_para_size
+        self.pipeline_para_size = pipeline_para_size
+        self.build_model = False
+        self.weights_data_type = weights_data_type
+        self.inference_data_type = inference_data_type
+        assert torch.cuda.is_available(), "CUDA is required for this model."
+        assert head_num % tensor_para_size == 0, "head_num must be a multiple of tensor_para_size."
+        assert layer_num % pipeline_para_size == 0, "layer_num must be a multiple of pipeline_para_size."
+        # queue for streaming
+        self.que = Queue()
+        self.threads = [None] * self.tensor_para_size
+        # Load the C++ model into Pytorch model.
+        # torch.classes.load_library(os.path.abspath(lib_path))
+        # Prepare for tensor/pipeline parallel
+        try:
+            dist.init_process_group(backend='mpi')
+        except:
+            print("[INFO] WARNING: Have initialized the process group")
+        self.rank = dist.get_rank()
+        self.device_count = torch.cuda.device_count()
+        self.device = self.rank % self.device_count
+        torch.cuda.set_device(self.device)
+        world_size = dist.get_world_size()
+        # print(tensor_para_size * pipeline_para_size)
+        assert world_size == tensor_para_size * pipeline_para_size, "tensor_para_size * pipeline_para_size must be equal to world_size."
+        self.tensor_para_rank = self.rank % self.tensor_para_size
+        self.pipeline_para_rank = self.rank // self.tensor_para_size
+        self.model = lyraOp.LyraBaichuan(
+            self.head_num, self.size_per_head, self.inter_size,
+            self.layer_num,
+            self.vocab_size,
+            self.rotary_embedding_dim,
+            self.layernorm_eps,
+            self.start_id, self.end_id,
+            self.tensor_para_size, self.pipeline_para_size,
+            self.max_seq_len,
+            self.use_gptj_residual,
+            self.memopt_mode,
+            self.quant_data_type,
+            model_path,
+            self.weights_data_type,
+            self.inference_data_type)
+        self.build_model = True
+        torch.cuda.empty_cache()
+    def forward(self,
+                start_ids: torch.Tensor,
+                start_lengths: torch.Tensor,
+                output_len,
+                beam_width=1,
+                top_k: torch.Tensor = None,
+                top_p: torch.Tensor = None,
+                beam_search_diversity_rate: torch.Tensor = None,
+                temperature: torch.Tensor = None,
+                len_penalty: torch.Tensor = None,
+                repetition_penalty: torch.Tensor = None,
+                random_seed: torch.Tensor = None,
+                return_output_length=False,
+                return_cum_log_probs=0):
+        input_len = start_ids.size(1)
+        assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
+        # Inputs to device
+        input_ids = start_ids.cuda(self.device)
+        input_lengths = start_lengths.cuda(self.device)
+        # outputs: output_ids, output_lengths, output_cum_log_probs (optional)
+        outputs = self.model.forward(input_ids,
+                                     input_lengths,
+                                     output_len,
+                                     beam_width,  # optional, can be None
+                                     top_k,  # optional, can be None
+                                     top_p,  # optional, can be None
+                                     beam_search_diversity_rate,  # optional, can be None
+                                     temperature,  # optional, can be None
+                                     len_penalty,  # optional, can be None
+                                     repetition_penalty,  # optional, can be None
+                                     random_seed,  # optional, can be None
+                                     return_cum_log_probs)  # optional, can be None
+        if return_cum_log_probs == 0:
+            output_ids, output_lengths = outputs
+        else:
+            output_ids, output_lengths, output_cum_log_probs = outputs
+        if return_output_length:
+            if return_cum_log_probs > 0:
+                return output_ids, output_lengths, output_cum_log_probs
+            else:
+                return output_ids, output_lengths
+        else:
+            return output_ids
+    def set_input_tensor(self, input_tensor):
+        """Set input tensor to be used instead of forward()'s input.
+        When doing pipeline parallelism the input from the previous
+        stage comes from communication, not from the input, so the
+        model's forward_step_func won't have it. This function is thus
+        used by internal code to bypass the input provided by the
+        forward_step_func"""
+        self.input_tensor = input_tensor
+    def _forward_callback(self, output_ids, seq_lengths, ctx):
+        self.que.put((False, (list(output_ids), list(seq_lengths))))
+    def _tensormap_dict_to_py_dict(self, tensormap_dict: lyraOp.TensorMap):
+        """map torch tensormap to py dict."""
+        ret = dict()
+        for k, v in tensormap_dict.items():
+            ret[k] = v
+        return ret
+    def stream_forward(self,
+                        start_ids: torch.Tensor,
+                        start_lengths: torch.Tensor,
+                        output_len,
+                        beam_width=1,
+                        top_k: torch.Tensor = None,
+                        top_p: torch.Tensor = None,
+                        beam_search_diversity_rate: torch.Tensor = None,
+                        temperature: torch.Tensor = None,
+                        len_penalty: torch.Tensor = None,
+                        repetition_penalty: torch.Tensor = None,
+                        random_seed: torch.Tensor = None,
+                        return_output_length=False,
+                        return_cum_log_probs=0):
+        # Register callback func to model
+        self.model.registerCallback(self._forward_callback)
+        batch_size = start_ids.size(0)
+        input_len = start_ids.size(1)
+        assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
+        # Inputs to device
+        input_ids = start_ids.cuda(self.device)
+        input_lengths = start_lengths.cuda(self.device)
+        # outputs: output_ids, output_lengths, output_cum_log_probs (optional)
+        # Init thread of model inference
+        def _func(enque_output):
+            outputs = self.model.forward(input_ids,
+                                    input_lengths,
+                                    output_len,
+                                    beam_width,  # optional, can be None
+                                    top_k,  # optional, can be None
+                                    top_p,  # optional, can be None
+                                    beam_search_diversity_rate,  # optional, can be None
+                                    temperature,  # optional, can be None
+                                    len_penalty,  # optional, can be None
+                                    repetition_penalty,  # optional, can be None
+                                    random_seed,  # optional, can be None
+                                    return_cum_log_probs)  # optional, can be None
+            if enque_output:
+                self.que.put((True, (outputs[0].tolist(), outputs[1].tolist())))
+        # Start thread of model inference
+        t = Thread(target=_func,
+                args=(True,),
+                daemon=True)
+        t.start()
+        self.threads[0] = t
+        # Generate streaming output
+        while True:
+            # while self.que.qsize() > 1:
+            #     self.que.get()
+            finish, outputs = self.que.get()
+            output_ids, sequence_length = outputs
+            output_ids = torch.tensor(output_ids).view(batch_size, beam_width, -1)
+            sequence_length = torch.tensor(sequence_length).view(batch_size, beam_width)
+            if return_output_length:
+                if return_cum_log_probs > 0:
+                    yield finish, output_ids, sequence_length, None
+                else:
+                    yield finish, output_ids, sequence_length, None
+            else:
+                yield finish, output_ids, None, None
+            if finish:
+                for t in self.threads:
+                    t.join()
+                while self.que.qsize() > 0:
+                    self.que.get()
+                break
+        self.model.unRegisterCallback()
+        return finish, output_ids, None, None

lyrallms/LyraBaichuanPy/lyra_baichuan/tokenization_baichuan.py ADDED Viewed

	@@ -0,0 +1,232 @@

+# Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
+import os
+from shutil import copyfile
+from typing import Any, Dict, List, Optional, Tuple
+import sentencepiece as spm
+from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
+PRETRAINED_VOCAB_FILES_MAP = {
+    "vocab_file": {},
+    "tokenizer_file": {},
+}
+PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
+class BaichuanTokenizer(PreTrainedTokenizer):
+    """
+    Construct a Baichuan tokenizer. Based on byte-level Byte-Pair-Encoding.
+    Args:
+        vocab_file (`str`):
+            Path to the vocabulary file.
+    """
+    vocab_files_names = VOCAB_FILES_NAMES
+    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
+    model_input_names = ["input_ids", "attention_mask"]
+    def __init__(
+        self,
+        vocab_file,
+        unk_token="<unk>",
+        bos_token="<s>",
+        eos_token="</s>",
+        pad_token=None,
+        sp_model_kwargs: Optional[Dict[str, Any]] = None,
+        add_bos_token=True,
+        add_eos_token=False,
+        clean_up_tokenization_spaces=False,
+        **kwargs,
+    ):
+        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
+        bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
+        eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
+        unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
+        pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
+        super().__init__(
+            bos_token=bos_token,
+            eos_token=eos_token,
+            unk_token=unk_token,
+            pad_token=pad_token,
+            add_bos_token=add_bos_token,
+            add_eos_token=add_eos_token,
+            sp_model_kwargs=self.sp_model_kwargs,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+            **kwargs,
+        )
+        self.vocab_file = vocab_file
+        self.add_bos_token = add_bos_token
+        self.add_eos_token = add_eos_token
+        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
+        self.sp_model.Load(vocab_file)
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        state["sp_model"] = None
+        return state
+    def __setstate__(self, d):
+        self.__dict__ = d
+        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
+        self.sp_model.Load(self.vocab_file)
+    @property
+    def vocab_size(self):
+        """Returns vocab size"""
+        return self.sp_model.get_piece_size()
+    def get_vocab(self):
+        """Returns vocab as a dict"""
+        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
+        vocab.update(self.added_tokens_encoder)
+        return vocab
+    def _tokenize(self, text):
+        """Returns a tokenized string."""
+        return self.sp_model.encode(text, out_type=str)
+    def _convert_token_to_id(self, token):
+        """Converts a token (str) in an id using the vocab."""
+        return self.sp_model.piece_to_id(token)
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
+        token = self.sp_model.IdToPiece(index)
+        return token
+    def convert_tokens_to_string(self, tokens):
+        """Converts a sequence of tokens (string) in a single string."""
+        current_sub_tokens = []
+        out_string = ""
+        prev_is_special = False
+        for i, token in enumerate(tokens):
+            # make sure that special tokens are not decoded using sentencepiece model
+            if token in self.all_special_tokens:
+                if not prev_is_special and i != 0:
+                    out_string += " "
+                out_string += self.sp_model.decode(current_sub_tokens) + token
+                prev_is_special = True
+                current_sub_tokens = []
+            else:
+                current_sub_tokens.append(token)
+                prev_is_special = False
+        out_string += self.sp_model.decode(current_sub_tokens)
+        return out_string
+    def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
+        """
+        Save the vocabulary and special tokens file to a directory.
+        Args:
+            save_directory (`str`):
+                The directory in which to save the vocabulary.
+        Returns:
+            `Tuple(str)`: Paths to the files saved.
+        """
+        if not os.path.isdir(save_directory):
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
+            return
+        out_vocab_file = os.path.join(
+            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
+        )
+        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
+            copyfile(self.vocab_file, out_vocab_file)
+        elif not os.path.isfile(self.vocab_file):
+            with open(out_vocab_file, "wb") as fi:
+                content_spiece_model = self.sp_model.serialized_model_proto()
+                fi.write(content_spiece_model)
+        return (out_vocab_file,)
+    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
+        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
+        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
+        output = bos_token_id + token_ids_0 + eos_token_id
+        if token_ids_1 is not None:
+            output = output + bos_token_id + token_ids_1 + eos_token_id
+        return output
+    def get_special_tokens_mask(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
+    ) -> List[int]:
+        """
+        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
+        special tokens using the tokenizer `prepare_for_model` method.
+        Args:
+            token_ids_0 (`List[int]`):
+                List of IDs.
+            token_ids_1 (`List[int]`, *optional*):
+                Optional second list of IDs for sequence pairs.
+            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
+                Whether or not the token list is already formatted with special tokens for the model.
+        Returns:
+            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+        if already_has_special_tokens:
+            return super().get_special_tokens_mask(
+                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
+            )
+        bos_token_id = [1] if self.add_bos_token else []
+        eos_token_id = [1] if self.add_eos_token else []
+        if token_ids_1 is None:
+            return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
+        return (
+            bos_token_id
+            + ([0] * len(token_ids_0))
+            + eos_token_id
+            + bos_token_id
+            + ([0] * len(token_ids_1))
+            + eos_token_id
+        )
+    def create_token_type_ids_from_sequences(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        """
+        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
+        sequence pair mask has the following format:
+        ```
+        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
+        | first sequence    | second sequence |
+        ```
+        if token_ids_1 is None, only returns the first portion of the mask (0s).
+        Args:
+            token_ids_0 (`List[int]`):
+                List of ids.
+            token_ids_1 (`List[int]`, *optional*):
+                Optional second list of IDs for sequence pairs.
+        Returns:
+            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
+        """
+        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
+        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
+        output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
+        if token_ids_1 is not None:
+            output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
+        return output

lyrallms/LyraLlamaPy/README.md ADDED Viewed

	@@ -0,0 +1,75 @@

+## 模型和环境
+### 构建环境
+```shell
+# 本地获取ngc pytorch cuda12原生镜像
+docker pull nvcr.io/nvidia/pytorch:23.02-py3
+# 启动容器
+docker run --gpus all -itd --rm --name lyrallms_cu12 nvcr.io/nvidia/pytorch:23.02-py3
+docker exec -it lyrallms_cu12 bash
+```
+获取代码后安装依赖
+```shell
+pip install -r requirements.txt
+```
+将`lyralib`下对应cuda版本的[so文件](../../lyralib/sm80) 复制到`/usr/lib/lyralib`下。
+## 推理使用
+### 使用核心片段
+```python
+from lyra_llama import lyraLlama
+model_path = 'XXX' # 包含转换后的模型参数，配置，tokenizer文件目录
+data_type = 'fp16'
+memopt_mode = 0 # 如需使用MEMOPT模式推理, memopt_mode=1
+# 加载加速后的模型，C++ 底层已经掩盖，依赖加速编译的 /usr/lib/ftlib 下的 so 库，已经打在镜像中
+# 模型加载需要花一些时间，因为现在 IO 参数是多个小文件，建议把下载的模型参数解压到本地磁盘
+model = lyraLlama(model_path, data_type, memopt_mode)
+# 输入, 若有多个输入，可batch 推理，prompts 支持列表，这里为模拟多个输入，直接复制 32 分，batchsize 达到32
+prompts = '列出3个不同的机器学习算法，并说明它们的适用范围.'
+prompts = [prompts,]*64
+# 生成, 最大长度可自行调整，这里设置 150，模型遇到 end token 或者达到最大计算长度时会停止当前批次计算.
+# 因为 LLaMA-ZIYA 词表是按字切分，导致存储和计算量非常大，若是长序列生成情况，请自行缩小 batch_size
+output_texts = model.generate(prompts, output_length=150, do_sample=False, top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0)
+# 输出查看， 虽然 输入字符串也是参与计算，用于评估模型吞吐量和计算速度。
+# 这里为了显示应用方便， output_texts 中每个样本的输出已经去掉了输入字符串
+print(output_texts)
+# 输出示例
+>>> Inputs: 列出3个不同的机器学习算法，并说明它们的适用范围.
+>>> Outputs:
+1. 线性回归（Linear Regression）：适用于解决两个变量之间的关系问题，例如预测房价或销售额。它可以用于回归分析和回归聚类分析。
+2. 决策树（Decision Tree）：适用于解决分类和回归问题。它可以用于分类、回归、异常检测和聚类分析。
+3. 神经网络（Neural Network）：适用于解决分类、回归和聚类问题。它可以用于图像识别、语音识别
+```
+### demo 脚本
+`examples/batch_demo.py` 中有类似上面的使用示例，做了简单的跑速测试，考虑大家对 token 的理解各有不同，我们这里直接按字符数来评估，不同 token 的理解可以自行根据生成结果字符数去观测。注意：在 `LLaMA-ZIYA` 中，tokenizer 对中文的切分，约等于一个字是一个 token.
+更多测试脚本及用法详见参考 `examples` 下的 [README.md](./examples/README.md) ，如：
+- Batch推理
+- 不等长Batch推理
+- Batch流式推理
+## 自定义模型参数
+已提供转换脚本 `parse_model_params.py` 可以将 LLaMa 模型的 HuggingFace 格式参数，转换为加速版本下各层模型需要的模型参数。因为 LLaMa 有很多变体，所以这里我们提供一个模型名字 `-model_name` 的转换参数，可以自行填入，以便生成可区分的 config.in 文件。
+```shell
+python parse_model_params.py -i your_model_dir -o output_dir -t_g 1 -i_g 1 -weight_data_type "fp16" -model_name "llama"
+```
+转换后的模型参数将以每个参数一个文件的形式存放在 `output_dir/{i_g}-gpu-{weight_data_type}` 下，分割的形式有助于并发 IO，但缺陷是不便捷。
+同时该转换脚本还会将同目录下 tokenizer_source 里的 `tokenizer.model` `tokenizer.json` `special_tokens_map.json` `tokenizer_config.json` 四个文件拷贝到 output_dir 下，以便后续使用加速模型时直接能初始化对应的 加速后的 LLaMa 的 tokenizer.

lyrallms/LyraLlamaPy/examples/README.md ADDED Viewed

	@@ -0,0 +1,114 @@

+## 测试脚本
+### batch推理
+```sh
+export FMHA_VERSION=V2 # 如使用旧版本Attn，设置 FMHA_VERSION=OFF
+export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8，设置 KV_CACHE_DTYPE=INT8
+model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
+kv_qparams_fpath=KVScalesPath # (可选) 校准后的KVCache量化Scales文件路径 (past_kv_scale.bin)
+data_type=fp16 # 权重保存精度
+memopt_mode=0 # MEMOPT模式: 0/1
+quant_type="int8" # 量化精度: int4/int8
+max_output_length=256
+warmups=1
+avgnums=1
+python batch_demo.py --model-path $model_path\
+                     --tokenizer-path $model_path\
+                     --data-type $data_type\
+                     --memopt_mode $memopt_mode\
+                     --quant-type ${quant_type}\
+                     --max-output-length $max_output_length\
+                     --warmups $warmups\
+                     --avgnums $avgnums\
+                     --kvqparams-fpath $kv_qparams_fpath
+```
+### batch流式推理
+```sh
+export FMHA_VERSION=V2 # 如使用旧版本Attn，设置为OFF
+export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8，设置 KV_CACHE_DTYPE=INT8
+export LYRA_STREAM_CB_STEP=30 # 回调函数间隔步数
+model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
+kv_qparams_fpath=KVScalesPath # (可选) 校准后的KVCache量化Scales文件路径 (past_kv_scale.bin)
+data_type=fp16 # 权重保存精度
+memopt_mode=0 # MEMOPT模式: 0/1
+quant_type="int8" # 量化精度: int4/int8
+max_output_length=256
+warmups=1
+avgnums=1
+python batch_stream_demo.py --model-path $model_path\
+                            --tokenizer-path $model_path\
+                            --data-type $data_type\
+                            --memopt_mode $memopt_mode\
+                            --quant-type ${quant_type}\
+                            --max-output-length $max_output_length\
+                            --warmups $warmups\
+                            --avgnums $avgnums\
+                            --kvqparams-fpath $kv_qparams_fpath
+```
+### 不等长batch推理
+```sh
+export FMHA_VERSION=V2 # 如使用旧版本Attn，设置为OFF
+export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8，设置 KV_CACHE_DTYPE=INT8
+model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
+kv_qparams_fpath=KVScalesPath # (可选) 校准后的KVCache量化Scales文件路径 (past_kv_scale.bin)
+prompt_filepath=valen_prompts.json # 用于测试的不等长prompts文件，从中采样
+data_type=fp16 # 权重保存精度
+memopt_mode=0 # MEMOPT模式: 0/1
+quant_type="int8" # 量化精度: int4/int8
+max_output_length=256
+warmups=1
+avgnums=1
+python random_batch_demo.py --model-path $model_path\
+                            --tokenizer-path $model_path\
+                            --data-type $data_type\
+                            --memopt_mode $memopt_mode\
+                            --quant-type ${quant_type}\
+                            --prompt_filepath $prompt_filepath\
+                            --max-output-length $max_output_length\
+                            --warmups $warmups\
+                            --avgnums $avgnums\
+                            --kvqparams-fpath $kv_qparams_fpath
+```
+## Prompt例子
+### 短序列
+```
+北京的景点：故宫、天坛、万里长城等。\n深圳的景点：
+```
+```
+今天天气大概 25度，有点小雨，吹着风，我想去户外散步，应该穿什么样的衣服 裤子鞋子搭配
+```
+### 1K序列
+```
+《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲，于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日，“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者：主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J （大卫·哈斯金斯）。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲：“Boys”；“咬我的臀部”；“Some Faces”和斯卡雷鬼曲调“Harry”，这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议，凯文·哈斯金斯 (Kevin Haskins) 说，“那里有力量流行音乐，还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中（除了“Bela Lugosi's Dead”），只有“Harry”获得了官方发行；1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目，��括“Boys”的原始录音，一直未发行，直到 2018 年The Bela Session以黑胶唱片和CD 形式发行，并可供乐队数字下载。[11]在额外的曲目中，《经典摇滚》杂志写道：“其余的材料发现乐队正在摸索方向，甚至触及了斯卡。”\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答：
+```
+### 2K序列
+```
+根据所给刑事法律文书中的案情描述，预测被告人被判的罪名。你需要从这些罪名中选择最恰当的一项：妨害公务，寻衅滋事，盗窃、侮辱尸体，危险物品肇事，非法采矿，组织、强迫、引诱、容留、介绍卖淫，开设赌场，聚众斗殴，绑架，非法持有毒品，销售假冒注册商标的商品，容留他人吸毒，假冒注册商标，交通肇事，破坏电力设备，组织卖淫，合同诈骗，走私武器、弹药，抢劫，非法处置查封、扣押、冻结的财产，以危险方法危害公共安全，过失投放危险物质，非法制造、买卖、运输、邮寄、储存枪支、弹药、爆炸物，伪造、变造、买卖武装部队公文、证件、印章，持有、使用假币，重婚，聚众冲击国家机关，生产、销售伪劣农药、兽药、化肥、种子，收买被拐卖的妇女、儿童，聚众哄抢，重大劳动安全事故，侵占，包庇毒品犯罪分子，虚报注册资本，违法发放贷款，制造、贩卖、传播淫秽物品，窝藏、包庇，帮助毁灭、伪造证据，放火，强奸，非法携带枪支、弹药、管制刀具、危险物品危及公共安全，伪造、变造金融票证，爆炸，玩忽职守，对非国家工作人员行贿，伪造、倒卖伪造的有价票证，私分国有资产，非法收购、运输、加工、出售国家重点保护植物、国家重点保护植物制品，生产、销售假药，挪用特定款物，过失致人死亡，走私国家禁止进出口的货物、物品，非法制造、买卖、运输、储存危险物质，洗钱，骗取贷款、票据承兑、金融票证，非法买卖制毒物品，非法买卖、运输、携带、持有毒品原植物种子、幼苗，生产、销售有毒、有害食品，滥用职权，招收公务员、学生徇私舞弊，诬告陷害，非法获取国家秘密，非法行医，非法收购、运输、出售珍贵、濒危野生动物、珍贵、濒危野生动物制品，非法出售发票，行贿，高利转贷，非法吸收公众存款，传播淫秽物品，非法进行节育手术，盗伐林木，聚众扰乱社会秩序，走私、贩卖、运输、制造毒品，滥伐林木，赌博，非法经营，生产、销售不符合安全标准的食品，提供侵入、非法控制计算机信息系统程序、工具，倒卖文物，窃取、收买、非法提供信用卡信息，盗掘古文化遗址、古墓葬，协助组织卖淫，破坏广播电视设施、公用电信设施，走私普通货物、物品，逃税，破坏监管秩序，失火，受贿，组织、领导、参加黑社会性质组织，票据诈骗，非法制造、销售非法制造的注册商标标识，侵犯著作权，伪造、变造、买卖国家机关公文、证件、印章，徇私舞弊不征、少征税款，强迫劳动，贷款诈骗，劫持船只、汽车，诈骗，非法种植毒品原植物，非法狩猎，挪用资金，非法收购、运输盗伐、滥伐的林木，出售、购买、运输假币，抢夺，虐待被监管人，窝藏、转移、收购、销售赃物，破坏计算机信息系统，制作、复制、出版、贩卖、传播淫秽物品牟利，拒不支付劳动报酬，盗窃、抢夺枪支、弹药、爆炸物，强迫他人吸毒，走私珍贵动物、珍贵动物制品，虐待，非法获取公民个人信息，破坏交通设施，非法转让、倒卖土地使用权，非法捕捞水产品，非法占用农用地，非法制造、出售非法制造的发票，非法持有、私藏枪支、弹药，集资诈骗，强迫卖淫，伪造公司、企业、事业单位、人民团体印章，利用影响力受贿，编造、故意传播虚假恐怖信息，介绍贿赂，传播性病，拐卖妇女、儿童，倒卖车票、船票，窝藏、转移、隐瞒毒品、毒赃，徇私舞弊不移交刑事案件，过失损坏广播电视设施、公用电信设施，动植物检疫徇私舞弊，破坏交通工具，猥亵儿童，挪用公款，伪造货币，冒充军人招摇撞骗，非法采伐、毁坏国家重点保护植物，故意毁坏财物，非法拘禁，招摇撞骗，伪造、变造居民身份证，徇私枉法，非法生产、买卖警用装备，掩饰、隐瞒犯罪所得、犯罪所得收益，生产、销售伪劣产品，破坏生产经营，帮助犯罪分子逃避处罚，贪污，投放危险物质，持有伪造的发票，危险驾驶，妨害作证，非法猎捕、杀害珍贵、濒危野生动物，重大责任事故，诽谤，虚开发票，引诱���教唆、欺骗他人吸毒，脱逃，扰乱无线电通讯管理秩序，保险诈骗，非法生产、销售间谍专用器材，非法组织卖血，强迫交易，串通投标，破坏易燃易爆设备，传授犯罪方法，妨害信用卡管理，拐骗儿童，单位行贿，打击报复证人，拒不执行判决、裁定，经济犯，金融凭证诈骗，虚开增值税专用发票、用于骗取出口退税、抵扣税款发票，走私废物，组织、领导传销活动，单位受贿，盗窃、抢夺枪支、弹药、爆炸物、危险物质，过失以危险方法危害公共安全，过失致人重伤，引诱、容留、介绍卖淫，遗弃，走私，信用卡诈骗，对单位行贿，故意杀人，聚众扰乱公共场所秩序、交通秩序，盗窃，故意伤害，非法侵入住宅，强制猥亵、侮辱妇女，伪证，污染环境，巨额财产来源不明，非国家工作人员受贿，侮辱，隐匿、故意销毁会计凭证、会计帐簿、财务会计报告，过失损坏武器装备、军事设施、军事通信，敲诈勒索，职务侵占。\n经审理查明：2013年9月底的一天晚上，被告人陆某德酒后经过沭阳县某镇某村张某荣家时，发现张某荣家没有人，即用石头砸破张某荣家房门玻璃，打开房门进入张某荣家中。因进入张某荣时被房门遗留的玻璃划伤，被告人陆某德在张某荣家北屋门和北屋东首间墙面遗留两处血迹。2014年1月7日，被告人陆某德被公安民警从其家中传唤到案，并如实供述自己的罪行。上述事实，有公诉机关提交的，经过庭审质证的，且均具有证据证明效力的以下证据予以证明：被告人陆某德供述其非法侵入他人住宅的时间、地点、经过等事实。该供述得到了被害人张某荣的陈述、证人周某花、李某华等人的证言、法庭科学DNA检验鉴定书、现场勘验检查笔录、现场图、现场照片等证据予以证实，足以认定。刑事判决书证明证明了被告人陆某德有前科；公安机关出具的“发破案经过”及“抓获经过”证明了本案案发及被告人陆某德的归案情况。\n
+```
+### 4K序列
+```
+<context>/*\n * Implement the \"Falling Rocks\" game in the text console. \n * A small dwarf stays at the bottom of the screen and can \n * move left and right (by the arrows keys). A number of rocks \n * of different sizes and forms constantly fall down and you \n * need to avoid a crash.\n * Rocks are the symbols ^, @, *, &, +, %, $, #, !, ., ;, - distributed \n * with appropriate density. The dwarf is (O). \n * Ensure a constant game speed by Thread.Sleep(150).\n * Implement collision detection and scoring system.\n*/\n\nusing System;\nusing System.Threading;\nusing System.Collections.Generic;\nusing System.Threading.Tasks;\n\nclass FallingRocks\n{\n    struct Position\n    {\n        public int X, Y;\n        public string symbol;\n        public ConsoleColor color;\n\n        public Position(int x, int y, string symbol, ConsoleColor color)\n        {\n            this.X = x;\n            this.Y = y;\n            this.symbol = symbol;\n            this.color = color;\n        }\n    }\n\n    static void Main()\n    {\n        Thread oThread = new Thread(new ThreadStart(Mainn));\n        Thread aThread = new Thread(new ThreadStart(Clr));\n        \n        aThread.Start();\n        oThread.Start();\n        oThread.Join();\n        aThread.Join();\n    }\n\n    static void Clr()\n    {\n        while (true)\n        {\n            Thread.Sleep(10);\n            Console.Clear();\n        }\n    }\n    static void Mainn()\n    {\n        //Random generator for rocks color, position and symbol\n        Random randomGenerator = new Random();\n        \n        //Sleep time for the game loop\n        double sleepTime = 150;\n        //Console settings\n        Console.CursorVisible = false;\n        Console.BufferHeight = Console.WindowHeight;\n        \n        //number of rocks in the Array rocks\n        int rocksCount = 0;\n\n        //array with the symbols of the rocks\n        string[] symbols = new string[] { \"^\", \"@\", \"*\", \"&\", \"+\", \"%\", \"$\", \"#\", \"!\", \".\", \";\" };\n        \n        //array with colors for the rocks\n        ConsoleColor[] colors = new ConsoleColor[] {ConsoleColor.Yellow, ConsoleColor.White, ConsoleColor.Gray};\n        \n        //array with rocks\n        Position[] rocks = new Position[200];\n        \n        //position for the dwarf\n        Position dwarf = new Position(10, Console.WindowHeight  - 1,\"(0)\",ConsoleColor.Red);\n        \n        //bool variable to say when the game loop to be over\n        bool gameLoop = true;\n\n        //variable keeping the score\n        ulong score = 0;\n\n        //the game loop\n        while (gameLoop)\n        {\n            //score is growing as the cycle runs\n            score++;\n\n            //setting the Y component for all the rocks in the array to grow with 2\n            for (int i = 0; i <= rocks.Length - 1; i++)\n            {\n                rocks[i].Y = rocks[i].Y + 2;\n            }\n\n            //generating rocks\n            for (int x = 0; x <= randomGenerator.Next(2, 4); x++)\n            {\n                rocks[rocksCount] = new Position(randomGenerator.Next(x * 15, x * 15 + 20), 0\n                    , symbols[randomGenerator.Next(0, symbols.Length - 1)]\n                    , colors[randomGenerator.Next(0, colors.Length - 1)]);\n                if (rocksCount >= 199) rocksCount = 0;\n                rocksCount++;\n            }\n\n            //printing the rocks and other stuff\n            foreach (var item in rocks)\n            {\n                foreach (var rock in rocks)\n                {\n                    //checking for colision\n                    if ((rock.X >= dwarf.X) && (rock.X <= (dwarf.X + 2)) && (rock.Y == dwarf.Y))\n                    {\n                        gameLoop = false;\n                        break;\n                    }\n                } \n\n                //printing the rocks\n                if (item.Y < Console.WindowHeight)\n                {                    \n                    Console.SetCursorPosition(item.X, item.Y);\n                    Console.ForegroundColor = item.color;\n                    Console.Write(item.symbol);\n                }\n\n                //checking for key pressed\n                if (Console.KeyAvailable)\n                {\n                    ConsoleKeyInfo pressedKey = Console.ReadKey();\n                    if (pressedKey.Key == ConsoleKey.RightArrow)\n                    {\n                        if(dwarf.X < Console.WindowWidth - 20)\n                        {\n                            //removing the old positions of the dwarf and increasing his X value\n                            Console.SetCursorPosition(dwarf.X, dwarf.Y);\n                            Console.Write(\"   \");\n                            dwarf.X++;\n                        }\n                    }\n                    if (pressedKey.Key == ConsoleKey.LeftArrow)                       \n                    {\n                        if(dwarf.X >= 1)\n                        {\n                            //removing the old positions of the dwarf and decreasing his X value\n                            Console.SetCursorPosition(dwarf.X, dwarf.Y);\n                            Console.Write(\"   \");\n                            dwarf.X--;\n                        }\n                    }\n                }\n            }\n          \n            //printing the dwarf\n            Console.SetCursorPosition(dwarf.X, dwarf.Y);\n            Console.ForegroundColor = dwarf.color;\n            Console.Write(dwarf.symbol);            \n            \n            //sleeping the loop for sometime\n            //Thread.Sleep((int)sleepTime);\n\n            //reducing the sleep time of the loop\n            sleepTime -= 0.5;\n\n            \n            //removing the rocks \n            //foreach (var item in rocks)\n            //{\n            //    if (item.Y < Console.WindowHeight)\n            //    {\n            //        Console.SetCursorPosition(item.X, item.Y);\n            //        Console.Write(\" \");\n            //    }\n            //}                        \n        }\n        //Printing the score after the game is over\n        Console.Clear();\n        Console.WriteLine(\"Game over! Your score is: \" + score);\n\n    }\n}\n</context>\n\n这个\"Falling Rocks\"游戏是如何工作的呢？可以详细解释一下代码的运作机制吗？ \n\n\n\n
+```
+### 8K序列
+```
+<context># -*- coding: utf-8 -*-\n# This code is part of Amoco\n# Copyright (C) 2021 Axel Tillequin ([email protected])\n# published under GPLv2 license\nfrom amoco.arch.tricore import env\nfrom amoco.arch.core import *\n# -------------------------------------------------------\n# from TriCore TC1.6.2 core architecture manual V1.2.2\n# (32-bit Unified Processor Core), 2020-01-15\n# define all except FPU instructions\n# -------------------------------------------------------\nISPECS = []\n@ispec("32<[ disp1(16) disp2(8) {6d} ]", mnemonic="CALL")\n@ispec("32<[ disp1(16) disp2(8) {61} ]", mnemonic="FCALL")\n@ispec("32<[ disp1(16) disp2(8) {1d} ]", mnemonic="J")\n@ispec("32<[ disp1(16) disp2(8) {5d} ]", mnemonic="JL")\ndef tricore_branch(obj, disp1, disp2):\n    v = env.cst(((disp2<<16)+disp1)<<1,24)\n    obj.operands = [disp.signextend(32)]\n    obj.type = type_control_flow\n@ispec("32<[ disp1(16) disp2(8) {ed} ]", mnemonic="CALLA")\n@ispec("32<[ disp1(16) disp2(8) {e1} ]", mnemonic="FCALLA")\n@ispec("32<[ disp1(16) disp2(8) {9d} ]", mnemonic="JA")\n@ispec("32<[ disp1(16) disp2(8) {dd} ]", mnemonic="JLA")\ndef tricore_branch(obj, disp1, disp2):\n    v = env.cst((disp2<<16)+disp1,24)\n    addr = composer([env.bit0,v[0:20],env.cst(0,7),v[20:24]])\n    obj.operands = [addr]\n    obj.type = type_control_flow\n@ispec("32<[ ---- {00} ---- ---- a(4) {2d} ]", mnemonic="CALLI")\n@ispec("32<[ ---- {01} ---- ---- a(4) {2d} ]", mnemonic="FCALLI")\n@ispec("32<[ ---- {03} ---- ---- a(4) {2d} ]", mnemonic="JI")\n@ispec("32<[ ---- {02} ---- ---- a(4) {2d} ]", mnemonic="JLI")\ndef tricore_branchI(obj, a):\n    src = env.A[a]\n    obj.operands = [src]\n    obj.type = type_control_flow\n@ispec("16<[ disp(8) {5c} ]", mnemonic="CALL")\n@ispec("16<[ disp(8) {3c} ]", mnemonic="J")\n@ispec("16<[ disp(8) {ee} ]", mnemonic="JNZ")\n@ispec("16<[ disp(8) {6e} ]", mnemonic="JZ")\ndef tricore_branch(obj, disp):\n    disp = env.cst(disp<<1,8)\n    obj.operands = [disp.signextend(32)]\n    obj.type = type_control_flow\n@ispec("32<[ ---- 0000000 const9(9) ---- {ad} ]", mnemonic="BISR")\n@ispec("32<[ ---- 0000100 const9(9) ---- {ad} ]", mnemonic="SYSCALL")\ndef tricore_system(obj, const9):\n    obj.operands = [env.cst(const9,9)]\n    obj.type = type_system\n@ispec("32<[ c(4) {1c} ---- b(4) ---- {0b} ]", mnemonic="ABS")\n@ispec("32<[ c(4) {5c} ---- b(4) ---- {0b} ]", mnemonic="ABS_B")\n@ispec("32<[ c(4) {7c} ---- b(4) ---- {0b} ]", mnemonic="ABS_H")\n@ispec("32<[ c(4) {1d} ---- b(4) ---- {0b} ]", mnemonic="ABSS")\n@ispec("32<[ c(4) {7d} ---- b(4) ---- {0b} ]", mnemonic="ABSS_H")\n@ispec("32<[ c(4) {1f} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n    src = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {80} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n    src = env.D[b]\n    dst = env.E[c]\n    obj.operands = [dst, src.signextend(64)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {81} ---- b(4) a(4) {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b, a):\n    src2 = env.D[b]\n    dst = env.E[c]\n    obj.operands = [dst, composer([src2,src1])]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) {4e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_B")\n@ispec("32<[ c(4) {6e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_H")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) {6f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS_H")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0b} ]", mnemonic="ADD_B")\n@ispec("32<[ c(4) {60} ---- b(4) a(4) {0b} ]", mnemonic="ADD_H")\n@ispec("32<[ c(4) {05} ---- b(4) a(4) {0b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_H")\n@ispec("32<[ c(4) {63} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_HU")\n@ispec("32<[ c(4) {03} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_U")\n@ispec("32<[ c(4) {04} ---- b(4) a(4) {0b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0f} ]", mnemonic="AND")\n@ispec("32<[ c(4) {20} ---- b(4) a(4) {0b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) {24} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) {25} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) {22} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) {23} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) {21} ---- b(4) a(4) {0b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) {10} ---- b(4) a(4) {0b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) {50} ---- b(4) a(4) {0b} ]", mnemonic="EQ_B")\n@ispec("32<[ c(4) {70} ---- b(4) a(4) {0b} ]", mnemonic="EQ_H")\n@ispec("32<[ c(4) {90} ---- b(4) a(4) {0b} ]", mnemonic="EQ_W")\n@ispec("32<[ c(4) {56} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) {76} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) {14} ---- b(4) a(4) {0b} ]", mnemonic="GE")\n@ispec("32<[ c(4) {15} ---- b(4) a(4) {0b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) {12} ---- b(4) a(4) {0b} ]", mnemonic="LT")\n@ispec("32<[ c(4) {13} ---- b(4) a(4) {0b} ]", mnemonic="LT_U")\n@ispec("32<[ c(4) {52} ---- b(4) a(4) {0b} ]", mnemonic="LT_B")\n@ispec("32<[ c(4) {53} ---- b(4) a(4) {0b} ]", mnemonic="LT_BU")\n@ispec("32<[ c(4) {72} ---- b(4) a(4) {0b} ]", mnemonic="LT_H")\n@ispec("32<[ c(4) {73} ---- b(4) a(4) {0b} ]", mnemonic="LT_HU")\n@ispec("32<[ c(4) {92} ---- b(4) a(4) {0b} ]", mnemonic="LT_W")\n@ispec("32<[ c(4) {93} ---- b(4) a(4) {0b} ]", mnemonic="LT_WU")\n@ispec("32<[ c(4) {1a} ---- b(4) a(4) {0b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) {1b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) {5a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_B")\n@ispec("32<[ c(4) {5b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_BU")\n@ispec("32<[ c(4) {7a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_H")\n@ispec("32<[ c(4) {7b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_HU")\n@ispec("32<[ c(4) {18} ---- b(4) a(4) {0b} ]", mnemonic="MIN")\n@ispec("32<[ c(4) {19} ---- b(4) a(4) {0b} ]", mnemonic="MIN_U")\n@ispec("32<[ c(4) {58} ---- b(4) a(4) {0b} ]", mnemonic="MIN_B")\n@ispec("32<[ c(4) {59} ---- b(4) a(4) {0b} ]", mnemonic="MIN_BU")\n@ispec("32<[ c(4) {78} ---- b(4) a(4) {0b} ]", mnemonic="MIN_H")\n@ispec("32<[ c(4) {79} ---- b(4) a(4) {0b} ]", mnemonic="MIN_HU")\n@ispec("32<[ c(4) {09} ---- b(4) a(4) {0f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) {11} ---- b(4) a(4) {0b} ]", mnemonic="NE")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0f} ]", mnemonic="OR")\n@ispec("32<[ c(4) {27} ---- b(4) a(4) {0b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) {2b} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) {2c} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) {29} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) {2a} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) {28} ---- b(4) a(4) {0b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0f} ]", mnemonic="SH")\n@ispec("32<[ c(4) {37} ---- b(4) a(4) {0b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) {3b} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) {3c} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) {39} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) {3a} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) {38} ---- b(4) a(4) {0b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {0f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {0f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0b} ]", mnemonic="SUB")\n@ispec("32<[ c(4) {48} ---- b(4) a(4) {0b} ]", mnemonic="SUB_B")\n@ispec("32<[ c(4) {68} ---- b(4) a(4) {0b} ]", mnemonic="SUB_H")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0b} ]", mnemonic="SUBC")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_U")\n@ispec("32<[ c(4) {6a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_H")\n@ispec("32<[ c(4) {6b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_HU")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0b} ]", mnemonic="SUBX")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) {2f} ---- b(4) a(4) {0b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) {30} ---- b(4) a(4) {0b} ]", mnemonic="XOR_NE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {01} ]", mnemonic="EQ_A")\n@ispec("32<[ c(4) {43} ---- b(4) a(4) {01} ]", mnemonic="GE_A")\n@ispec("32<[ c(4) {42} ---- b(4) a(4) {01} ]", mnemonic="LT_A")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {01} ]", mnemonic="NE_A")\ndef tricore_daa_arithmetic(obj, c, b, a):\n    src1 = env.A[a]\n    src2 = env.A[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {63} ---- b(4) ---- {01} ]", mnemonic="MOV_A",  _dst=env.A, _src=env.D)\n@ispec("32<[ c(4) {00} ---- b(4) ---- {01} ]", mnemonic="MOV_AA", _dst=env.A, _src=env.A)\n@ispec("32<[ c(4) {4c} ---- b(4) ---- {01} ]", mnemonic="MOV_D",  _dst=env.D, _src=env.A)\ndef tricore_daa_arithmetic(obj, c, b, _dst, _src):\n    dst = _dst[c]\n    src = _src[b]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {48} ---- ---- a(4) {01} ]", mnemonic="EQZ_A")\n@ispec("32<[ c(4) {49} ---- ---- a(4) {01} ]", mnemonic="NEZ_A")\ndef tricore_da_arithmetic(obj, c, a):\n    src1 = env.A[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {01} --00 b(4) a(4) {4b} ]", mnemonic="BMERGE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {06} --00 b(4) a(4) {4b} ]", mnemonic="CRC32_B")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32B_W")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32L_W")\ndef tricore_crc32(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src2, src1]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {20} --01 b(4) a(4) {4b} ]", mnemonic="DIV")\n@ispec("32<[ c(4) {21} --01 b(4) a(4) {4b} ]", mnemonic="DIV_U")\n@ispec("32<[ c(4) {5a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_B")\n@ispec("32<[ c(4) {4a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_BU")\n@ispec("32<[ c(4) {3a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_H")\n@ispec("32<[ c(4) {2a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_HU")\n@ispec("32<[ c(4) {1a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT")\n@ispec("32<[ c(4) {0a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_U")\ndef tricore_edd_arithmetic(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 100 ----- b(4) a(4) {17} ]", mnemonic="DEXTR")\ndef tricore_dddc(obj, c, d, b, a):\n    shift = env.D[d]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, shift]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 ----- ---- a(4) {17} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 ----- ---- a(4) {17} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, a):\n    if d%2:\n        raise InstructionError(obj)\n    width = env.E[d][32:37]\n    src1 = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1, width]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 0--00 ---- a(4) {6b} ]", mnemonic="PACK")\ndef tricore_extr(obj, c, d, a):\n    if d%2:\n        raise InstructionError(obj)\n    src1 = env.E[d]\n    src2 = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {08} -- 00 ---- a(4) {4b} ]", mnemonic="UNPACK")\ndef tricore_extr(obj, c, d, a):\n    src = env.D[a]\n    dst = env.E[c]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {02} -- 00 ---- a(4) {4b} ]", mnemonic="PARITY")\n@ispec("32<[ c(4) {22} -- 00 ---- a(4) {4b} ]", mnemonic="POPCNT_W")\ndef tricore_extr(obj, c, d, a):\n    src = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 ----- b(4) a(4) {77} ]", mnemonic="DEXTR")\ndef tricore_dextr(obj, c, pos, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, env.cst(pos,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 10 width(5) ---- a(4) {37} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) pos(5) 11 width(5) ---- a(4) {37} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, pos, width, a):\n    src1 = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) const(4) ---- {b7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, const):\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) const(4) ---- {d7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, const):\n    src2 = env.D[d]\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, env.cst(const,4), src2, env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) b(4) ---- {37} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, b):\n    src1 = env.D[b]\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) b(4) ---- {57} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, b):\n    src1 = env.D[b]\n    src2 = env.D[d]\n    if c%2:\n        raise InstructionError(obj)\n    dst = env.E[c]\n    obj.operands = [dst, src1, src2, env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) const(4) a(4) {b7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, const, a):\n    dst = env.D[c]\n    src1 = env.D[a]\n    obj.operands = [dst, src1, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- const(4) a(4) {97} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, const, a):\n    src1 = env.D[a]\n    if d%2:\n        raise InstructionError(obj)\n    src3 = env.E[d]\n    dst = env.D[c]\n    obj.operands = [dst, src1, env.cst(const,4), src3]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) const(4) a(4) {d7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, const, a):\n    src1 = env.D[a]\n    src3 = env.D[d]\n    dst = env.D[c]\n    obj.operands = [dst, src1, env.cst(const,4), src3]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) b(4) a(4) {37} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, b, a):\n    dst = env.D[c]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    obj.operands = [dst, src1, src2, env.cst(pos,5), env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- b(4) a(4) {17} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    if d%2:\n        raise InstructionError(obj)\n    src3 = env.E[d]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, src3]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) b(4) a(4) {57} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    src3 = env.D[d]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, src3, env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 width(5) ---- a(4) {57} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 width(5) ---- a(4) {57} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, width, a):\n    src2 = env.D[d]\n    src1 = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2, env.cst(width,5)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {09} --00 ---- a(4) {4b} ]", mnemonic="BSPLIT")\ndef tricore_edd_arithmetic(obj, c, a):\n    src1 = env.D[a]\n    dst = env.E[c]\n    obj.operands = [dst, src1]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) 0001110 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) 0001111 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) 0000101 ~const9(9) a(4) {8b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) 0000011 ~const9(9) a(4) {8b} ]", mnemonic="ADDS_U")  #const9 is signed\n@ispec("32<[ c(4) 0000100 ~const9(9) a(4) {8b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) 0100000 ~const9(9) a(4) {8b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) 0100100 ~const9(9) a(4) {8b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) 0100010 ~const9(9) a(4) {8b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) 0100001 ~const9(9) a(4) {8b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) 0010000 ~const9(9) a(4) {8b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) 1010110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) 1110110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) 0010100 ~const9(9) a(4) {8b} ]", mnemonic="GE")\n@ispec("32<[ c(4) 0010010 ~const9(9) a(4) {8b} ]", mnemonic="LT")\n@ispec("32<[ c(4) 0011010 ~const9(9) a(4) {8b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) 0010001 ~const9(9) a(4) {8b} ]", mnemonic="NE")\n@ispec("32<[ c(4) 0100111 ~const9(9) a(4) {8b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) 0101011 ~const9(9) a(4) {8b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) 0101001 ~const9(9) a(4) {8b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) 0001000 ~const9(9) a(4) {8b} ]", mnemonic="RSUB")\n@ispec("32<[ c(4) 0001001 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS")\n@ispec("32<[ c(4) 0001011 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS_U") #const9 is signed\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8f} ]", mnemonic="SH")\n@ispec("32<[ c(4) 1000000 ~const9(9) a(4) {8f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) 0110111 ~const9(9) a(4) {8b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) 0111011 ~const9(9) a(4) {8b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) 0111001 ~const9(9) a(4) {8b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) 0111000 ~const9(9) a(4) {8b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) 0000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) 1000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) 0101111 ~const9(9) a(4) {8b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) 0110011 ~const9(9) a(4) {8b} ]", mnemonic="XOR_GE")\n@ispec("32<[ c(4) 0110001 ~const9(9) a(4) {8b} ]", mnemonic="XOR_LT")\n@ispec("32<[ c(4) 0110000 ~const9(9) a(4) {8b} ]", mnemonic="XOR_NE")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n    src1 = env.D[a]\n    if obj.mnemonic in ("SH","SHA","SHAS"):\n        const9 = const9[0:6]\n    elif obj.mnemonic in ("SH_H","SHA_H"):\n        const9 = const9[0:5]\n    src2 = env.cst(const9.int(-1),32)\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_OR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {87} ]", mnemonic="AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {87} ]", mnemonic="ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {67} ]", mnemonic="INS_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {67} ]", mnemonic="INSN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {07} ]", mnemonic="NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {87} ]", mnemonic="NOR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {87} ]", mnemonic="OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {07} ]", mnemonic="ORN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_ORN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XOR_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {07} ]", mnemonic="XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {07} ]", mnemonic="XOR_T")\ndef tricore_ddd_arithmetic(obj, c, pos2, pos1, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, src1[pos1:pos1+1], src2[pos2:pos2+1]]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) 0001000 const9(9) a(4) {8f} ]", mnemonic="AND")\n@ispec("32<[ c(4) 0100101 const9(9) a(4) {8b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) 0100011 const9(9) a(4) {8b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) 0001110 const9(9) a(4) {8f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) 0001001 const9(9) a(4) {8f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) 0001011 const9(9) a(4) {8f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) 0010101 const9(9) a(4) {8b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) 0001010 const9(9) a(4) {8f} ]", mnemonic="OR")\n@ispec("32<[ c(4) 0101100 const9(9) a(4) {8b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) 0101010 const9(9) a(4) {8b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) 0101000 const9(9) a(4) {8b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) 0001111 const9(9) a(4) {8f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) 0000111 const9(9) a(4) {8f} ]", mnemonic="SHUFFLE")\n@ispec("32<[ c(4) 0001101 const9(9) a(4) {8f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) 0001100 const9(9) a(4) {8f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) 0111100 const9(9) a(4) {8b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) 0111010 const9(9) a(4) {8b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) 0110100 const9(9) a(4) {8b} ]", mnemonic="XOR_GE_U")\n@ispec("32<[ c(4) 0110011 const9(9) a(4) {8b} ]", mnemonic="XOR_LT_U")\n@ispec("32<[ c(4) 0011011 const9(9) a(4) {8b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) 0010011 const9(9) a(4) {8b} ]", mnemonic="LT_U")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n    src1 = env.D[a]\n    src2 = env.cst(const9,32)\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {c2} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {06} ]", mnemonic="SH")\n@ispec("16<[ ~const4(4) a(4) {86} ]", mnemonic="SHA")\ndef tricore_ddc_arithmetic(obj, const4, a):\n    dst = env.D[a]\n    src2 = env.cst(const4.int(-1),32)\n    src1 = env.D[a]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {92} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {8a} ]", mnemonic="CADD")\n@ispec("16<[ ~const4(4) a(4) {ca} ]", mnemonic="CADDN")\n@ispec("16<[ ~const4(4) a(4) {aa} ]", mnemonic="CMOV")\n@ispec("16<[ ~const4(4) a(4) {ea} ]", mnemonic="CMOVN")\ndef tricore_ddc_arithmetic(obj, const4, a):\n    dst = env.D[a]\n    src2 = env.cst(const4.int(-1),32)\n    src1 = env.D[15]\n    obj.operands = [dst, src1, src2]\n    if "CADD" in obj.mnemonic:\n        obj.operands = [dst, src1, dst, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {9a} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {ba} ]", mnemonic="EQ")\n@ispec("16<[ ~const4(4) a(4) {fa} ]", mnemonic="LT")\n@ispec("16<[ ~const4(4) a(4) {82} ]", mnemonic="MOV")\ndef tricore_ddc_arithmetic(obj, const4, a):\n    dst = env.D[15]\n    src2 = env.cst(const4.int(-1),32)\n    src1 = env.D[a]\n    obj.operands = [dst, src1, src2]\n    if obj.mnemonic=="MOV":\n        obj.operands = [src1,src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {d2} ]", mnemonic="MOV")\ndef tricore_ec_arithmetic(obj, const4, a):\n    dst = env.E[a]\n    src = env.cst(const4.int(-1),64)\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("16<[ const4(4) a(4) {a0} ]", mnemonic="MOV_A")\ndef tricore_ec_arithmetic(obj, const4, a):\n    dst = env.A[a]\n    src = env.cst(const4,32)\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("16<[ const8(8) {16} ]", mnemonic="AND")\n@ispec("16<[ const8(8) {da} ]", mnemonic="MOV")\n@ispec("16<[ const8(8) {96} ]", mnemonic="OR")\ndef tricore_ddc_arithmetic(obj, const8):\n    dst = env.D[15]\n    src2 = env.cst(const8,32)\n    src1 = env.D[15]\n    obj.operands = [dst, src1, src2]\n    if obj.mnemonic=="MOV":\n        obj.operands = [src1,src2]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {42} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {26} ]", mnemonic="AND")\n@ispec("16<[ b(4) a(4) {a6} ]", mnemonic="OR")\n@ispec("16<[ b(4) a(4) {a2} ]", mnemonic="SUB")\n@ispec("16<[ b(4) a(4) {62} ]", mnemonic="SUBS")\n@ispec("16<[ b(4) a(4) {c6} ]", mnemonic="XOR")\ndef tricore_dd_arithmetic(obj, b, a):\n    dst = env.D[a]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {02} ]", mnemonic="MOV"    , _dst=env.D, _src=env.D)\n@ispec("16<[ b(4) a(4) {60} ]", mnemonic="MOV_A"  , _dst=env.A, _src=env.D)\n@ispec("16<[ b(4) a(4) {40} ]", mnemonic="MOV_AA" , _dst=env.A, _src=env.A)\n@ispec("16<[ b(4) a(4) {80} ]", mnemonic="MOV_D"  , _dst=env.D, _src=env.A)\ndef tricore_mov(obj, b, a, _dst, _src):\n    dst = _dst[a]\n    src = _src[b]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {12} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {2a} ]", mnemonic="CMOV")\n@ispec("16<[ b(4) a(4) {6a} ]", mnemonic="CMOVN")\n@ispec("16<[ b(4) a(4) {52} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n    dst = env.D[a]\n    src1 = env.D[15]\n    src2 = env.D[b]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {1a} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {22} ]", mnemonic="ADDS")\n@ispec("16<[ b(4) a(4) {3a} ]", mnemonic="EQ")\n@ispec("16<[ b(4) a(4) {7a} ]", mnemonic="LT")\n@ispec("16<[ b(4) a(4) {5a} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n    dst = env.D[15]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {01} ]", mnemonic="ADD_A")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {01} ]", mnemonic="SUB_A")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n    src1 = env.A[a]\n    src2 = env.A[b]\n    dst = env.A[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {b0} ]", mnemonic="ADD_A")\ndef tricore_aac_arithmetic(obj, const4, a):\n    dst = env.A[a]\n    src2 = env.cst(const4.int(-1),32)\n    src1 = env.A[a]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ const8(8) {20} ]", mnemonic="SUB_A")\ndef tricore_aac_arithmetic(obj, const8, a):\n    dst = env.A[10]\n    src2 = env.cst(const8,32)\n    src1 = env.A[10]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {30} ]", mnemonic="ADD_A")\ndef tricore_aa_arithmetic(obj, b, a):\n    dst = env.A[a]\n    src1 = env.A[a]\n    src2 = env.A[b]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {1b} ]", mnemonic="ADDI")\n@ispec("32<[ c(4) ~const16(16) a(4) {9b} ]", mnemonic="ADDIH")\ndef tricore_di_arithmetic(obj, c, const16, a):\n    src1 = env.D[a]\n    src2 = env.cst(const16.int(-1),32)\n    if self.mnemonic=="ADDIH": src2=src2<<16\n    dst = env.D[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {11} ]", mnemonic="ADDIH_A")\ndef tricore_ai_arithmetic(obj, c, const16, a):\n    src1 = env.A[a]\n    src2 = env.cst(const16.int(-1),32)<<16\n    dst = env.A[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {60} -- n(2) b(4) a(4) {01} ]", mnemonic="ADDSC_A")\ndef tricore_aaa_arithmetic(obj, c, n, b, a):\n    src1 = env.D[a]\n    src2 = env.A[b]\n    dst = env.A[c]\n    obj.operands = [dst, src2, src1, env.cst(n,2)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {01} ]", mnemonic="ADDSC_AT")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n    src1 = env.D[a]\n    src2 = env.A[b]\n    dst = env.A[c]\n    obj.operands = [dst, src2, src1]\n    obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) n(2) 010000 ]", mnemonic="ADDSC_A")\ndef tricore_aa_arithmetic(obj, b, a, n):\n    dst = env.A[a]\n    src1 = env.D[15]\n    src2 = env.A[b]\n    obj.operands = [dst, src2, src1, env.cst(n,2)]\n    obj.type = type_data_processing\n@ispec("32<[ off2(4) 10 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I",  mode="Short-offset")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I",  mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I",  mode="Circular")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I",  mode="Post-increment")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I",  mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W",  mode="Short-offset")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W",  mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W",  mode="Circular")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W",  mode="Post-increment")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W",  mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Circular")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W",  mode="Short-offset")\n@ispec("32<[ off2(4) 00 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W",  mode="Post-increment")\n@ispec("32<[ off2(4) 01 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W",  mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I",  mode="Short-offset")\n@ispec("32<[ off2(4) 00 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I",  mode="Post-increment")\n@ispec("32<[ off2(4) 01 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I",  mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Pre-increment")\ndef tricore_cache(obj, off2, off1, b):\n    src2 = env.A[b]\n    src1 = env.cst((off2<<6)+off1,10)\n    obj.operands = [src2, src1]\n    obj.type = type_system\n@ispec("32<[ off2(4) 10 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Pre-increment")\ndef tricore_swap(obj, off2, off1, b, a):\n    if a%2:\n        raise InstructionError(obj)\n    dst = env.D[a]\n    src1 = env.A[b]\n    src2 = env.cst((off2<<6)+off1,10)\n    src3 = env.E[a]\n    obj.operands = [dst, src1, src2, src3]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ~const9(9) a(4) {ab} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {ab} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {13} ]", mnemonic="MADDS",  opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {33} ]", mnemonic="MSUBS",  opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {ab} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {ab} ]", mnemonic="SELN")\ndef tricore_cond_ddc(obj, c, d, const9, a):\n    cond = env.D[d]\n    src1 = env.D[a]\n    src2 = env.cst(const9.int(-1),32)\n    dst = env.D[c]\n    obj.operands = [dst, cond, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {13} ]", mnemonic="MADD_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {33} ]", mnemonic="MSUB_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="64+(32+K9)->64")\ndef tricore_cond_eec(obj, c, d, const9, a):\n    cond = env.E[d]\n    src1 = env.D[a]\n    src2 = env.cst(const9.int(-1),32)\n    dst = env.E[c]\n    obj.operands = [dst, cond, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*16U)->64")\ndef tricore_cond_eec(obj, c, d, n, b, a):\n    cond = env.E[d]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.E[c]\n    obj.operands = [dst, cond, src1, src2, env.cst(n,2)]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 0000 ---- b(4) a(4) {2b} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 0001 ---- b(4) a(4) {2b} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 0010 ---- b(4) a(4) {2b} ]", mnemonic="CSUB")\n@ispec("32<[ c(4) d(4) 0011 ---- b(4) a(4) {2b} ]", mnemonic="CSUBN")\n@ispec("32<[ c(4) d(4)      {0a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4)      {8a} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4)      {88} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) 0100 ---- b(4) a(4) {2b} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 0101 ---- b(4) a(4) {2b} ]", mnemonic="SELN")\ndef tricore_cond_ddd(obj, c, d, b, a):\n    cond = env.D[d]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.D[c]\n    obj.operands = [dst, cond, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) {6a}      b(4) a(4) {03} ]", mnemonic="MADD", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {ea}      b(4) a(4) {03} ]", mnemonic="MADDS", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {68}      b(4) a(4) {03} ]", mnemonic="MADD_U", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {e8}      b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="64+(32*32)->64")\ndef tricore_cond_ddd(obj, c, d, b, a):\n    cond = env.E[d]\n    src1 = env.D[a]\n    src2 = env.D[b]\n    dst = env.E[c]\n    obj.operands = [dst, cond, src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ c(4) {1c} ---- ---- a(4) {0f} ]", mnemonic="CLO")\n@ispec("32<[ c(4) {7d} ---- ---- a(4) {0f} ]", mnemonic="CLO_H")\n@ispec("32<[ c(4) {1d} ---- ---- a(4) {0f} ]", mnemonic="CLS")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0f} ]", mnemonic="CLS_H")\n@ispec("32<[ c(4) {1b} ---- ---- a(4) {0f} ]", mnemonic="CLZ")\n@ispec("32<[ c(4) {7c} ---- ---- a(4) {0f} ]", mnemonic="CLZ_H")\n@ispec("32<[ c(4) {5e} ---- ---- a(4) {0b} ]", mnemonic="SAT_B")\n@ispec("32<[ c(4) {5f} ---- ---- a(4) {0b} ]", mnemonic="SAT_BU")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0b} ]", mnemonic="SAT_H")\n@ispec("32<[ c(4) {7f} ---- ---- a(4) {0b} ]", mnemonic="SAT_HU")\ndef tricore_dd_arithmetic(obj, c, a):\n    src = env.D[a]\n    dst = env.D[c]\n    obj.operands = [dst, src]\n    obj.type = type_data_processing\n@ispec("16<[ 1010 ---- {00} ]", mnemonic="DEBUG")\n@ispec("16<[ 0000 ---- {00} ]", mnemonic="NOP")\ndef tricore_system(obj):\n    obj.operands = []\n    obj.type = type_system\n@ispec("16<[ 0111 ---- {00} ]", mnemonic="FRET")\n@ispec("16<[ 1001 ---- {00} ]", mnemonic="RET")\n@ispec("16<[ 1000 ---- {00} ]", mnemonic="RFE")\ndef tricore_ret(obj):\n    obj.operands = []\n    obj.type = type_control_flow\n@ispec("32<[ ---- 000100 ---------- ---- {0d} ]", mnemonic="DEBUG")\n@ispec("32<[ ---- 001101 ---------- ---- {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 010010 ---------- ---- {0d} ]", mnemonic="DSYNC")\n@ispec("32<[ ---- 001100 ---------- ---- {0d} ]", mnemonic="ENABLE")\n@ispec("32<[ ---- 010011 ---------- ---- {0d} ]", mnemonic="ISYNC")\n@ispec("32<[ ---- 010101 ---------- ---- {0d} ]", mnemonic="TRAPSV")\n@ispec("32<[ ---- 010100 ---------- ---- {0d} ]", mnemonic="TRAPV")\n@ispec("32<[ ---- 000000 ---------- ---- {0d} ]", mnemonic="NOP")\n@ispec("32<[ ---- 001001 ---------- ---- {0d} ]", mnemonic="RSLCX")\n@ispec("32<[ ---- 000000 ---------- ---- {2f} ]", mnemonic="RSTV")\n@ispec("32<[ ---- 001000 ---------- ---- {0d} ]", mnemonic="SVLCX")\n@ispec("32<[ ---- 010110 ---------- ---- {0d} ]", mnemonic="WAIT")\ndef tricore_system(obj):\n    obj.operands = []\n    obj.type = type_system\n@ispec("32<[ ---- 000011 ---------- ---- {0d} ]", mnemonic="FRET")\n@ispec("32<[ ---- 000110 ---------- ---- {0d} ]", mnemonic="RET")\n@ispec("32<[ ---- 000111 ---------- ---- {0d} ]", mnemonic="RFE")\n@ispec("32<[ ---- 000101 ---------- ---- {0d} ]", mnemonic="RFM")\ndef tricore_ret(obj):\n    obj.operands = []\n    obj.type = type_control_flow\n@ispec("32<[ ---- 001111 ---------- a(4) {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 001110 ---------- a(4) {0d} ]", mnemonic="RESTORE")\ndef tricore_system(obj, a):\n    obj.operands = [env.D[a]]\n    obj.type = type_system\n@ispec("32<[ c(4) d(4) 1101 -- 00 b(4) ---- {6b} ]", mnemonic="DVADJ")\n@ispec("32<[ c(4) d(4) 1111 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP")\n@ispec("32<[ c(4) d(4) 1110 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP_U")\n@ispec("32<[ c(4) d(4) 1010 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX")\n@ispec("32<[ c(4) d(4) 1011 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX_U")\n@ispec("32<[ c(4) d(4) 1000 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN")\n@ispec("32<[ c(4) d(4) 1001 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN_U")\ndef tricore_eee(obj, c, d, b):\n    if d%2 or b%2 or c%2:\n        raise InstructionError(obj)\n    src1 = env.E[d]\n    src2 = env.E[b]\n    dst = env.E[c]\n    obj.operands = [dst, src1, src2]\n    obj.type = type_data_processing\n@ispec("16<[ ~const4(4) disp(4) {1e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {9e} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ ~const4(4) disp(4) {5e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {de} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, const4, disp, _off):\n    dst = env.D[15]\n    src1 = env.cst(const4.int(-1),32)\n    src2 = env.cst(disp,32)+_off\n    obj.operands = [dst, src1, src2]\n    obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {3e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ b(4) disp(4) {be} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ b(4) disp(4) {7e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ b(4) disp(4) {fe} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, b, disp, _off):\n    dst = env.D[15]\n    src1 = env.D[b]\n    src2 = env.cst(disp,32)+_off\n    obj.operands = [dst, src1, src2]\n    obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {ce} ]", mnemonic="JGEZ")\n@ispec("16<[ b(4) disp(4) {4e} ]", mnemonic="JGTZ")\n@ispec("16<[ b(4) disp(4) {8e} ]", mnemonic="JLEZ")\n@ispec("16<[ b(4) disp(4) {0e} ]", mnemonic="JLTZ")\n@ispec("16<[ b(4) disp(4) {f6} ]", mnemonic="JNZ")\n@ispec("16<[ b(4) disp(4) {76} ]", mnemonic="JZ")\ndef tricore_jcc(obj, b, disp):\n    src1 = env.D[b]\n    src2 = env.cst(disp,32)\n    obj.operands = [src1, src2]\n    obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {df} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {df} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, const, a):\n    src1 = env.D[a]\n    src2 = env.cst(const,4)\n    obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, b, a):\n    src1 = env.D[a]\n    src2 = env.D[b]\n    obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JEQ_A")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JNE_A")\ndef tricore_jcc(obj, disp, b, a):\n    src1 = env.A[a]\n    src2 = env.A[b]\n    obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) ---- a(4) {bd} ]", mnemonic="JNZ_A")\n@ispec("32<[ 0 ~disp(15) ---- a(4) {bd} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, disp, a):\n    src1 = env.A[a]\n    src2 = env.A[b]\n    obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOP")\n@ispec("32<[ 1 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOPU")\ndef tricore_jcc(obj, disp, b):\n    src1 = env.A[b]\n    src2 =  env.cst(disp.int(-1)*2,32)\n    obj.operands = [src1, src2]\n    if obj.mnemonic=="LOOPU":\n        obj.operands = [src2]\n    obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {7c} ]", mnemonic="JNZ_A")\n@ispec("16<[ b(4) disp(4) {bc} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, b, disp):\n    src1 = env.A[b]\n    src2 = env.cst(disp,32)\n    obj.operands = [src1, src2]\n    obj.type = type_control_flow\n@ispec("16<[ b(4) #disp(4) {fc} ]", mnemonic="LOOP")\ndef tricore_jcc(obj, b, disp):\n    src1 = env.A[b]\n    src2 = env.cst(int(("1"*27)+disp+"0",2),32)\n    obj.operands = [src1, src2]\n    obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {dc} ]", mnemonic="JI")\ndef tricore_ji(obj, a):\n    src = env.A[a]\n    obj.operands = [src]\n    obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {46} ]", mnemonic="NOT")\n@ispec("16<[ 0101 a(4) {32} ]", mnemonic="RSUB")\n@ispec("16<[ 0000 a(4) {32} ]", mnemonic="SAT_B")\n@ispec("16<[ 0001 a(4) {32} ]", mnemonic="SAT_BU")\n@ispec("16<[ 0010 a(4) {32} ]", mnemonic="SAT_H")\n@ispec("16<[ 0011 a(4) {32} ]", mnemonic="SAT_HU")\ndef tricore_a(obj, a):\n    src = env.D[a]\n    obj.operands = [src]\n    obj.type = type_data_processing\n@ispec("16<[ n(4) disp(4) {ae} ]", mnemonic="JNZ_T")\n@ispec("16<[ n(4) disp(4) {2e} ]", mnemonic="JZ_T")\ndef tricore_ji(obj, n, disp):\n    obj.operands = [env.D[15][n:n+1], env.cst(disp,32)]\n    obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JNZ_T")\n@ispec("32<[ 0 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JZ_T")\ndef tricore_jcc(obj, disp, n, a, h):\n    i = n+(h<<4)\n    src = env.D[a][i:i+1]\n    obj.operands = [src, env.cst(disp.int(-1),32)]\n    obj.type = type_control_flow\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_BU", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_HU", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {45} ]", mnemonic="LD_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LEA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n    dst = env.D[a]\n    if obj.mnemonic in ("LD_A", "LEA")  : dst = env.A[a]\n    if obj.mnemonic in ("LD_D","LDMST") : dst = env.E[a]\n    if obj.mnemonic=="LD_DA": dst = env.P[a]\n    src = off1//off2//off3\n    obj.operands = [dst, composer([env.cst(src.int(),28),env.cst(off4,4)])]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LHA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n    dst = env.A[a]\n    src = off1//off2//off3//off4\n    obj.operands = [dst, composer([env.cst(0,14),env.cst(src.int(),18)])]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {65} ]", mnemonic="ST_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="SWAP_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="LDMST", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, a):\n    src = env.D[a]\n    if obj.mnemonic in ("ST_A",)  : src = env.A[a]\n    if obj.mnemonic in ("ST_D","LDMST") : src = env.E[a]\n    if obj.mnemonic=="ST_DA": src = env.P[a]\n    addr = off1//off2//off3\n    obj.operands = [composer([env.cst(addr.int(),28),env.cst(off4,4)]), src]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) b bpos(3) {d5} ]", mnemonic="ST_T", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, b, bpos):\n    obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)]), env.cst(bpos,3), env.cst(b,1)]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) ---- {15} ]", mnemonic="STLCX", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4):\n    obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDLCX", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDUCX", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n    src = off1//off2//off3\n    obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="LEA", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b, a):\n    dst = env.D[a]\n    if   obj.mnemonic=="LD_A"  : dst = env.A[a]\n    elif obj.mnemonic=="LEA"   : dst = env.A[a]\n    elif obj.mnemonic=="LD_D"  : dst = env.E[a]\n    elif obj.mnemonic=="LDMST" : dst = env.E[a]\n    elif obj.mnemonic=="LD_DA" : dst = env.P[a]\n    obj.b = b\n    src1 = env.A[b]\n    off10 = off1//off2\n    src2 = env.cst(off10.int(-1),10)\n    obj.operands = [dst, src1, src2]\n    if obj.mode == "Bit-Reverse":\n        obj.operands.pop()\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Pre-increment")\ndef tricore_st(obj, off2, off1, b, a):\n    dst = env.D[a]\n    if   obj.mnemonic=="ST_A"  : dst = env.A[a]\n    elif obj.mnemonic=="ST_D"  : dst = env.E[a]\n    elif obj.mnemonic=="ST_DA" : dst = env.P[a]\n    elif obj.mnemonic=="LDMST" : dst = env.E[a]\n    obj.b = b\n    src1 = env.A[b]\n    off10 = off1//off2\n    src2 = env.cst(off10.int(-1),10)\n    obj.operands = [src1, src2, dst]\n    if obj.mode == "Bit-Reverse":\n        obj.operands.pop()\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Pre-increment")\ndef tricore_ld(obj, off2, off1, b, a):\n    dst = env.D[a]\n    src1 = env.P[b]\n    off10 = off1//off2\n    src2 = env.cst(off10.int(-1),10)\n    obj.operands = [src1, src2, dst]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) ---- {49} ]", mnemonic="LDLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) ---- {49} ]", mnemonic="LDUCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) ---- {49} ]", mnemonic="STLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) ---- {49} ]", mnemonic="STUCX", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b):\n    src1 = env.A[b]\n    off10 = off1//off2\n    src2 = env.cst(off10.int(-1),10)\n    obj.operands = [src1, src2]\n    obj.type = type_data_processing\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {99} ]", mnemonic="LD_A", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {79} ]", mnemonic="LD_B", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {39} ]", mnemonic="LD_BU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {b9} ]", mnemonic="LD_HU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {19} ]", mnemonic="LD_W", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {d9} ]", mnemonic="LEA", mode="Long-offset")\ndef tricore_ld(obj, off2, off3, off1, b, a):\n    dst = env.D[a]\n\n</context>\n\n假设一个实际的场景，我们需要采用这段代码来实现工作流程，可以给我解释一下这段代码的作用吗？ \n\n\n\n
+```

lyrallms/LyraLlamaPy/examples/batch_demo.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import argparse
+import sys
+sys.path.append('../')
+from time import perf_counter
+from lyra_llama import lyraLlama
+def get_args():
+    parser = argparse.ArgumentParser(description="Faster ChatGLM6B Demo")
+    parser.add_argument('--model-path', type=str, required=True,
+                        help='Model Path, include config.ini and tokenizer files')
+    parser.add_argument('--tokenizer-path', type=str, default=None)
+    parser.add_argument(
+        '--data-type', type=str, metavar='TYPE', default='fp16',
+        choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
+        help='The data type to inference. If None, the data type follows the '
+             'checkpoint data type.')
+    parser.add_argument(
+        '--memopt-mode', type=int, default=0, choices=[0, 1],
+        help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
+             ' 0: FP16 mode'
+             ' 1: Use MEMOPT mode')
+    parser.add_argument(
+        '--quant-type', type=str, metavar='TYPE', default='int8',
+        choices=['int4', 'int8'],
+        help='The data type of quantization. Only used in MEMOPT.')
+    parser.add_argument(
+        '--kvqparams-fpath',  type=str, required=False, default="",
+        help='File path of kv quantized params.')
+    parser.add_argument("--prompt", type=str, required=False)
+    parser.add_argument("--max-output-length", type=int, default=512)
+    parser.add_argument("--warmups", type=int, default=10)
+    parser.add_argument("--avgnums", type=int, default=10)
+    args = parser.parse_args()
+    print('\n=================== Arguments ===================')
+    for k, v in vars(args).items():
+        print(f' - {k.ljust(25, ".")}: {v}')
+    print('=================================================')
+    return args
+def main():
+    args = get_args()
+    model = lyraLlama(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type, args.kvqparams_fpath)
+    # args.prompt = '''<context>/*\n * Implement the \"Falling Rocks\" game in the text console. \n * A small dwarf stays at the bottom of the screen and can \n * move left and right (by the arrows keys). A number of rocks \n * of different sizes and forms constantly fall down and you \n * need to avoid a crash.\n * Rocks are the symbols ^, @, *, &, +, %, $, #, !, ., ;, - distributed \n * with appropriate density. The dwarf is (O). \n * Ensure a constant game speed by Thread.Sleep(150).\n * Implement collision detection and scoring system.\n*/\n\nusing System;\nusing System.Threading;\nusing System.Collections.Generic;\nusing System.Threading.Tasks;\n\nclass FallingRocks\n{\n    struct Position\n    {\n        public int X, Y;\n        public string symbol;\n        public ConsoleColor color;\n\n        public Position(int x, int y, string symbol, ConsoleColor color)\n        {\n            this.X = x;\n            this.Y = y;\n            this.symbol = symbol;\n            this.color = color;\n        }\n    }\n\n    static void Main()\n    {\n        Thread oThread = new Thread(new ThreadStart(Mainn));\n        Thread aThread = new Thread(new ThreadStart(Clr));\n        \n        aThread.Start();\n        oThread.Start();\n        oThread.Join();\n        aThread.Join();\n    }\n\n    static void Clr()\n    {\n        while (true)\n        {\n            Thread.Sleep(10);\n            Console.Clear();\n        }\n    }\n    static void Mainn()\n    {\n        //Random generator for rocks color, position and symbol\n        Random randomGenerator = new Random();\n        \n        //Sleep time for the game loop\n        double sleepTime = 150;\n        //Console settings\n        Console.CursorVisible = false;\n        Console.BufferHeight = Console.WindowHeight;\n        \n        //number of rocks in the Array rocks\n        int rocksCount = 0;\n\n        //array with the symbols of the rocks\n        string[] symbols = new string[] { \"^\", \"@\", \"*\", \"&\", \"+\", \"%\", \"$\", \"#\", \"!\", \".\", \";\" };\n        \n        //array with colors for the rocks\n        ConsoleColor[] colors = new ConsoleColor[] {ConsoleColor.Yellow, ConsoleColor.White, ConsoleColor.Gray};\n        \n        //array with rocks\n        Position[] rocks = new Position[200];\n        \n        //position for the dwarf\n        Position dwarf = new Position(10, Console.WindowHeight  - 1,\"(0)\",ConsoleColor.Red);\n        \n        //bool variable to say when the game loop to be over\n        bool gameLoop = true;\n\n        //variable keeping the score\n        ulong score = 0;\n\n        //the game loop\n        while (gameLoop)\n        {\n            //score is growing as the cycle runs\n            score++;\n\n            //setting the Y component for all the rocks in the array to grow with 2\n            for (int i = 0; i <= rocks.Length - 1; i++)\n            {\n                rocks[i].Y = rocks[i].Y + 2;\n            }\n\n            //generating rocks\n            for (int x = 0; x <= randomGenerator.Next(2, 4); x++)\n            {\n                rocks[rocksCount] = new Position(randomGenerator.Next(x * 15, x * 15 + 20), 0\n                    , symbols[randomGenerator.Next(0, symbols.Length - 1)]\n                    , colors[randomGenerator.Next(0, colors.Length - 1)]);\n                if (rocksCount >= 199) rocksCount = 0;\n                rocksCount++;\n            }\n\n            //printing the rocks and other stuff\n            foreach (var item in rocks)\n            {\n                foreach (var rock in rocks)\n                {\n                    //checking for colision\n                    if ((rock.X >= dwarf.X) && (rock.X <= (dwarf.X + 2)) && (rock.Y == dwarf.Y))\n                    {\n                        gameLoop = false;\n                        break;\n                    }\n                } \n\n                //printing the rocks\n                if (item.Y < Console.WindowHeight)\n                {                    \n                    Console.SetCursorPosition(item.X, item.Y);\n                    Console.ForegroundColor = item.color;\n                    Console.Write(item.symbol);\n                }\n\n                //checking for key pressed\n                if (Console.KeyAvailable)\n                {\n                    ConsoleKeyInfo pressedKey = Console.ReadKey();\n                    if (pressedKey.Key == ConsoleKey.RightArrow)\n                    {\n                        if(dwarf.X < Console.WindowWidth - 20)\n                        {\n                            //removing the old positions of the dwarf and increasing his X value\n                            Console.SetCursorPosition(dwarf.X, dwarf.Y);\n                            Console.Write(\"   \");\n                            dwarf.X++;\n                        }\n                    }\n                    if (pressedKey.Key == ConsoleKey.LeftArrow)                       \n                    {\n                        if(dwarf.X >= 1)\n                        {\n                            //removing the old positions of the dwarf and decreasing his X value\n                            Console.SetCursorPosition(dwarf.X, dwarf.Y);\n                            Console.Write(\"   \");\n                            dwarf.X--;\n                        }\n                    }\n                }\n            }\n          \n            //printing the dwarf\n            Console.SetCursorPosition(dwarf.X, dwarf.Y);\n            Console.ForegroundColor = dwarf.color;\n            Console.Write(dwarf.symbol);            \n            \n            //sleeping the loop for sometime\n            //Thread.Sleep((int)sleepTime);\n\n            //reducing the sleep time of the loop\n            sleepTime -= 0.5;\n\n            \n            //removing the rocks \n            //foreach (var item in rocks)\n            //{\n            //    if (item.Y < Console.WindowHeight)\n            //    {\n            //        Console.SetCursorPosition(item.X, item.Y);\n            //        Console.Write(\" \");\n            //    }\n            //}                        \n        }\n        //Printing the score after the game is over\n        Console.Clear();\n        Console.WriteLine(\"Game over! Your score is: \" + score);\n\n    }\n}\n</context>\n\n这个\"Falling Rocks\"游戏是如何工作的呢？可以详细解释一下代码的运作机制吗？ \n\n\n\n'''
+    prompt_template = "Human: {}\n\nAssistant:" # xverse
+    # prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
+    prompt = prompt_template.format(args.prompt)
+    test_batch_size = [1, 8, 16, 32, 64] # 8, 16, 32, 64
+    print("test_batch_size: ", test_batch_size)
+    for i, bs in enumerate(test_batch_size):
+        prompts = [prompt, ] * bs
+        # warmup gpu
+        for _ in range(args.warmups):
+            output_texts = model.generate(
+                prompts, output_length=args.max_output_length,
+                top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
+        start = perf_counter()
+        for _ in range(args.avgnums):
+            output_texts = model.generate(
+                prompts, output_length=args.max_output_length,
+                top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
+        end = perf_counter()
+        cost = (end - start) / args.avgnums
+        input_output_texts = [prompt + ' ' + gtext for prompt,
+                              gtext in zip(prompts, output_texts)]
+        tokens = 0
+        input_tokens = len(model.tokenizer.encode(prompt))
+        words = 0
+        for text in input_output_texts:
+            tokens += len(model.tokenizer.encode(text))
+            words += len(text)
+        avg_output_tokens = tokens / len(input_output_texts) - input_tokens
+        print(
+            f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Output tokens: {avg_output_tokens} Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
+        )
+        print(
+            f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
+        )
+        if i == 0:
+            for k in range(bs):
+                print(
+                    f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
+                if k > 2:
+                    break
+if __name__ == "__main__":
+    main()

lyrallms/LyraLlamaPy/examples/batch_stream_demo.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import argparse
+import sys
+from time import perf_counter
+import sys
+# import ipdb
+sys.path.append('../')
+import threading
+import time
+from lyra_llama import lyraLlama
+def print_string(string, prev_seq_length=None, finish=False):
+    if finish:
+        print_list([string])
+        return
+    print("\033c", end="")
+    if prev_seq_length:
+        print(string[:prev_seq_length], end='', flush=True)
+        string = string[prev_seq_length:]
+    for c_char in string:
+        print(c_char, end='', flush=True)
+        time.sleep(0.025)  # 控制每个字符的输出间隔，可以根据需要调整
+def print_list(lines):
+    # 清空终端输出
+    print("\033c", end="")
+    # 逐行打印字符串列表
+    print('\n'.join(lines))
+def get_args():
+    parser = argparse.ArgumentParser(description="Faster ChatGLM6B Demo")
+    parser.add_argument('--model-path', type=str, required=True,
+                        help='Model Path, include config.ini and tokenizer files')
+    parser.add_argument('--tokenizer-path', type=str, default=None)
+    parser.add_argument(
+        '--data-type', type=str, metavar='TYPE', default='fp16',
+        choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
+        help='The data type to inference. If None, the data type follows the '
+             'checkpoint data type.')
+    parser.add_argument(
+        '--memopt_mode', type=int, default=0, choices=[0, 1],
+        help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
+             ' 0: FP16 mode'
+             ' 1: Use MEMOPT mode')
+    parser.add_argument(
+        '--quant-type', type=str, metavar='TYPE', default='int8',
+        choices=['int4', 'int8'],
+        help='The data type of quantization. Only used in MEMOPT.')
+    parser.add_argument(
+        '--kvqparams-fpath',  type=str, required=False, default="",
+        help='File path of kv quantized params.')
+    parser.add_argument("--prompt", type=str, required=False)
+    parser.add_argument("--max-output-length", type=int, default=512)
+    parser.add_argument("--warmups", type=int, default=10)
+    parser.add_argument("--avgnums", type=int, default=10)
+    args = parser.parse_args()
+    print('\n=================== Arguments ===================')
+    for k, v in vars(args).items():
+        print(f' - {k.ljust(25, ".")}: {v}')
+    print('=================================================')
+    return args
+def main():
+    args = get_args()
+    model = lyraLlama(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type, args.kvqparams_fpath)
+    prompt_template = "Human: {}\n\nAssistant:"  # xverse
+    # prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
+    prompt = prompt_template.format(args.prompt)
+    test_batch_size = [1]  # 8, 16, 32, 64
+    print("test_batch_size: ", test_batch_size)
+    for i, bs in enumerate(test_batch_size):
+        prompts = [prompt, ] * bs
+        # warmup gpu
+        for _ in range(args.warmups):
+            for finish, output_texts in model.stream_generate(prompts,
+                                                              output_length=args.max_output_length,
+                                                              top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False):
+                pass
+        start = perf_counter()
+        for _ in range(args.avgnums):
+            prev_sequence_lengths = None
+            stream_counter = 0
+            for finish, output_texts in model.stream_generate(prompts,
+                                                              output_length=args.max_output_length,
+                                                              top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False):
+                if len(output_texts) == 1:
+                    print_string(output_texts[0], prev_sequence_lengths, finish)
+                    prev_sequence_lengths = len(output_texts[0])
+                else:
+                    print_list(output_texts)
+                stream_counter += 1
+        end = perf_counter()
+        cost = (end - start) / args.avgnums
+        input_output_texts = [prompt + ' ' + gtext for prompt,
+                              gtext in zip(prompts, output_texts)]
+        tokens = 0
+        input_tokens = len(model.tokenizer.encode(prompt))
+        words = 0
+        for text in input_output_texts:
+            tokens += len(model.tokenizer.encode(text))
+            words += len(text)
+        avg_output_tokens = tokens / len(input_output_texts) - input_tokens
+if __name__ == "__main__":
+    main()

lyrallms/LyraLlamaPy/examples/random_batch_demo.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import argparse
+import json
+import random
+import numpy as np
+from time import perf_counter
+import sys
+sys.path.append('../')
+from lyra_llama import lyraLlama
+def get_args():
+    parser = argparse.ArgumentParser(description="Faster ChatGLM6B Demo")
+    parser.add_argument('--model-path', type=str, required=True,
+                        help='Model Path, include config.ini and tokenizer files')
+    parser.add_argument('--tokenizer-path', type=str, default=None)
+    parser.add_argument(
+        '--data-type', type=str, metavar='TYPE', default='fp16',
+        choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
+        help='The data type to inference. If None, the data type follows the '
+             'checkpoint data type.')
+    parser.add_argument(
+        '--memopt_mode', type=int, default=0, choices=[0, 1],
+        help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
+             ' 0: FP16 mode'
+             ' 1: Use MEMOPT mode')
+    parser.add_argument(
+        '--quant-type', type=str, metavar='TYPE', default='int8',
+        choices=['int4', 'int8'],
+        help='The data type of quantization. Only used in MEMOPT.')
+    parser.add_argument(
+        '--kvqparams-fpath',  type=str, required=False, default="",
+        help='File path of kv quantized params.')
+    parser.add_argument("--prompt_filepath", type=str, required=True)
+    parser.add_argument("--max-output-length", type=int, default=512)
+    parser.add_argument("--warmups", type=int, default=10)
+    parser.add_argument("--avgnums", type=int, default=10)
+    args = parser.parse_args()
+    print('\n=================== Arguments ===================')
+    for k, v in vars(args).items():
+        print(f' - {k.ljust(25, ".")}: {v}')
+    print('=================================================')
+    return args
+def main():
+    args = get_args()
+    model = lyraLlama(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type, args.kvqparams_fpath)
+    with open(args.prompt_filepath, "rb") as f:
+        input_datas = json.loads(f.read())
+    used_input_data = input_datas[0]
+    prompt_template = "Human: {}\n\nAssistant:" # xverse
+    # prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
+    test_batch_size = [1, 2, 4,] # 8, 16, 32, 64
+    print("test_batch_size: ", test_batch_size)
+    for i, bs in enumerate(test_batch_size):
+        all_use_prompts = []
+        all_output_texts = []
+        # warmup gpu
+        for _ in range(args.warmups):
+            prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
+            output_texts = model.generate(
+                prompts, output_length=args.max_output_length,
+                top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
+        all_cost_s = 0.0
+        for _ in range(args.avgnums):
+            prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
+            all_use_prompts.extend(prompts)
+            start = perf_counter()
+            output_texts = model.generate(
+                prompts, output_length=args.max_output_length,
+                top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
+            all_cost_s += perf_counter() - start
+            all_output_texts.extend(output_texts)
+        cost = all_cost_s / args.avgnums
+        input_output_texts = [prompt + ' ' + gtext for prompt,gtext in zip(all_use_prompts, all_output_texts)]
+        tokens = 0
+        avg_input_tokens = np.mean([len(model.tokenizer.encode(prompt)) for prompt in all_use_prompts])
+        words = 0
+        for text in input_output_texts:
+            tokens += len(model.tokenizer.encode(text))
+            words += len(text)
+        print(
+            f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Avg Input tokens: {avg_input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
+        )
+        print(
+            f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
+        )
+        if i == 0:
+            for k in range(bs):
+                print(
+                    f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
+                if k>2:
+                    break
+if __name__ == "__main__":
+    main()

lyrallms/LyraLlamaPy/examples/test.sh ADDED Viewed

	@@ -0,0 +1,20 @@

+export FMHA_VERSION=V2 # 如使用旧版本Attn，设置 FMHA_VERSION=OFF
+export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8，设置 KV_CACHE_DTYPE=INT8
+model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
+data_type=fp16 # 权重保存精度
+memopt_mode=0 # MEMOPT模式: 0/1
+quant_type="int8" # 量化精度: int4/int8
+max_output_length=256
+warmups=1
+avgnums=1
+python batch_demo.py --model-path $model_path\
+                     --tokenizer-path $model_path\
+                     --data-type $data_type\
+                     --memopt_mode $memopt_mode\
+                     --quant-type ${quant_type}\
+                     --max-output-length $max_output_length\
+                     --warmups $warmups\
+                     --avgnums $avgnums

lyrallms/LyraLlamaPy/examples/test_stream.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+export FMHA_VERSION=V2 # 如使用旧版本Attn，设置为OFF
+export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8，设置 KV_CACHE_DTYPE=INT8
+export LYRA_STREAM_CB_STEP=30 # 回调函数间隔步数
+model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
+data_type=fp16 # 权重保存精度
+memopt_mode=0 # MEMOPT模式: 0/1
+quant_type="int8" # 量化精度: int4/int8
+max_output_length=256
+warmups=1
+avgnums=1
+python batch_stream_demo.py --model-path $model_path\
+                            --tokenizer-path $model_path\
+                            --data-type $data_type\
+                            --memopt_mode $memopt_mode\
+                            --quant-type ${quant_type}\
+                            --max-output-length $max_output_length\
+                            --warmups $warmups\
+                            --avgnums $avgnums

lyrallms/LyraLlamaPy/examples/torch_benchmark.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from transformers import AutoTokenizer
+from transformers import LlamaForCausalLM, AutoModelForCausalLM
+from time import perf_counter
+import torch
+import argparse
+def get_args():
+    parser = argparse.ArgumentParser(description="Torch model Demo")
+    parser.add_argument('--model-path', type=str, required=True,
+                        help='Model Path, include config.ini and tokenizer files')
+    parser.add_argument('--tokenizer-path', type=str, default=None)
+    parser.add_argument("--prompt", type=str, required=False)
+    parser.add_argument("--max-output-length", type=int, default=512)
+    parser.add_argument("--warmups", type=int, default=10)
+    parser.add_argument("--avgnums", type=int, default=10)
+    args = parser.parse_args()
+    print('\n=================== Arguments ===================')
+    for k, v in vars(args).items():
+        print(f' - {k.ljust(25, ".")}: {v}')
+    print('=================================================')
+    return args
+def main():
+    args = get_args()
+    device = torch.device("cuda")
+    prompt_template = "Human: {}\n\nAssistant:" # xverse
+    # prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
+    prompt = prompt_template.format(args.prompt)
+    model = AutoModelForCausalLM.from_pretrained(args.model_path, torch_dtype=torch.float16, trust_remote_code=True).eval().to(device)
+    tokenizer = AutoTokenizer.from_pretrained(args.model_path, use_fast=False, trust_remote_code=True)
+    test_batch_size = [1, 8, 16, 32, 64]
+    print("test_batch_size: ", test_batch_size)
+    for i, bs in enumerate(test_batch_size):
+        prompts = [prompt] * bs
+        # warmup gpu
+        for _ in range(args.warmups):
+            input_ids = tokenizer(prompts, return_tensors="pt").input_ids.to(device)
+            generate_ids = model.generate(
+                        input_ids,
+                        max_new_tokens=args.max_output_length,
+                        do_sample = False,
+                        top_k = 30,
+                        top_p = 0.85,
+                        temperature = 1.0,
+                        repetition_penalty=1.,
+                        eos_token_id=2,
+                        bos_token_id=1,
+                        pad_token_id=0)
+            generate_ids = [output_ids[len(single_input_id):] for single_input_id, output_ids in zip(input_ids, generate_ids)]
+            outputs = tokenizer.batch_decode(generate_ids)
+        # test
+        start = perf_counter()
+        for _ in range(args.avgnums):
+            input_ids = tokenizer(prompts, return_tensors="pt").input_ids.to(device)
+            generate_ids = model.generate(
+                        input_ids,
+                        max_new_tokens=args.max_output_length,
+                        do_sample = False,
+                        top_k = 30,
+                        top_p = 0.85,
+                        temperature = 1.0,
+                        repetition_penalty=1.,
+                        eos_token_id=2,
+                        bos_token_id=1,
+                        pad_token_id=0)
+            generate_ids = [output_ids[len(single_input_id):] for single_input_id, output_ids in zip(input_ids, generate_ids)]
+            output_texts = tokenizer.batch_decode(generate_ids)
+        end = perf_counter()
+        cost = (end - start) / args.avgnums
+        # 计算吞吐量
+        input_output_texts = [prompt + ' ' + gtext for prompt, gtext in zip(prompts, output_texts)]
+        tokens = 0
+        input_tokens = len(tokenizer.encode(prompt))
+        words = 0
+        for text in input_output_texts:
+            tokens += len(tokenizer.encode(text))
+            words += len(text)
+        avg_output_tokens = tokens / len(input_output_texts) - input_tokens
+        print(
+            f"\nBatch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Output tokens: {avg_output_tokens} Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
+        )
+        print(
+            f"Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
+        )
+        if i == 0:
+            for k in range(bs):
+                print(
+                    f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
+                if k > 2:
+                    break
+if __name__ == "__main__":
+    main()

lyrallms/LyraLlamaPy/examples/varlen_prompts.json ADDED Viewed

	@@ -0,0 +1,6 @@

+[
+    "歌曲名：《幸福万年长》；歌手名：汤灿；歌曲描述：汤灿的幸福万年长创作背景：2001年，汤灿决定推出一首能够贴近听众和潮流的民歌。为此，她邀请了创作过歌曲《为你》《快乐老家》的音乐人浮克合作，邀其担任该曲的制作工作。虽然浮克此前一直从事流行歌曲的工作，但他其实也是一位衷情民歌风格的音乐人，于是两人一拍即合，合作了该曲。\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答：",
+    "歌曲名：《小丑面具》；歌手名：韩庚；歌曲描述：韩庚的小丑面具的歌曲鉴赏：韩庚在这首歌化身为“小丑”，带上面具调侃这社会上的表面功夫，用幽默又神经质的方式批判愈形冷酷的人心。在这首独特的电子舞曲当中，韩庚尝试了各种不同的发声方式，冷笑、哭喊、啜泣……甚至用声乐融合鬼魅的方法演唱，让人不禁陷入他建构的虚幻氛围而随之起舞。\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答：",
+    "《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲，于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日，“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者：主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J （大卫·哈斯金斯）。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲：“Boys”；“咬我的臀部”；“Some Faces”和斯卡雷鬼曲调“Harry”，这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议，凯文·哈斯金斯 (Kevin Haskins) 说，“那里有力量流行音乐，还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中（除了“Bela Lugosi's Dead”），只有“Harry”获得了官方发行；1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目，包括“Boys”的原始录音，一直未发行，直到 2018 年The Bela Session以黑胶唱片和CD 形式发行，并可供乐队数字下载。[11]在额外的曲目中，《经典摇滚》杂志写道：“其余的材料发现乐队正在摸索方向，甚至触及了斯卡。”\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答：",
+    "歌曲名：《仓颉》；歌手名：五月天；歌曲描述：五月天的仓颉的歌曲鉴赏：五月天 仓颉(2张)《仓颉》是一首写在文明即将消失前的情诗，陈信宏的词写得颇有味道。《仓颉》这样淡淡的歌曲，或许不够大气，但是陈信宏真诚的演唱足以令人感动，而且《仓颉》的歌词也写得很有哲理。这首歌曲朗朗上口的旋律和诗意的文字使得它很适合在KTV演唱。\n根据上述信息，请回答用户问题：请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答："
+]

lyrallms/LyraLlamaPy/lyra_llama/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .lyra_llama import lyraLlama

lyrallms/LyraLlamaPy/lyra_llama/config.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import dataclasses
+from typing import Optional
+@dataclasses.dataclass
+class LyraLlamaParam:
+    num_heads: int = 40
+    size_per_head: int = 128
+    inter_size: int = 13824
+    num_layers: int = 40
+    vocab_size: int = 39424
+    start_id: Optional[int] = 1
+    end_id: Optional[int] = 2
+    tensor_para_size: int = 1
+    pipeline_para_size: int = 1
+    remove_padding: bool = True
+    shared_contexts_ratio: float = 1.0
+    layernorm_eps: float = 1e-6
+    weights_data_type: str = "fp16"
+    rotary_embedding: int = 128
+    use_gptj_residual: bool = False
+    def __post_init__(self):
+        if not 0.0 <= self.shared_contexts_ratio <= 1.0:
+            raise ValueError(
+                f'Got an invalid value of shared_context_ratio '
+                f'{self.shared_contexts_ratio} - range: [0.0, 1.0]')
+    def asdict(self):
+        return dataclasses.asdict(self)
+LYRA_LLAMA_PARAM = LyraLlamaParam()
+LIB_SO_PATH = '/usr/lib/ftlib/lyraOp.cpython-38-x86_64-linux-gnu.so'

lyrallms/LyraLlamaPy/lyra_llama/lyra_llama.py ADDED Viewed

	@@ -0,0 +1,232 @@

+from __future__ import annotations
+import configparser
+import pathlib
+import typing
+import os
+import torch
+import transformers
+from torch.nn.utils.rnn import pad_sequence
+from .config import LYRA_LLAMA_PARAM, LIB_SO_PATH
+from .model import LlamaModel
+class lyraLlama:
+    def __init__(self, model_path, tokenizer_path=None, dtype='fp16', memopt_mode=0, quant_dtype="int4", kvqparams_fpath="") -> None:
+        self.model_path = model_path
+        self.tokenizer_path = tokenizer_path
+        self.kvqparams_fpath = kvqparams_fpath
+        self.dtype = dtype
+        self.memopt_mode = memopt_mode
+        self.quant_data_type = quant_dtype
+        self.model, self.tokenizer = self.load_model_and_tokenizer()
+        print("Got model and tokenizer")
+    def load_model_and_tokenizer(self):
+        if self.tokenizer_path is None:
+            tokenizer_path = self.model_path
+        else:
+            tokenizer_path = self.tokenizer_path
+        print(f'Loading tokenizer from {tokenizer_path}')
+        tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_path)
+        checkpoint_path = pathlib.Path(self.model_path)
+        config_path = checkpoint_path / 'config.ini'
+        if config_path.exists():
+            # Read model params from config.
+            cfg = configparser.ConfigParser()
+            cfg.read(config_path)
+            model_name = 'llama'
+            inference_data_type = self.dtype
+            if inference_data_type == None:
+                inference_data_type = cfg.get(model_name, "weight_data_type")
+            model_args = dict(
+                head_num=cfg.getint(model_name, 'head_num'),
+                kv_head_num=cfg.getint(model_name, 'kv_head_num', fallback=0),
+                size_per_head=cfg.getint(model_name, "size_per_head"),
+                inter_size=cfg.getint(model_name, 'inter_size'),
+                layer_num=cfg.getint(model_name, "num_layer"),
+                rotary_embedding_dim=cfg.getint(model_name, 'rotary_embedding'),
+                layernorm_eps=cfg.getfloat(model_name, 'layernorm_eps'),
+                vocab_size=cfg.getint(model_name, "vocab_size"),
+                start_id=cfg.getint(model_name, "start_id"),
+                end_id=cfg.getint(model_name, "end_id"),
+                weights_data_type=cfg.get(model_name, "weight_data_type"),
+                tensor_para_size=cfg.getint(model_name, "tensor_para_size"),
+                inference_data_type=inference_data_type,
+                rope_theta=cfg.getfloat(model_name, "rope_theta", fallback=float(10000.0)))
+        else:
+            inference_data_type = self.dtype
+            if inference_data_type == None:
+                inference_data_type = LYRA_LLAMA_PARAM.weights_data_type
+            model_args = dict(head_num=LYRA_LLAMA_PARAM.num_heads,
+                              size_per_head=LYRA_LLAMA_PARAM.size_per_head,
+                              inter_size=LYRA_LLAMA_PARAM.inter_size,
+                              layer_num=LYRA_LLAMA_PARAM.num_layers,
+                              rotary_embedding_dim=LYRA_LLAMA_PARAM.rotary_embedding,
+                              layernorm_eps=LYRA_LLAMA_PARAM.layernorm_eps,
+                              vocab_size=LYRA_LLAMA_PARAM.vocab_size,
+                              start_id=LYRA_LLAMA_PARAM.start_id or tokenizer.bos_token_id,
+                              end_id=LYRA_LLAMA_PARAM.end_id or tokenizer.eos_token_id,
+                              weights_data_type=LYRA_LLAMA_PARAM.weights_data_type,
+                              tensor_para_size=LYRA_LLAMA_PARAM.tensor_para_size,
+                              inference_data_type=inference_data_type)
+        # update common parameters
+        model_args.update(dict(
+            lib_path=LIB_SO_PATH,
+            model_path=os.path.join(self.model_path, "1-gpu-fp16.bin"),
+            kvqparams_fpath=self.kvqparams_fpath, # kv quantized scales (calibrated)
+            max_seq_len=0,  # for position seq embedding
+            pipeline_para_size=LYRA_LLAMA_PARAM.pipeline_para_size,
+            use_gptj_residual=LYRA_LLAMA_PARAM.use_gptj_residual,
+            memopt_mode=self.memopt_mode,
+            quant_data_type=self.quant_data_type
+            # shared_contexts_ratio=LYRA_LLAMA_PARAM.shared_contexts_ratio,
+        ))
+        print('[LYRA][INFO] Load Our LYRA Highly Optimized LLaMA model')
+        for k, v in model_args.items():
+            print(f' - {k.ljust(25, ".")}: {v}')
+        # Check sanity and consistency between the model and tokenizer.
+        checklist = ['head_num', 'size_per_head', 'vocab_size', 'layer_num',
+                     'tensor_para_size', 'tensor_para_size', 'weights_data_type']
+        if None in [model_args[k] for k in checklist]:
+            none_params = [p for p in checklist if model_args[p] is None]
+            print(f'[LYRA][WARNING] Found None parameters {none_params}. They must '
+                  f'be provided either by config file or CLI arguments.')
+        if model_args['start_id'] != tokenizer.bos_token_id:
+            print('[LYRA][WARNING] Given start_id is not matched with the bos token '
+                  'id of the pretrained tokenizer.')
+        if model_args['end_id'] not in (tokenizer.pad_token_id, tokenizer.eos_token_id):
+            print('[LYRA][WARNING] Given end_id is not matched with neither pad '
+                  'token id nor eos token id of the pretrained tokenizer.')
+        print(f'Loading model from {self.model_path}')
+        model = LlamaModel(**model_args)
+        return model, tokenizer
+    def generate(self, prompts: typing.List[str] | str,
+                 output_length: int = 512,
+                 beam_width: int = 1,
+                 top_k: typing.Optional[torch.IntTensor] = 1,
+                 top_p: typing.Optional[torch.FloatTensor] = 1.0,
+                 beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
+                 temperature: typing.Optional[torch.FloatTensor] = 1.0,
+                 len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
+                 repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
+                 presence_penalty: typing.Optional[torch.FloatTensor] = None,
+                 min_length: typing.Optional[torch.IntTensor] = None,
+                 bad_words_list: typing.Optional[torch.IntTensor] = None,
+                 do_sample: bool = False,
+                 return_output_length: bool = False,
+                 return_cum_log_probs: int = 0):
+        if isinstance(prompts, str):
+            prompts = [prompts, ]
+        inputs = prompts
+        batch_size = len(inputs)
+        ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
+        ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
+        # we must encode the raw prompt text one by one in order to compute the length of the original text.
+        input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
+        input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
+        # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
+        input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
+        random_seed = None
+        if do_sample:
+            random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
+        outputs = self.model(start_ids=input_token_ids,
+                             start_lengths=input_lengths,
+                             output_len=output_length,
+                             beam_width=beam_width,
+                             top_k=top_k * ones_int,
+                             top_p=top_p * ones_float,
+                             beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
+                             temperature=temperature * ones_float,
+                             len_penalty=len_penalty * ones_float,
+                             repetition_penalty=repetition_penalty * ones_float,
+                             random_seed=random_seed,
+                             return_output_length=return_output_length,
+                             return_cum_log_probs=return_cum_log_probs)
+        if return_cum_log_probs > 0:
+            outputs = outputs[0]  # output_token_ids.
+        # Slice the generated token ids of the 1st beam result.
+        # output = input tokens + generated tokens.
+        output_token_ids = [out[0, length:].cpu()
+                            for out, length in zip(outputs, input_lengths)]
+        output_texts = self.tokenizer.batch_decode(
+            output_token_ids, skip_special_tokens=True)
+        return output_texts
+    def stream_generate(self, prompts: typing.List[str] | str,
+                        output_length: int = 512,
+                        beam_width: int = 1,
+                        top_k: typing.Optional[torch.IntTensor] = 1,
+                        top_p: typing.Optional[torch.FloatTensor] = 1.0,
+                        beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
+                        temperature: typing.Optional[torch.FloatTensor] = 1.0,
+                        len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
+                        repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
+                        presence_penalty: typing.Optional[torch.FloatTensor] = None,
+                        min_length: typing.Optional[torch.IntTensor] = None,
+                        bad_words_list: typing.Optional[torch.IntTensor] = None,
+                        do_sample: bool = False,
+                        return_output_length: bool = False,
+                        return_cum_log_probs: int = 0):
+        if isinstance(prompts, str):
+            prompts = [prompts, ]
+        inputs = prompts
+        batch_size = len(inputs)
+        ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
+        ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
+        # we must encode the raw prompt text one by one in order to compute the length of the original text.
+        input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
+        input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
+        # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
+        input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
+        random_seed = None
+        if do_sample:
+            random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
+        for finish, output_ids, sequence_length, output_cum_log_probs in self.model.stream_forward(start_ids=input_token_ids,
+                                                                                                   start_lengths=input_lengths,
+                                                                                                   output_len=output_length,
+                                                                                                   beam_width=beam_width,
+                                                                                                   top_k=top_k * ones_int,
+                                                                                                   top_p=top_p * ones_float,
+                                                                                                   beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
+                                                                                                   temperature=temperature * ones_float,
+                                                                                                   len_penalty=len_penalty * ones_float,
+                                                                                                   repetition_penalty=repetition_penalty * ones_float,
+                                                                                                   random_seed=random_seed,
+                                                                                                   return_output_length=return_output_length,
+                                                                                                   return_cum_log_probs=return_cum_log_probs):
+            # Slice the generated token ids of the 1st beam result.
+            # output = input tokens + generated tokens.
+            output_token_ids = [out[0, length:].cpu()
+                                for out, length in zip(output_ids, input_lengths)]
+            output_texts = self.tokenizer.batch_decode(
+                output_token_ids, skip_special_tokens=True)
+            yield finish, output_texts

lyrallms/LyraLlamaPy/lyra_llama/model.py ADDED Viewed

	@@ -0,0 +1,270 @@

+from __future__ import print_function
+import copy
+import os
+import pathlib
+import typing
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import time
+from queue import Queue
+from threading import Thread
+import sys
+sys.path.append('/usr/lib/lyralib')
+import lyraOp
+str_type_map = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}
+class LlamaModel(nn.Module):
+    def __init__(self,
+                 head_num,
+                 size_per_head,
+                 inter_size,
+                 vocab_size,
+                 rotary_embedding_dim,
+                 start_id, end_id, layer_num,
+                 max_seq_len: int,
+                 layernorm_eps,
+                 tensor_para_size: int,
+                 pipeline_para_size: int,
+                 use_gptj_residual,
+                 lib_path: typing.Union[str, pathlib.Path],
+                 model_path,
+                 kvqparams_fpath: str = "",
+                 memopt_mode: int = 0,
+                 quant_data_type: str = "int8",
+                 inference_data_type: str = "fp16",
+                 weights_data_type: typing.Union[str, np.dtype] = np.float32,
+                 kv_head_num = 0,
+                 rope_theta = 10000.0):
+        super().__init__()
+        self.head_num = head_num
+        self.kv_head_num = kv_head_num
+        self.size_per_head = size_per_head
+        self.inter_size = inter_size
+        self.vocab_size = vocab_size
+        self.rotary_embedding_dim = rotary_embedding_dim
+        self.start_id = start_id
+        self.end_id = end_id
+        self.max_seq_len = max_seq_len
+        self.layer_num = layer_num
+        self.use_gptj_residual = use_gptj_residual
+        self.layernorm_eps = layernorm_eps
+        self.memopt_mode = memopt_mode
+        self.quant_data_type = quant_data_type
+        self.rope_theta = rope_theta
+        # multi-gpu params
+        self.tensor_para_size = tensor_para_size
+        self.pipeline_para_size = pipeline_para_size
+        self.build_model = False
+        self.weights_data_type = weights_data_type
+        self.inference_data_type = inference_data_type
+        # queue for streaming
+        self.que = Queue()
+        self.threads = [None] * self.tensor_para_size
+        assert torch.cuda.is_available(), "CUDA is required for this model."
+        assert head_num % tensor_para_size == 0, "head_num must be a multiple of tensor_para_size."
+        assert layer_num % pipeline_para_size == 0, "layer_num must be a multiple of pipeline_para_size."
+        # Load the C++ model into Pytorch model.
+        # torch.classes.load_library(os.path.abspath(lib_path))
+        # Prepare for tensor/pipeline parallel
+        try:
+            dist.init_process_group(backend='mpi')
+        except:
+            print("[INFO] WARNING: Have initialized the process group")
+        self.rank = dist.get_rank()
+        self.device_count = torch.cuda.device_count()
+        self.device = self.rank % self.device_count
+        torch.cuda.set_device(self.device)
+        world_size = dist.get_world_size()
+        # print(tensor_para_size * pipeline_para_size)
+        assert world_size == tensor_para_size * pipeline_para_size, "tensor_para_size * pipeline_para_size must be equal to world_size."
+        self.tensor_para_rank = self.rank % self.tensor_para_size
+        self.pipeline_para_rank = self.rank // self.tensor_para_size
+        if self.kv_head_num == 0:
+            self.kv_head_num = self.head_num
+        self.model = lyraOp.LyraLlama(
+            self.head_num, self.size_per_head, self.inter_size,
+            self.layer_num,
+            self.vocab_size,
+            self.rotary_embedding_dim,
+            self.layernorm_eps,
+            self.start_id, self.end_id,
+            self.tensor_para_size, self.pipeline_para_size,
+            self.max_seq_len,
+            self.use_gptj_residual,
+            self.memopt_mode,
+            self.quant_data_type,
+            model_path,
+            kvqparams_fpath,
+            self.weights_data_type,
+            self.inference_data_type,
+            self.kv_head_num,
+            self.rope_theta)
+        self.build_model = True
+        torch.cuda.empty_cache()
+    def forward(self,
+                start_ids: torch.Tensor,
+                start_lengths: torch.Tensor,
+                output_len,
+                beam_width=1,
+                top_k: torch.Tensor = None,
+                top_p: torch.Tensor = None,
+                beam_search_diversity_rate: torch.Tensor = None,
+                temperature: torch.Tensor = None,
+                len_penalty: torch.Tensor = None,
+                repetition_penalty: torch.Tensor = None,
+                random_seed: torch.Tensor = None,
+                return_output_length=False,
+                return_cum_log_probs=0):
+        input_len = start_ids.size(1)
+        assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
+        # Inputs to device
+        input_ids = start_ids.cuda(self.device)
+        input_lengths = start_lengths.cuda(self.device)
+        # outputs: output_ids, output_lengths, output_cum_log_probs (optional)
+        outputs = self.model.forward(input_ids,
+                                     input_lengths,
+                                     output_len,
+                                     beam_width,  # optional, can be None
+                                     top_k,  # optional, can be None
+                                     top_p,  # optional, can be None
+                                     beam_search_diversity_rate,  # optional, can be None
+                                     temperature,  # optional, can be None
+                                     len_penalty,  # optional, can be None
+                                     repetition_penalty,  # optional, can be None
+                                     random_seed,  # optional, can be None
+                                     return_cum_log_probs)  # optional, can be None
+        if return_cum_log_probs == 0:
+            output_ids, output_lengths = outputs
+        else:
+            output_ids, output_lengths, output_cum_log_probs = outputs
+        if return_output_length:
+            if return_cum_log_probs > 0:
+                return output_ids, output_lengths, output_cum_log_probs
+            else:
+                return output_ids, output_lengths
+        else:
+            return output_ids
+    def set_input_tensor(self, input_tensor):
+        """Set input tensor to be used instead of forward()'s input.
+        When doing pipeline parallelism the input from the previous
+        stage comes from communication, not from the input, so the
+        model's forward_step_func won't have it. This function is thus
+        used by internal code to bypass the input provided by the
+        forward_step_func"""
+        self.input_tensor = input_tensor
+    def _forward_callback(self, output_ids, seq_lengths, ctx):
+        self.que.put((False, (list(output_ids), list(seq_lengths))))
+    def _tensormap_dict_to_py_dict(self, tensormap_dict: lyraOp.TensorMap):
+        """map torch tensormap to py dict."""
+        ret = dict()
+        for k, v in tensormap_dict.items():
+            ret[k] = v
+        return ret
+    def stream_forward(self,
+                        start_ids: torch.Tensor,
+                        start_lengths: torch.Tensor,
+                        output_len,
+                        beam_width=1,
+                        top_k: torch.Tensor = None,
+                        top_p: torch.Tensor = None,
+                        beam_search_diversity_rate: torch.Tensor = None,
+                        temperature: torch.Tensor = None,
+                        len_penalty: torch.Tensor = None,
+                        repetition_penalty: torch.Tensor = None,
+                        random_seed: torch.Tensor = None,
+                        return_output_length=False,
+                        return_cum_log_probs=0):
+        # Register callback func to model
+        self.model.registerCallback(self._forward_callback)
+        batch_size = start_ids.size(0)
+        input_len = start_ids.size(1)
+        assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
+        # Inputs to device
+        input_ids = start_ids.cuda(self.device)
+        input_lengths = start_lengths.cuda(self.device)
+        # outputs: output_ids, output_lengths, output_cum_log_probs (optional)
+        # Init thread of model inference
+        def _func(enque_output):
+            outputs = self.model.forward(input_ids,
+                                    input_lengths,
+                                    output_len,
+                                    beam_width,  # optional, can be None
+                                    top_k,  # optional, can be None
+                                    top_p,  # optional, can be None
+                                    beam_search_diversity_rate,  # optional, can be None
+                                    temperature,  # optional, can be None
+                                    len_penalty,  # optional, can be None
+                                    repetition_penalty,  # optional, can be None
+                                    random_seed,  # optional, can be None
+                                    return_cum_log_probs)  # optional, can be None
+            if enque_output:
+                self.que.put((True, (outputs[0].tolist(), outputs[1].tolist())))
+        # Start thread of model inference
+        t = Thread(target=_func,
+                args=(True,),
+                daemon=True)
+        t.start()
+        self.threads[0] = t
+        # Generate streaming output
+        while True:
+            while self.que.qsize() > 1:
+                self.que.get()
+            finish, outputs = self.que.get()
+            output_ids, sequence_length = outputs
+            output_ids_tensor = torch.tensor(output_ids).view(batch_size, beam_width, -1)
+            sequence_length_tensor = torch.tensor(sequence_length).view(batch_size, beam_width)
+            if return_output_length:
+                if return_cum_log_probs > 0:
+                    yield finish, output_ids_tensor, sequence_length_tensor, None
+                else:
+                    yield finish, output_ids_tensor, sequence_length_tensor, None
+            else:
+                yield finish, output_ids_tensor, None, None
+            if finish:
+                for t in self.threads:
+                    t.join()
+                while self.que.qsize() > 0:
+                    self.que.get()
+                break
+        self.model.unRegisterCallback()

lyrallms/README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+## `lyrallms` 能力矩阵
+| |Attn方法| |MEMOPT模式| |KVCache精度| |
+|:----|:----|:----|:----|:----|:----|:----|
+| |Unfused|FlashAttn2|W4A16|W8A16|FP16|INT8|
+|LLaMA|✅|✅|✅|✅|✅|✅|
+|XVERSE|✅|✅|✅|✅|✅|✅|
+|Baichuan 1/2 (7B及13B)|✅|❌|✅|✅|✅|❌|
+|ChatGLM|✅|❌|❌|✅|✅|❌|
+|BELLE|✅|❌|❌|✅|✅|❌|
+## `lyrallms` 使用
+### 校准 (Calibration)
+参考`calibration`文件夹下的[README.md](./calibration/README.md) 。
+### Python转换及调用加速模型
+#### LLaMA
+参考`LyraLlamaPy`文件夹下的[README.md](./LyraLlamaPy/README.md) 。
+#### Baichuan
+参考`LyraBaichuanPy`文件夹下的[README.md](./LyraLlamaPy/README.md) 。

models/.gitkeep ADDED Viewed

File without changes

models/Baichuan/Baichuan2_13B_Base/1-gpu-fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cee68cc4fc1b5d25cd39f3bd64ae7ee25f15035892cbbedb10e0b980d9afd87f
+size 27793336320

models/Baichuan/Baichuan2_13B_Base/config.ini ADDED Viewed

	@@ -0,0 +1,14 @@

+[baichuan]
+model_name = Baichuan2_13B_base
+head_num = 40
+size_per_head = 128
+inter_size = 13696
+num_layer = 40
+rotary_embedding = 128
+layernorm_eps = 1e-06
+vocab_size = 125696
+start_id = 1
+end_id = 2
+tensor_para_size = 1
+weight_data_type = fp16

models/Baichuan/Baichuan2_13B_Base/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_from_model_config": true,
+  "architectures": [
+    "BaichuanForCausalLM"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_baichuan.BaichuanConfig",
+    "AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
+  },
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 13696,
+  "model_max_length": 4096,
+  "model_type": "baichuan",
+  "num_attention_heads": 40,
+  "num_hidden_layers": 40,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-06,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.29.2",
+  "use_cache": true,
+  "vocab_size": 125696
+}

models/Baichuan/Baichuan2_13B_Base/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  }
+}

models/Baichuan/Baichuan2_13B_Base/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79452955be6b419a65984273a9f08af86042e1c2a75ee3ba989cbf620a133cc2
+size 2001107

models/Baichuan/Baichuan2_13B_Base/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_baichuan.BaichuanTokenizer",
+      null
+    ]
+  },
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "model_max_length": 4096,
+  "pad_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "sp_model_kwargs": {},
+  "tokenizer_class": "BaichuanTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  }
+}

models/Baichuan/Baichuan2_13B_Chat/1-gpu-fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:182aeae174da2d23af945c93ab92a6ba48ccf9bbc02474096ba950dd7e17bdd2
+size 27793336320

models/Baichuan/Baichuan2_13B_Chat/config.ini ADDED Viewed

	@@ -0,0 +1,14 @@

+[baichuan]
+model_name = Baichuan2_13B_chat
+head_num = 40
+size_per_head = 128
+inter_size = 13696
+num_layer = 40
+rotary_embedding = 128
+layernorm_eps = 1e-06
+vocab_size = 125696
+start_id = 1
+end_id = 2
+tensor_para_size = 1
+weight_data_type = fp16

models/Baichuan/Baichuan2_13B_Chat/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_from_model_config": true,
+  "architectures": [
+    "BaichuanForCausalLM"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_baichuan.BaichuanConfig",
+    "AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
+  },
+  "tokenizer_class": "BaichuanTokenizer",
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 13696,
+  "model_max_length": 4096,
+  "model_type": "baichuan",
+  "num_attention_heads": 40,
+  "num_hidden_layers": 40,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-06,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.29.2",
+  "use_cache": true,
+  "vocab_size": 125696
+}

models/Baichuan/Baichuan2_13B_Chat/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  }
+}

models/Baichuan/Baichuan2_13B_Chat/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79452955be6b419a65984273a9f08af86042e1c2a75ee3ba989cbf620a133cc2
+size 2001107

models/Baichuan/Baichuan2_13B_Chat/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_baichuan.BaichuanTokenizer",
+      null
+    ]
+  },
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "model_max_length": 4096,
+  "pad_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  },
+  "sp_model_kwargs": {},
+  "tokenizer_class": "BaichuanTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": true
+  }
+}

models/Baichuan/Baichuan2_7B_Base/1-gpu-fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f83ab15fe6cd2d93be29248e87051db3b62921d3093922d7e73c9817bc0409b
+size 15011946496

models/Baichuan/Baichuan2_7B_Base/config.ini ADDED Viewed

	@@ -0,0 +1,14 @@

+[baichuan]
+model_name = Baichuan2_7B_base
+head_num = 32
+size_per_head = 128
+inter_size = 11008
+num_layer = 32
+rotary_embedding = 128
+layernorm_eps = 1e-06
+vocab_size = 125696
+start_id = 1
+end_id = 2
+tensor_para_size = 1
+weight_data_type = fp16

models/Baichuan/Baichuan2_7B_Base/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "architectures": [
+    "BaichuanForCausalLM"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_baichuan.BaichuanConfig",
+    "AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
+  },
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 4096,
+  "model_max_length": 4096,
+  "model_type": "baichuan",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-06,
+  "_from_model_config": true,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.29.2",
+  "use_cache": true,
+  "vocab_size": 125696
+}