carsonhxsu commited on
Commit
8453337
1 Parent(s): 2ebc29a

# This is a combination of 22 commits.

Browse files

# This is the 1st commit message:

Init

# This is the commit message #2:

[Enhancement] Update README

# This is the commit message #3:

Upload SM80 so files

# This is the commit message #4:

Track model files

# This is the commit message #5:

Update gitignore

# This is the commit message #6:

Upload converted XVERSE 13B Chat model files

# This is the commit message #7:

Upload converted Baichuan2 13B Chat model files

# This is the commit message #8:

Upload converted Baichuan2 7B model files

# This is the commit message #9:

Upload converted Baichuan2 13B Base model files

# This is the commit message #10:

Upload converted Baichuan 7B Base model files

# This is the commit message #11:

Upload converted Baichuan 13B Chat model files

# This is the commit message #12:

Upload converted Baichuan 13B Base model files

# This is the commit message #13:

Upload converted LLaMA Ziya 13B model files

# This is the commit message #14:

Upload converted Yi 6B model files

# This is the commit message #15:

Update README and .gitattributes

# This is the commit message #16:

Remove SM70 so files

# This is the commit message #17:

Update README

# This is the commit message #18:

Update SM80 Cuda11 so file

# This is the commit message #19:

Update README

# This is the commit message #20:

Update Python codes

# This is the commit message #21:

Update Python codes

# This is the commit message #22:

Update gitattributes

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. .gitignore +8 -0
  3. README.md +157 -0
  4. lyralib/.gitattributes +3 -0
  5. lyralib/.gitkeep +0 -0
  6. lyralib/sm80/cuda11/lyraOp.cpython-38-x86_64-linux-gnu.so +3 -0
  7. lyralib/sm80/cuda12/lyraOp.cpython-38-x86_64-linux-gnu.so +3 -0
  8. lyrallms/LyraBaichuanPy/README.md +88 -0
  9. lyrallms/LyraBaichuanPy/configuration_baichuan.py +69 -0
  10. lyrallms/LyraBaichuanPy/examples/README.md +105 -0
  11. lyrallms/LyraBaichuanPy/examples/batch_demo.py +103 -0
  12. lyrallms/LyraBaichuanPy/examples/batch_stream_demo.py +101 -0
  13. lyrallms/LyraBaichuanPy/examples/random_batch_demo.py +116 -0
  14. lyrallms/LyraBaichuanPy/examples/varlen_prompts.json +6 -0
  15. lyrallms/LyraBaichuanPy/generation_utils.py +83 -0
  16. lyrallms/LyraBaichuanPy/lyra_baichuan/__init__.py +1 -0
  17. lyrallms/LyraBaichuanPy/lyra_baichuan/config.py +34 -0
  18. lyrallms/LyraBaichuanPy/lyra_baichuan/lyra_baichuan.py +391 -0
  19. lyrallms/LyraBaichuanPy/lyra_baichuan/model.py +258 -0
  20. lyrallms/LyraBaichuanPy/lyra_baichuan/tokenization_baichuan.py +232 -0
  21. lyrallms/LyraLlamaPy/README.md +75 -0
  22. lyrallms/LyraLlamaPy/examples/README.md +114 -0
  23. lyrallms/LyraLlamaPy/examples/batch_demo.py +109 -0
  24. lyrallms/LyraLlamaPy/examples/batch_stream_demo.py +135 -0
  25. lyrallms/LyraLlamaPy/examples/random_batch_demo.py +123 -0
  26. lyrallms/LyraLlamaPy/examples/test.sh +20 -0
  27. lyrallms/LyraLlamaPy/examples/test_stream.sh +21 -0
  28. lyrallms/LyraLlamaPy/examples/torch_benchmark.py +111 -0
  29. lyrallms/LyraLlamaPy/examples/varlen_prompts.json +6 -0
  30. lyrallms/LyraLlamaPy/lyra_llama/__init__.py +1 -0
  31. lyrallms/LyraLlamaPy/lyra_llama/config.py +34 -0
  32. lyrallms/LyraLlamaPy/lyra_llama/lyra_llama.py +232 -0
  33. lyrallms/LyraLlamaPy/lyra_llama/model.py +270 -0
  34. lyrallms/README.md +27 -0
  35. models/.gitkeep +0 -0
  36. models/Baichuan/Baichuan2_13B_Base/1-gpu-fp16.bin +3 -0
  37. models/Baichuan/Baichuan2_13B_Base/config.ini +14 -0
  38. models/Baichuan/Baichuan2_13B_Base/config.json +28 -0
  39. models/Baichuan/Baichuan2_13B_Base/special_tokens_map.json +30 -0
  40. models/Baichuan/Baichuan2_13B_Base/tokenizer.model +3 -0
  41. models/Baichuan/Baichuan2_13B_Base/tokenizer_config.json +46 -0
  42. models/Baichuan/Baichuan2_13B_Chat/1-gpu-fp16.bin +3 -0
  43. models/Baichuan/Baichuan2_13B_Chat/config.ini +14 -0
  44. models/Baichuan/Baichuan2_13B_Chat/config.json +29 -0
  45. models/Baichuan/Baichuan2_13B_Chat/special_tokens_map.json +30 -0
  46. models/Baichuan/Baichuan2_13B_Chat/tokenizer.model +3 -0
  47. models/Baichuan/Baichuan2_13B_Chat/tokenizer_config.json +46 -0
  48. models/Baichuan/Baichuan2_7B_Base/1-gpu-fp16.bin +3 -0
  49. models/Baichuan/Baichuan2_7B_Base/config.ini +14 -0
  50. models/Baichuan/Baichuan2_7B_Base/config.json +28 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ models/* filter=lfs diff=lfs merge=lfs -text
37
+ lyralib/* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ *~
2
+ *.o
3
+ *build*/
4
+ __pycache__/
5
+ .vscode
6
+ .idea
7
+ .cache
8
+ **/.ipynb_checkpoints/
README.md CHANGED
@@ -1,3 +1,160 @@
1
  ---
2
  license: mit
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
+ language: en
4
+ tags:
5
+ - LLM
6
+ - LLaMA
7
+ - Baichuan
8
+ - Baichuan2
9
+ - XVERSE
10
  ---
11
+ # Model Card for lyraLLMs
12
+
13
+ ## Introduction
14
+
15
+ We have released **lyraLLMs**, a highly optimized and easy-to-use inference engine for LLMs.
16
+
17
+ **lyraLLMs** is suitable for NVIDIA GPUs:
18
+ - Volta (V100)
19
+ - Turing (T4)
20
+ - Ampere (A100/A10)
21
+ - Ada Lovelace (RTX 4090, etc.)
22
+
23
+ **lyraLLMs** supports many popular HuggingFace models as follows:
24
+ - [BELLE](https://huggingface.co/TMElyralab/lyraBELLE)
25
+ - [ChatGLM](https://huggingface.co/TMElyralab/lyraChatGLM)
26
+ - LLaMA
27
+ - LLaMA 2
28
+ - XVERSE
29
+ - Baichuan 1 & 2
30
+
31
+ **lyraLLMs** is fast, memory-efficient & easy to use with:
32
+ - State-of-the-art throughtput (up to 7K tokens/s for LLaMA 13B)
33
+ - Efficient memory usage of attention with FlashAttention2
34
+ - Quantization: MEMOPT mode (W8A16, W4A16), KVCache Int8
35
+ - Easy-to-use Python API to serve LLMs
36
+ - Streaming outputs
37
+
38
+ If you like our work and consider to join us, feel free to drop a line at [email protected]
39
+
40
+ ## Speed
41
+
42
+ ### Settings
43
+ * Evaluated at tokens/s (input + output)
44
+ * Test on A100 40G, CUDA 12.0
45
+ * Enable the use of MEMOPT mode and KVCache Int8
46
+
47
+ ### Throughputs
48
+
49
+ ### XVERSE-13B-Chat
50
+
51
+ #### Input
52
+ 北京的景点:故宫、天坛、万里长城等。\n深圳的景点:
53
+
54
+ | Version | Batch Size 1 | Batch Size 64 | Batch Size 128 | Batch Size 256 | Batch Size 512 |
55
+ | --- | --- | --- | --- | --- | --- |
56
+ | Torch 2.1.0 | 52.9 | 2308.1 | OOM | | |
57
+ | lyraXVERSE | 200.4 | 4624.8 | 5759.7 | 6075.6 | 5733 |
58
+
59
+ ### Baichuan2-7B-Base
60
+
61
+ #### Input
62
+ 北京的景点:登鹳雀楼->王之涣\n夜雨寄北->
63
+
64
+ | Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
65
+ | --- | --- | --- | --- | --- | --- |
66
+ | Torch 2.0.1 | 41.2 | 323.2 | 640.0 | 1256.8 | 2231.0 |
67
+ | lyraBaichuan | 125.9 | 948.1 | 1749.3 | 2974.0 | 4370.1 |
68
+
69
+ ### Baichuan2-13B-Base
70
+
71
+ #### Input
72
+ 北京的景点:登鹳雀楼->王之涣\n夜雨寄北->
73
+
74
+ | Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
75
+ | --- | --- | --- | --- | --- | --- |
76
+ | Torch 2.0.1 | 40.9 | 307.9 | 555.6 | 1010.4 | 1601.0 |
77
+ | lyraBaichuan | 80.0 | 568.2 | 1124.4 | 1942.6 | 2828.0 |
78
+
79
+ ### Yi-6B
80
+
81
+ #### Input
82
+ \# write the quick sort algorithm
83
+
84
+ | Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
85
+ | --- | --- | --- | --- | --- | --- |
86
+ | Torch 2.1.0 | 31.4 | 247.5 | 490.4 | 987.2 | 1796.3 |
87
+ | lyraLLaMA | 93.8 | 735.6 | 2339.8 | 3020.9 | 4630.8 |
88
+
89
+ ### Yi-34B
90
+
91
+ Due to limitation of VRAM, we cannot profile the throughputs of Yi-34B on A100 40G using Torch.
92
+
93
+ #### Input
94
+ Let me tell you an interesting story about cat Tom and mouse Jerry,
95
+
96
+ | Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
97
+ | --- | --- | --- | --- | --- | --- |
98
+ | lyraLLaMA | 52.5 | 399.4 | 753.0 | 1138.2 | 1926.2 |
99
+
100
+ ## Usage
101
+
102
+ ### Environment (Docker recommended)
103
+
104
+ - For Cuda 11.X: we recommend ```nvcr.io/nvidia/pytorch:22.12-py3```
105
+ - For Cuda 12.0: we recommend ```nvcr.io/nvidia/pytorch:23.02-py3```
106
+
107
+ ```bash
108
+ docker pull nvcr.io/nvidia/pytorch:23.02-py3
109
+ docker run --rm -it --gpus all -v ./:/lyraLLMs nvcr.io/nvidia/pytorch:23.02-py3
110
+
111
+ pip install -r requirements.txt
112
+ ```
113
+
114
+ ### Convert Models
115
+
116
+ We have released multiple optimized models converted from original HuggingFace ones:
117
+ - ChatGLM-6B
118
+ - XVERSE-13B-Chat
119
+ - LLaMA-Ziya-13B
120
+ - Baichuan-7B, Baichuan-13B-Base, Baichuan-13B-Chat, Baichuan2-7B-Base, Baichuan2-7B-Chat, Baichuan2-13B-Base and lyraBaichuan2-13B-Chat
121
+ - Yi-6B
122
+
123
+ Feel free to contact us if you would like to convert a finetuned version of LLMs.
124
+
125
+ ### Inference
126
+
127
+ Refer to [README.md](./lyrallms/README.md) for inference of converted models with **lyraLLMs**.
128
+
129
+ ### Python Demo
130
+
131
+ ```python
132
+ from lyra_llama import lyraLlama
133
+
134
+ model_path = 'XXX' # 包含转换后的模型参数,配置,tokenizer文件目录
135
+ data_type = 'fp16'
136
+ memopt_mode = 0 # 如需使用MEMOPT模式推理, memopt_mode=1
137
+
138
+ model = lyraLlama(model_path, data_type, memopt_mode)
139
+
140
+ prompts = '列出3个不同的机器学习算法,并说明它们的适用范围.'
141
+ prompts = [prompts,] * 64
142
+
143
+ output_texts = model.generate(prompts, output_length=150, do_sample=False, top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0)
144
+ print(output_texts)
145
+
146
+ ```
147
+
148
+ ## Citation
149
+ ``` bibtex
150
+ @Misc{lyraLLMs2024,
151
+   author =       {Kangjian Wu, Zhengtao Wang, Yibo Lu, Haoxiong Su, Bin Wu},
152
+   title =        {lyraLLMs: A highly optimized and easy-to-use inference engine for LLMs},
153
+   howpublished = {\url{https://huggingface.co/TMElyralab/lyraLLMs}},
154
+   year =         {2024}
155
+ }
156
+ ```
157
+
158
+ ## Report bug
159
+ - start a discussion to report any bugs!--> https://huggingface.co/TMElyralab/lyraLLMs/discussions
160
+ - report bug with a `[bug]` mark in the title.
lyralib/.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213543e928e2727580c3f1dcbddfaf56b7a778ec7dfb29f4b3b66ab0009bfd0b
3
+ size 41
lyralib/.gitkeep ADDED
File without changes
lyralib/sm80/cuda11/lyraOp.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50ec3dbe390bffc052e754a294614025fb423b23c6bc8a26a8dadf52d1b29c2
3
+ size 233586480
lyralib/sm80/cuda12/lyraOp.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b8189f3321cd0578da920f15d4b74e7a96be7556731e1de3cb313b8700e3c45
3
+ size 234352496
lyrallms/LyraBaichuanPy/README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 模型和环境
2
+
3
+ ### 构建环境
4
+ ```shell
5
+ # 本地获取ngc pytorch原生镜像
6
+ docker pull nvcr.io/nvidia/pytorch:23.02-py3
7
+
8
+ # 启动容器
9
+ docker run --gpus all -itd --rm --name lyrallms_cu12 nvcr.io/nvidia/pytorch:23.02-py3
10
+ docker exec -it lyrallms_cu12 bash
11
+ ```
12
+
13
+ 获取代码后安装依赖
14
+ ```shell
15
+ pip install -r requirements.txt
16
+ ```
17
+
18
+ 将`lyralib`下对应cuda版本的[so文件](../../lyralib/sm80) 复制到`/usr/lib/lyralib`下。
19
+
20
+ ## 推理使用
21
+
22
+ ### 使用核心片段
23
+
24
+ ```python
25
+ from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
26
+
27
+ model_path = 'XXX' # 包含转换后的模型参数,配置,tokenizer文件目录
28
+ tokenizer_path = 'XXX'
29
+ data_type = 'fp16' # 推理精度
30
+ memopt_mode = 1
31
+
32
+ # 加载加速后的模型,C++ 底层已经掩盖,依赖加速编译的 /usr/lib/ftlib 下的 so 库,已经打在镜像中
33
+ # 模型加载需要花一些时间,建议把下载的模型参数解压到本地磁盘
34
+ # 如需使用Baichuan1/2-7B模型,下方更换为:lyraBaichuan7B(model_path, tokenizer_path, data_type, memopt_mode)
35
+ model = lyraBaichuan13B(model_path, tokenizer_path, data_type, memopt_mode)
36
+
37
+ # 输入, 若有多个输入,可batch 推理,prompts 支持列表,这里为模拟多个输入,直接复制 32 分,batchsize 达到32
38
+ prompts = "登鹳雀楼->王之涣\n夜雨寄北->"
39
+ prompts = [prompts,]*32
40
+
41
+ # 生成, 最大长度可自行调整,这里设置 64,模型遇到 end token 或者达到最大计算长度时会停止当前批次计算.
42
+ output_texts = model.generate(prompts, output_length=64, do_sample=False, top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0)
43
+
44
+ # 输出查看, 虽然 输入字符串也是参与计算,用于评估模型吞吐量和计算速度。
45
+ # 这里为了显示应用方便, output_texts 中每个样本的输出已经去掉了输入字符串
46
+ print(output_texts)
47
+
48
+ # 输出示例
49
+ >>> Inputs: 登鹳雀楼->王之涣
50
+ 夜雨寄北->
51
+ >>> Outputs:
52
+ 李商隐
53
+ 望洞庭->刘禹锡
54
+ 黄鹤楼送孟浩然之广陵->李白
55
+ 登岳阳楼->杜甫
56
+ 秋词->刘禹锡
57
+ 枫桥夜泊->张继
58
+ 饮湖上初晴后雨->苏轼
59
+ 浪淘沙->刘禹锡
60
+ ```
61
+
62
+ ### demo 脚本
63
+
64
+ `examples/batch_demo.py` 中有类似上面的使用示例,做了简单的跑速测试,考虑大家对 token 的理解各有不同,我们这里直接按字符数来评估,不同 token 的理解可以自行根据生成结果字符数去观测。
65
+
66
+ 更多测试脚本及用法详见参考 `examples` 下的 [README.md](./examples/README.md) ,如:
67
+ - Batch推理
68
+ - 不等长Batch推理
69
+ - Batch流式推理
70
+
71
+ ## 自定义模型参数
72
+
73
+ 已提供转换脚本 `parse_model_params.py` 可以将 Baichuan1/2 模型的 HuggingFace 格式参数,转换为加速版本下各层模型需要的模型参数。这里我们提供一个模型名字 `-model_name` 的转换参数,可以自行填入,以便生成可区分的 config.in 文件。
74
+
75
+ ```shell
76
+ python parse_model_params.py -i your_model_dir -o output_dir -t_g 1 -i_g 1 -weight_data_type "fp16" -model_name "baichuan2-13b"
77
+ ```
78
+
79
+ 该转换脚本还会将同目录下 tokenizer_source 里的 `tokenizer.model` `special_tokens_map.json` `tokenizer_config.json` 四个文件拷贝到 output_dir 下,以便后续使用加速模型时直接能初始化对应的 加速后的 Baichuan 的 tokenizer.
80
+
81
+ 转换后的模型参数将以每个参数一个文件的形式存放在 `output_dir/{i_g}-gpu-{weight_data_type}` 下,需要使用`merge_bin.py`将多个bin文件合并为一个。
82
+
83
+ ```shell
84
+ layer_num=40 # 13B->40, 7B->32
85
+ python merge_bin.py -i model_dir/{i_g}-gpu-{weight_data_type} -o output_dir -l ${layer_num}
86
+ ```
87
+
88
+ 将上述 `config.ini` `config.json` `tokenizer.model` `special_tokens_map.json` `tokenizer_config.json` 五个文件拷贝到 output_dir 下。
lyrallms/LyraBaichuanPy/configuration_baichuan.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2023 Baichuan Inc. All Rights Reserved.
2
+
3
+ # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
6
+ # and OPT implementations in this library. It has been modified from its
7
+ # original forms to accommodate minor architectural differences compared
8
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
9
+ #
10
+ # Licensed under the Apache License, Version 2.0 (the "License");
11
+ # you may not use this file except in compliance with the License.
12
+ # You may obtain a copy of the License at
13
+ #
14
+ # http://www.apache.org/licenses/LICENSE-2.0
15
+ #
16
+ # Unless required by applicable law or agreed to in writing, software
17
+ # distributed under the License is distributed on an "AS IS" BASIS,
18
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ # See the License for the specific language governing permissions and
20
+ # limitations under the License.
21
+
22
+ from transformers.configuration_utils import PretrainedConfig
23
+ from transformers.utils import logging
24
+
25
+
26
+ logger = logging.get_logger(__name__)
27
+
28
+
29
+ class BaichuanConfig(PretrainedConfig):
30
+ model_type = "baichuan"
31
+ keys_to_ignore_at_inference = ["past_key_values"]
32
+
33
+ def __init__(
34
+ self,
35
+ vocab_size=125696,
36
+ hidden_size=4096,
37
+ intermediate_size=11008,
38
+ num_hidden_layers=32,
39
+ num_attention_heads=32,
40
+ hidden_act="silu",
41
+ max_position_embeddings=4096,
42
+ initializer_range=0.02,
43
+ rms_norm_eps=1e-6,
44
+ use_cache=True,
45
+ pad_token_id=0,
46
+ bos_token_id=1,
47
+ eos_token_id=2,
48
+ tie_word_embeddings=False,
49
+ z_loss_weight=0,
50
+ **kwargs,
51
+ ):
52
+ self.vocab_size = vocab_size
53
+ self.max_position_embeddings = max_position_embeddings
54
+ self.hidden_size = hidden_size
55
+ self.intermediate_size = intermediate_size
56
+ self.num_hidden_layers = num_hidden_layers
57
+ self.num_attention_heads = num_attention_heads
58
+ self.hidden_act = hidden_act
59
+ self.initializer_range = initializer_range
60
+ self.rms_norm_eps = rms_norm_eps
61
+ self.use_cache = use_cache
62
+ self.z_loss_weight = z_loss_weight
63
+ super().__init__(
64
+ pad_token_id=pad_token_id,
65
+ bos_token_id=bos_token_id,
66
+ eos_token_id=eos_token_id,
67
+ tie_word_embeddings=tie_word_embeddings,
68
+ **kwargs,
69
+ )
lyrallms/LyraBaichuanPy/examples/README.md ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 测试脚本
2
+
3
+ ### batch推理
4
+
5
+ ```sh
6
+ export FMHA_VERSION=OFF
7
+ export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
8
+
9
+ model_path=ModelPath # 转换后模型所处文件夹路径
10
+ data_type=fp16 # 权重保存精度
11
+ memopt_mode=0 # MEMOPT模式: 0/1
12
+ quant_type="int8" # 量化精度: int4/int8
13
+ max_output_length=256
14
+ warmups=1
15
+ avgnums=1
16
+
17
+ python batch_demo.py --model-path $model_path\
18
+ --tokenizer-path $model_path\
19
+ --data-type $data_type\
20
+ --memopt_mode $memopt_mode\
21
+ --quant-type ${quant_type}\
22
+ --max-output-length $max_output_length\
23
+ --warmups $warmups\
24
+ --avgnums $avgnums
25
+ ```
26
+
27
+ ### batch流式推理
28
+
29
+ ```sh
30
+ export FMHA_VERSION=OFF
31
+ export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
32
+ export LYRA_STREAM_CB_STEP=30 # 回调函数间隔步数
33
+
34
+ model_path=ModelPath # 转换后模型所处文件夹路径
35
+ data_type=fp16 # 权重保存精度
36
+ memopt_mode=0 # MEMOPT模式: 0/1
37
+ quant_type="int8" # 量化精度: int4/int8
38
+ max_output_length=256
39
+ warmups=1
40
+ avgnums=1
41
+
42
+ python batch_stream_demo.py --model-path $model_path\
43
+ --tokenizer-path $model_path\
44
+ --data-type $data_type\
45
+ --memopt_mode $memopt_mode\
46
+ --quant-type ${quant_type}\
47
+ --max-output-length $max_output_length\
48
+ --warmups $warmups\
49
+ --avgnums $avgnums
50
+ ```
51
+ ### 不等长batch推理
52
+
53
+ ```sh
54
+ export FMHA_VERSION=OFF
55
+ export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
56
+
57
+ model_path=ModelPath # 转换后模型所处文件夹路径
58
+ prompt_filepath=valen_prompts.json # 用于测试的不等长prompts文件,从中采样
59
+ data_type=fp16 # 权重保存精度
60
+ memopt_mode=0 # MEMOPT模式: 0/1
61
+ quant_type="int8" # 量化精度: int4/int8
62
+ max_output_length=256
63
+ warmups=1
64
+ avgnums=1
65
+
66
+ python random_batch_demo.py --model-path $model_path\
67
+ --tokenizer-path $model_path\
68
+ --data-type $data_type\
69
+ --memopt_mode $memopt_mode\
70
+ --quant-type ${quant_type}\
71
+ --prompt_filepath $prompt_filepath\
72
+ --max-output-length $max_output_length\
73
+ --warmups $warmups\
74
+ --avgnums $avgnums
75
+ ```
76
+
77
+ ## Prompt例子
78
+
79
+ ### 短序列
80
+ ```
81
+ 北京的景点:故宫、天坛、万里长城等。\n深圳的景点:
82
+ ```
83
+ ```
84
+ 今天天气大概 25度,有点小雨,吹着风,我想去户外散步,应该穿什么样的衣服 裤子鞋子搭配
85
+ ```
86
+
87
+ ### 1K序列
88
+ ```
89
+ 《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲,于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日,“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者:主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J (大卫·哈斯金斯)。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲:“Boys”;“咬我的臀部”;“Some Faces”和斯卡雷鬼曲调“Harry”,这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议,凯文·哈斯金斯 (Kevin Haskins) 说,“那里有力量流行音乐,还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中(除了“Bela Lugosi's Dead”),只有“Harry”获得了官方发行;1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目,包括“Boys”的原始录音,一直未发行,直到 2018 年The Bela Session以黑胶唱片和CD 形式发行,并可供乐队数字下载。[11]在额外的曲目中,《经典摇滚》杂志写道:“其余的材料发现乐队正在摸索方向,甚至触及了斯卡。”\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:
90
+ ```
91
+
92
+ ### 2K序列
93
+ ```
94
+ 根据所给刑事法律文书中的案情描述,预测被告人被判的罪名。你需要从这些罪名中选择最恰当的一项:妨害公务,寻衅滋事,盗窃、侮辱尸体,危险物品肇事,非法采矿,组织、强迫、引诱、容留、介绍卖淫,开设赌场,聚众斗殴,绑架,非法持有毒品,销售假冒注册商标的商品,容留他人吸毒,假冒注册商标,交通肇事,破坏电力设备,组织卖淫,合同诈骗,走私武器、弹药,抢劫,非法处置查封、扣押、冻结的财产,以危险方法危害公共安全,过失投放危险物质,非法制造、买卖、运输、邮寄、储存枪支、弹药、爆炸物,伪造、变造、买卖武装部队公文、证件、印章,持有、使用假币,重婚,聚众冲击国家机关,生产、销售伪劣农药、兽药、化肥、种子,收买被拐卖的妇女、儿童,聚众哄抢,重大劳动安全事故,侵占,包庇毒品犯罪分子,虚报注册资本,违法发放贷款,制造、贩卖、传播淫秽物品,窝藏、包庇,帮助毁灭、伪造证据,放火,强奸,非法携带枪支、弹药、管制刀具、危险物品危及公共安全,伪造、变造金融票证,爆炸,玩忽职守,对非国家工作人员行贿,伪造、倒卖伪造的有价票证,私分国有资产,非法收购、运输、加工、出售国家重点保护植物、国家重点保护植物制品,生产、销售假药,挪用特定款物,过失致人死亡,走私国家禁止进出口的货物、物品,非法制造、买卖、运输、储存危险物质,洗钱,骗取贷款、票据承兑、金融票证,非法买卖制毒物品,非法买卖、运输、携带、持有毒品原植物种子、幼苗,生产、销售有毒、有害食品,滥用职权,招收公务员、学生徇私舞弊,诬告陷害,非法获取国家秘密,非法行医,非法收购、运输、出售珍贵、濒危野生动物、珍贵、濒危野生动物制品,非法出售发票,行贿,高利转贷,非法吸收公众存款,传播淫秽物品,非法进行节育手术,盗伐林木,聚众扰乱社会秩序,走私、贩卖、运输、制造毒品,滥伐林木,赌博,非法经营,生产、销售不符合安全标准的食品,提供侵入、非法控制计算机信息系统程序、工具,倒卖文物,窃取、收买、非法提供信用卡信息,盗掘古文化遗址、古墓葬,协助组织卖淫,破坏广播电视设施、公用电信设施,走私普通货物、物品,逃税,破坏监管秩序,失火,受贿,组织、领导、参加黑社会性质组织,票据诈骗,非法制造、销售非法制造的注册商标标识,侵犯著作权,伪造、变造、买卖国家机关公文、证件、印章,徇私舞弊不征、少征税款,强迫劳动,贷款诈骗,劫持船只、汽车,诈骗,非法种植毒品原植物,非法狩猎,挪用资金,非法收购、运输盗伐、滥伐的林木,出售、购买、运输假币,抢夺,虐待被监管人,窝藏、转移、收购、销售赃物,破坏计算机信息系统,制作、复制、出版、贩卖、传播淫秽物品牟利,拒不支付劳动报酬,盗窃、抢夺枪支、弹药、爆炸物,强迫他人吸毒,走私珍贵动物、珍贵动物制品,虐待,非法获取公民个人信息,破坏交通设施,非法转让、倒卖土地使用权,非法捕捞水产品,非法占用农用地,非法制造、出售非法制造的发票,非法持有、私藏枪支、弹药,集资诈骗,强迫卖淫,伪造公司、企业、事业单位、人民团体印章,利用影响力受贿,编造、故意传播虚假恐怖信息,介绍贿赂,传播性病,拐卖妇女、儿童,倒卖车票、船票,窝藏、转移、隐瞒毒品、毒赃,徇私舞弊不移交刑事案件,过失损坏广播电视设施、公用电信设施,动植物检疫徇私舞弊,破坏交通工具,猥亵儿童,挪用公款,伪造货币,冒充军人招摇撞骗,非法采伐、毁坏国家重点保护植物,故意毁坏财物,非法拘禁,招摇撞骗,伪造、变造居民身份证,徇私枉法,非法生产、买卖警用装备,掩饰、隐瞒犯罪所得、犯罪所得收益,生产、销售伪劣产品,破坏生产经营,帮助犯罪分子逃避处罚,贪污,投放危险物质,持有伪造的发票,危险驾驶,妨害作证,非法猎捕、杀害珍贵、濒危野生动物,重大责任事故,诽谤,虚开发票,引诱、教唆、欺骗他人吸毒,脱逃,扰乱无线电通讯管理秩序,保险诈骗,非法生产、销售间谍专用器材,非法组织卖血,强迫交易,串通投标,破坏易燃易爆设备,传授犯罪方法,妨害信用卡管理,拐骗儿童,单位行贿,打击报复证人,拒不执行判决、裁定,经济犯,金融凭证诈骗,虚开增值税专用发票、用于骗取出口退税、抵扣税款发票,走私废物,组织、领导传销活动,单位受贿,盗窃、抢夺枪支、弹药、爆炸物、危险物质,过失以危险方法危害公共安全,过失致人重伤,引诱、容留、介绍卖淫,遗弃,走私,信用卡诈骗,对单位行贿,故意杀人,聚众扰乱公共场��秩序、交通秩序,盗窃,故意伤害,非法侵入住宅,强制猥亵、侮辱妇女,伪证,污染环境,巨额财产来源不明,非国家工作人员受贿,侮辱,隐匿、故意销毁会计凭证、会计帐簿、财务会计报告,过失损坏武器装备、军事设施、军事通信,敲诈勒索,职务侵占。\n经审理查明:2013年9月底的一天晚上,被告人陆某德酒后经过沭阳县某镇某村张某荣家时,发现张某荣家没有人,即用石头砸破张某荣家房门玻璃,打开房门进入张某荣家中。因进入张某荣时被房门遗留的玻璃划伤,被告人陆某德在张某荣家北屋门和北屋东首间墙面遗留两处血迹。2014年1月7日,被告人陆某德被公安民警从其家中传唤到案,并如实供述自己的罪行。上述事实,有公诉机关提交的,经过庭审质证的,且均具有证据证明效力的以下证据予以证明:被告人陆某德供述其非法侵入他人住宅的时间、地点、经过等事实。该供述得到了被害人张某荣的陈述、证人周某花、李某华等人的证言、法庭科学DNA检验鉴定书、现场勘验检查笔录、现场图、现场照片等证据予以证实,足以认定。刑事判决书证明证明了被告人陆某德有前科;公安机关出具的“发破案经过”及“抓获经过”证明了本案案发及被告人陆某德的归案情况。\n
95
+ ```
96
+
97
+ ### 4K序列
98
+ ```
99
+ <context>/*\n * Implement the \"Falling Rocks\" game in the text console. \n * A small dwarf stays at the bottom of the screen and can \n * move left and right (by the arrows keys). A number of rocks \n * of different sizes and forms constantly fall down and you \n * need to avoid a crash.\n * Rocks are the symbols ^, @, *, &, +, %, $, #, !, ., ;, - distributed \n * with appropriate density. The dwarf is (O). \n * Ensure a constant game speed by Thread.Sleep(150).\n * Implement collision detection and scoring system.\n*/\n\nusing System;\nusing System.Threading;\nusing System.Collections.Generic;\nusing System.Threading.Tasks;\n\nclass FallingRocks\n{\n struct Position\n {\n public int X, Y;\n public string symbol;\n public ConsoleColor color;\n\n public Position(int x, int y, string symbol, ConsoleColor color)\n {\n this.X = x;\n this.Y = y;\n this.symbol = symbol;\n this.color = color;\n }\n }\n\n static void Main()\n {\n Thread oThread = new Thread(new ThreadStart(Mainn));\n Thread aThread = new Thread(new ThreadStart(Clr));\n \n aThread.Start();\n oThread.Start();\n oThread.Join();\n aThread.Join();\n }\n\n static void Clr()\n {\n while (true)\n {\n Thread.Sleep(10);\n Console.Clear();\n }\n }\n static void Mainn()\n {\n //Random generator for rocks color, position and symbol\n Random randomGenerator = new Random();\n \n //Sleep time for the game loop\n double sleepTime = 150;\n //Console settings\n Console.CursorVisible = false;\n Console.BufferHeight = Console.WindowHeight;\n \n //number of rocks in the Array rocks\n int rocksCount = 0;\n\n //array with the symbols of the rocks\n string[] symbols = new string[] { \"^\", \"@\", \"*\", \"&\", \"+\", \"%\", \"$\", \"#\", \"!\", \".\", \";\" };\n \n //array with colors for the rocks\n ConsoleColor[] colors = new ConsoleColor[] {ConsoleColor.Yellow, ConsoleColor.White, ConsoleColor.Gray};\n \n //array with rocks\n Position[] rocks = new Position[200];\n \n //position for the dwarf\n Position dwarf = new Position(10, Console.WindowHeight - 1,\"(0)\",ConsoleColor.Red);\n \n //bool variable to say when the game loop to be over\n bool gameLoop = true;\n\n //variable keeping the score\n ulong score = 0;\n\n //the game loop\n while (gameLoop)\n {\n //score is growing as the cycle runs\n score++;\n\n //setting the Y component for all the rocks in the array to grow with 2\n for (int i = 0; i <= rocks.Length - 1; i++)\n {\n rocks[i].Y = rocks[i].Y + 2;\n }\n\n //generating rocks\n for (int x = 0; x <= randomGenerator.Next(2, 4); x++)\n {\n rocks[rocksCount] = new Position(randomGenerator.Next(x * 15, x * 15 + 20), 0\n , symbols[randomGenerator.Next(0, symbols.Length - 1)]\n , colors[randomGenerator.Next(0, colors.Length - 1)]);\n if (rocksCount >= 199) rocksCount = 0;\n rocksCount++;\n }\n\n //printing the rocks and other stuff\n foreach (var item in rocks)\n {\n foreach (var rock in rocks)\n {\n //checking for colision\n if ((rock.X >= dwarf.X) && (rock.X <= (dwarf.X + 2)) && (rock.Y == dwarf.Y))\n {\n gameLoop = false;\n break;\n }\n } \n\n //printing the rocks\n if (item.Y < Console.WindowHeight)\n { \n Console.SetCursorPosition(item.X, item.Y);\n Console.ForegroundColor = item.color;\n Console.Write(item.symbol);\n }\n\n //checking for key pressed\n if (Console.KeyAvailable)\n {\n ConsoleKeyInfo pressedKey = Console.ReadKey();\n if (pressedKey.Key == ConsoleKey.RightArrow)\n {\n if(dwarf.X < Console.WindowWidth - 20)\n {\n //removing the old positions of the dwarf and increasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X++;\n }\n }\n if (pressedKey.Key == ConsoleKey.LeftArrow) \n {\n if(dwarf.X >= 1)\n {\n //removing the old positions of the dwarf and decreasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X--;\n }\n }\n }\n }\n \n //printing the dwarf\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.ForegroundColor = dwarf.color;\n Console.Write(dwarf.symbol); \n \n //sleeping the loop for sometime\n //Thread.Sleep((int)sleepTime);\n\n //reducing the sleep time of the loop\n sleepTime -= 0.5;\n\n \n //removing the rocks \n //foreach (var item in rocks)\n //{\n // if (item.Y < Console.WindowHeight)\n // {\n // Console.SetCursorPosition(item.X, item.Y);\n // Console.Write(\" \");\n // }\n //} \n }\n //Printing the score after the game is over\n Console.Clear();\n Console.WriteLine(\"Game over! Your score is: \" + score);\n\n }\n}\n</context>\n\n这个\"Falling Rocks\"游戏是如何工作的呢?可以详细解释一下代码的运作机制吗? \n\n\n\n
100
+ ```
101
+
102
+ ### 8K序列
103
+ ```
104
+ <context># -*- coding: utf-8 -*-\n# This code is part of Amoco\n# Copyright (C) 2021 Axel Tillequin ([email protected])\n# published under GPLv2 license\nfrom amoco.arch.tricore import env\nfrom amoco.arch.core import *\n# -------------------------------------------------------\n# from TriCore TC1.6.2 core architecture manual V1.2.2\n# (32-bit Unified Processor Core), 2020-01-15\n# define all except FPU instructions\n# -------------------------------------------------------\nISPECS = []\n@ispec("32<[ disp1(16) disp2(8) {6d} ]", mnemonic="CALL")\n@ispec("32<[ disp1(16) disp2(8) {61} ]", mnemonic="FCALL")\n@ispec("32<[ disp1(16) disp2(8) {1d} ]", mnemonic="J")\n@ispec("32<[ disp1(16) disp2(8) {5d} ]", mnemonic="JL")\ndef tricore_branch(obj, disp1, disp2):\n v = env.cst(((disp2<<16)+disp1)<<1,24)\n obj.operands = [disp.signextend(32)]\n obj.type = type_control_flow\n@ispec("32<[ disp1(16) disp2(8) {ed} ]", mnemonic="CALLA")\n@ispec("32<[ disp1(16) disp2(8) {e1} ]", mnemonic="FCALLA")\n@ispec("32<[ disp1(16) disp2(8) {9d} ]", mnemonic="JA")\n@ispec("32<[ disp1(16) disp2(8) {dd} ]", mnemonic="JLA")\ndef tricore_branch(obj, disp1, disp2):\n v = env.cst((disp2<<16)+disp1,24)\n addr = composer([env.bit0,v[0:20],env.cst(0,7),v[20:24]])\n obj.operands = [addr]\n obj.type = type_control_flow\n@ispec("32<[ ---- {00} ---- ---- a(4) {2d} ]", mnemonic="CALLI")\n@ispec("32<[ ---- {01} ---- ---- a(4) {2d} ]", mnemonic="FCALLI")\n@ispec("32<[ ---- {03} ---- ---- a(4) {2d} ]", mnemonic="JI")\n@ispec("32<[ ---- {02} ---- ---- a(4) {2d} ]", mnemonic="JLI")\ndef tricore_branchI(obj, a):\n src = env.A[a]\n obj.operands = [src]\n obj.type = type_control_flow\n@ispec("16<[ disp(8) {5c} ]", mnemonic="CALL")\n@ispec("16<[ disp(8) {3c} ]", mnemonic="J")\n@ispec("16<[ disp(8) {ee} ]", mnemonic="JNZ")\n@ispec("16<[ disp(8) {6e} ]", mnemonic="JZ")\ndef tricore_branch(obj, disp):\n disp = env.cst(disp<<1,8)\n obj.operands = [disp.signextend(32)]\n obj.type = type_control_flow\n@ispec("32<[ ---- 0000000 const9(9) ---- {ad} ]", mnemonic="BISR")\n@ispec("32<[ ---- 0000100 const9(9) ---- {ad} ]", mnemonic="SYSCALL")\ndef tricore_system(obj, const9):\n obj.operands = [env.cst(const9,9)]\n obj.type = type_system\n@ispec("32<[ c(4) {1c} ---- b(4) ---- {0b} ]", mnemonic="ABS")\n@ispec("32<[ c(4) {5c} ---- b(4) ---- {0b} ]", mnemonic="ABS_B")\n@ispec("32<[ c(4) {7c} ---- b(4) ---- {0b} ]", mnemonic="ABS_H")\n@ispec("32<[ c(4) {1d} ---- b(4) ---- {0b} ]", mnemonic="ABSS")\n@ispec("32<[ c(4) {7d} ---- b(4) ---- {0b} ]", mnemonic="ABSS_H")\n@ispec("32<[ c(4) {1f} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n src = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {80} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n src = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, src.signextend(64)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {81} ---- b(4) a(4) {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b, a):\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, composer([src2,src1])]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) {4e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_B")\n@ispec("32<[ c(4) {6e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_H")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) {6f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS_H")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0b} ]", mnemonic="ADD_B")\n@ispec("32<[ c(4) {60} ---- b(4) a(4) {0b} ]", mnemonic="ADD_H")\n@ispec("32<[ c(4) {05} ---- b(4) a(4) {0b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_H")\n@ispec("32<[ c(4) {63} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_HU")\n@ispec("32<[ c(4) {03} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_U")\n@ispec("32<[ c(4) {04} ---- b(4) a(4) {0b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0f} ]", mnemonic="AND")\n@ispec("32<[ c(4) {20} ---- b(4) a(4) {0b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) {24} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) {25} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) {22} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) {23} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) {21} ---- b(4) a(4) {0b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) {10} ---- b(4) a(4) {0b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) {50} ---- b(4) a(4) {0b} ]", mnemonic="EQ_B")\n@ispec("32<[ c(4) {70} ---- b(4) a(4) {0b} ]", mnemonic="EQ_H")\n@ispec("32<[ c(4) {90} ---- b(4) a(4) {0b} ]", mnemonic="EQ_W")\n@ispec("32<[ c(4) {56} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) {76} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) {14} ---- b(4) a(4) {0b} ]", mnemonic="GE")\n@ispec("32<[ c(4) {15} ---- b(4) a(4) {0b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) {12} ---- b(4) a(4) {0b} ]", mnemonic="LT")\n@ispec("32<[ c(4) {13} ---- b(4) a(4) {0b} ]", mnemonic="LT_U")\n@ispec("32<[ c(4) {52} ---- b(4) a(4) {0b} ]", mnemonic="LT_B")\n@ispec("32<[ c(4) {53} ---- b(4) a(4) {0b} ]", mnemonic="LT_BU")\n@ispec("32<[ c(4) {72} ---- b(4) a(4) {0b} ]", mnemonic="LT_H")\n@ispec("32<[ c(4) {73} ---- b(4) a(4) {0b} ]", mnemonic="LT_HU")\n@ispec("32<[ c(4) {92} ---- b(4) a(4) {0b} ]", mnemonic="LT_W")\n@ispec("32<[ c(4) {93} ---- b(4) a(4) {0b} ]", mnemonic="LT_WU")\n@ispec("32<[ c(4) {1a} ---- b(4) a(4) {0b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) {1b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) {5a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_B")\n@ispec("32<[ c(4) {5b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_BU")\n@ispec("32<[ c(4) {7a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_H")\n@ispec("32<[ c(4) {7b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_HU")\n@ispec("32<[ c(4) {18} ---- b(4) a(4) {0b} ]", mnemonic="MIN")\n@ispec("32<[ c(4) {19} ---- b(4) a(4) {0b} ]", mnemonic="MIN_U")\n@ispec("32<[ c(4) {58} ---- b(4) a(4) {0b} ]", mnemonic="MIN_B")\n@ispec("32<[ c(4) {59} ---- b(4) a(4) {0b} ]", mnemonic="MIN_BU")\n@ispec("32<[ c(4) {78} ---- b(4) a(4) {0b} ]", mnemonic="MIN_H")\n@ispec("32<[ c(4) {79} ---- b(4) a(4) {0b} ]", mnemonic="MIN_HU")\n@ispec("32<[ c(4) {09} ---- b(4) a(4) {0f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) {11} ---- b(4) a(4) {0b} ]", mnemonic="NE")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0f} ]", mnemonic="OR")\n@ispec("32<[ c(4) {27} ---- b(4) a(4) {0b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) {2b} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) {2c} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) {29} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) {2a} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) {28} ---- b(4) a(4) {0b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0f} ]", mnemonic="SH")\n@ispec("32<[ c(4) {37} ---- b(4) a(4) {0b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) {3b} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) {3c} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) {39} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) {3a} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) {38} ---- b(4) a(4) {0b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {0f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {0f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0b} ]", mnemonic="SUB")\n@ispec("32<[ c(4) {48} ---- b(4) a(4) {0b} ]", mnemonic="SUB_B")\n@ispec("32<[ c(4) {68} ---- b(4) a(4) {0b} ]", mnemonic="SUB_H")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0b} ]", mnemonic="SUBC")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_U")\n@ispec("32<[ c(4) {6a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_H")\n@ispec("32<[ c(4) {6b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_HU")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0b} ]", mnemonic="SUBX")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) {2f} ---- b(4) a(4) {0b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) {30} ---- b(4) a(4) {0b} ]", mnemonic="XOR_NE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {01} ]", mnemonic="EQ_A")\n@ispec("32<[ c(4) {43} ---- b(4) a(4) {01} ]", mnemonic="GE_A")\n@ispec("32<[ c(4) {42} ---- b(4) a(4) {01} ]", mnemonic="LT_A")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {01} ]", mnemonic="NE_A")\ndef tricore_daa_arithmetic(obj, c, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {63} ---- b(4) ---- {01} ]", mnemonic="MOV_A", _dst=env.A, _src=env.D)\n@ispec("32<[ c(4) {00} ---- b(4) ---- {01} ]", mnemonic="MOV_AA", _dst=env.A, _src=env.A)\n@ispec("32<[ c(4) {4c} ---- b(4) ---- {01} ]", mnemonic="MOV_D", _dst=env.D, _src=env.A)\ndef tricore_daa_arithmetic(obj, c, b, _dst, _src):\n dst = _dst[c]\n src = _src[b]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {48} ---- ---- a(4) {01} ]", mnemonic="EQZ_A")\n@ispec("32<[ c(4) {49} ---- ---- a(4) {01} ]", mnemonic="NEZ_A")\ndef tricore_da_arithmetic(obj, c, a):\n src1 = env.A[a]\n dst = env.D[c]\n obj.operands = [dst, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {01} --00 b(4) a(4) {4b} ]", mnemonic="BMERGE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {06} --00 b(4) a(4) {4b} ]", mnemonic="CRC32_B")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32B_W")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32L_W")\ndef tricore_crc32(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src2, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {20} --01 b(4) a(4) {4b} ]", mnemonic="DIV")\n@ispec("32<[ c(4) {21} --01 b(4) a(4) {4b} ]", mnemonic="DIV_U")\n@ispec("32<[ c(4) {5a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_B")\n@ispec("32<[ c(4) {4a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_BU")\n@ispec("32<[ c(4) {3a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_H")\n@ispec("32<[ c(4) {2a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_HU")\n@ispec("32<[ c(4) {1a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT")\n@ispec("32<[ c(4) {0a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_U")\ndef tricore_edd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 100 ----- b(4) a(4) {17} ]", mnemonic="DEXTR")\ndef tricore_dddc(obj, c, d, b, a):\n shift = env.D[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, shift]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 ----- ---- a(4) {17} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 ----- ---- a(4) {17} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, a):\n if d%2:\n raise InstructionError(obj)\n width = env.E[d][32:37]\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, width]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 0--00 ---- a(4) {6b} ]", mnemonic="PACK")\ndef tricore_extr(obj, c, d, a):\n if d%2:\n raise InstructionError(obj)\n src1 = env.E[d]\n src2 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {08} -- 00 ---- a(4) {4b} ]", mnemonic="UNPACK")\ndef tricore_extr(obj, c, d, a):\n src = env.D[a]\n dst = env.E[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {02} -- 00 ---- a(4) {4b} ]", mnemonic="PARITY")\n@ispec("32<[ c(4) {22} -- 00 ---- a(4) {4b} ]", mnemonic="POPCNT_W")\ndef tricore_extr(obj, c, d, a):\n src = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 ----- b(4) a(4) {77} ]", mnemonic="DEXTR")\ndef tricore_dextr(obj, c, pos, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, env.cst(pos,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 10 width(5) ---- a(4) {37} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) pos(5) 11 width(5) ---- a(4) {37} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, pos, width, a):\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) const(4) ---- {b7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, const):\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) const(4) ---- {d7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, const):\n src2 = env.D[d]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, env.cst(const,4), src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) b(4) ---- {37} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, b):\n src1 = env.D[b]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) b(4) ---- {57} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, b):\n src1 = env.D[b]\n src2 = env.D[d]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) const(4) a(4) {b7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, const, a):\n dst = env.D[c]\n src1 = env.D[a]\n obj.operands = [dst, src1, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- const(4) a(4) {97} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, const, a):\n src1 = env.D[a]\n if d%2:\n raise InstructionError(obj)\n src3 = env.E[d]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(const,4), src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) const(4) a(4) {d7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, const, a):\n src1 = env.D[a]\n src3 = env.D[d]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(const,4), src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) b(4) a(4) {37} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, b, a):\n dst = env.D[c]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- b(4) a(4) {17} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n if d%2:\n raise InstructionError(obj)\n src3 = env.E[d]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) b(4) a(4) {57} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n src3 = env.D[d]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, src3, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 width(5) ---- a(4) {57} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 width(5) ---- a(4) {57} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, width, a):\n src2 = env.D[d]\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {09} --00 ---- a(4) {4b} ]", mnemonic="BSPLIT")\ndef tricore_edd_arithmetic(obj, c, a):\n src1 = env.D[a]\n dst = env.E[c]\n obj.operands = [dst, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) 0001110 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) 0001111 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) 0000101 ~const9(9) a(4) {8b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) 0000011 ~const9(9) a(4) {8b} ]", mnemonic="ADDS_U") #const9 is signed\n@ispec("32<[ c(4) 0000100 ~const9(9) a(4) {8b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) 0100000 ~const9(9) a(4) {8b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) 0100100 ~const9(9) a(4) {8b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) 0100010 ~const9(9) a(4) {8b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) 0100001 ~const9(9) a(4) {8b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) 0010000 ~const9(9) a(4) {8b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) 1010110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) 1110110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) 0010100 ~const9(9) a(4) {8b} ]", mnemonic="GE")\n@ispec("32<[ c(4) 0010010 ~const9(9) a(4) {8b} ]", mnemonic="LT")\n@ispec("32<[ c(4) 0011010 ~const9(9) a(4) {8b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) 0010001 ~const9(9) a(4) {8b} ]", mnemonic="NE")\n@ispec("32<[ c(4) 0100111 ~const9(9) a(4) {8b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) 0101011 ~const9(9) a(4) {8b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) 0101001 ~const9(9) a(4) {8b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) 0001000 ~const9(9) a(4) {8b} ]", mnemonic="RSUB")\n@ispec("32<[ c(4) 0001001 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS")\n@ispec("32<[ c(4) 0001011 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS_U") #const9 is signed\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8f} ]", mnemonic="SH")\n@ispec("32<[ c(4) 1000000 ~const9(9) a(4) {8f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) 0110111 ~const9(9) a(4) {8b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) 0111011 ~const9(9) a(4) {8b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) 0111001 ~const9(9) a(4) {8b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) 0111000 ~const9(9) a(4) {8b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) 0000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) 1000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) 0101111 ~const9(9) a(4) {8b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) 0110011 ~const9(9) a(4) {8b} ]", mnemonic="XOR_GE")\n@ispec("32<[ c(4) 0110001 ~const9(9) a(4) {8b} ]", mnemonic="XOR_LT")\n@ispec("32<[ c(4) 0110000 ~const9(9) a(4) {8b} ]", mnemonic="XOR_NE")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n src1 = env.D[a]\n if obj.mnemonic in ("SH","SHA","SHAS"):\n const9 = const9[0:6]\n elif obj.mnemonic in ("SH_H","SHA_H"):\n const9 = const9[0:5]\n src2 = env.cst(const9.int(-1),32)\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_OR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {87} ]", mnemonic="AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {87} ]", mnemonic="ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {67} ]", mnemonic="INS_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {67} ]", mnemonic="INSN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {07} ]", mnemonic="NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {87} ]", mnemonic="NOR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {87} ]", mnemonic="OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {07} ]", mnemonic="ORN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_ORN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XOR_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {07} ]", mnemonic="XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {07} ]", mnemonic="XOR_T")\ndef tricore_ddd_arithmetic(obj, c, pos2, pos1, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1[pos1:pos1+1], src2[pos2:pos2+1]]\n obj.type = type_data_processing\n@ispec("32<[ c(4) 0001000 const9(9) a(4) {8f} ]", mnemonic="AND")\n@ispec("32<[ c(4) 0100101 const9(9) a(4) {8b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) 0100011 const9(9) a(4) {8b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) 0001110 const9(9) a(4) {8f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) 0001001 const9(9) a(4) {8f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) 0001011 const9(9) a(4) {8f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) 0010101 const9(9) a(4) {8b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) 0001010 const9(9) a(4) {8f} ]", mnemonic="OR")\n@ispec("32<[ c(4) 0101100 const9(9) a(4) {8b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) 0101010 const9(9) a(4) {8b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) 0101000 const9(9) a(4) {8b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) 0001111 const9(9) a(4) {8f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) 0000111 const9(9) a(4) {8f} ]", mnemonic="SHUFFLE")\n@ispec("32<[ c(4) 0001101 const9(9) a(4) {8f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) 0001100 const9(9) a(4) {8f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) 0111100 const9(9) a(4) {8b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) 0111010 const9(9) a(4) {8b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) 0110100 const9(9) a(4) {8b} ]", mnemonic="XOR_GE_U")\n@ispec("32<[ c(4) 0110011 const9(9) a(4) {8b} ]", mnemonic="XOR_LT_U")\n@ispec("32<[ c(4) 0011011 const9(9) a(4) {8b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) 0010011 const9(9) a(4) {8b} ]", mnemonic="LT_U")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n src1 = env.D[a]\n src2 = env.cst(const9,32)\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {c2} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {06} ]", mnemonic="SH")\n@ispec("16<[ ~const4(4) a(4) {86} ]", mnemonic="SHA")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[a]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {92} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {8a} ]", mnemonic="CADD")\n@ispec("16<[ ~const4(4) a(4) {ca} ]", mnemonic="CADDN")\n@ispec("16<[ ~const4(4) a(4) {aa} ]", mnemonic="CMOV")\n@ispec("16<[ ~const4(4) a(4) {ea} ]", mnemonic="CMOVN")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[15]\n obj.operands = [dst, src1, src2]\n if "CADD" in obj.mnemonic:\n obj.operands = [dst, src1, dst, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {9a} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {ba} ]", mnemonic="EQ")\n@ispec("16<[ ~const4(4) a(4) {fa} ]", mnemonic="LT")\n@ispec("16<[ ~const4(4) a(4) {82} ]", mnemonic="MOV")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[15]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[a]\n obj.operands = [dst, src1, src2]\n if obj.mnemonic=="MOV":\n obj.operands = [src1,src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {d2} ]", mnemonic="MOV")\ndef tricore_ec_arithmetic(obj, const4, a):\n dst = env.E[a]\n src = env.cst(const4.int(-1),64)\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ const4(4) a(4) {a0} ]", mnemonic="MOV_A")\ndef tricore_ec_arithmetic(obj, const4, a):\n dst = env.A[a]\n src = env.cst(const4,32)\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ const8(8) {16} ]", mnemonic="AND")\n@ispec("16<[ const8(8) {da} ]", mnemonic="MOV")\n@ispec("16<[ const8(8) {96} ]", mnemonic="OR")\ndef tricore_ddc_arithmetic(obj, const8):\n dst = env.D[15]\n src2 = env.cst(const8,32)\n src1 = env.D[15]\n obj.operands = [dst, src1, src2]\n if obj.mnemonic=="MOV":\n obj.operands = [src1,src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {42} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {26} ]", mnemonic="AND")\n@ispec("16<[ b(4) a(4) {a6} ]", mnemonic="OR")\n@ispec("16<[ b(4) a(4) {a2} ]", mnemonic="SUB")\n@ispec("16<[ b(4) a(4) {62} ]", mnemonic="SUBS")\n@ispec("16<[ b(4) a(4) {c6} ]", mnemonic="XOR")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[a]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {02} ]", mnemonic="MOV" , _dst=env.D, _src=env.D)\n@ispec("16<[ b(4) a(4) {60} ]", mnemonic="MOV_A" , _dst=env.A, _src=env.D)\n@ispec("16<[ b(4) a(4) {40} ]", mnemonic="MOV_AA" , _dst=env.A, _src=env.A)\n@ispec("16<[ b(4) a(4) {80} ]", mnemonic="MOV_D" , _dst=env.D, _src=env.A)\ndef tricore_mov(obj, b, a, _dst, _src):\n dst = _dst[a]\n src = _src[b]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {12} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {2a} ]", mnemonic="CMOV")\n@ispec("16<[ b(4) a(4) {6a} ]", mnemonic="CMOVN")\n@ispec("16<[ b(4) a(4) {52} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[a]\n src1 = env.D[15]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {1a} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {22} ]", mnemonic="ADDS")\n@ispec("16<[ b(4) a(4) {3a} ]", mnemonic="EQ")\n@ispec("16<[ b(4) a(4) {7a} ]", mnemonic="LT")\n@ispec("16<[ b(4) a(4) {5a} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[15]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {01} ]", mnemonic="ADD_A")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {01} ]", mnemonic="SUB_A")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {b0} ]", mnemonic="ADD_A")\ndef tricore_aac_arithmetic(obj, const4, a):\n dst = env.A[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.A[a]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ const8(8) {20} ]", mnemonic="SUB_A")\ndef tricore_aac_arithmetic(obj, const8, a):\n dst = env.A[10]\n src2 = env.cst(const8,32)\n src1 = env.A[10]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {30} ]", mnemonic="ADD_A")\ndef tricore_aa_arithmetic(obj, b, a):\n dst = env.A[a]\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {1b} ]", mnemonic="ADDI")\n@ispec("32<[ c(4) ~const16(16) a(4) {9b} ]", mnemonic="ADDIH")\ndef tricore_di_arithmetic(obj, c, const16, a):\n src1 = env.D[a]\n src2 = env.cst(const16.int(-1),32)\n if self.mnemonic=="ADDIH": src2=src2<<16\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {11} ]", mnemonic="ADDIH_A")\ndef tricore_ai_arithmetic(obj, c, const16, a):\n src1 = env.A[a]\n src2 = env.cst(const16.int(-1),32)<<16\n dst = env.A[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {60} -- n(2) b(4) a(4) {01} ]", mnemonic="ADDSC_A")\ndef tricore_aaa_arithmetic(obj, c, n, b, a):\n src1 = env.D[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src2, src1, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {01} ]", mnemonic="ADDSC_AT")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src2, src1]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) n(2) 010000 ]", mnemonic="ADDSC_A")\ndef tricore_aa_arithmetic(obj, b, a, n):\n dst = env.A[a]\n src1 = env.D[15]\n src2 = env.A[b]\n obj.operands = [dst, src2, src1, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ off2(4) 10 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I", mode="Circular")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W", mode="Circular")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Circular")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Pre-increment")\ndef tricore_cache(obj, off2, off1, b):\n src2 = env.A[b]\n src1 = env.cst((off2<<6)+off1,10)\n obj.operands = [src2, src1]\n obj.type = type_system\n@ispec("32<[ off2(4) 10 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Pre-increment")\ndef tricore_swap(obj, off2, off1, b, a):\n if a%2:\n raise InstructionError(obj)\n dst = env.D[a]\n src1 = env.A[b]\n src2 = env.cst((off2<<6)+off1,10)\n src3 = env.E[a]\n obj.operands = [dst, src1, src2, src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ~const9(9) a(4) {ab} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {ab} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {ab} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {ab} ]", mnemonic="SELN")\ndef tricore_cond_ddc(obj, c, d, const9, a):\n cond = env.D[d]\n src1 = env.D[a]\n src2 = env.cst(const9.int(-1),32)\n dst = env.D[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {13} ]", mnemonic="MADD_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {33} ]", mnemonic="MSUB_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="64+(32+K9)->64")\ndef tricore_cond_eec(obj, c, d, const9, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.cst(const9.int(-1),32)\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*16U)->64")\ndef tricore_cond_eec(obj, c, d, n, b, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 0000 ---- b(4) a(4) {2b} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 0001 ---- b(4) a(4) {2b} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 0010 ---- b(4) a(4) {2b} ]", mnemonic="CSUB")\n@ispec("32<[ c(4) d(4) 0011 ---- b(4) a(4) {2b} ]", mnemonic="CSUBN")\n@ispec("32<[ c(4) d(4) {0a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) {8a} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) {88} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) 0100 ---- b(4) a(4) {2b} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 0101 ---- b(4) a(4) {2b} ]", mnemonic="SELN")\ndef tricore_cond_ddd(obj, c, d, b, a):\n cond = env.D[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) {6a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {ea} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {68} b(4) a(4) {03} ]", mnemonic="MADD_U", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {e8} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="64+(32*32)->64")\ndef tricore_cond_ddd(obj, c, d, b, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {1c} ---- ---- a(4) {0f} ]", mnemonic="CLO")\n@ispec("32<[ c(4) {7d} ---- ---- a(4) {0f} ]", mnemonic="CLO_H")\n@ispec("32<[ c(4) {1d} ---- ---- a(4) {0f} ]", mnemonic="CLS")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0f} ]", mnemonic="CLS_H")\n@ispec("32<[ c(4) {1b} ---- ---- a(4) {0f} ]", mnemonic="CLZ")\n@ispec("32<[ c(4) {7c} ---- ---- a(4) {0f} ]", mnemonic="CLZ_H")\n@ispec("32<[ c(4) {5e} ---- ---- a(4) {0b} ]", mnemonic="SAT_B")\n@ispec("32<[ c(4) {5f} ---- ---- a(4) {0b} ]", mnemonic="SAT_BU")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0b} ]", mnemonic="SAT_H")\n@ispec("32<[ c(4) {7f} ---- ---- a(4) {0b} ]", mnemonic="SAT_HU")\ndef tricore_dd_arithmetic(obj, c, a):\n src = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ 1010 ---- {00} ]", mnemonic="DEBUG")\n@ispec("16<[ 0000 ---- {00} ]", mnemonic="NOP")\ndef tricore_system(obj):\n obj.operands = []\n obj.type = type_system\n@ispec("16<[ 0111 ---- {00} ]", mnemonic="FRET")\n@ispec("16<[ 1001 ---- {00} ]", mnemonic="RET")\n@ispec("16<[ 1000 ---- {00} ]", mnemonic="RFE")\ndef tricore_ret(obj):\n obj.operands = []\n obj.type = type_control_flow\n@ispec("32<[ ---- 000100 ---------- ---- {0d} ]", mnemonic="DEBUG")\n@ispec("32<[ ---- 001101 ---------- ---- {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 010010 ---------- ---- {0d} ]", mnemonic="DSYNC")\n@ispec("32<[ ---- 001100 ---------- ---- {0d} ]", mnemonic="ENABLE")\n@ispec("32<[ ---- 010011 ---------- ---- {0d} ]", mnemonic="ISYNC")\n@ispec("32<[ ---- 010101 ---------- ---- {0d} ]", mnemonic="TRAPSV")\n@ispec("32<[ ---- 010100 ---------- ---- {0d} ]", mnemonic="TRAPV")\n@ispec("32<[ ---- 000000 ---------- ---- {0d} ]", mnemonic="NOP")\n@ispec("32<[ ---- 001001 ---------- ---- {0d} ]", mnemonic="RSLCX")\n@ispec("32<[ ---- 000000 ---------- ---- {2f} ]", mnemonic="RSTV")\n@ispec("32<[ ---- 001000 ---------- ---- {0d} ]", mnemonic="SVLCX")\n@ispec("32<[ ---- 010110 ---------- ---- {0d} ]", mnemonic="WAIT")\ndef tricore_system(obj):\n obj.operands = []\n obj.type = type_system\n@ispec("32<[ ---- 000011 ---------- ---- {0d} ]", mnemonic="FRET")\n@ispec("32<[ ---- 000110 ---------- ---- {0d} ]", mnemonic="RET")\n@ispec("32<[ ---- 000111 ---------- ---- {0d} ]", mnemonic="RFE")\n@ispec("32<[ ---- 000101 ---------- ---- {0d} ]", mnemonic="RFM")\ndef tricore_ret(obj):\n obj.operands = []\n obj.type = type_control_flow\n@ispec("32<[ ---- 001111 ---------- a(4) {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 001110 ---------- a(4) {0d} ]", mnemonic="RESTORE")\ndef tricore_system(obj, a):\n obj.operands = [env.D[a]]\n obj.type = type_system\n@ispec("32<[ c(4) d(4) 1101 -- 00 b(4) ---- {6b} ]", mnemonic="DVADJ")\n@ispec("32<[ c(4) d(4) 1111 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP")\n@ispec("32<[ c(4) d(4) 1110 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP_U")\n@ispec("32<[ c(4) d(4) 1010 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX")\n@ispec("32<[ c(4) d(4) 1011 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX_U")\n@ispec("32<[ c(4) d(4) 1000 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN")\n@ispec("32<[ c(4) d(4) 1001 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN_U")\ndef tricore_eee(obj, c, d, b):\n if d%2 or b%2 or c%2:\n raise InstructionError(obj)\n src1 = env.E[d]\n src2 = env.E[b]\n dst = env.E[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) disp(4) {1e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {9e} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ ~const4(4) disp(4) {5e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {de} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, const4, disp, _off):\n dst = env.D[15]\n src1 = env.cst(const4.int(-1),32)\n src2 = env.cst(disp,32)+_off\n obj.operands = [dst, src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {3e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ b(4) disp(4) {be} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ b(4) disp(4) {7e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ b(4) disp(4) {fe} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, b, disp, _off):\n dst = env.D[15]\n src1 = env.D[b]\n src2 = env.cst(disp,32)+_off\n obj.operands = [dst, src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {ce} ]", mnemonic="JGEZ")\n@ispec("16<[ b(4) disp(4) {4e} ]", mnemonic="JGTZ")\n@ispec("16<[ b(4) disp(4) {8e} ]", mnemonic="JLEZ")\n@ispec("16<[ b(4) disp(4) {0e} ]", mnemonic="JLTZ")\n@ispec("16<[ b(4) disp(4) {f6} ]", mnemonic="JNZ")\n@ispec("16<[ b(4) disp(4) {76} ]", mnemonic="JZ")\ndef tricore_jcc(obj, b, disp):\n src1 = env.D[b]\n src2 = env.cst(disp,32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {df} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {df} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, const, a):\n src1 = env.D[a]\n src2 = env.cst(const,4)\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JEQ_A")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JNE_A")\ndef tricore_jcc(obj, disp, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) ---- a(4) {bd} ]", mnemonic="JNZ_A")\n@ispec("32<[ 0 ~disp(15) ---- a(4) {bd} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, disp, a):\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOP")\n@ispec("32<[ 1 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOPU")\ndef tricore_jcc(obj, disp, b):\n src1 = env.A[b]\n src2 = env.cst(disp.int(-1)*2,32)\n obj.operands = [src1, src2]\n if obj.mnemonic=="LOOPU":\n obj.operands = [src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {7c} ]", mnemonic="JNZ_A")\n@ispec("16<[ b(4) disp(4) {bc} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, b, disp):\n src1 = env.A[b]\n src2 = env.cst(disp,32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) #disp(4) {fc} ]", mnemonic="LOOP")\ndef tricore_jcc(obj, b, disp):\n src1 = env.A[b]\n src2 = env.cst(int(("1"*27)+disp+"0",2),32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {dc} ]", mnemonic="JI")\ndef tricore_ji(obj, a):\n src = env.A[a]\n obj.operands = [src]\n obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {46} ]", mnemonic="NOT")\n@ispec("16<[ 0101 a(4) {32} ]", mnemonic="RSUB")\n@ispec("16<[ 0000 a(4) {32} ]", mnemonic="SAT_B")\n@ispec("16<[ 0001 a(4) {32} ]", mnemonic="SAT_BU")\n@ispec("16<[ 0010 a(4) {32} ]", mnemonic="SAT_H")\n@ispec("16<[ 0011 a(4) {32} ]", mnemonic="SAT_HU")\ndef tricore_a(obj, a):\n src = env.D[a]\n obj.operands = [src]\n obj.type = type_data_processing\n@ispec("16<[ n(4) disp(4) {ae} ]", mnemonic="JNZ_T")\n@ispec("16<[ n(4) disp(4) {2e} ]", mnemonic="JZ_T")\ndef tricore_ji(obj, n, disp):\n obj.operands = [env.D[15][n:n+1], env.cst(disp,32)]\n obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JNZ_T")\n@ispec("32<[ 0 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JZ_T")\ndef tricore_jcc(obj, disp, n, a, h):\n i = n+(h<<4)\n src = env.D[a][i:i+1]\n obj.operands = [src, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_BU", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_HU", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {45} ]", mnemonic="LD_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LEA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n dst = env.D[a]\n if obj.mnemonic in ("LD_A", "LEA") : dst = env.A[a]\n if obj.mnemonic in ("LD_D","LDMST") : dst = env.E[a]\n if obj.mnemonic=="LD_DA": dst = env.P[a]\n src = off1//off2//off3\n obj.operands = [dst, composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LHA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n dst = env.A[a]\n src = off1//off2//off3//off4\n obj.operands = [dst, composer([env.cst(0,14),env.cst(src.int(),18)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {65} ]", mnemonic="ST_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="SWAP_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="LDMST", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, a):\n src = env.D[a]\n if obj.mnemonic in ("ST_A",) : src = env.A[a]\n if obj.mnemonic in ("ST_D","LDMST") : src = env.E[a]\n if obj.mnemonic=="ST_DA": src = env.P[a]\n addr = off1//off2//off3\n obj.operands = [composer([env.cst(addr.int(),28),env.cst(off4,4)]), src]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) b bpos(3) {d5} ]", mnemonic="ST_T", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, b, bpos):\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)]), env.cst(bpos,3), env.cst(b,1)]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) ---- {15} ]", mnemonic="STLCX", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4):\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDLCX", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDUCX", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n src = off1//off2//off3\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="LEA", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b, a):\n dst = env.D[a]\n if obj.mnemonic=="LD_A" : dst = env.A[a]\n elif obj.mnemonic=="LEA" : dst = env.A[a]\n elif obj.mnemonic=="LD_D" : dst = env.E[a]\n elif obj.mnemonic=="LDMST" : dst = env.E[a]\n elif obj.mnemonic=="LD_DA" : dst = env.P[a]\n obj.b = b\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [dst, src1, src2]\n if obj.mode == "Bit-Reverse":\n obj.operands.pop()\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Pre-increment")\ndef tricore_st(obj, off2, off1, b, a):\n dst = env.D[a]\n if obj.mnemonic=="ST_A" : dst = env.A[a]\n elif obj.mnemonic=="ST_D" : dst = env.E[a]\n elif obj.mnemonic=="ST_DA" : dst = env.P[a]\n elif obj.mnemonic=="LDMST" : dst = env.E[a]\n obj.b = b\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2, dst]\n if obj.mode == "Bit-Reverse":\n obj.operands.pop()\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Pre-increment")\ndef tricore_ld(obj, off2, off1, b, a):\n dst = env.D[a]\n src1 = env.P[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2, dst]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) ---- {49} ]", mnemonic="LDLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) ---- {49} ]", mnemonic="LDUCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) ---- {49} ]", mnemonic="STLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) ---- {49} ]", mnemonic="STUCX", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b):\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {99} ]", mnemonic="LD_A", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {79} ]", mnemonic="LD_B", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {39} ]", mnemonic="LD_BU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {b9} ]", mnemonic="LD_HU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {19} ]", mnemonic="LD_W", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {d9} ]", mnemonic="LEA", mode="Long-offset")\ndef tricore_ld(obj, off2, off3, off1, b, a):\n dst = env.D[a]\n\n</context>\n\n假设一个实际的场景,我们需要采用这段代码来实现工作流程,可以给我解释一下这段代码的作用吗? \n\n\n\n
105
+ ```
lyrallms/LyraBaichuanPy/examples/batch_demo.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from time import perf_counter
3
+
4
+ import sys
5
+ sys.path.append('../')
6
+
7
+ from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
8
+
9
+
10
+ def get_args():
11
+ parser = argparse.ArgumentParser(description="Faster Baichuan Demo")
12
+
13
+ parser.add_argument('--model-path', type=str, required=True,
14
+ help='Model Path, include config.ini and tokenizer files')
15
+ # parser.add_argument('--tokenizer-path', type=str, default='/group/30063/users/vanewu/LocalModels/ChatGLM6B-Torch/chatglm-6b')
16
+ parser.add_argument('--tokenizer-path', type=str, default=None)
17
+
18
+ parser.add_argument(
19
+ '--data-type', type=str, metavar='TYPE', default='fp16',
20
+ choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
21
+ help='The data type to inference. If None, the data type follows the '
22
+ 'checkpoint data type.')
23
+
24
+ parser.add_argument(
25
+ '--memopt_mode', type=int, default=0, choices=[0, 1],
26
+ help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
27
+ ' 0: FP16 mode'
28
+ ' 1: Use MEMOPT mode')
29
+
30
+ parser.add_argument(
31
+ '--quant-type', type=str, metavar='TYPE', default='int8',
32
+ choices=['int4', 'int8'],
33
+ help='The data type of quantization. Only used in MEMOPT.')
34
+
35
+ parser.add_argument("--prompt", type=str, required=False)
36
+ parser.add_argument("--max-output-length", type=int, default=512)
37
+ parser.add_argument("--warmups", type=int, default=10)
38
+ parser.add_argument("--avgnums", type=int, default=10)
39
+ args = parser.parse_args()
40
+
41
+ print('\n=================== Arguments ===================')
42
+ for k, v in vars(args).items():
43
+ print(f' - {k.ljust(25, ".")}: {v}')
44
+ print('=================================================')
45
+
46
+ return args
47
+
48
+
49
+ def main():
50
+ args = get_args()
51
+
52
+ # model = lyraBaichuan7B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type)
53
+ model = lyraBaichuan13B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type)
54
+
55
+ # prompt_template = "<reserved_106>{}\n<reserved_107>" # baichuan chat
56
+ prompt_template = "{}" # baichuan
57
+
58
+ prompt = prompt_template.format(args.prompt)
59
+
60
+ test_batch_size = [1, 2, 4] # 8, 16, 32, 64
61
+ print("test_batch_size: ", test_batch_size)
62
+
63
+ for i, bs in enumerate(test_batch_size):
64
+ prompts = [prompt, ]*bs
65
+
66
+ # warmup gpu
67
+ for _ in range(args.warmups):
68
+ output_texts = model.generate(
69
+ prompts, output_length=args.max_output_length,
70
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.1, do_sample=False)
71
+
72
+ start = perf_counter()
73
+ for _ in range(args.avgnums):
74
+ output_texts = model.generate(
75
+ prompts, output_length=args.max_output_length,
76
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
77
+ end = perf_counter()
78
+ cost = (end - start) / args.avgnums
79
+
80
+ input_output_texts = [prompt+' ' + gtext for prompt,
81
+ gtext in zip(prompts, output_texts)]
82
+ tokens = 0
83
+ input_tokens = len(model.tokenizer.encode(prompt))
84
+ words = 0
85
+ for text in input_output_texts:
86
+ tokens += len(model.tokenizer.encode(text))
87
+ words += len(text)
88
+ print(
89
+ f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
90
+ )
91
+ print(
92
+ f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
93
+ )
94
+
95
+ if i == 0:
96
+ for k in range(bs):
97
+ print(
98
+ f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
99
+ if k>2:
100
+ break
101
+
102
+ if __name__ == "__main__":
103
+ main()
lyrallms/LyraBaichuanPy/examples/batch_stream_demo.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from time import perf_counter
3
+
4
+ import sys
5
+ sys.path.append('../')
6
+
7
+ from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
8
+
9
+
10
+ def print_list(lines):
11
+ # 清空终端输出
12
+ print("\033c", end="")
13
+
14
+ # 逐行打印字符串列表
15
+ print('\n'.join(lines))
16
+
17
+ def get_args():
18
+ parser = argparse.ArgumentParser(description="Faster Baichuan Demo")
19
+
20
+ parser.add_argument('--model-path', type=str, required=True,
21
+ help='Model Path, include config.ini and tokenizer files')
22
+ parser.add_argument('--tokenizer-path', type=str, default=None)
23
+
24
+ parser.add_argument(
25
+ '--data-type', type=str, metavar='TYPE', default='fp16',
26
+ choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
27
+ help='The data type to inference. If None, the data type follows the '
28
+ 'checkpoint data type.')
29
+
30
+ parser.add_argument(
31
+ '--memopt_mode', type=int, default=0, choices=[0, 1],
32
+ help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
33
+ ' 0: FP16 mode'
34
+ ' 1: Use MEMOPT mode')
35
+
36
+ parser.add_argument("--prompt", type=str, required=False)
37
+ parser.add_argument("--max-output-length", type=int, default=512)
38
+ parser.add_argument("--warmups", type=int, default=10)
39
+ parser.add_argument("--avgnums", type=int, default=10)
40
+ args = parser.parse_args()
41
+
42
+ print('\n=================== Arguments ===================')
43
+ for k, v in vars(args).items():
44
+ print(f' - {k.ljust(25, ".")}: {v}')
45
+ print('=================================================')
46
+
47
+ return args
48
+
49
+
50
+ def main():
51
+ args = get_args()
52
+
53
+ # model = lyraBaichuan7B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
54
+ model = lyraBaichuan13B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
55
+
56
+ # prompt_template = "<reserved_106>{}\n<reserved_107>" # baichuan chat
57
+ prompt_template = "{}" # baichuan
58
+
59
+ prompt = prompt_template.format(args.prompt)
60
+
61
+ test_batch_size = [1, 2, 4] # 8, 16, 32, 64
62
+ print("test_batch_size: ", test_batch_size)
63
+
64
+ for i, bs in enumerate(test_batch_size):
65
+ prompts = [prompt, ]*bs
66
+
67
+ # warmup gpu
68
+ for _ in range(args.warmups):
69
+ output_texts = model.generate(
70
+ prompts, output_length=args.max_output_length,
71
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.1, do_sample=False)
72
+
73
+ start = perf_counter()
74
+ for _ in range(args.avgnums):
75
+ for finish, output_texts in model.stream_generate(prompts,
76
+ output_length=args.max_output_length,
77
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False):
78
+ print_list(output_texts)
79
+
80
+ if finish:
81
+ break
82
+ end = perf_counter()
83
+ cost = (end - start) / args.avgnums
84
+
85
+ input_output_texts = [prompt+' ' + gtext for prompt,
86
+ gtext in zip(prompts, output_texts)]
87
+ tokens = 0
88
+ input_tokens = len(model.tokenizer.encode(prompt))
89
+ words = 0
90
+ for text in input_output_texts:
91
+ tokens += len(model.tokenizer.encode(text))
92
+ words += len(text)
93
+ print(
94
+ f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
95
+ )
96
+ print(
97
+ f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
98
+ )
99
+
100
+ if __name__ == "__main__":
101
+ main()
lyrallms/LyraBaichuanPy/examples/random_batch_demo.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import random
4
+ import numpy as np
5
+
6
+ from time import perf_counter
7
+
8
+ import sys
9
+ sys.path.append('../')
10
+ from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
11
+
12
+
13
+ def get_args():
14
+ parser = argparse.ArgumentParser(description="Faster Baichuan Demo")
15
+
16
+ parser.add_argument('--model-path', type=str, required=True,
17
+ help='Model Path, include config.ini and tokenizer files')
18
+ # parser.add_argument('--tokenizer-path', type=str, default='/group/30063/users/vanewu/LocalModels/ChatGLM6B-Torch/chatglm-6b')
19
+ parser.add_argument('--tokenizer-path', type=str, default=None)
20
+
21
+ parser.add_argument(
22
+ '--data-type', type=str, metavar='TYPE', default='fp16',
23
+ choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
24
+ help='The data type to inference. If None, the data type follows the '
25
+ 'checkpoint data type.')
26
+
27
+ parser.add_argument(
28
+ '--memopt_mode', type=int, default=0, choices=[0, 1],
29
+ help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
30
+ ' 0: FP16 mode'
31
+ ' 1: Use MEMOPT mode')
32
+
33
+ parser.add_argument("--prompt_filepath", type=str, required=True)
34
+ parser.add_argument("--max-output-length", type=int, default=512)
35
+ parser.add_argument("--warmups", type=int, default=10)
36
+ parser.add_argument("--avgnums", type=int, default=10)
37
+ args = parser.parse_args()
38
+
39
+ print('\n=================== Arguments ===================')
40
+ for k, v in vars(args).items():
41
+ print(f' - {k.ljust(25, ".")}: {v}')
42
+ print('=================================================')
43
+
44
+ return args
45
+
46
+
47
+ def main():
48
+ args = get_args()
49
+
50
+ # model = lyraBaichuan7B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
51
+ model = lyraBaichuan13B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
52
+
53
+ with open(args.prompt_filepath, "rb") as f:
54
+ input_datas = json.loads(f.read())
55
+
56
+ used_input_data = input_datas[0]
57
+
58
+ # prompt_template = "<reserved_106>{}\n<reserved_107>" # baichuan chat
59
+ prompt_template = "{}" # baichuan
60
+
61
+ test_batch_size = [1, 2, 4,] # 8, 16, 32, 64
62
+ print("test_batch_size: ", test_batch_size)
63
+
64
+ for i, bs in enumerate(test_batch_size):
65
+ all_use_prompts = []
66
+ all_output_texts = []
67
+
68
+ # warmup gpu
69
+ for _ in range(args.warmups):
70
+ prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
71
+ output_texts = model.generate(
72
+ prompts, output_length=args.max_output_length,
73
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
74
+
75
+ all_cost_s = 0.0
76
+
77
+ for _ in range(args.avgnums):
78
+ prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
79
+ all_use_prompts.extend(prompts)
80
+
81
+ start = perf_counter()
82
+ output_texts = model.generate(
83
+ prompts, output_length=args.max_output_length,
84
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
85
+ all_cost_s += perf_counter() - start
86
+
87
+ all_output_texts.extend(output_texts)
88
+
89
+ cost = all_cost_s / args.avgnums
90
+
91
+ input_output_texts = [prompt + ' ' + gtext for prompt,gtext in zip(all_use_prompts, all_output_texts)]
92
+
93
+ tokens = 0
94
+ avg_input_tokens = np.mean([len(model.tokenizer.encode(prompt)) for prompt in all_use_prompts])
95
+
96
+ words = 0
97
+ for text in input_output_texts:
98
+ tokens += len(model.tokenizer.encode(text))
99
+ words += len(text)
100
+ print(
101
+ f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Avg Input tokens: {avg_input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
102
+ )
103
+ print(
104
+ f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
105
+ )
106
+
107
+ if i == 0:
108
+ for k in range(bs):
109
+ print(
110
+ f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
111
+ if k>2:
112
+ break
113
+
114
+ if __name__ == "__main__":
115
+ main()
116
+
lyrallms/LyraBaichuanPy/examples/varlen_prompts.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [
2
+ "歌曲名:《幸福万年长》;歌手名:汤灿;歌曲描述:汤灿的幸福万年长创作背景:2001年,汤灿决定推出一首能够贴近听众和潮流的民歌。为此,她邀请了创作过歌曲《为你》《快乐老家》的音乐人浮克合作,邀其担任该曲的制作工作。虽然浮克此前一直从事流行歌曲的工作,但他其实也是一位衷情民歌风格的音乐人,于是两人一拍即合,合作了该曲。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
3
+ "歌曲名:《小丑面具》;歌手名:韩庚;歌曲描述:韩庚的小丑面具的歌曲鉴赏:韩庚在这首歌化身为“小丑”,带上面具调侃这社会上的表面功夫,用幽默又神经质的方式批判愈形冷酷的人心。在这首独特的电子舞曲当中,韩庚尝试了各种不同的发声方式,冷笑、哭喊、啜泣……甚至用声乐融合鬼魅的方法演唱,让人不禁陷入他建构的虚幻氛围而随之起舞。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
4
+ "《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲,于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日,“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者:主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J (大卫·哈斯金斯)。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲:“Boys”;“咬我的臀部”;“Some Faces”和斯卡雷鬼曲调“Harry”,这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议,凯文·哈斯金斯 (Kevin Haskins) 说,“那里有力量流行音乐,还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中(除了“Bela Lugosi's Dead”),只有“Harry”获得了官方发行;1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目,包括“Boys”的原始录音,一直未发行,直到 2018 年The Bela Session以黑胶唱片和CD 形式发行,并可供乐队数字下载。[11]在额外的曲目中,《经典摇滚》杂志写道:“其余的材料发现乐队正在摸索方向,甚至触及了斯卡。”\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
5
+ "歌曲名:《仓颉》;歌手名:五月天;歌曲描述:五月天的仓颉的歌曲鉴赏:五月天 仓颉(2张)《仓颉》是一首写在文明即将消失前的情诗,陈信宏的词写得颇有味道。《仓颉》这样淡淡的歌曲,或许不够大气,但是陈信宏真诚的演唱足以令人感动,而且《仓颉》的歌词也写得很有哲理。这首歌曲朗朗上口的旋律和诗意的文字使得它很适合在KTV演唱。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:"
6
+ ]
lyrallms/LyraBaichuanPy/generation_utils.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from queue import Queue
3
+
4
+ import torch
5
+
6
+
7
+ def build_chat_input(model, tokenizer, messages: List[dict], max_new_tokens: int=0):
8
+ def _parse_messages(messages, split_role="user"):
9
+ system, rounds = "", []
10
+ round = []
11
+ for i, message in enumerate(messages):
12
+ if message["role"] == "system":
13
+ assert i == 0
14
+ system = message["content"]
15
+ continue
16
+ if message["role"] == split_role and round:
17
+ rounds.append(round)
18
+ round = []
19
+ round.append(message)
20
+ if round:
21
+ rounds.append(round)
22
+ return system, rounds
23
+
24
+ max_new_tokens = max_new_tokens or model.generation_config.max_new_tokens
25
+ max_input_tokens = model.config.model_max_length - max_new_tokens
26
+ system, rounds = _parse_messages(messages, split_role="user")
27
+ system_tokens = tokenizer.encode(system)
28
+ max_history_tokens = max_input_tokens - len(system_tokens)
29
+
30
+ history_tokens = []
31
+ for round in rounds[::-1]:
32
+ round_tokens = []
33
+ for message in round:
34
+ if message["role"] == "user":
35
+ round_tokens.append(model.generation_config.user_token_id)
36
+ else:
37
+ round_tokens.append(model.generation_config.assistant_token_id)
38
+ round_tokens.extend(tokenizer.encode(message["content"]))
39
+ if len(history_tokens) == 0 or len(history_tokens) + len(round_tokens) <= max_history_tokens:
40
+ history_tokens = round_tokens + history_tokens # concat left
41
+ if len(history_tokens) < max_history_tokens:
42
+ continue
43
+ break
44
+
45
+ input_tokens = system_tokens + history_tokens
46
+ if messages[-1]["role"] != "assistant":
47
+ input_tokens.append(model.generation_config.assistant_token_id)
48
+ input_tokens = input_tokens[-max_input_tokens:] # truncate left
49
+ return torch.LongTensor([input_tokens]).to(model.device)
50
+
51
+
52
+ class TextIterStreamer:
53
+ def __init__(self, tokenizer, skip_prompt=False, skip_special_tokens=False):
54
+ self.tokenizer = tokenizer
55
+ self.skip_prompt = skip_prompt
56
+ self.skip_special_tokens = skip_special_tokens
57
+ self.tokens = []
58
+ self.text_queue = Queue()
59
+ self.next_tokens_are_prompt = True
60
+
61
+ def put(self, value):
62
+ if self.skip_prompt and self.next_tokens_are_prompt:
63
+ self.next_tokens_are_prompt = False
64
+ else:
65
+ if len(value.shape) > 1:
66
+ value = value[0]
67
+ self.tokens.extend(value.tolist())
68
+ self.text_queue.put(
69
+ self.tokenizer.decode(self.tokens, skip_special_tokens=self.skip_special_tokens))
70
+
71
+ def end(self):
72
+ self.text_queue.put(None)
73
+
74
+ def __iter__(self):
75
+ return self
76
+
77
+ def __next__(self):
78
+ value = self.text_queue.get()
79
+ if value is None:
80
+ raise StopIteration()
81
+ else:
82
+ return value
83
+
lyrallms/LyraBaichuanPy/lyra_baichuan/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
lyrallms/LyraBaichuanPy/lyra_baichuan/config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dataclasses
2
+ from typing import Optional
3
+
4
+
5
+ @dataclasses.dataclass
6
+ class LyraBaichuanParam:
7
+ num_heads: int = 40
8
+ size_per_head: int = 128
9
+ inter_size: int = 13824
10
+ num_layers: int = 40
11
+ vocab_size: int = 39424
12
+ start_id: Optional[int] = 1
13
+ end_id: Optional[int] = 2
14
+ tensor_para_size: int = 1
15
+ pipeline_para_size: int = 1
16
+ remove_padding: bool = True
17
+ shared_contexts_ratio: float = 1.0
18
+ layernorm_eps: float = 1e-6
19
+ weights_data_type: str = "fp16"
20
+ rotary_embedding: int = 128
21
+ use_gptj_residual: bool = False
22
+
23
+ def __post_init__(self):
24
+ if not 0.0 <= self.shared_contexts_ratio <= 1.0:
25
+ raise ValueError(
26
+ f'Got an invalid value of shared_context_ratio '
27
+ f'{self.shared_contexts_ratio} - range: [0.0, 1.0]')
28
+
29
+ def asdict(self):
30
+ return dataclasses.asdict(self)
31
+
32
+
33
+ LYRA_BAICHUAN_PARAM = LyraBaichuanParam()
34
+ LIB_SO_PATH = '/usr/lib/ftlib/libth_lyrallms.so'
lyrallms/LyraBaichuanPy/lyra_baichuan/lyra_baichuan.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import configparser
4
+ import pathlib
5
+ import typing
6
+ import os
7
+
8
+ import torch
9
+ import transformers
10
+ from torch.nn.utils.rnn import pad_sequence
11
+
12
+ from .config import LYRA_BAICHUAN_PARAM, LIB_SO_PATH
13
+ from .model import BaichuanModel
14
+ from .tokenization_baichuan import BaichuanTokenizer
15
+
16
+ class lyraBaichuan7B:
17
+ def __init__(self, model_path, tokenizer_path=None, dtype='fp16', memopt_mode=0, quant_dtype="int4") -> None:
18
+ self.model_path = model_path
19
+ self.tokenizer_path = tokenizer_path
20
+ self.dtype = dtype
21
+
22
+ self.memopt_mode = memopt_mode
23
+ self.quant_data_type = quant_dtype
24
+
25
+ self.model, self.tokenizer = self.load_model_and_tokenizer()
26
+ print("Got model and tokenizer")
27
+
28
+ def load_model_and_tokenizer(self):
29
+ if self.tokenizer_path is None:
30
+ tokenizer_path = self.model_path
31
+ else:
32
+ tokenizer_path = self.tokenizer_path
33
+
34
+ print(f'Loading tokenizer from {tokenizer_path}')
35
+ tokenizer = BaichuanTokenizer.from_pretrained(tokenizer_path)
36
+
37
+ checkpoint_path = pathlib.Path(self.model_path)
38
+ config_path = checkpoint_path / 'config.ini'
39
+
40
+ if config_path.exists():
41
+ # Read model params from config.
42
+ cfg = configparser.ConfigParser()
43
+ cfg.read(config_path)
44
+ model_name = 'baichuan'
45
+ inference_data_type = self.dtype
46
+ if inference_data_type == None:
47
+ inference_data_type = cfg.get(model_name, "weight_data_type")
48
+ model_args = dict(
49
+ head_num=cfg.getint(model_name, 'head_num'),
50
+ size_per_head=cfg.getint(model_name, "size_per_head"),
51
+ inter_size=cfg.getint(model_name, 'inter_size'),
52
+ layer_num=cfg.getint(model_name, "num_layer"),
53
+ rotary_embedding_dim=cfg.getint(model_name, 'rotary_embedding'),
54
+ layernorm_eps=cfg.getfloat(model_name, 'layernorm_eps'),
55
+ vocab_size=cfg.getint(model_name, "vocab_size"),
56
+ start_id=cfg.getint(model_name, "start_id"),
57
+ end_id=cfg.getint(model_name, "end_id"),
58
+ weights_data_type=cfg.get(model_name, "weight_data_type"),
59
+ tensor_para_size=cfg.getint(model_name, "tensor_para_size"),
60
+ inference_data_type=inference_data_type)
61
+ else:
62
+ inference_data_type = self.dtype
63
+ if inference_data_type == None:
64
+ inference_data_type = LYRA_BAICHUAN_PARAM.weights_data_type
65
+ model_args = dict(head_num=LYRA_BAICHUAN_PARAM.num_heads,
66
+ size_per_head=LYRA_BAICHUAN_PARAM.size_per_head,
67
+ inter_size=LYRA_BAICHUAN_PARAM.inter_size,
68
+ layer_num=LYRA_BAICHUAN_PARAM.num_layers,
69
+ rotary_embedding_dim=LYRA_BAICHUAN_PARAM.rotary_embedding,
70
+ layernorm_eps=LYRA_BAICHUAN_PARAM.layernorm_eps,
71
+ vocab_size=LYRA_BAICHUAN_PARAM.vocab_size,
72
+ start_id=LYRA_BAICHUAN_PARAM.start_id or tokenizer.bos_token_id,
73
+ end_id=LYRA_BAICHUAN_PARAM.end_id or tokenizer.eos_token_id,
74
+ weights_data_type=LYRA_BAICHUAN_PARAM.weights_data_type,
75
+ tensor_para_size=LYRA_BAICHUAN_PARAM.tensor_para_size,
76
+ inference_data_type=inference_data_type)
77
+
78
+ # update common parameters
79
+ model_args.update(dict(
80
+ lib_path=LIB_SO_PATH,
81
+ model_path=os.path.join(self.model_path, "1-gpu-fp16.bin"),
82
+ max_seq_len=0, # for position seq embedding
83
+ pipeline_para_size=LYRA_BAICHUAN_PARAM.pipeline_para_size,
84
+ use_gptj_residual=LYRA_BAICHUAN_PARAM.use_gptj_residual,
85
+ memopt_mode=self.memopt_mode,
86
+ quant_data_type=self.quant_data_type
87
+ ))
88
+
89
+ print('[FT][INFO] Load Our FT Highly Optimized Baichuan-7B model')
90
+ for k, v in model_args.items():
91
+ print(f' - {k.ljust(25, ".")}: {v}')
92
+
93
+ # Check sanity and consistency between the model and tokenizer.
94
+ checklist = ['head_num', 'size_per_head', 'vocab_size', 'layer_num',
95
+ 'tensor_para_size', 'tensor_para_size', 'weights_data_type']
96
+ if None in [model_args[k] for k in checklist]:
97
+ none_params = [p for p in checklist if model_args[p] is None]
98
+ print(f'[FT][WARNING] Found None parameters {none_params}. They must '
99
+ f'be provided either by config file or CLI arguments.')
100
+ if model_args['start_id'] != tokenizer.bos_token_id:
101
+ print('[FT][WARNING] Given start_id is not matched with the bos token '
102
+ 'id of the pretrained tokenizer.')
103
+ if model_args['end_id'] not in (tokenizer.pad_token_id, tokenizer.eos_token_id):
104
+ print('[FT][WARNING] Given end_id is not matched with neither pad '
105
+ 'token id nor eos token id of the pretrained tokenizer.')
106
+
107
+ print(f'Loading model from {self.model_path}')
108
+ model = BaichuanModel(**model_args)
109
+ return model, tokenizer
110
+
111
+ def generate(self, prompts: typing.List[str] | str,
112
+ output_length: int = 512,
113
+ beam_width: int = 1,
114
+ top_k: typing.Optional[torch.IntTensor] = 1,
115
+ top_p: typing.Optional[torch.FloatTensor] = 1.0,
116
+ beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
117
+ temperature: typing.Optional[torch.FloatTensor] = 1.0,
118
+ len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
119
+ repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
120
+ presence_penalty: typing.Optional[torch.FloatTensor] = None,
121
+ min_length: typing.Optional[torch.IntTensor] = None,
122
+ bad_words_list: typing.Optional[torch.IntTensor] = None,
123
+ do_sample: bool = False,
124
+ return_output_length: bool = False,
125
+ return_cum_log_probs: int = 0):
126
+ #
127
+ if isinstance(prompts, str):
128
+ prompts = [prompts, ]
129
+
130
+ inputs = prompts
131
+
132
+ batch_size = len(inputs)
133
+ ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
134
+ ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
135
+
136
+ # we must encode the raw prompt text one by one in order to compute the length of the original text.
137
+ input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
138
+ input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
139
+ # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
140
+ input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
141
+
142
+ random_seed = None
143
+ if do_sample:
144
+ random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
145
+
146
+ outputs = self.model(start_ids=input_token_ids,
147
+ start_lengths=input_lengths,
148
+ output_len=output_length,
149
+ beam_width=beam_width,
150
+ top_k=top_k * ones_int,
151
+ top_p=top_p * ones_float,
152
+ beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
153
+ temperature=temperature * ones_float,
154
+ len_penalty=len_penalty * ones_float,
155
+ repetition_penalty=repetition_penalty * ones_float,
156
+ random_seed=random_seed,
157
+ return_output_length=return_output_length,
158
+ return_cum_log_probs=return_cum_log_probs)
159
+
160
+ if return_cum_log_probs > 0:
161
+ outputs = outputs[0] # output_token_ids.
162
+
163
+ # Slice the generated token ids of the 1st beam result.
164
+ # output = input tokens + generated tokens.
165
+ output_token_ids = [out[0, length:].cpu()
166
+ for out, length in zip(outputs, input_lengths)]
167
+
168
+ output_texts = self.tokenizer.batch_decode(
169
+ output_token_ids, skip_special_tokens=True)
170
+
171
+ return output_texts
172
+
173
+ class lyraBaichuan13B:
174
+ def __init__(self, model_path, tokenizer_path=None, dtype='fp16', memopt_mode=0, quant_dtype="int4") -> None:
175
+ self.model_path = model_path
176
+ self.tokenizer_path = tokenizer_path
177
+ self.dtype = dtype
178
+
179
+ self.memopt_mode = memopt_mode
180
+ self.quant_data_type = quant_dtype
181
+
182
+ self.model, self.tokenizer = self.load_model_and_tokenizer()
183
+ print("Got model and tokenizer")
184
+
185
+ def load_model_and_tokenizer(self):
186
+ if self.tokenizer_path is None:
187
+ tokenizer_path = self.model_path
188
+ else:
189
+ tokenizer_path = self.tokenizer_path
190
+
191
+ print(f'Loading tokenizer from {tokenizer_path}')
192
+ tokenizer = BaichuanTokenizer.from_pretrained(tokenizer_path)
193
+
194
+ checkpoint_path = pathlib.Path(self.model_path)
195
+ config_path = checkpoint_path / 'config.ini'
196
+
197
+ if config_path.exists():
198
+ # Read model params from config.
199
+ cfg = configparser.ConfigParser()
200
+ cfg.read(config_path)
201
+ model_name = 'baichuan'
202
+ inference_data_type = self.dtype
203
+ if inference_data_type == None:
204
+ inference_data_type = cfg.get(model_name, "weight_data_type")
205
+ model_args = dict(
206
+ head_num=cfg.getint(model_name, 'head_num'),
207
+ size_per_head=cfg.getint(model_name, "size_per_head"),
208
+ inter_size=cfg.getint(model_name, 'inter_size'),
209
+ layer_num=cfg.getint(model_name, "num_layer"),
210
+ rotary_embedding_dim=0,
211
+ layernorm_eps=cfg.getfloat(model_name, 'layernorm_eps'),
212
+ vocab_size=cfg.getint(model_name, "vocab_size"),
213
+ start_id=cfg.getint(model_name, "start_id"),
214
+ end_id=cfg.getint(model_name, "end_id"),
215
+ weights_data_type=cfg.get(model_name, "weight_data_type"),
216
+ tensor_para_size=cfg.getint(model_name, "tensor_para_size"),
217
+ inference_data_type=inference_data_type)
218
+ else:
219
+ inference_data_type = self.dtype
220
+ if inference_data_type == None:
221
+ inference_data_type = LYRA_BAICHUAN_PARAM.weights_data_type
222
+ model_args = dict(head_num=LYRA_BAICHUAN_PARAM.num_heads,
223
+ size_per_head=LYRA_BAICHUAN_PARAM.size_per_head,
224
+ inter_size=LYRA_BAICHUAN_PARAM.inter_size,
225
+ layer_num=LYRA_BAICHUAN_PARAM.num_layers,
226
+ rotary_embedding_dim=0,
227
+ layernorm_eps=LYRA_BAICHUAN_PARAM.layernorm_eps,
228
+ vocab_size=LYRA_BAICHUAN_PARAM.vocab_size,
229
+ start_id=LYRA_BAICHUAN_PARAM.start_id or tokenizer.bos_token_id,
230
+ end_id=LYRA_BAICHUAN_PARAM.end_id or tokenizer.eos_token_id,
231
+ weights_data_type=LYRA_BAICHUAN_PARAM.weights_data_type,
232
+ tensor_para_size=LYRA_BAICHUAN_PARAM.tensor_para_size,
233
+ inference_data_type=inference_data_type)
234
+
235
+ # update common parameters
236
+ model_args.update(dict(
237
+ lib_path=LIB_SO_PATH,
238
+ model_path=os.path.join(self.model_path, "1-gpu-fp16.bin"),
239
+ max_seq_len=0, # for position seq embedding
240
+ pipeline_para_size=LYRA_BAICHUAN_PARAM.pipeline_para_size,
241
+ use_gptj_residual=LYRA_BAICHUAN_PARAM.use_gptj_residual,
242
+ memopt_mode=self.memopt_mode,
243
+ quant_data_type=self.quant_data_type
244
+ ))
245
+
246
+ print('[FT][INFO] Load Our FT Highly Optimized Baichuan-13B model')
247
+ for k, v in model_args.items():
248
+ print(f' - {k.ljust(25, ".")}: {v}')
249
+
250
+ # Check sanity and consistency between the model and tokenizer.
251
+ checklist = ['head_num', 'size_per_head', 'vocab_size', 'layer_num',
252
+ 'tensor_para_size', 'tensor_para_size', 'weights_data_type']
253
+ if None in [model_args[k] for k in checklist]:
254
+ none_params = [p for p in checklist if model_args[p] is None]
255
+ print(f'[FT][WARNING] Found None parameters {none_params}. They must '
256
+ f'be provided either by config file or CLI arguments.')
257
+ if model_args['start_id'] != tokenizer.bos_token_id:
258
+ print('[FT][WARNING] Given start_id is not matched with the bos token '
259
+ 'id of the pretrained tokenizer.')
260
+ if model_args['end_id'] not in (tokenizer.pad_token_id, tokenizer.eos_token_id):
261
+ print('[FT][WARNING] Given end_id is not matched with neither pad '
262
+ 'token id nor eos token id of the pretrained tokenizer.')
263
+
264
+ print(f'Loading model from {self.model_path}')
265
+ model = BaichuanModel(**model_args)
266
+ return model, tokenizer
267
+
268
+ def generate(self, prompts: typing.List[str] | str,
269
+ output_length: int = 512,
270
+ beam_width: int = 1,
271
+ top_k: typing.Optional[torch.IntTensor] = 1,
272
+ top_p: typing.Optional[torch.FloatTensor] = 1.0,
273
+ beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
274
+ temperature: typing.Optional[torch.FloatTensor] = 1.0,
275
+ len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
276
+ repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
277
+ presence_penalty: typing.Optional[torch.FloatTensor] = None,
278
+ min_length: typing.Optional[torch.IntTensor] = None,
279
+ bad_words_list: typing.Optional[torch.IntTensor] = None,
280
+ do_sample: bool = False,
281
+ return_output_length: bool = False,
282
+ return_cum_log_probs: int = 0):
283
+ #
284
+ if isinstance(prompts, str):
285
+ prompts = [prompts, ]
286
+
287
+ inputs = prompts
288
+
289
+ batch_size = len(inputs)
290
+ ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
291
+ ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
292
+
293
+ # we must encode the raw prompt text one by one in order to compute the length of the original text.
294
+ input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
295
+ input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
296
+ # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
297
+ input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
298
+
299
+ random_seed = None
300
+ if do_sample:
301
+ random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
302
+
303
+ outputs = self.model(start_ids=input_token_ids,
304
+ start_lengths=input_lengths,
305
+ output_len=output_length,
306
+ beam_width=beam_width,
307
+ top_k=top_k * ones_int,
308
+ top_p=top_p * ones_float,
309
+ beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
310
+ temperature=temperature * ones_float,
311
+ len_penalty=len_penalty * ones_float,
312
+ repetition_penalty=repetition_penalty * ones_float,
313
+ random_seed=random_seed,
314
+ return_output_length=return_output_length,
315
+ return_cum_log_probs=return_cum_log_probs)
316
+
317
+ if return_cum_log_probs > 0:
318
+ outputs = outputs[0] # output_token_ids.
319
+
320
+ # Slice the generated token ids of the 1st beam result.
321
+ # output = input tokens + generated tokens.
322
+ output_token_ids = [out[0, length:].cpu()
323
+ for out, length in zip(outputs, input_lengths)]
324
+
325
+ output_texts = self.tokenizer.batch_decode(
326
+ output_token_ids, skip_special_tokens=True)
327
+
328
+ return output_texts
329
+
330
+ def stream_generate(self, prompts: typing.List[str] | str,
331
+ output_length: int = 512,
332
+ beam_width: int = 1,
333
+ top_k: typing.Optional[torch.IntTensor] = 1,
334
+ top_p: typing.Optional[torch.FloatTensor] = 1.0,
335
+ beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
336
+ temperature: typing.Optional[torch.FloatTensor] = 1.0,
337
+ len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
338
+ repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
339
+ presence_penalty: typing.Optional[torch.FloatTensor] = None,
340
+ min_length: typing.Optional[torch.IntTensor] = None,
341
+ bad_words_list: typing.Optional[torch.IntTensor] = None,
342
+ do_sample: bool = False,
343
+ return_output_length: bool = False,
344
+ return_cum_log_probs: int = 0):
345
+ if isinstance(prompts, str):
346
+ prompts = [prompts, ]
347
+
348
+ inputs = prompts
349
+
350
+ batch_size = len(inputs)
351
+ ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
352
+ ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
353
+
354
+ # we must encode the raw prompt text one by one in order to compute the length of the original text.
355
+ input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
356
+ input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
357
+ # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
358
+ input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
359
+
360
+ random_seed = None
361
+ if do_sample:
362
+ random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
363
+
364
+ for finish, output_ids, sequence_length, output_cum_log_probs in self.model.stream_forward(start_ids=input_token_ids,
365
+ start_lengths=input_lengths,
366
+ output_len=output_length,
367
+ beam_width=beam_width,
368
+ top_k=top_k * ones_int,
369
+ top_p=top_p * ones_float,
370
+ beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
371
+ temperature=temperature * ones_float,
372
+ len_penalty=len_penalty * ones_float,
373
+ repetition_penalty=repetition_penalty * ones_float,
374
+ random_seed=random_seed,
375
+ return_output_length=return_output_length,
376
+ return_cum_log_probs=return_cum_log_probs):
377
+
378
+ # Slice the generated token ids of the 1st beam result.
379
+ # output = input tokens + generated tokens.
380
+ output_token_ids = [out[0, length:].cpu()
381
+ for out, length in zip(output_ids, input_lengths)]
382
+
383
+ output_texts = self.tokenizer.batch_decode(
384
+ output_token_ids, skip_special_tokens=True)
385
+
386
+ if finish:
387
+ break
388
+
389
+ yield finish, output_texts
390
+
391
+ return finish, output_texts
lyrallms/LyraBaichuanPy/lyra_baichuan/model.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function
2
+
3
+ import copy
4
+ import os
5
+ import pathlib
6
+ import typing
7
+
8
+ import numpy as np
9
+ import torch
10
+ import torch.distributed as dist
11
+ import torch.nn as nn
12
+
13
+ from queue import Queue
14
+ from threading import Thread
15
+
16
+ import sys
17
+ sys.path.append('/usr/lib/lyralib')
18
+ import lyraOp
19
+
20
+ str_type_map = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}
21
+
22
+ class BaichuanModel(nn.Module):
23
+ def __init__(self,
24
+ head_num,
25
+ size_per_head,
26
+ inter_size,
27
+ vocab_size,
28
+ rotary_embedding_dim,
29
+ start_id, end_id, layer_num,
30
+ max_seq_len: int,
31
+ layernorm_eps,
32
+ tensor_para_size: int,
33
+ pipeline_para_size: int,
34
+ use_gptj_residual,
35
+ lib_path: typing.Union[str, pathlib.Path],
36
+ model_path,
37
+ memopt_mode: int = 0,
38
+ quant_data_type: str = "int8",
39
+ inference_data_type: str = "fp16",
40
+ weights_data_type: typing.Union[str, np.dtype] = np.float32):
41
+ super().__init__()
42
+ self.head_num = head_num
43
+ self.size_per_head = size_per_head
44
+ self.inter_size = inter_size
45
+ self.vocab_size = vocab_size
46
+ self.rotary_embedding_dim = rotary_embedding_dim
47
+ self.start_id = start_id
48
+ self.end_id = end_id
49
+ self.max_seq_len = max_seq_len
50
+ self.layer_num = layer_num
51
+ self.use_gptj_residual = use_gptj_residual
52
+ self.layernorm_eps = layernorm_eps
53
+ self.memopt_mode = memopt_mode
54
+ self.quant_data_type = quant_data_type
55
+
56
+ # multi-gpu params
57
+ self.tensor_para_size = tensor_para_size
58
+ self.pipeline_para_size = pipeline_para_size
59
+ self.build_model = False
60
+ self.weights_data_type = weights_data_type
61
+ self.inference_data_type = inference_data_type
62
+
63
+ assert torch.cuda.is_available(), "CUDA is required for this model."
64
+
65
+ assert head_num % tensor_para_size == 0, "head_num must be a multiple of tensor_para_size."
66
+ assert layer_num % pipeline_para_size == 0, "layer_num must be a multiple of pipeline_para_size."
67
+
68
+ # queue for streaming
69
+ self.que = Queue()
70
+ self.threads = [None] * self.tensor_para_size
71
+
72
+ # Load the C++ model into Pytorch model.
73
+ # torch.classes.load_library(os.path.abspath(lib_path))
74
+
75
+ # Prepare for tensor/pipeline parallel
76
+ try:
77
+ dist.init_process_group(backend='mpi')
78
+ except:
79
+ print("[INFO] WARNING: Have initialized the process group")
80
+ self.rank = dist.get_rank()
81
+ self.device_count = torch.cuda.device_count()
82
+ self.device = self.rank % self.device_count
83
+ torch.cuda.set_device(self.device)
84
+
85
+ world_size = dist.get_world_size()
86
+ # print(tensor_para_size * pipeline_para_size)
87
+ assert world_size == tensor_para_size * pipeline_para_size, "tensor_para_size * pipeline_para_size must be equal to world_size."
88
+
89
+ self.tensor_para_rank = self.rank % self.tensor_para_size
90
+ self.pipeline_para_rank = self.rank // self.tensor_para_size
91
+
92
+ self.model = lyraOp.LyraBaichuan(
93
+ self.head_num, self.size_per_head, self.inter_size,
94
+ self.layer_num,
95
+ self.vocab_size,
96
+ self.rotary_embedding_dim,
97
+ self.layernorm_eps,
98
+ self.start_id, self.end_id,
99
+ self.tensor_para_size, self.pipeline_para_size,
100
+ self.max_seq_len,
101
+ self.use_gptj_residual,
102
+ self.memopt_mode,
103
+ self.quant_data_type,
104
+ model_path,
105
+ self.weights_data_type,
106
+ self.inference_data_type)
107
+
108
+ self.build_model = True
109
+ torch.cuda.empty_cache()
110
+
111
+ def forward(self,
112
+ start_ids: torch.Tensor,
113
+ start_lengths: torch.Tensor,
114
+ output_len,
115
+ beam_width=1,
116
+ top_k: torch.Tensor = None,
117
+ top_p: torch.Tensor = None,
118
+ beam_search_diversity_rate: torch.Tensor = None,
119
+ temperature: torch.Tensor = None,
120
+ len_penalty: torch.Tensor = None,
121
+ repetition_penalty: torch.Tensor = None,
122
+ random_seed: torch.Tensor = None,
123
+ return_output_length=False,
124
+ return_cum_log_probs=0):
125
+
126
+ input_len = start_ids.size(1)
127
+ assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
128
+
129
+ # Inputs to device
130
+ input_ids = start_ids.cuda(self.device)
131
+ input_lengths = start_lengths.cuda(self.device)
132
+ # outputs: output_ids, output_lengths, output_cum_log_probs (optional)
133
+ outputs = self.model.forward(input_ids,
134
+ input_lengths,
135
+ output_len,
136
+ beam_width, # optional, can be None
137
+ top_k, # optional, can be None
138
+ top_p, # optional, can be None
139
+ beam_search_diversity_rate, # optional, can be None
140
+ temperature, # optional, can be None
141
+ len_penalty, # optional, can be None
142
+ repetition_penalty, # optional, can be None
143
+ random_seed, # optional, can be None
144
+ return_cum_log_probs) # optional, can be None
145
+
146
+ if return_cum_log_probs == 0:
147
+ output_ids, output_lengths = outputs
148
+ else:
149
+ output_ids, output_lengths, output_cum_log_probs = outputs
150
+ if return_output_length:
151
+ if return_cum_log_probs > 0:
152
+ return output_ids, output_lengths, output_cum_log_probs
153
+ else:
154
+ return output_ids, output_lengths
155
+ else:
156
+ return output_ids
157
+
158
+ def set_input_tensor(self, input_tensor):
159
+ """Set input tensor to be used instead of forward()'s input.
160
+
161
+ When doing pipeline parallelism the input from the previous
162
+ stage comes from communication, not from the input, so the
163
+ model's forward_step_func won't have it. This function is thus
164
+ used by internal code to bypass the input provided by the
165
+ forward_step_func"""
166
+ self.input_tensor = input_tensor
167
+
168
+ def _forward_callback(self, output_ids, seq_lengths, ctx):
169
+ self.que.put((False, (list(output_ids), list(seq_lengths))))
170
+
171
+
172
+ def _tensormap_dict_to_py_dict(self, tensormap_dict: lyraOp.TensorMap):
173
+ """map torch tensormap to py dict."""
174
+ ret = dict()
175
+ for k, v in tensormap_dict.items():
176
+ ret[k] = v
177
+
178
+ return ret
179
+
180
+ def stream_forward(self,
181
+ start_ids: torch.Tensor,
182
+ start_lengths: torch.Tensor,
183
+ output_len,
184
+ beam_width=1,
185
+ top_k: torch.Tensor = None,
186
+ top_p: torch.Tensor = None,
187
+ beam_search_diversity_rate: torch.Tensor = None,
188
+ temperature: torch.Tensor = None,
189
+ len_penalty: torch.Tensor = None,
190
+ repetition_penalty: torch.Tensor = None,
191
+ random_seed: torch.Tensor = None,
192
+ return_output_length=False,
193
+ return_cum_log_probs=0):
194
+
195
+ # Register callback func to model
196
+ self.model.registerCallback(self._forward_callback)
197
+
198
+ batch_size = start_ids.size(0)
199
+ input_len = start_ids.size(1)
200
+ assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
201
+
202
+ # Inputs to device
203
+ input_ids = start_ids.cuda(self.device)
204
+ input_lengths = start_lengths.cuda(self.device)
205
+ # outputs: output_ids, output_lengths, output_cum_log_probs (optional)
206
+
207
+ # Init thread of model inference
208
+ def _func(enque_output):
209
+ outputs = self.model.forward(input_ids,
210
+ input_lengths,
211
+ output_len,
212
+ beam_width, # optional, can be None
213
+ top_k, # optional, can be None
214
+ top_p, # optional, can be None
215
+ beam_search_diversity_rate, # optional, can be None
216
+ temperature, # optional, can be None
217
+ len_penalty, # optional, can be None
218
+ repetition_penalty, # optional, can be None
219
+ random_seed, # optional, can be None
220
+ return_cum_log_probs) # optional, can be None
221
+ if enque_output:
222
+ self.que.put((True, (outputs[0].tolist(), outputs[1].tolist())))
223
+
224
+ # Start thread of model inference
225
+ t = Thread(target=_func,
226
+ args=(True,),
227
+ daemon=True)
228
+ t.start()
229
+ self.threads[0] = t
230
+
231
+ # Generate streaming output
232
+ while True:
233
+ # while self.que.qsize() > 1:
234
+ # self.que.get()
235
+
236
+ finish, outputs = self.que.get()
237
+
238
+ output_ids, sequence_length = outputs
239
+ output_ids = torch.tensor(output_ids).view(batch_size, beam_width, -1)
240
+ sequence_length = torch.tensor(sequence_length).view(batch_size, beam_width)
241
+
242
+ if return_output_length:
243
+ if return_cum_log_probs > 0:
244
+ yield finish, output_ids, sequence_length, None
245
+ else:
246
+ yield finish, output_ids, sequence_length, None
247
+ else:
248
+ yield finish, output_ids, None, None
249
+
250
+ if finish:
251
+ for t in self.threads:
252
+ t.join()
253
+ while self.que.qsize() > 0:
254
+ self.que.get()
255
+ break
256
+
257
+ self.model.unRegisterCallback()
258
+ return finish, output_ids, None, None
lyrallms/LyraBaichuanPy/lyra_baichuan/tokenization_baichuan.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
2
+
3
+ import os
4
+ from shutil import copyfile
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import sentencepiece as spm
8
+ from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
9
+ from transformers.utils import logging
10
+
11
+
12
+ logger = logging.get_logger(__name__)
13
+
14
+ VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
15
+
16
+ PRETRAINED_VOCAB_FILES_MAP = {
17
+ "vocab_file": {},
18
+ "tokenizer_file": {},
19
+ }
20
+ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
21
+
22
+
23
+ class BaichuanTokenizer(PreTrainedTokenizer):
24
+ """
25
+ Construct a Baichuan tokenizer. Based on byte-level Byte-Pair-Encoding.
26
+
27
+ Args:
28
+ vocab_file (`str`):
29
+ Path to the vocabulary file.
30
+ """
31
+
32
+ vocab_files_names = VOCAB_FILES_NAMES
33
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
34
+ max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
35
+ model_input_names = ["input_ids", "attention_mask"]
36
+
37
+ def __init__(
38
+ self,
39
+ vocab_file,
40
+ unk_token="<unk>",
41
+ bos_token="<s>",
42
+ eos_token="</s>",
43
+ pad_token=None,
44
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
45
+ add_bos_token=True,
46
+ add_eos_token=False,
47
+ clean_up_tokenization_spaces=False,
48
+ **kwargs,
49
+ ):
50
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
51
+ bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
52
+ eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
53
+ unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
54
+ pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
55
+ super().__init__(
56
+ bos_token=bos_token,
57
+ eos_token=eos_token,
58
+ unk_token=unk_token,
59
+ pad_token=pad_token,
60
+ add_bos_token=add_bos_token,
61
+ add_eos_token=add_eos_token,
62
+ sp_model_kwargs=self.sp_model_kwargs,
63
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
64
+ **kwargs,
65
+ )
66
+ self.vocab_file = vocab_file
67
+ self.add_bos_token = add_bos_token
68
+ self.add_eos_token = add_eos_token
69
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
70
+ self.sp_model.Load(vocab_file)
71
+
72
+ def __getstate__(self):
73
+ state = self.__dict__.copy()
74
+ state["sp_model"] = None
75
+ return state
76
+
77
+ def __setstate__(self, d):
78
+ self.__dict__ = d
79
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
80
+ self.sp_model.Load(self.vocab_file)
81
+
82
+ @property
83
+ def vocab_size(self):
84
+ """Returns vocab size"""
85
+ return self.sp_model.get_piece_size()
86
+
87
+ def get_vocab(self):
88
+ """Returns vocab as a dict"""
89
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
90
+ vocab.update(self.added_tokens_encoder)
91
+ return vocab
92
+
93
+ def _tokenize(self, text):
94
+ """Returns a tokenized string."""
95
+ return self.sp_model.encode(text, out_type=str)
96
+
97
+ def _convert_token_to_id(self, token):
98
+ """Converts a token (str) in an id using the vocab."""
99
+ return self.sp_model.piece_to_id(token)
100
+
101
+ def _convert_id_to_token(self, index):
102
+ """Converts an index (integer) in a token (str) using the vocab."""
103
+ token = self.sp_model.IdToPiece(index)
104
+ return token
105
+
106
+ def convert_tokens_to_string(self, tokens):
107
+ """Converts a sequence of tokens (string) in a single string."""
108
+ current_sub_tokens = []
109
+ out_string = ""
110
+ prev_is_special = False
111
+ for i, token in enumerate(tokens):
112
+ # make sure that special tokens are not decoded using sentencepiece model
113
+ if token in self.all_special_tokens:
114
+ if not prev_is_special and i != 0:
115
+ out_string += " "
116
+ out_string += self.sp_model.decode(current_sub_tokens) + token
117
+ prev_is_special = True
118
+ current_sub_tokens = []
119
+ else:
120
+ current_sub_tokens.append(token)
121
+ prev_is_special = False
122
+ out_string += self.sp_model.decode(current_sub_tokens)
123
+ return out_string
124
+
125
+ def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
126
+ """
127
+ Save the vocabulary and special tokens file to a directory.
128
+
129
+ Args:
130
+ save_directory (`str`):
131
+ The directory in which to save the vocabulary.
132
+
133
+ Returns:
134
+ `Tuple(str)`: Paths to the files saved.
135
+ """
136
+ if not os.path.isdir(save_directory):
137
+ logger.error(f"Vocabulary path ({save_directory}) should be a directory")
138
+ return
139
+ out_vocab_file = os.path.join(
140
+ save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
141
+ )
142
+
143
+ if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
144
+ copyfile(self.vocab_file, out_vocab_file)
145
+ elif not os.path.isfile(self.vocab_file):
146
+ with open(out_vocab_file, "wb") as fi:
147
+ content_spiece_model = self.sp_model.serialized_model_proto()
148
+ fi.write(content_spiece_model)
149
+
150
+ return (out_vocab_file,)
151
+
152
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
153
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
154
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
155
+
156
+ output = bos_token_id + token_ids_0 + eos_token_id
157
+
158
+ if token_ids_1 is not None:
159
+ output = output + bos_token_id + token_ids_1 + eos_token_id
160
+
161
+ return output
162
+
163
+ def get_special_tokens_mask(
164
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
165
+ ) -> List[int]:
166
+ """
167
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
168
+ special tokens using the tokenizer `prepare_for_model` method.
169
+
170
+ Args:
171
+ token_ids_0 (`List[int]`):
172
+ List of IDs.
173
+ token_ids_1 (`List[int]`, *optional*):
174
+ Optional second list of IDs for sequence pairs.
175
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
176
+ Whether or not the token list is already formatted with special tokens for the model.
177
+
178
+ Returns:
179
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
180
+ """
181
+ if already_has_special_tokens:
182
+ return super().get_special_tokens_mask(
183
+ token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
184
+ )
185
+
186
+ bos_token_id = [1] if self.add_bos_token else []
187
+ eos_token_id = [1] if self.add_eos_token else []
188
+
189
+ if token_ids_1 is None:
190
+ return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
191
+ return (
192
+ bos_token_id
193
+ + ([0] * len(token_ids_0))
194
+ + eos_token_id
195
+ + bos_token_id
196
+ + ([0] * len(token_ids_1))
197
+ + eos_token_id
198
+ )
199
+
200
+ def create_token_type_ids_from_sequences(
201
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
202
+ ) -> List[int]:
203
+ """
204
+ Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
205
+ sequence pair mask has the following format:
206
+
207
+ ```
208
+ 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
209
+ | first sequence | second sequence |
210
+ ```
211
+
212
+ if token_ids_1 is None, only returns the first portion of the mask (0s).
213
+
214
+ Args:
215
+ token_ids_0 (`List[int]`):
216
+ List of ids.
217
+ token_ids_1 (`List[int]`, *optional*):
218
+ Optional second list of IDs for sequence pairs.
219
+
220
+ Returns:
221
+ `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
222
+ """
223
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
224
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
225
+
226
+ output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
227
+
228
+ if token_ids_1 is not None:
229
+ output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
230
+
231
+ return output
232
+
lyrallms/LyraLlamaPy/README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 模型和环境
2
+
3
+ ### 构建环境
4
+ ```shell
5
+ # 本地获取ngc pytorch cuda12原生镜像
6
+ docker pull nvcr.io/nvidia/pytorch:23.02-py3
7
+
8
+ # 启动容器
9
+ docker run --gpus all -itd --rm --name lyrallms_cu12 nvcr.io/nvidia/pytorch:23.02-py3
10
+ docker exec -it lyrallms_cu12 bash
11
+ ```
12
+
13
+ 获取代码后安装依赖
14
+ ```shell
15
+ pip install -r requirements.txt
16
+ ```
17
+
18
+ 将`lyralib`下对应cuda版本的[so文件](../../lyralib/sm80) 复制到`/usr/lib/lyralib`下。
19
+
20
+ ## 推理使用
21
+
22
+ ### 使用核心片段
23
+
24
+ ```python
25
+ from lyra_llama import lyraLlama
26
+
27
+ model_path = 'XXX' # 包含转换后的模型参数,配置,tokenizer文件目录
28
+ data_type = 'fp16'
29
+ memopt_mode = 0 # 如需使用MEMOPT模式推理, memopt_mode=1
30
+
31
+ # 加载加速后的模型,C++ 底层已经掩盖,依赖加速编译的 /usr/lib/ftlib 下的 so 库,已经打在镜像中
32
+ # 模型加载需要花一些时间,因为现在 IO 参数是多个小文件,建议把下载的模型参数解压到本地磁盘
33
+ model = lyraLlama(model_path, data_type, memopt_mode)
34
+
35
+ # 输入, 若有多个输入,可batch 推理,prompts 支持列表,这里为模拟多个输入,直接复制 32 分,batchsize 达到32
36
+ prompts = '列出3个不同的机器学习算法,并说明它们的适用范围.'
37
+ prompts = [prompts,]*64
38
+
39
+ # 生成, 最大长度可自行调整,这里设置 150,模型遇到 end token 或者达到最大计算长度时会停止当前批次计算.
40
+ # 因为 LLaMA-ZIYA 词表是按字切分,导致存储和计算量非常大,若是长序列生成情况,请自行缩小 batch_size
41
+ output_texts = model.generate(prompts, output_length=150, do_sample=False, top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0)
42
+
43
+ # 输出查看, 虽然 输入字符串也是参与计算,用于评估模型吞吐量和计算速度。
44
+ # 这里为了显示应用方便, output_texts 中每个样本的输出已经去掉了输入字符串
45
+ print(output_texts)
46
+
47
+ # 输出示例
48
+ >>> Inputs: 列出3个不同的机器学习算法,并说明它们的适用范围.
49
+ >>> Outputs:
50
+ 1. 线性回归(Linear Regression):适用于解决两个变量之间的关系问题,例如预测房价或销售额。它可以用于回归分析和回归聚类分析。
51
+ 2. 决策树(Decision Tree):适用于解决分类和回归问题。它可以用于分类、回归、异常检测和聚类分析。
52
+ 3. 神经网络(Neural Network):适用于解决分类、回归和聚类问题。它可以用于图像识别、语音识别
53
+ ```
54
+
55
+ ### demo 脚本
56
+
57
+ `examples/batch_demo.py` 中有类似上面的使用示例,做了简单的跑速测试,考虑大家对 token 的理解各有不同,我们这里直接按字符数来评估,不同 token 的理解可以自行根据生成结果字符数去观测。注意:在 `LLaMA-ZIYA` 中,tokenizer 对中文的切分,约等于一个字是一个 token.
58
+
59
+ 更多测试脚本及用法详见参考 `examples` 下的 [README.md](./examples/README.md) ,如:
60
+ - Batch推理
61
+ - 不等长Batch推理
62
+ - Batch流式推理
63
+
64
+ ## 自定义模型参数
65
+
66
+ 已提供转换脚本 `parse_model_params.py` 可以将 LLaMa 模型的 HuggingFace 格式参数,转换为加速版本下各层模型需要的模型参数。因为 LLaMa 有很多变体,所以这里我们提供一个模型名字 `-model_name` 的转换参数,可以自行填入,以便生成可区分的 config.in 文件。
67
+
68
+ ```shell
69
+ python parse_model_params.py -i your_model_dir -o output_dir -t_g 1 -i_g 1 -weight_data_type "fp16" -model_name "llama"
70
+ ```
71
+
72
+ 转换后的模型参数将以每个参数一个文件的形式存放在 `output_dir/{i_g}-gpu-{weight_data_type}` 下,分割的形式有助于并发 IO,但缺陷是不便捷。
73
+
74
+ 同时该转换脚本还会将同目录下 tokenizer_source 里的 `tokenizer.model` `tokenizer.json` `special_tokens_map.json` `tokenizer_config.json` 四个文件拷贝到 output_dir 下,以便后续使用加速模型时直接能初始化对应的 加速后的 LLaMa 的 tokenizer.
75
+
lyrallms/LyraLlamaPy/examples/README.md ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 测试脚本
2
+
3
+ ### batch推理
4
+
5
+ ```sh
6
+ export FMHA_VERSION=V2 # 如使用旧版本Attn,设置 FMHA_VERSION=OFF
7
+ export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
8
+
9
+ model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
10
+ kv_qparams_fpath=KVScalesPath # (可选) 校准后的KVCache量化Scales文件路径 (past_kv_scale.bin)
11
+
12
+ data_type=fp16 # 权重保存精度
13
+ memopt_mode=0 # MEMOPT模式: 0/1
14
+ quant_type="int8" # 量化精度: int4/int8
15
+ max_output_length=256
16
+ warmups=1
17
+ avgnums=1
18
+
19
+ python batch_demo.py --model-path $model_path\
20
+ --tokenizer-path $model_path\
21
+ --data-type $data_type\
22
+ --memopt_mode $memopt_mode\
23
+ --quant-type ${quant_type}\
24
+ --max-output-length $max_output_length\
25
+ --warmups $warmups\
26
+ --avgnums $avgnums\
27
+ --kvqparams-fpath $kv_qparams_fpath
28
+ ```
29
+
30
+ ### batch流式推理
31
+
32
+ ```sh
33
+ export FMHA_VERSION=V2 # 如使用旧版本Attn,设置为OFF
34
+ export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
35
+ export LYRA_STREAM_CB_STEP=30 # 回调函数间隔步数
36
+
37
+ model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
38
+ kv_qparams_fpath=KVScalesPath # (可选) 校准后的KVCache量化Scales文件路径 (past_kv_scale.bin)
39
+
40
+ data_type=fp16 # 权重保存精度
41
+ memopt_mode=0 # MEMOPT模式: 0/1
42
+ quant_type="int8" # 量化精度: int4/int8
43
+ max_output_length=256
44
+ warmups=1
45
+ avgnums=1
46
+
47
+ python batch_stream_demo.py --model-path $model_path\
48
+ --tokenizer-path $model_path\
49
+ --data-type $data_type\
50
+ --memopt_mode $memopt_mode\
51
+ --quant-type ${quant_type}\
52
+ --max-output-length $max_output_length\
53
+ --warmups $warmups\
54
+ --avgnums $avgnums\
55
+ --kvqparams-fpath $kv_qparams_fpath
56
+ ```
57
+ ### 不等长batch推理
58
+
59
+ ```sh
60
+ export FMHA_VERSION=V2 # 如使用旧版本Attn,设置为OFF
61
+ export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
62
+
63
+ model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
64
+ kv_qparams_fpath=KVScalesPath # (可选) 校准后的KVCache量化Scales文件路径 (past_kv_scale.bin)
65
+
66
+ prompt_filepath=valen_prompts.json # 用于测试的不等长prompts文件,从中采样
67
+ data_type=fp16 # 权重保存精度
68
+ memopt_mode=0 # MEMOPT模式: 0/1
69
+ quant_type="int8" # 量化精度: int4/int8
70
+ max_output_length=256
71
+ warmups=1
72
+ avgnums=1
73
+
74
+ python random_batch_demo.py --model-path $model_path\
75
+ --tokenizer-path $model_path\
76
+ --data-type $data_type\
77
+ --memopt_mode $memopt_mode\
78
+ --quant-type ${quant_type}\
79
+ --prompt_filepath $prompt_filepath\
80
+ --max-output-length $max_output_length\
81
+ --warmups $warmups\
82
+ --avgnums $avgnums\
83
+ --kvqparams-fpath $kv_qparams_fpath
84
+ ```
85
+
86
+ ## Prompt例子
87
+
88
+ ### 短序列
89
+ ```
90
+ 北京的景点:故宫、天坛、万里长城等。\n深圳的景点:
91
+ ```
92
+ ```
93
+ 今天天气大概 25度,有点小雨,吹着风,我想去户外散步,应该穿什么样的衣服 裤子鞋子搭配
94
+ ```
95
+
96
+ ### 1K序列
97
+ ```
98
+ 《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲,于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日,“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者:主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J (大卫·哈斯金斯)。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲:“Boys”;“咬我的臀部”;“Some Faces”和斯卡雷鬼曲调“Harry”,这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议,凯文·哈斯金斯 (Kevin Haskins) 说,“那里有力量流行音乐,还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中(除了“Bela Lugosi's Dead”),只有“Harry”获得了官方发行;1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目,��括“Boys”的原始录音,一直未发行,直到 2018 年The Bela Session以黑胶唱片和CD 形式发行,并可供乐队数字下载。[11]在额外的曲目中,《经典摇滚》杂志写道:“其余的材料发现乐队正在摸索方向,甚至触及了斯卡。”\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:
99
+ ```
100
+
101
+ ### 2K序列
102
+ ```
103
+ 根据所给刑事法律文书中的案情描述,预测被告人被判的罪名。你需要从这些罪名中选择最恰当的一项:妨害公务,寻衅滋事,盗窃、侮辱尸体,危险物品肇事,非法采矿,组织、强迫、引诱、容留、介绍卖淫,开设赌场,聚众斗殴,绑架,非法持有毒品,销售假冒注册商标的商品,容留他人吸毒,假冒注册商标,交通肇事,破坏电力设备,组织卖淫,合同诈骗,走私武器、弹药,抢劫,非法处置查封、扣押、冻结的财产,以危险方法危害公共安全,过失投放危险物质,非法制造、买卖、运输、邮寄、储存枪支、弹药、爆炸物,伪造、变造、买卖武装部队公文、证件、印章,持有、使用假币,重婚,聚众冲击国家机关,生产、销售伪劣农药、兽药、化肥、种子,收买被拐卖的妇女、儿童,聚众哄抢,重大劳动安全事故,侵占,包庇毒品犯罪分子,虚报注册资本,违法发放贷款,制造、贩卖、传播淫秽物品,窝藏、包庇,帮助毁灭、伪造证据,放火,强奸,非法携带枪支、弹药、管制刀具、危险物品危及公共安全,伪造、变造金融票证,爆炸,玩忽职守,对非国家工作人员行贿,伪造、倒卖伪造的有价票证,私分国有资产,非法收购、运输、加工、出售国家重点保护植物、国家重点保护植物制品,生产、销售假药,挪用特定款物,过失致人死亡,走私国家禁止进出口的货物、物品,非法制造、买卖、运输、储存危险物质,洗钱,骗取贷款、票据承兑、金融票证,非法买卖制毒物品,非法买卖、运输、携带、持有毒品原植物种子、幼苗,生产、销售有毒、有害食品,滥用职权,招收公务员、学生徇私舞弊,诬告陷害,非法获取国家秘密,非法行医,非法收购、运输、出售珍贵、濒危野生动物、珍贵、濒危野生动物制品,非法出售发票,行贿,高利转贷,非法吸收公众存款,传播淫秽物品,非法进行节育手术,盗伐林木,聚众扰乱社会秩序,走私、贩卖、运输、制造毒品,滥伐林木,赌博,非法经营,生产、销售不符合安全标准的食品,提供侵入、非法控制计算机信息系统程序、工具,倒卖文物,窃取、收买、非法提供信用卡信息,盗掘古文化遗址、古墓葬,协助组织卖淫,破坏广播电视设施、公用电信设施,走私普通货物、物品,逃税,破坏监管秩序,失火,受贿,组织、领导、参加黑社会性质组织,票据诈骗,非法制造、销售非法制造的注册商标标识,侵犯著作权,伪造、变造、买卖国家机关公文、证件、印章,徇私舞弊不征、少征税款,强迫劳动,贷款诈骗,劫持船只、汽车,诈骗,非法种植毒品原植物,非法狩猎,挪用资金,非法收购、运输盗伐、滥伐的林木,出售、购买、运输假币,抢夺,虐待被监管人,窝藏、转移、收购、销售赃物,破坏计算机信息系统,制作、复制、出版、贩卖、传播淫秽物品牟利,拒不支付劳动报酬,盗窃、抢夺枪支、弹药、爆炸物,强迫他人吸毒,走私珍贵动物、珍贵动物制品,虐待,非法获取公民个人信息,破坏交通设施,非法转让、倒卖土地使用权,非法捕捞水产品,非法占用农用地,非法制造、出售非法制造的发票,非法持有、私藏枪支、弹药,集资诈骗,强迫卖淫,伪造公司、企业、事业单位、人民团体印章,利用影响力受贿,编造、故意传播虚假恐怖信息,介绍贿赂,传播性病,拐卖妇女、儿童,倒卖车票、船票,窝藏、转移、隐瞒毒品、毒赃,徇私舞弊不移交刑事案件,过失损坏广播电视设施、公用电信设施,动植物检疫徇私舞弊,破坏交通工具,猥亵儿童,挪用公款,伪造货币,冒充军人招摇撞骗,非法采伐、毁坏国家重点保护植物,故意毁坏财物,非法拘禁,招摇撞骗,伪造、变造居民身份证,徇私枉法,非法生产、买卖警用装备,掩饰、隐瞒犯罪所得、犯罪所得收益,生产、销售伪劣产品,破坏生产经营,帮助犯罪分子逃避处罚,贪污,投放危险物质,持有伪造的发票,危险驾驶,妨害作证,非法猎捕、杀害珍贵、濒危野生动物,重大责任事故,诽谤,虚开发票,引诱���教唆、欺骗他人吸毒,脱逃,扰乱无线电通讯管理秩序,保险诈骗,非法生产、销售间谍专用器材,非法组织卖血,强迫交易,串通投标,破坏易燃易爆设备,传授犯罪方法,妨害信用卡管理,拐骗儿童,单位行贿,打击报复证人,拒不执行判决、裁定,经济犯,金融凭证诈骗,虚开增值税专用发票、用于骗取出口退税、抵扣税款发票,走私废物,组织、领导传销活动,单位受贿,盗窃、抢夺枪支、弹药、爆炸物、危险物质,过失以危险方法危害公共安全,过失致人重伤,引诱、容留、介绍卖淫,遗弃,走私,信用卡诈骗,对单位行贿,故意杀人,聚众扰乱公共场所秩序、交通秩序,盗窃,故意伤害,非法侵入住宅,强制猥亵、侮辱妇女,伪证,污染环境,巨额财产来源不明,非国家工作人员受贿,侮辱,隐匿、故意销毁会计凭证、会计帐簿、财务会计报告,过失损坏武器装备、军事设施、军事通信,敲诈勒索,职务侵占。\n经审理查明:2013年9月底的一天晚上,被告人陆某德酒后经过沭阳县某镇某村张某荣家时,发现张某荣家没有人,即用石头砸破张某荣家房门玻璃,打开房门进入张某荣家中。因进入张某荣时被房门遗留的玻璃划伤,被告人陆某德在张某荣家北屋门和北屋东首间墙面遗留两处血迹。2014年1月7日,被告人陆某德被公安民警从其家中传唤到案,并如实供述自己的罪行。上述事实,有公诉机关提交的,经过庭审质证的,且均具有证据证明效力的以下证据予以证明:被告人陆某德供述其非法侵入他人住宅的时间、地点、经过等事实。该供述得到了被害人张某荣的陈述、证人周某花、李某华等人的证言、法庭科学DNA检验鉴定书、现场勘验检查笔录、现场图、现场照片等证据予以证实,足以认定。刑事判决书证明证明了被告人陆某德有前科;公安机关出具的“发破案经过”及“抓获经过”证明了本案案发及被告人陆某德的归案情况。\n
104
+ ```
105
+
106
+ ### 4K序列
107
+ ```
108
+ <context>/*\n * Implement the \"Falling Rocks\" game in the text console. \n * A small dwarf stays at the bottom of the screen and can \n * move left and right (by the arrows keys). A number of rocks \n * of different sizes and forms constantly fall down and you \n * need to avoid a crash.\n * Rocks are the symbols ^, @, *, &, +, %, $, #, !, ., ;, - distributed \n * with appropriate density. The dwarf is (O). \n * Ensure a constant game speed by Thread.Sleep(150).\n * Implement collision detection and scoring system.\n*/\n\nusing System;\nusing System.Threading;\nusing System.Collections.Generic;\nusing System.Threading.Tasks;\n\nclass FallingRocks\n{\n struct Position\n {\n public int X, Y;\n public string symbol;\n public ConsoleColor color;\n\n public Position(int x, int y, string symbol, ConsoleColor color)\n {\n this.X = x;\n this.Y = y;\n this.symbol = symbol;\n this.color = color;\n }\n }\n\n static void Main()\n {\n Thread oThread = new Thread(new ThreadStart(Mainn));\n Thread aThread = new Thread(new ThreadStart(Clr));\n \n aThread.Start();\n oThread.Start();\n oThread.Join();\n aThread.Join();\n }\n\n static void Clr()\n {\n while (true)\n {\n Thread.Sleep(10);\n Console.Clear();\n }\n }\n static void Mainn()\n {\n //Random generator for rocks color, position and symbol\n Random randomGenerator = new Random();\n \n //Sleep time for the game loop\n double sleepTime = 150;\n //Console settings\n Console.CursorVisible = false;\n Console.BufferHeight = Console.WindowHeight;\n \n //number of rocks in the Array rocks\n int rocksCount = 0;\n\n //array with the symbols of the rocks\n string[] symbols = new string[] { \"^\", \"@\", \"*\", \"&\", \"+\", \"%\", \"$\", \"#\", \"!\", \".\", \";\" };\n \n //array with colors for the rocks\n ConsoleColor[] colors = new ConsoleColor[] {ConsoleColor.Yellow, ConsoleColor.White, ConsoleColor.Gray};\n \n //array with rocks\n Position[] rocks = new Position[200];\n \n //position for the dwarf\n Position dwarf = new Position(10, Console.WindowHeight - 1,\"(0)\",ConsoleColor.Red);\n \n //bool variable to say when the game loop to be over\n bool gameLoop = true;\n\n //variable keeping the score\n ulong score = 0;\n\n //the game loop\n while (gameLoop)\n {\n //score is growing as the cycle runs\n score++;\n\n //setting the Y component for all the rocks in the array to grow with 2\n for (int i = 0; i <= rocks.Length - 1; i++)\n {\n rocks[i].Y = rocks[i].Y + 2;\n }\n\n //generating rocks\n for (int x = 0; x <= randomGenerator.Next(2, 4); x++)\n {\n rocks[rocksCount] = new Position(randomGenerator.Next(x * 15, x * 15 + 20), 0\n , symbols[randomGenerator.Next(0, symbols.Length - 1)]\n , colors[randomGenerator.Next(0, colors.Length - 1)]);\n if (rocksCount >= 199) rocksCount = 0;\n rocksCount++;\n }\n\n //printing the rocks and other stuff\n foreach (var item in rocks)\n {\n foreach (var rock in rocks)\n {\n //checking for colision\n if ((rock.X >= dwarf.X) && (rock.X <= (dwarf.X + 2)) && (rock.Y == dwarf.Y))\n {\n gameLoop = false;\n break;\n }\n } \n\n //printing the rocks\n if (item.Y < Console.WindowHeight)\n { \n Console.SetCursorPosition(item.X, item.Y);\n Console.ForegroundColor = item.color;\n Console.Write(item.symbol);\n }\n\n //checking for key pressed\n if (Console.KeyAvailable)\n {\n ConsoleKeyInfo pressedKey = Console.ReadKey();\n if (pressedKey.Key == ConsoleKey.RightArrow)\n {\n if(dwarf.X < Console.WindowWidth - 20)\n {\n //removing the old positions of the dwarf and increasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X++;\n }\n }\n if (pressedKey.Key == ConsoleKey.LeftArrow) \n {\n if(dwarf.X >= 1)\n {\n //removing the old positions of the dwarf and decreasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X--;\n }\n }\n }\n }\n \n //printing the dwarf\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.ForegroundColor = dwarf.color;\n Console.Write(dwarf.symbol); \n \n //sleeping the loop for sometime\n //Thread.Sleep((int)sleepTime);\n\n //reducing the sleep time of the loop\n sleepTime -= 0.5;\n\n \n //removing the rocks \n //foreach (var item in rocks)\n //{\n // if (item.Y < Console.WindowHeight)\n // {\n // Console.SetCursorPosition(item.X, item.Y);\n // Console.Write(\" \");\n // }\n //} \n }\n //Printing the score after the game is over\n Console.Clear();\n Console.WriteLine(\"Game over! Your score is: \" + score);\n\n }\n}\n</context>\n\n这个\"Falling Rocks\"游戏是如何工作的呢?可以详细解释一下代码的运作机制吗? \n\n\n\n
109
+ ```
110
+
111
+ ### 8K序列
112
+ ```
113
+ <context># -*- coding: utf-8 -*-\n# This code is part of Amoco\n# Copyright (C) 2021 Axel Tillequin ([email protected])\n# published under GPLv2 license\nfrom amoco.arch.tricore import env\nfrom amoco.arch.core import *\n# -------------------------------------------------------\n# from TriCore TC1.6.2 core architecture manual V1.2.2\n# (32-bit Unified Processor Core), 2020-01-15\n# define all except FPU instructions\n# -------------------------------------------------------\nISPECS = []\n@ispec("32<[ disp1(16) disp2(8) {6d} ]", mnemonic="CALL")\n@ispec("32<[ disp1(16) disp2(8) {61} ]", mnemonic="FCALL")\n@ispec("32<[ disp1(16) disp2(8) {1d} ]", mnemonic="J")\n@ispec("32<[ disp1(16) disp2(8) {5d} ]", mnemonic="JL")\ndef tricore_branch(obj, disp1, disp2):\n v = env.cst(((disp2<<16)+disp1)<<1,24)\n obj.operands = [disp.signextend(32)]\n obj.type = type_control_flow\n@ispec("32<[ disp1(16) disp2(8) {ed} ]", mnemonic="CALLA")\n@ispec("32<[ disp1(16) disp2(8) {e1} ]", mnemonic="FCALLA")\n@ispec("32<[ disp1(16) disp2(8) {9d} ]", mnemonic="JA")\n@ispec("32<[ disp1(16) disp2(8) {dd} ]", mnemonic="JLA")\ndef tricore_branch(obj, disp1, disp2):\n v = env.cst((disp2<<16)+disp1,24)\n addr = composer([env.bit0,v[0:20],env.cst(0,7),v[20:24]])\n obj.operands = [addr]\n obj.type = type_control_flow\n@ispec("32<[ ---- {00} ---- ---- a(4) {2d} ]", mnemonic="CALLI")\n@ispec("32<[ ---- {01} ---- ---- a(4) {2d} ]", mnemonic="FCALLI")\n@ispec("32<[ ---- {03} ---- ---- a(4) {2d} ]", mnemonic="JI")\n@ispec("32<[ ---- {02} ---- ---- a(4) {2d} ]", mnemonic="JLI")\ndef tricore_branchI(obj, a):\n src = env.A[a]\n obj.operands = [src]\n obj.type = type_control_flow\n@ispec("16<[ disp(8) {5c} ]", mnemonic="CALL")\n@ispec("16<[ disp(8) {3c} ]", mnemonic="J")\n@ispec("16<[ disp(8) {ee} ]", mnemonic="JNZ")\n@ispec("16<[ disp(8) {6e} ]", mnemonic="JZ")\ndef tricore_branch(obj, disp):\n disp = env.cst(disp<<1,8)\n obj.operands = [disp.signextend(32)]\n obj.type = type_control_flow\n@ispec("32<[ ---- 0000000 const9(9) ---- {ad} ]", mnemonic="BISR")\n@ispec("32<[ ---- 0000100 const9(9) ---- {ad} ]", mnemonic="SYSCALL")\ndef tricore_system(obj, const9):\n obj.operands = [env.cst(const9,9)]\n obj.type = type_system\n@ispec("32<[ c(4) {1c} ---- b(4) ---- {0b} ]", mnemonic="ABS")\n@ispec("32<[ c(4) {5c} ---- b(4) ---- {0b} ]", mnemonic="ABS_B")\n@ispec("32<[ c(4) {7c} ---- b(4) ---- {0b} ]", mnemonic="ABS_H")\n@ispec("32<[ c(4) {1d} ---- b(4) ---- {0b} ]", mnemonic="ABSS")\n@ispec("32<[ c(4) {7d} ---- b(4) ---- {0b} ]", mnemonic="ABSS_H")\n@ispec("32<[ c(4) {1f} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n src = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {80} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n src = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, src.signextend(64)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {81} ---- b(4) a(4) {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b, a):\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, composer([src2,src1])]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) {4e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_B")\n@ispec("32<[ c(4) {6e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_H")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) {6f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS_H")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0b} ]", mnemonic="ADD_B")\n@ispec("32<[ c(4) {60} ---- b(4) a(4) {0b} ]", mnemonic="ADD_H")\n@ispec("32<[ c(4) {05} ---- b(4) a(4) {0b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_H")\n@ispec("32<[ c(4) {63} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_HU")\n@ispec("32<[ c(4) {03} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_U")\n@ispec("32<[ c(4) {04} ---- b(4) a(4) {0b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0f} ]", mnemonic="AND")\n@ispec("32<[ c(4) {20} ---- b(4) a(4) {0b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) {24} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) {25} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) {22} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) {23} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) {21} ---- b(4) a(4) {0b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) {10} ---- b(4) a(4) {0b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) {50} ---- b(4) a(4) {0b} ]", mnemonic="EQ_B")\n@ispec("32<[ c(4) {70} ---- b(4) a(4) {0b} ]", mnemonic="EQ_H")\n@ispec("32<[ c(4) {90} ---- b(4) a(4) {0b} ]", mnemonic="EQ_W")\n@ispec("32<[ c(4) {56} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) {76} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) {14} ---- b(4) a(4) {0b} ]", mnemonic="GE")\n@ispec("32<[ c(4) {15} ---- b(4) a(4) {0b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) {12} ---- b(4) a(4) {0b} ]", mnemonic="LT")\n@ispec("32<[ c(4) {13} ---- b(4) a(4) {0b} ]", mnemonic="LT_U")\n@ispec("32<[ c(4) {52} ---- b(4) a(4) {0b} ]", mnemonic="LT_B")\n@ispec("32<[ c(4) {53} ---- b(4) a(4) {0b} ]", mnemonic="LT_BU")\n@ispec("32<[ c(4) {72} ---- b(4) a(4) {0b} ]", mnemonic="LT_H")\n@ispec("32<[ c(4) {73} ---- b(4) a(4) {0b} ]", mnemonic="LT_HU")\n@ispec("32<[ c(4) {92} ---- b(4) a(4) {0b} ]", mnemonic="LT_W")\n@ispec("32<[ c(4) {93} ---- b(4) a(4) {0b} ]", mnemonic="LT_WU")\n@ispec("32<[ c(4) {1a} ---- b(4) a(4) {0b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) {1b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) {5a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_B")\n@ispec("32<[ c(4) {5b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_BU")\n@ispec("32<[ c(4) {7a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_H")\n@ispec("32<[ c(4) {7b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_HU")\n@ispec("32<[ c(4) {18} ---- b(4) a(4) {0b} ]", mnemonic="MIN")\n@ispec("32<[ c(4) {19} ---- b(4) a(4) {0b} ]", mnemonic="MIN_U")\n@ispec("32<[ c(4) {58} ---- b(4) a(4) {0b} ]", mnemonic="MIN_B")\n@ispec("32<[ c(4) {59} ---- b(4) a(4) {0b} ]", mnemonic="MIN_BU")\n@ispec("32<[ c(4) {78} ---- b(4) a(4) {0b} ]", mnemonic="MIN_H")\n@ispec("32<[ c(4) {79} ---- b(4) a(4) {0b} ]", mnemonic="MIN_HU")\n@ispec("32<[ c(4) {09} ---- b(4) a(4) {0f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) {11} ---- b(4) a(4) {0b} ]", mnemonic="NE")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0f} ]", mnemonic="OR")\n@ispec("32<[ c(4) {27} ---- b(4) a(4) {0b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) {2b} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) {2c} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) {29} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) {2a} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) {28} ---- b(4) a(4) {0b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0f} ]", mnemonic="SH")\n@ispec("32<[ c(4) {37} ---- b(4) a(4) {0b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) {3b} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) {3c} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) {39} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) {3a} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) {38} ---- b(4) a(4) {0b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {0f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {0f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0b} ]", mnemonic="SUB")\n@ispec("32<[ c(4) {48} ---- b(4) a(4) {0b} ]", mnemonic="SUB_B")\n@ispec("32<[ c(4) {68} ---- b(4) a(4) {0b} ]", mnemonic="SUB_H")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0b} ]", mnemonic="SUBC")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_U")\n@ispec("32<[ c(4) {6a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_H")\n@ispec("32<[ c(4) {6b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_HU")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0b} ]", mnemonic="SUBX")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) {2f} ---- b(4) a(4) {0b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) {30} ---- b(4) a(4) {0b} ]", mnemonic="XOR_NE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {01} ]", mnemonic="EQ_A")\n@ispec("32<[ c(4) {43} ---- b(4) a(4) {01} ]", mnemonic="GE_A")\n@ispec("32<[ c(4) {42} ---- b(4) a(4) {01} ]", mnemonic="LT_A")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {01} ]", mnemonic="NE_A")\ndef tricore_daa_arithmetic(obj, c, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {63} ---- b(4) ---- {01} ]", mnemonic="MOV_A", _dst=env.A, _src=env.D)\n@ispec("32<[ c(4) {00} ---- b(4) ---- {01} ]", mnemonic="MOV_AA", _dst=env.A, _src=env.A)\n@ispec("32<[ c(4) {4c} ---- b(4) ---- {01} ]", mnemonic="MOV_D", _dst=env.D, _src=env.A)\ndef tricore_daa_arithmetic(obj, c, b, _dst, _src):\n dst = _dst[c]\n src = _src[b]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {48} ---- ---- a(4) {01} ]", mnemonic="EQZ_A")\n@ispec("32<[ c(4) {49} ---- ---- a(4) {01} ]", mnemonic="NEZ_A")\ndef tricore_da_arithmetic(obj, c, a):\n src1 = env.A[a]\n dst = env.D[c]\n obj.operands = [dst, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {01} --00 b(4) a(4) {4b} ]", mnemonic="BMERGE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {06} --00 b(4) a(4) {4b} ]", mnemonic="CRC32_B")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32B_W")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32L_W")\ndef tricore_crc32(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src2, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {20} --01 b(4) a(4) {4b} ]", mnemonic="DIV")\n@ispec("32<[ c(4) {21} --01 b(4) a(4) {4b} ]", mnemonic="DIV_U")\n@ispec("32<[ c(4) {5a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_B")\n@ispec("32<[ c(4) {4a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_BU")\n@ispec("32<[ c(4) {3a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_H")\n@ispec("32<[ c(4) {2a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_HU")\n@ispec("32<[ c(4) {1a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT")\n@ispec("32<[ c(4) {0a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_U")\ndef tricore_edd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 100 ----- b(4) a(4) {17} ]", mnemonic="DEXTR")\ndef tricore_dddc(obj, c, d, b, a):\n shift = env.D[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, shift]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 ----- ---- a(4) {17} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 ----- ---- a(4) {17} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, a):\n if d%2:\n raise InstructionError(obj)\n width = env.E[d][32:37]\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, width]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 0--00 ---- a(4) {6b} ]", mnemonic="PACK")\ndef tricore_extr(obj, c, d, a):\n if d%2:\n raise InstructionError(obj)\n src1 = env.E[d]\n src2 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {08} -- 00 ---- a(4) {4b} ]", mnemonic="UNPACK")\ndef tricore_extr(obj, c, d, a):\n src = env.D[a]\n dst = env.E[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {02} -- 00 ---- a(4) {4b} ]", mnemonic="PARITY")\n@ispec("32<[ c(4) {22} -- 00 ---- a(4) {4b} ]", mnemonic="POPCNT_W")\ndef tricore_extr(obj, c, d, a):\n src = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 ----- b(4) a(4) {77} ]", mnemonic="DEXTR")\ndef tricore_dextr(obj, c, pos, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, env.cst(pos,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 10 width(5) ---- a(4) {37} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) pos(5) 11 width(5) ---- a(4) {37} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, pos, width, a):\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) const(4) ---- {b7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, const):\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) const(4) ---- {d7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, const):\n src2 = env.D[d]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, env.cst(const,4), src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) b(4) ---- {37} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, b):\n src1 = env.D[b]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) b(4) ---- {57} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, b):\n src1 = env.D[b]\n src2 = env.D[d]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) const(4) a(4) {b7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, const, a):\n dst = env.D[c]\n src1 = env.D[a]\n obj.operands = [dst, src1, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- const(4) a(4) {97} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, const, a):\n src1 = env.D[a]\n if d%2:\n raise InstructionError(obj)\n src3 = env.E[d]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(const,4), src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) const(4) a(4) {d7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, const, a):\n src1 = env.D[a]\n src3 = env.D[d]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(const,4), src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) b(4) a(4) {37} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, b, a):\n dst = env.D[c]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- b(4) a(4) {17} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n if d%2:\n raise InstructionError(obj)\n src3 = env.E[d]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) b(4) a(4) {57} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n src3 = env.D[d]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, src3, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 width(5) ---- a(4) {57} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 width(5) ---- a(4) {57} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, width, a):\n src2 = env.D[d]\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {09} --00 ---- a(4) {4b} ]", mnemonic="BSPLIT")\ndef tricore_edd_arithmetic(obj, c, a):\n src1 = env.D[a]\n dst = env.E[c]\n obj.operands = [dst, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) 0001110 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) 0001111 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) 0000101 ~const9(9) a(4) {8b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) 0000011 ~const9(9) a(4) {8b} ]", mnemonic="ADDS_U") #const9 is signed\n@ispec("32<[ c(4) 0000100 ~const9(9) a(4) {8b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) 0100000 ~const9(9) a(4) {8b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) 0100100 ~const9(9) a(4) {8b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) 0100010 ~const9(9) a(4) {8b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) 0100001 ~const9(9) a(4) {8b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) 0010000 ~const9(9) a(4) {8b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) 1010110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) 1110110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) 0010100 ~const9(9) a(4) {8b} ]", mnemonic="GE")\n@ispec("32<[ c(4) 0010010 ~const9(9) a(4) {8b} ]", mnemonic="LT")\n@ispec("32<[ c(4) 0011010 ~const9(9) a(4) {8b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) 0010001 ~const9(9) a(4) {8b} ]", mnemonic="NE")\n@ispec("32<[ c(4) 0100111 ~const9(9) a(4) {8b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) 0101011 ~const9(9) a(4) {8b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) 0101001 ~const9(9) a(4) {8b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) 0001000 ~const9(9) a(4) {8b} ]", mnemonic="RSUB")\n@ispec("32<[ c(4) 0001001 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS")\n@ispec("32<[ c(4) 0001011 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS_U") #const9 is signed\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8f} ]", mnemonic="SH")\n@ispec("32<[ c(4) 1000000 ~const9(9) a(4) {8f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) 0110111 ~const9(9) a(4) {8b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) 0111011 ~const9(9) a(4) {8b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) 0111001 ~const9(9) a(4) {8b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) 0111000 ~const9(9) a(4) {8b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) 0000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) 1000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) 0101111 ~const9(9) a(4) {8b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) 0110011 ~const9(9) a(4) {8b} ]", mnemonic="XOR_GE")\n@ispec("32<[ c(4) 0110001 ~const9(9) a(4) {8b} ]", mnemonic="XOR_LT")\n@ispec("32<[ c(4) 0110000 ~const9(9) a(4) {8b} ]", mnemonic="XOR_NE")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n src1 = env.D[a]\n if obj.mnemonic in ("SH","SHA","SHAS"):\n const9 = const9[0:6]\n elif obj.mnemonic in ("SH_H","SHA_H"):\n const9 = const9[0:5]\n src2 = env.cst(const9.int(-1),32)\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_OR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {87} ]", mnemonic="AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {87} ]", mnemonic="ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {67} ]", mnemonic="INS_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {67} ]", mnemonic="INSN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {07} ]", mnemonic="NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {87} ]", mnemonic="NOR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {87} ]", mnemonic="OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {07} ]", mnemonic="ORN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_ORN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XOR_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {07} ]", mnemonic="XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {07} ]", mnemonic="XOR_T")\ndef tricore_ddd_arithmetic(obj, c, pos2, pos1, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1[pos1:pos1+1], src2[pos2:pos2+1]]\n obj.type = type_data_processing\n@ispec("32<[ c(4) 0001000 const9(9) a(4) {8f} ]", mnemonic="AND")\n@ispec("32<[ c(4) 0100101 const9(9) a(4) {8b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) 0100011 const9(9) a(4) {8b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) 0001110 const9(9) a(4) {8f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) 0001001 const9(9) a(4) {8f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) 0001011 const9(9) a(4) {8f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) 0010101 const9(9) a(4) {8b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) 0001010 const9(9) a(4) {8f} ]", mnemonic="OR")\n@ispec("32<[ c(4) 0101100 const9(9) a(4) {8b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) 0101010 const9(9) a(4) {8b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) 0101000 const9(9) a(4) {8b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) 0001111 const9(9) a(4) {8f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) 0000111 const9(9) a(4) {8f} ]", mnemonic="SHUFFLE")\n@ispec("32<[ c(4) 0001101 const9(9) a(4) {8f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) 0001100 const9(9) a(4) {8f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) 0111100 const9(9) a(4) {8b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) 0111010 const9(9) a(4) {8b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) 0110100 const9(9) a(4) {8b} ]", mnemonic="XOR_GE_U")\n@ispec("32<[ c(4) 0110011 const9(9) a(4) {8b} ]", mnemonic="XOR_LT_U")\n@ispec("32<[ c(4) 0011011 const9(9) a(4) {8b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) 0010011 const9(9) a(4) {8b} ]", mnemonic="LT_U")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n src1 = env.D[a]\n src2 = env.cst(const9,32)\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {c2} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {06} ]", mnemonic="SH")\n@ispec("16<[ ~const4(4) a(4) {86} ]", mnemonic="SHA")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[a]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {92} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {8a} ]", mnemonic="CADD")\n@ispec("16<[ ~const4(4) a(4) {ca} ]", mnemonic="CADDN")\n@ispec("16<[ ~const4(4) a(4) {aa} ]", mnemonic="CMOV")\n@ispec("16<[ ~const4(4) a(4) {ea} ]", mnemonic="CMOVN")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[15]\n obj.operands = [dst, src1, src2]\n if "CADD" in obj.mnemonic:\n obj.operands = [dst, src1, dst, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {9a} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {ba} ]", mnemonic="EQ")\n@ispec("16<[ ~const4(4) a(4) {fa} ]", mnemonic="LT")\n@ispec("16<[ ~const4(4) a(4) {82} ]", mnemonic="MOV")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[15]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[a]\n obj.operands = [dst, src1, src2]\n if obj.mnemonic=="MOV":\n obj.operands = [src1,src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {d2} ]", mnemonic="MOV")\ndef tricore_ec_arithmetic(obj, const4, a):\n dst = env.E[a]\n src = env.cst(const4.int(-1),64)\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ const4(4) a(4) {a0} ]", mnemonic="MOV_A")\ndef tricore_ec_arithmetic(obj, const4, a):\n dst = env.A[a]\n src = env.cst(const4,32)\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ const8(8) {16} ]", mnemonic="AND")\n@ispec("16<[ const8(8) {da} ]", mnemonic="MOV")\n@ispec("16<[ const8(8) {96} ]", mnemonic="OR")\ndef tricore_ddc_arithmetic(obj, const8):\n dst = env.D[15]\n src2 = env.cst(const8,32)\n src1 = env.D[15]\n obj.operands = [dst, src1, src2]\n if obj.mnemonic=="MOV":\n obj.operands = [src1,src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {42} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {26} ]", mnemonic="AND")\n@ispec("16<[ b(4) a(4) {a6} ]", mnemonic="OR")\n@ispec("16<[ b(4) a(4) {a2} ]", mnemonic="SUB")\n@ispec("16<[ b(4) a(4) {62} ]", mnemonic="SUBS")\n@ispec("16<[ b(4) a(4) {c6} ]", mnemonic="XOR")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[a]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {02} ]", mnemonic="MOV" , _dst=env.D, _src=env.D)\n@ispec("16<[ b(4) a(4) {60} ]", mnemonic="MOV_A" , _dst=env.A, _src=env.D)\n@ispec("16<[ b(4) a(4) {40} ]", mnemonic="MOV_AA" , _dst=env.A, _src=env.A)\n@ispec("16<[ b(4) a(4) {80} ]", mnemonic="MOV_D" , _dst=env.D, _src=env.A)\ndef tricore_mov(obj, b, a, _dst, _src):\n dst = _dst[a]\n src = _src[b]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {12} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {2a} ]", mnemonic="CMOV")\n@ispec("16<[ b(4) a(4) {6a} ]", mnemonic="CMOVN")\n@ispec("16<[ b(4) a(4) {52} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[a]\n src1 = env.D[15]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {1a} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {22} ]", mnemonic="ADDS")\n@ispec("16<[ b(4) a(4) {3a} ]", mnemonic="EQ")\n@ispec("16<[ b(4) a(4) {7a} ]", mnemonic="LT")\n@ispec("16<[ b(4) a(4) {5a} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[15]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {01} ]", mnemonic="ADD_A")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {01} ]", mnemonic="SUB_A")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {b0} ]", mnemonic="ADD_A")\ndef tricore_aac_arithmetic(obj, const4, a):\n dst = env.A[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.A[a]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ const8(8) {20} ]", mnemonic="SUB_A")\ndef tricore_aac_arithmetic(obj, const8, a):\n dst = env.A[10]\n src2 = env.cst(const8,32)\n src1 = env.A[10]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {30} ]", mnemonic="ADD_A")\ndef tricore_aa_arithmetic(obj, b, a):\n dst = env.A[a]\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {1b} ]", mnemonic="ADDI")\n@ispec("32<[ c(4) ~const16(16) a(4) {9b} ]", mnemonic="ADDIH")\ndef tricore_di_arithmetic(obj, c, const16, a):\n src1 = env.D[a]\n src2 = env.cst(const16.int(-1),32)\n if self.mnemonic=="ADDIH": src2=src2<<16\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {11} ]", mnemonic="ADDIH_A")\ndef tricore_ai_arithmetic(obj, c, const16, a):\n src1 = env.A[a]\n src2 = env.cst(const16.int(-1),32)<<16\n dst = env.A[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {60} -- n(2) b(4) a(4) {01} ]", mnemonic="ADDSC_A")\ndef tricore_aaa_arithmetic(obj, c, n, b, a):\n src1 = env.D[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src2, src1, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {01} ]", mnemonic="ADDSC_AT")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src2, src1]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) n(2) 010000 ]", mnemonic="ADDSC_A")\ndef tricore_aa_arithmetic(obj, b, a, n):\n dst = env.A[a]\n src1 = env.D[15]\n src2 = env.A[b]\n obj.operands = [dst, src2, src1, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ off2(4) 10 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I", mode="Circular")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W", mode="Circular")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Circular")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Pre-increment")\ndef tricore_cache(obj, off2, off1, b):\n src2 = env.A[b]\n src1 = env.cst((off2<<6)+off1,10)\n obj.operands = [src2, src1]\n obj.type = type_system\n@ispec("32<[ off2(4) 10 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Pre-increment")\ndef tricore_swap(obj, off2, off1, b, a):\n if a%2:\n raise InstructionError(obj)\n dst = env.D[a]\n src1 = env.A[b]\n src2 = env.cst((off2<<6)+off1,10)\n src3 = env.E[a]\n obj.operands = [dst, src1, src2, src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ~const9(9) a(4) {ab} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {ab} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {ab} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {ab} ]", mnemonic="SELN")\ndef tricore_cond_ddc(obj, c, d, const9, a):\n cond = env.D[d]\n src1 = env.D[a]\n src2 = env.cst(const9.int(-1),32)\n dst = env.D[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {13} ]", mnemonic="MADD_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {33} ]", mnemonic="MSUB_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="64+(32+K9)->64")\ndef tricore_cond_eec(obj, c, d, const9, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.cst(const9.int(-1),32)\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*16U)->64")\ndef tricore_cond_eec(obj, c, d, n, b, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 0000 ---- b(4) a(4) {2b} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 0001 ---- b(4) a(4) {2b} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 0010 ---- b(4) a(4) {2b} ]", mnemonic="CSUB")\n@ispec("32<[ c(4) d(4) 0011 ---- b(4) a(4) {2b} ]", mnemonic="CSUBN")\n@ispec("32<[ c(4) d(4) {0a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) {8a} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) {88} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) 0100 ---- b(4) a(4) {2b} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 0101 ---- b(4) a(4) {2b} ]", mnemonic="SELN")\ndef tricore_cond_ddd(obj, c, d, b, a):\n cond = env.D[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) {6a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {ea} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {68} b(4) a(4) {03} ]", mnemonic="MADD_U", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {e8} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="64+(32*32)->64")\ndef tricore_cond_ddd(obj, c, d, b, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {1c} ---- ---- a(4) {0f} ]", mnemonic="CLO")\n@ispec("32<[ c(4) {7d} ---- ---- a(4) {0f} ]", mnemonic="CLO_H")\n@ispec("32<[ c(4) {1d} ---- ---- a(4) {0f} ]", mnemonic="CLS")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0f} ]", mnemonic="CLS_H")\n@ispec("32<[ c(4) {1b} ---- ---- a(4) {0f} ]", mnemonic="CLZ")\n@ispec("32<[ c(4) {7c} ---- ---- a(4) {0f} ]", mnemonic="CLZ_H")\n@ispec("32<[ c(4) {5e} ---- ---- a(4) {0b} ]", mnemonic="SAT_B")\n@ispec("32<[ c(4) {5f} ---- ---- a(4) {0b} ]", mnemonic="SAT_BU")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0b} ]", mnemonic="SAT_H")\n@ispec("32<[ c(4) {7f} ---- ---- a(4) {0b} ]", mnemonic="SAT_HU")\ndef tricore_dd_arithmetic(obj, c, a):\n src = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ 1010 ---- {00} ]", mnemonic="DEBUG")\n@ispec("16<[ 0000 ---- {00} ]", mnemonic="NOP")\ndef tricore_system(obj):\n obj.operands = []\n obj.type = type_system\n@ispec("16<[ 0111 ---- {00} ]", mnemonic="FRET")\n@ispec("16<[ 1001 ---- {00} ]", mnemonic="RET")\n@ispec("16<[ 1000 ---- {00} ]", mnemonic="RFE")\ndef tricore_ret(obj):\n obj.operands = []\n obj.type = type_control_flow\n@ispec("32<[ ---- 000100 ---------- ---- {0d} ]", mnemonic="DEBUG")\n@ispec("32<[ ---- 001101 ---------- ---- {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 010010 ---------- ---- {0d} ]", mnemonic="DSYNC")\n@ispec("32<[ ---- 001100 ---------- ---- {0d} ]", mnemonic="ENABLE")\n@ispec("32<[ ---- 010011 ---------- ---- {0d} ]", mnemonic="ISYNC")\n@ispec("32<[ ---- 010101 ---------- ---- {0d} ]", mnemonic="TRAPSV")\n@ispec("32<[ ---- 010100 ---------- ---- {0d} ]", mnemonic="TRAPV")\n@ispec("32<[ ---- 000000 ---------- ---- {0d} ]", mnemonic="NOP")\n@ispec("32<[ ---- 001001 ---------- ---- {0d} ]", mnemonic="RSLCX")\n@ispec("32<[ ---- 000000 ---------- ---- {2f} ]", mnemonic="RSTV")\n@ispec("32<[ ---- 001000 ---------- ---- {0d} ]", mnemonic="SVLCX")\n@ispec("32<[ ---- 010110 ---------- ---- {0d} ]", mnemonic="WAIT")\ndef tricore_system(obj):\n obj.operands = []\n obj.type = type_system\n@ispec("32<[ ---- 000011 ---------- ---- {0d} ]", mnemonic="FRET")\n@ispec("32<[ ---- 000110 ---------- ---- {0d} ]", mnemonic="RET")\n@ispec("32<[ ---- 000111 ---------- ---- {0d} ]", mnemonic="RFE")\n@ispec("32<[ ---- 000101 ---------- ---- {0d} ]", mnemonic="RFM")\ndef tricore_ret(obj):\n obj.operands = []\n obj.type = type_control_flow\n@ispec("32<[ ---- 001111 ---------- a(4) {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 001110 ---------- a(4) {0d} ]", mnemonic="RESTORE")\ndef tricore_system(obj, a):\n obj.operands = [env.D[a]]\n obj.type = type_system\n@ispec("32<[ c(4) d(4) 1101 -- 00 b(4) ---- {6b} ]", mnemonic="DVADJ")\n@ispec("32<[ c(4) d(4) 1111 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP")\n@ispec("32<[ c(4) d(4) 1110 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP_U")\n@ispec("32<[ c(4) d(4) 1010 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX")\n@ispec("32<[ c(4) d(4) 1011 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX_U")\n@ispec("32<[ c(4) d(4) 1000 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN")\n@ispec("32<[ c(4) d(4) 1001 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN_U")\ndef tricore_eee(obj, c, d, b):\n if d%2 or b%2 or c%2:\n raise InstructionError(obj)\n src1 = env.E[d]\n src2 = env.E[b]\n dst = env.E[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) disp(4) {1e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {9e} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ ~const4(4) disp(4) {5e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {de} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, const4, disp, _off):\n dst = env.D[15]\n src1 = env.cst(const4.int(-1),32)\n src2 = env.cst(disp,32)+_off\n obj.operands = [dst, src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {3e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ b(4) disp(4) {be} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ b(4) disp(4) {7e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ b(4) disp(4) {fe} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, b, disp, _off):\n dst = env.D[15]\n src1 = env.D[b]\n src2 = env.cst(disp,32)+_off\n obj.operands = [dst, src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {ce} ]", mnemonic="JGEZ")\n@ispec("16<[ b(4) disp(4) {4e} ]", mnemonic="JGTZ")\n@ispec("16<[ b(4) disp(4) {8e} ]", mnemonic="JLEZ")\n@ispec("16<[ b(4) disp(4) {0e} ]", mnemonic="JLTZ")\n@ispec("16<[ b(4) disp(4) {f6} ]", mnemonic="JNZ")\n@ispec("16<[ b(4) disp(4) {76} ]", mnemonic="JZ")\ndef tricore_jcc(obj, b, disp):\n src1 = env.D[b]\n src2 = env.cst(disp,32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {df} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {df} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, const, a):\n src1 = env.D[a]\n src2 = env.cst(const,4)\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JEQ_A")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JNE_A")\ndef tricore_jcc(obj, disp, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) ---- a(4) {bd} ]", mnemonic="JNZ_A")\n@ispec("32<[ 0 ~disp(15) ---- a(4) {bd} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, disp, a):\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOP")\n@ispec("32<[ 1 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOPU")\ndef tricore_jcc(obj, disp, b):\n src1 = env.A[b]\n src2 = env.cst(disp.int(-1)*2,32)\n obj.operands = [src1, src2]\n if obj.mnemonic=="LOOPU":\n obj.operands = [src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {7c} ]", mnemonic="JNZ_A")\n@ispec("16<[ b(4) disp(4) {bc} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, b, disp):\n src1 = env.A[b]\n src2 = env.cst(disp,32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) #disp(4) {fc} ]", mnemonic="LOOP")\ndef tricore_jcc(obj, b, disp):\n src1 = env.A[b]\n src2 = env.cst(int(("1"*27)+disp+"0",2),32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {dc} ]", mnemonic="JI")\ndef tricore_ji(obj, a):\n src = env.A[a]\n obj.operands = [src]\n obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {46} ]", mnemonic="NOT")\n@ispec("16<[ 0101 a(4) {32} ]", mnemonic="RSUB")\n@ispec("16<[ 0000 a(4) {32} ]", mnemonic="SAT_B")\n@ispec("16<[ 0001 a(4) {32} ]", mnemonic="SAT_BU")\n@ispec("16<[ 0010 a(4) {32} ]", mnemonic="SAT_H")\n@ispec("16<[ 0011 a(4) {32} ]", mnemonic="SAT_HU")\ndef tricore_a(obj, a):\n src = env.D[a]\n obj.operands = [src]\n obj.type = type_data_processing\n@ispec("16<[ n(4) disp(4) {ae} ]", mnemonic="JNZ_T")\n@ispec("16<[ n(4) disp(4) {2e} ]", mnemonic="JZ_T")\ndef tricore_ji(obj, n, disp):\n obj.operands = [env.D[15][n:n+1], env.cst(disp,32)]\n obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JNZ_T")\n@ispec("32<[ 0 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JZ_T")\ndef tricore_jcc(obj, disp, n, a, h):\n i = n+(h<<4)\n src = env.D[a][i:i+1]\n obj.operands = [src, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_BU", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_HU", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {45} ]", mnemonic="LD_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LEA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n dst = env.D[a]\n if obj.mnemonic in ("LD_A", "LEA") : dst = env.A[a]\n if obj.mnemonic in ("LD_D","LDMST") : dst = env.E[a]\n if obj.mnemonic=="LD_DA": dst = env.P[a]\n src = off1//off2//off3\n obj.operands = [dst, composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LHA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n dst = env.A[a]\n src = off1//off2//off3//off4\n obj.operands = [dst, composer([env.cst(0,14),env.cst(src.int(),18)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {65} ]", mnemonic="ST_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="SWAP_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="LDMST", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, a):\n src = env.D[a]\n if obj.mnemonic in ("ST_A",) : src = env.A[a]\n if obj.mnemonic in ("ST_D","LDMST") : src = env.E[a]\n if obj.mnemonic=="ST_DA": src = env.P[a]\n addr = off1//off2//off3\n obj.operands = [composer([env.cst(addr.int(),28),env.cst(off4,4)]), src]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) b bpos(3) {d5} ]", mnemonic="ST_T", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, b, bpos):\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)]), env.cst(bpos,3), env.cst(b,1)]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) ---- {15} ]", mnemonic="STLCX", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4):\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDLCX", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDUCX", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n src = off1//off2//off3\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="LEA", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b, a):\n dst = env.D[a]\n if obj.mnemonic=="LD_A" : dst = env.A[a]\n elif obj.mnemonic=="LEA" : dst = env.A[a]\n elif obj.mnemonic=="LD_D" : dst = env.E[a]\n elif obj.mnemonic=="LDMST" : dst = env.E[a]\n elif obj.mnemonic=="LD_DA" : dst = env.P[a]\n obj.b = b\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [dst, src1, src2]\n if obj.mode == "Bit-Reverse":\n obj.operands.pop()\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Pre-increment")\ndef tricore_st(obj, off2, off1, b, a):\n dst = env.D[a]\n if obj.mnemonic=="ST_A" : dst = env.A[a]\n elif obj.mnemonic=="ST_D" : dst = env.E[a]\n elif obj.mnemonic=="ST_DA" : dst = env.P[a]\n elif obj.mnemonic=="LDMST" : dst = env.E[a]\n obj.b = b\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2, dst]\n if obj.mode == "Bit-Reverse":\n obj.operands.pop()\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Pre-increment")\ndef tricore_ld(obj, off2, off1, b, a):\n dst = env.D[a]\n src1 = env.P[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2, dst]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) ---- {49} ]", mnemonic="LDLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) ---- {49} ]", mnemonic="LDUCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) ---- {49} ]", mnemonic="STLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) ---- {49} ]", mnemonic="STUCX", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b):\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {99} ]", mnemonic="LD_A", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {79} ]", mnemonic="LD_B", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {39} ]", mnemonic="LD_BU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {b9} ]", mnemonic="LD_HU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {19} ]", mnemonic="LD_W", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {d9} ]", mnemonic="LEA", mode="Long-offset")\ndef tricore_ld(obj, off2, off3, off1, b, a):\n dst = env.D[a]\n\n</context>\n\n假设一个实际的场景,我们需要采用这段代码来实现工作流程,可以给我解释一下这段代码的作用吗? \n\n\n\n
114
+ ```
lyrallms/LyraLlamaPy/examples/batch_demo.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+ sys.path.append('../')
4
+ from time import perf_counter
5
+
6
+ from lyra_llama import lyraLlama
7
+
8
+
9
+ def get_args():
10
+ parser = argparse.ArgumentParser(description="Faster ChatGLM6B Demo")
11
+
12
+ parser.add_argument('--model-path', type=str, required=True,
13
+ help='Model Path, include config.ini and tokenizer files')
14
+ parser.add_argument('--tokenizer-path', type=str, default=None)
15
+
16
+ parser.add_argument(
17
+ '--data-type', type=str, metavar='TYPE', default='fp16',
18
+ choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
19
+ help='The data type to inference. If None, the data type follows the '
20
+ 'checkpoint data type.')
21
+
22
+ parser.add_argument(
23
+ '--memopt-mode', type=int, default=0, choices=[0, 1],
24
+ help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
25
+ ' 0: FP16 mode'
26
+ ' 1: Use MEMOPT mode')
27
+
28
+ parser.add_argument(
29
+ '--quant-type', type=str, metavar='TYPE', default='int8',
30
+ choices=['int4', 'int8'],
31
+ help='The data type of quantization. Only used in MEMOPT.')
32
+
33
+ parser.add_argument(
34
+ '--kvqparams-fpath', type=str, required=False, default="",
35
+ help='File path of kv quantized params.')
36
+
37
+ parser.add_argument("--prompt", type=str, required=False)
38
+ parser.add_argument("--max-output-length", type=int, default=512)
39
+ parser.add_argument("--warmups", type=int, default=10)
40
+ parser.add_argument("--avgnums", type=int, default=10)
41
+ args = parser.parse_args()
42
+
43
+ print('\n=================== Arguments ===================')
44
+ for k, v in vars(args).items():
45
+ print(f' - {k.ljust(25, ".")}: {v}')
46
+ print('=================================================')
47
+
48
+ return args
49
+
50
+
51
+ def main():
52
+ args = get_args()
53
+
54
+ model = lyraLlama(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type, args.kvqparams_fpath)
55
+
56
+ # args.prompt = '''<context>/*\n * Implement the \"Falling Rocks\" game in the text console. \n * A small dwarf stays at the bottom of the screen and can \n * move left and right (by the arrows keys). A number of rocks \n * of different sizes and forms constantly fall down and you \n * need to avoid a crash.\n * Rocks are the symbols ^, @, *, &, +, %, $, #, !, ., ;, - distributed \n * with appropriate density. The dwarf is (O). \n * Ensure a constant game speed by Thread.Sleep(150).\n * Implement collision detection and scoring system.\n*/\n\nusing System;\nusing System.Threading;\nusing System.Collections.Generic;\nusing System.Threading.Tasks;\n\nclass FallingRocks\n{\n struct Position\n {\n public int X, Y;\n public string symbol;\n public ConsoleColor color;\n\n public Position(int x, int y, string symbol, ConsoleColor color)\n {\n this.X = x;\n this.Y = y;\n this.symbol = symbol;\n this.color = color;\n }\n }\n\n static void Main()\n {\n Thread oThread = new Thread(new ThreadStart(Mainn));\n Thread aThread = new Thread(new ThreadStart(Clr));\n \n aThread.Start();\n oThread.Start();\n oThread.Join();\n aThread.Join();\n }\n\n static void Clr()\n {\n while (true)\n {\n Thread.Sleep(10);\n Console.Clear();\n }\n }\n static void Mainn()\n {\n //Random generator for rocks color, position and symbol\n Random randomGenerator = new Random();\n \n //Sleep time for the game loop\n double sleepTime = 150;\n //Console settings\n Console.CursorVisible = false;\n Console.BufferHeight = Console.WindowHeight;\n \n //number of rocks in the Array rocks\n int rocksCount = 0;\n\n //array with the symbols of the rocks\n string[] symbols = new string[] { \"^\", \"@\", \"*\", \"&\", \"+\", \"%\", \"$\", \"#\", \"!\", \".\", \";\" };\n \n //array with colors for the rocks\n ConsoleColor[] colors = new ConsoleColor[] {ConsoleColor.Yellow, ConsoleColor.White, ConsoleColor.Gray};\n \n //array with rocks\n Position[] rocks = new Position[200];\n \n //position for the dwarf\n Position dwarf = new Position(10, Console.WindowHeight - 1,\"(0)\",ConsoleColor.Red);\n \n //bool variable to say when the game loop to be over\n bool gameLoop = true;\n\n //variable keeping the score\n ulong score = 0;\n\n //the game loop\n while (gameLoop)\n {\n //score is growing as the cycle runs\n score++;\n\n //setting the Y component for all the rocks in the array to grow with 2\n for (int i = 0; i <= rocks.Length - 1; i++)\n {\n rocks[i].Y = rocks[i].Y + 2;\n }\n\n //generating rocks\n for (int x = 0; x <= randomGenerator.Next(2, 4); x++)\n {\n rocks[rocksCount] = new Position(randomGenerator.Next(x * 15, x * 15 + 20), 0\n , symbols[randomGenerator.Next(0, symbols.Length - 1)]\n , colors[randomGenerator.Next(0, colors.Length - 1)]);\n if (rocksCount >= 199) rocksCount = 0;\n rocksCount++;\n }\n\n //printing the rocks and other stuff\n foreach (var item in rocks)\n {\n foreach (var rock in rocks)\n {\n //checking for colision\n if ((rock.X >= dwarf.X) && (rock.X <= (dwarf.X + 2)) && (rock.Y == dwarf.Y))\n {\n gameLoop = false;\n break;\n }\n } \n\n //printing the rocks\n if (item.Y < Console.WindowHeight)\n { \n Console.SetCursorPosition(item.X, item.Y);\n Console.ForegroundColor = item.color;\n Console.Write(item.symbol);\n }\n\n //checking for key pressed\n if (Console.KeyAvailable)\n {\n ConsoleKeyInfo pressedKey = Console.ReadKey();\n if (pressedKey.Key == ConsoleKey.RightArrow)\n {\n if(dwarf.X < Console.WindowWidth - 20)\n {\n //removing the old positions of the dwarf and increasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X++;\n }\n }\n if (pressedKey.Key == ConsoleKey.LeftArrow) \n {\n if(dwarf.X >= 1)\n {\n //removing the old positions of the dwarf and decreasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X--;\n }\n }\n }\n }\n \n //printing the dwarf\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.ForegroundColor = dwarf.color;\n Console.Write(dwarf.symbol); \n \n //sleeping the loop for sometime\n //Thread.Sleep((int)sleepTime);\n\n //reducing the sleep time of the loop\n sleepTime -= 0.5;\n\n \n //removing the rocks \n //foreach (var item in rocks)\n //{\n // if (item.Y < Console.WindowHeight)\n // {\n // Console.SetCursorPosition(item.X, item.Y);\n // Console.Write(\" \");\n // }\n //} \n }\n //Printing the score after the game is over\n Console.Clear();\n Console.WriteLine(\"Game over! Your score is: \" + score);\n\n }\n}\n</context>\n\n这个\"Falling Rocks\"游戏是如何工作的呢?可以详细解释一下代码的运作机制吗? \n\n\n\n'''
57
+
58
+ prompt_template = "Human: {}\n\nAssistant:" # xverse
59
+ # prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
60
+
61
+ prompt = prompt_template.format(args.prompt)
62
+
63
+ test_batch_size = [1, 8, 16, 32, 64] # 8, 16, 32, 64
64
+ print("test_batch_size: ", test_batch_size)
65
+
66
+ for i, bs in enumerate(test_batch_size):
67
+ prompts = [prompt, ] * bs
68
+
69
+ # warmup gpu
70
+ for _ in range(args.warmups):
71
+ output_texts = model.generate(
72
+ prompts, output_length=args.max_output_length,
73
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
74
+
75
+ start = perf_counter()
76
+ for _ in range(args.avgnums):
77
+ output_texts = model.generate(
78
+ prompts, output_length=args.max_output_length,
79
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
80
+ end = perf_counter()
81
+ cost = (end - start) / args.avgnums
82
+
83
+ input_output_texts = [prompt + ' ' + gtext for prompt,
84
+ gtext in zip(prompts, output_texts)]
85
+ tokens = 0
86
+ input_tokens = len(model.tokenizer.encode(prompt))
87
+ words = 0
88
+ for text in input_output_texts:
89
+ tokens += len(model.tokenizer.encode(text))
90
+ words += len(text)
91
+
92
+ avg_output_tokens = tokens / len(input_output_texts) - input_tokens
93
+ print(
94
+ f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Output tokens: {avg_output_tokens} Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
95
+ )
96
+ print(
97
+ f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
98
+ )
99
+
100
+ if i == 0:
101
+ for k in range(bs):
102
+ print(
103
+ f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
104
+ if k > 2:
105
+ break
106
+
107
+
108
+ if __name__ == "__main__":
109
+ main()
lyrallms/LyraLlamaPy/examples/batch_stream_demo.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+ from time import perf_counter
4
+
5
+ import sys
6
+ # import ipdb
7
+ sys.path.append('../')
8
+ import threading
9
+ import time
10
+
11
+ from lyra_llama import lyraLlama
12
+
13
+
14
+ def print_string(string, prev_seq_length=None, finish=False):
15
+ if finish:
16
+ print_list([string])
17
+ return
18
+
19
+ print("\033c", end="")
20
+
21
+ if prev_seq_length:
22
+ print(string[:prev_seq_length], end='', flush=True)
23
+ string = string[prev_seq_length:]
24
+
25
+ for c_char in string:
26
+ print(c_char, end='', flush=True)
27
+ time.sleep(0.025) # 控制每个字符的输出间隔,可以根据需要调整
28
+
29
+
30
+ def print_list(lines):
31
+ # 清空终端输出
32
+ print("\033c", end="")
33
+
34
+ # 逐行打印字符串列表
35
+ print('\n'.join(lines))
36
+
37
+
38
+ def get_args():
39
+ parser = argparse.ArgumentParser(description="Faster ChatGLM6B Demo")
40
+
41
+ parser.add_argument('--model-path', type=str, required=True,
42
+ help='Model Path, include config.ini and tokenizer files')
43
+ parser.add_argument('--tokenizer-path', type=str, default=None)
44
+
45
+ parser.add_argument(
46
+ '--data-type', type=str, metavar='TYPE', default='fp16',
47
+ choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
48
+ help='The data type to inference. If None, the data type follows the '
49
+ 'checkpoint data type.')
50
+
51
+ parser.add_argument(
52
+ '--memopt_mode', type=int, default=0, choices=[0, 1],
53
+ help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
54
+ ' 0: FP16 mode'
55
+ ' 1: Use MEMOPT mode')
56
+
57
+ parser.add_argument(
58
+ '--quant-type', type=str, metavar='TYPE', default='int8',
59
+ choices=['int4', 'int8'],
60
+ help='The data type of quantization. Only used in MEMOPT.')
61
+
62
+ parser.add_argument(
63
+ '--kvqparams-fpath', type=str, required=False, default="",
64
+ help='File path of kv quantized params.')
65
+
66
+ parser.add_argument("--prompt", type=str, required=False)
67
+ parser.add_argument("--max-output-length", type=int, default=512)
68
+ parser.add_argument("--warmups", type=int, default=10)
69
+ parser.add_argument("--avgnums", type=int, default=10)
70
+ args = parser.parse_args()
71
+
72
+ print('\n=================== Arguments ===================')
73
+ for k, v in vars(args).items():
74
+ print(f' - {k.ljust(25, ".")}: {v}')
75
+ print('=================================================')
76
+
77
+ return args
78
+
79
+
80
+ def main():
81
+ args = get_args()
82
+
83
+ model = lyraLlama(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type, args.kvqparams_fpath)
84
+
85
+ prompt_template = "Human: {}\n\nAssistant:" # xverse
86
+ # prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
87
+
88
+ prompt = prompt_template.format(args.prompt)
89
+
90
+ test_batch_size = [1] # 8, 16, 32, 64
91
+ print("test_batch_size: ", test_batch_size)
92
+
93
+ for i, bs in enumerate(test_batch_size):
94
+ prompts = [prompt, ] * bs
95
+
96
+ # warmup gpu
97
+ for _ in range(args.warmups):
98
+ for finish, output_texts in model.stream_generate(prompts,
99
+ output_length=args.max_output_length,
100
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False):
101
+ pass
102
+
103
+ start = perf_counter()
104
+ for _ in range(args.avgnums):
105
+ prev_sequence_lengths = None
106
+ stream_counter = 0
107
+ for finish, output_texts in model.stream_generate(prompts,
108
+ output_length=args.max_output_length,
109
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False):
110
+
111
+ if len(output_texts) == 1:
112
+ print_string(output_texts[0], prev_sequence_lengths, finish)
113
+ prev_sequence_lengths = len(output_texts[0])
114
+ else:
115
+ print_list(output_texts)
116
+
117
+ stream_counter += 1
118
+
119
+ end = perf_counter()
120
+ cost = (end - start) / args.avgnums
121
+
122
+ input_output_texts = [prompt + ' ' + gtext for prompt,
123
+ gtext in zip(prompts, output_texts)]
124
+ tokens = 0
125
+ input_tokens = len(model.tokenizer.encode(prompt))
126
+ words = 0
127
+ for text in input_output_texts:
128
+ tokens += len(model.tokenizer.encode(text))
129
+ words += len(text)
130
+
131
+ avg_output_tokens = tokens / len(input_output_texts) - input_tokens
132
+
133
+
134
+ if __name__ == "__main__":
135
+ main()
lyrallms/LyraLlamaPy/examples/random_batch_demo.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import random
4
+ import numpy as np
5
+
6
+ from time import perf_counter
7
+
8
+ import sys
9
+ sys.path.append('../')
10
+ from lyra_llama import lyraLlama
11
+
12
+
13
+ def get_args():
14
+ parser = argparse.ArgumentParser(description="Faster ChatGLM6B Demo")
15
+
16
+ parser.add_argument('--model-path', type=str, required=True,
17
+ help='Model Path, include config.ini and tokenizer files')
18
+ parser.add_argument('--tokenizer-path', type=str, default=None)
19
+
20
+ parser.add_argument(
21
+ '--data-type', type=str, metavar='TYPE', default='fp16',
22
+ choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
23
+ help='The data type to inference. If None, the data type follows the '
24
+ 'checkpoint data type.')
25
+
26
+ parser.add_argument(
27
+ '--memopt_mode', type=int, default=0, choices=[0, 1],
28
+ help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
29
+ ' 0: FP16 mode'
30
+ ' 1: Use MEMOPT mode')
31
+
32
+ parser.add_argument(
33
+ '--quant-type', type=str, metavar='TYPE', default='int8',
34
+ choices=['int4', 'int8'],
35
+ help='The data type of quantization. Only used in MEMOPT.')
36
+
37
+ parser.add_argument(
38
+ '--kvqparams-fpath', type=str, required=False, default="",
39
+ help='File path of kv quantized params.')
40
+
41
+ parser.add_argument("--prompt_filepath", type=str, required=True)
42
+ parser.add_argument("--max-output-length", type=int, default=512)
43
+ parser.add_argument("--warmups", type=int, default=10)
44
+ parser.add_argument("--avgnums", type=int, default=10)
45
+ args = parser.parse_args()
46
+
47
+ print('\n=================== Arguments ===================')
48
+ for k, v in vars(args).items():
49
+ print(f' - {k.ljust(25, ".")}: {v}')
50
+ print('=================================================')
51
+
52
+ return args
53
+
54
+
55
+ def main():
56
+ args = get_args()
57
+
58
+ model = lyraLlama(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type, args.kvqparams_fpath)
59
+
60
+ with open(args.prompt_filepath, "rb") as f:
61
+ input_datas = json.loads(f.read())
62
+
63
+ used_input_data = input_datas[0]
64
+
65
+ prompt_template = "Human: {}\n\nAssistant:" # xverse
66
+ # prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
67
+
68
+ test_batch_size = [1, 2, 4,] # 8, 16, 32, 64
69
+ print("test_batch_size: ", test_batch_size)
70
+
71
+ for i, bs in enumerate(test_batch_size):
72
+ all_use_prompts = []
73
+ all_output_texts = []
74
+
75
+ # warmup gpu
76
+ for _ in range(args.warmups):
77
+ prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
78
+ output_texts = model.generate(
79
+ prompts, output_length=args.max_output_length,
80
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
81
+
82
+ all_cost_s = 0.0
83
+
84
+ for _ in range(args.avgnums):
85
+ prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
86
+ all_use_prompts.extend(prompts)
87
+
88
+ start = perf_counter()
89
+ output_texts = model.generate(
90
+ prompts, output_length=args.max_output_length,
91
+ top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
92
+ all_cost_s += perf_counter() - start
93
+
94
+ all_output_texts.extend(output_texts)
95
+
96
+ cost = all_cost_s / args.avgnums
97
+
98
+ input_output_texts = [prompt + ' ' + gtext for prompt,gtext in zip(all_use_prompts, all_output_texts)]
99
+
100
+ tokens = 0
101
+ avg_input_tokens = np.mean([len(model.tokenizer.encode(prompt)) for prompt in all_use_prompts])
102
+
103
+ words = 0
104
+ for text in input_output_texts:
105
+ tokens += len(model.tokenizer.encode(text))
106
+ words += len(text)
107
+ print(
108
+ f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Avg Input tokens: {avg_input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
109
+ )
110
+ print(
111
+ f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
112
+ )
113
+
114
+ if i == 0:
115
+ for k in range(bs):
116
+ print(
117
+ f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
118
+ if k>2:
119
+ break
120
+
121
+ if __name__ == "__main__":
122
+ main()
123
+
lyrallms/LyraLlamaPy/examples/test.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export FMHA_VERSION=V2 # 如使用旧版本Attn,设置 FMHA_VERSION=OFF
2
+ export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
3
+
4
+ model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
5
+
6
+ data_type=fp16 # 权重保存精度
7
+ memopt_mode=0 # MEMOPT模式: 0/1
8
+ quant_type="int8" # 量化精度: int4/int8
9
+ max_output_length=256
10
+ warmups=1
11
+ avgnums=1
12
+
13
+ python batch_demo.py --model-path $model_path\
14
+ --tokenizer-path $model_path\
15
+ --data-type $data_type\
16
+ --memopt_mode $memopt_mode\
17
+ --quant-type ${quant_type}\
18
+ --max-output-length $max_output_length\
19
+ --warmups $warmups\
20
+ --avgnums $avgnums
lyrallms/LyraLlamaPy/examples/test_stream.sh ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export FMHA_VERSION=V2 # 如使用旧版本Attn,设置为OFF
2
+ export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
3
+ export LYRA_STREAM_CB_STEP=30 # 回调函数间隔步数
4
+
5
+ model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
6
+
7
+ data_type=fp16 # 权重保存精度
8
+ memopt_mode=0 # MEMOPT模式: 0/1
9
+ quant_type="int8" # 量化精度: int4/int8
10
+ max_output_length=256
11
+ warmups=1
12
+ avgnums=1
13
+
14
+ python batch_stream_demo.py --model-path $model_path\
15
+ --tokenizer-path $model_path\
16
+ --data-type $data_type\
17
+ --memopt_mode $memopt_mode\
18
+ --quant-type ${quant_type}\
19
+ --max-output-length $max_output_length\
20
+ --warmups $warmups\
21
+ --avgnums $avgnums
lyrallms/LyraLlamaPy/examples/torch_benchmark.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+ from transformers import LlamaForCausalLM, AutoModelForCausalLM
3
+ from time import perf_counter
4
+ import torch
5
+ import argparse
6
+
7
+ def get_args():
8
+ parser = argparse.ArgumentParser(description="Torch model Demo")
9
+
10
+ parser.add_argument('--model-path', type=str, required=True,
11
+ help='Model Path, include config.ini and tokenizer files')
12
+ parser.add_argument('--tokenizer-path', type=str, default=None)
13
+
14
+ parser.add_argument("--prompt", type=str, required=False)
15
+ parser.add_argument("--max-output-length", type=int, default=512)
16
+ parser.add_argument("--warmups", type=int, default=10)
17
+ parser.add_argument("--avgnums", type=int, default=10)
18
+ args = parser.parse_args()
19
+
20
+ print('\n=================== Arguments ===================')
21
+ for k, v in vars(args).items():
22
+ print(f' - {k.ljust(25, ".")}: {v}')
23
+ print('=================================================')
24
+
25
+ return args
26
+
27
+ def main():
28
+ args = get_args()
29
+ device = torch.device("cuda")
30
+
31
+ prompt_template = "Human: {}\n\nAssistant:" # xverse
32
+ # prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
33
+
34
+ prompt = prompt_template.format(args.prompt)
35
+
36
+ model = AutoModelForCausalLM.from_pretrained(args.model_path, torch_dtype=torch.float16, trust_remote_code=True).eval().to(device)
37
+ tokenizer = AutoTokenizer.from_pretrained(args.model_path, use_fast=False, trust_remote_code=True)
38
+
39
+ test_batch_size = [1, 8, 16, 32, 64]
40
+ print("test_batch_size: ", test_batch_size)
41
+
42
+ for i, bs in enumerate(test_batch_size):
43
+
44
+ prompts = [prompt] * bs
45
+
46
+ # warmup gpu
47
+ for _ in range(args.warmups):
48
+ input_ids = tokenizer(prompts, return_tensors="pt").input_ids.to(device)
49
+ generate_ids = model.generate(
50
+ input_ids,
51
+ max_new_tokens=args.max_output_length,
52
+ do_sample = False,
53
+ top_k = 30,
54
+ top_p = 0.85,
55
+ temperature = 1.0,
56
+ repetition_penalty=1.,
57
+ eos_token_id=2,
58
+ bos_token_id=1,
59
+ pad_token_id=0)
60
+
61
+ generate_ids = [output_ids[len(single_input_id):] for single_input_id, output_ids in zip(input_ids, generate_ids)]
62
+ outputs = tokenizer.batch_decode(generate_ids)
63
+
64
+ # test
65
+ start = perf_counter()
66
+ for _ in range(args.avgnums):
67
+ input_ids = tokenizer(prompts, return_tensors="pt").input_ids.to(device)
68
+ generate_ids = model.generate(
69
+ input_ids,
70
+ max_new_tokens=args.max_output_length,
71
+ do_sample = False,
72
+ top_k = 30,
73
+ top_p = 0.85,
74
+ temperature = 1.0,
75
+ repetition_penalty=1.,
76
+ eos_token_id=2,
77
+ bos_token_id=1,
78
+ pad_token_id=0)
79
+
80
+ generate_ids = [output_ids[len(single_input_id):] for single_input_id, output_ids in zip(input_ids, generate_ids)]
81
+ output_texts = tokenizer.batch_decode(generate_ids)
82
+
83
+ end = perf_counter()
84
+ cost = (end - start) / args.avgnums
85
+
86
+ # 计算吞吐量
87
+ input_output_texts = [prompt + ' ' + gtext for prompt, gtext in zip(prompts, output_texts)]
88
+ tokens = 0
89
+ input_tokens = len(tokenizer.encode(prompt))
90
+ words = 0
91
+ for text in input_output_texts:
92
+ tokens += len(tokenizer.encode(text))
93
+ words += len(text)
94
+
95
+ avg_output_tokens = tokens / len(input_output_texts) - input_tokens
96
+ print(
97
+ f"\nBatch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Output tokens: {avg_output_tokens} Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
98
+ )
99
+ print(
100
+ f"Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
101
+ )
102
+
103
+ if i == 0:
104
+ for k in range(bs):
105
+ print(
106
+ f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
107
+ if k > 2:
108
+ break
109
+
110
+ if __name__ == "__main__":
111
+ main()
lyrallms/LyraLlamaPy/examples/varlen_prompts.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [
2
+ "歌曲名:《幸福万年长》;歌手名:汤灿;歌曲描述:汤灿的幸福万年长创作背景:2001年,汤灿决定推出一首能够贴近听众和潮流的民歌。为此,她邀请了创作过歌曲《为你》《快乐老家》的音乐人浮克合作,邀其担任该曲的制作工作。虽然浮克此前一直从事流行歌曲的工作,但他其实也是一位衷情民歌风格的音乐人,于是两人一拍即合,合作了该曲。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
3
+ "歌曲名:《小丑面具》;歌手名:韩庚;歌曲描述:韩庚的小丑面具的歌曲鉴赏:韩庚在这首歌化身为“小丑”,带上面具调侃这社会上的表面功夫,用幽默又神经质的方式批判愈形冷酷的人心。在这首独特的电子舞曲当中,韩庚尝试了各种不同的发声方式,冷笑、哭喊、啜泣……甚至用声乐融合鬼魅的方法演唱,让人不禁陷入他建构的虚幻氛围而随之起舞。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
4
+ "《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲,于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日,“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者:主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J (大卫·哈斯金斯)。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲:“Boys”;“咬我的臀部”;“Some Faces”和斯卡雷鬼曲调“Harry”,这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议,凯文·哈斯金斯 (Kevin Haskins) 说,“那里有力量流行音乐,还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中(除了“Bela Lugosi's Dead”),只有“Harry”获得了官方发行;1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目,包括“Boys”的原始录音,一直未发行,直到 2018 年The Bela Session以黑胶唱片和CD 形式发行,并可供乐队数字下载。[11]在额外的曲目中,《经典摇滚》杂志写道:“其余的材料发现乐队正在摸索方向,甚至触及了斯卡。”\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
5
+ "歌曲名:《仓颉》;歌手名:五月天;歌曲描述:五月天的仓颉的歌曲鉴赏:五月天 仓颉(2张)《仓颉》是一首写在文明即将消失前的情诗,陈信宏的词写得颇有味道。《仓颉》这样淡淡的歌曲,或许不够大气,但是陈信宏真诚的演唱足以令人感动,而且《仓颉》的歌词也写得很有哲理。这首歌曲朗朗上口的旋律和诗意的文字使得它很适合在KTV演唱。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:"
6
+ ]
lyrallms/LyraLlamaPy/lyra_llama/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .lyra_llama import lyraLlama
lyrallms/LyraLlamaPy/lyra_llama/config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dataclasses
2
+ from typing import Optional
3
+
4
+
5
+ @dataclasses.dataclass
6
+ class LyraLlamaParam:
7
+ num_heads: int = 40
8
+ size_per_head: int = 128
9
+ inter_size: int = 13824
10
+ num_layers: int = 40
11
+ vocab_size: int = 39424
12
+ start_id: Optional[int] = 1
13
+ end_id: Optional[int] = 2
14
+ tensor_para_size: int = 1
15
+ pipeline_para_size: int = 1
16
+ remove_padding: bool = True
17
+ shared_contexts_ratio: float = 1.0
18
+ layernorm_eps: float = 1e-6
19
+ weights_data_type: str = "fp16"
20
+ rotary_embedding: int = 128
21
+ use_gptj_residual: bool = False
22
+
23
+ def __post_init__(self):
24
+ if not 0.0 <= self.shared_contexts_ratio <= 1.0:
25
+ raise ValueError(
26
+ f'Got an invalid value of shared_context_ratio '
27
+ f'{self.shared_contexts_ratio} - range: [0.0, 1.0]')
28
+
29
+ def asdict(self):
30
+ return dataclasses.asdict(self)
31
+
32
+
33
+ LYRA_LLAMA_PARAM = LyraLlamaParam()
34
+ LIB_SO_PATH = '/usr/lib/ftlib/lyraOp.cpython-38-x86_64-linux-gnu.so'
lyrallms/LyraLlamaPy/lyra_llama/lyra_llama.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import configparser
4
+ import pathlib
5
+ import typing
6
+ import os
7
+
8
+ import torch
9
+ import transformers
10
+ from torch.nn.utils.rnn import pad_sequence
11
+
12
+ from .config import LYRA_LLAMA_PARAM, LIB_SO_PATH
13
+ from .model import LlamaModel
14
+
15
+
16
+ class lyraLlama:
17
+ def __init__(self, model_path, tokenizer_path=None, dtype='fp16', memopt_mode=0, quant_dtype="int4", kvqparams_fpath="") -> None:
18
+ self.model_path = model_path
19
+ self.tokenizer_path = tokenizer_path
20
+ self.kvqparams_fpath = kvqparams_fpath
21
+
22
+ self.dtype = dtype
23
+
24
+ self.memopt_mode = memopt_mode
25
+ self.quant_data_type = quant_dtype
26
+
27
+ self.model, self.tokenizer = self.load_model_and_tokenizer()
28
+ print("Got model and tokenizer")
29
+
30
+ def load_model_and_tokenizer(self):
31
+ if self.tokenizer_path is None:
32
+ tokenizer_path = self.model_path
33
+ else:
34
+ tokenizer_path = self.tokenizer_path
35
+
36
+ print(f'Loading tokenizer from {tokenizer_path}')
37
+ tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_path)
38
+
39
+ checkpoint_path = pathlib.Path(self.model_path)
40
+ config_path = checkpoint_path / 'config.ini'
41
+
42
+ if config_path.exists():
43
+ # Read model params from config.
44
+ cfg = configparser.ConfigParser()
45
+ cfg.read(config_path)
46
+ model_name = 'llama'
47
+ inference_data_type = self.dtype
48
+ if inference_data_type == None:
49
+ inference_data_type = cfg.get(model_name, "weight_data_type")
50
+ model_args = dict(
51
+ head_num=cfg.getint(model_name, 'head_num'),
52
+ kv_head_num=cfg.getint(model_name, 'kv_head_num', fallback=0),
53
+ size_per_head=cfg.getint(model_name, "size_per_head"),
54
+ inter_size=cfg.getint(model_name, 'inter_size'),
55
+ layer_num=cfg.getint(model_name, "num_layer"),
56
+ rotary_embedding_dim=cfg.getint(model_name, 'rotary_embedding'),
57
+ layernorm_eps=cfg.getfloat(model_name, 'layernorm_eps'),
58
+ vocab_size=cfg.getint(model_name, "vocab_size"),
59
+ start_id=cfg.getint(model_name, "start_id"),
60
+ end_id=cfg.getint(model_name, "end_id"),
61
+ weights_data_type=cfg.get(model_name, "weight_data_type"),
62
+ tensor_para_size=cfg.getint(model_name, "tensor_para_size"),
63
+ inference_data_type=inference_data_type,
64
+ rope_theta=cfg.getfloat(model_name, "rope_theta", fallback=float(10000.0)))
65
+ else:
66
+ inference_data_type = self.dtype
67
+ if inference_data_type == None:
68
+ inference_data_type = LYRA_LLAMA_PARAM.weights_data_type
69
+ model_args = dict(head_num=LYRA_LLAMA_PARAM.num_heads,
70
+ size_per_head=LYRA_LLAMA_PARAM.size_per_head,
71
+ inter_size=LYRA_LLAMA_PARAM.inter_size,
72
+ layer_num=LYRA_LLAMA_PARAM.num_layers,
73
+ rotary_embedding_dim=LYRA_LLAMA_PARAM.rotary_embedding,
74
+ layernorm_eps=LYRA_LLAMA_PARAM.layernorm_eps,
75
+ vocab_size=LYRA_LLAMA_PARAM.vocab_size,
76
+ start_id=LYRA_LLAMA_PARAM.start_id or tokenizer.bos_token_id,
77
+ end_id=LYRA_LLAMA_PARAM.end_id or tokenizer.eos_token_id,
78
+ weights_data_type=LYRA_LLAMA_PARAM.weights_data_type,
79
+ tensor_para_size=LYRA_LLAMA_PARAM.tensor_para_size,
80
+ inference_data_type=inference_data_type)
81
+
82
+ # update common parameters
83
+ model_args.update(dict(
84
+ lib_path=LIB_SO_PATH,
85
+ model_path=os.path.join(self.model_path, "1-gpu-fp16.bin"),
86
+ kvqparams_fpath=self.kvqparams_fpath, # kv quantized scales (calibrated)
87
+ max_seq_len=0, # for position seq embedding
88
+ pipeline_para_size=LYRA_LLAMA_PARAM.pipeline_para_size,
89
+ use_gptj_residual=LYRA_LLAMA_PARAM.use_gptj_residual,
90
+ memopt_mode=self.memopt_mode,
91
+ quant_data_type=self.quant_data_type
92
+ # shared_contexts_ratio=LYRA_LLAMA_PARAM.shared_contexts_ratio,
93
+ ))
94
+
95
+ print('[LYRA][INFO] Load Our LYRA Highly Optimized LLaMA model')
96
+ for k, v in model_args.items():
97
+ print(f' - {k.ljust(25, ".")}: {v}')
98
+
99
+ # Check sanity and consistency between the model and tokenizer.
100
+ checklist = ['head_num', 'size_per_head', 'vocab_size', 'layer_num',
101
+ 'tensor_para_size', 'tensor_para_size', 'weights_data_type']
102
+ if None in [model_args[k] for k in checklist]:
103
+ none_params = [p for p in checklist if model_args[p] is None]
104
+ print(f'[LYRA][WARNING] Found None parameters {none_params}. They must '
105
+ f'be provided either by config file or CLI arguments.')
106
+ if model_args['start_id'] != tokenizer.bos_token_id:
107
+ print('[LYRA][WARNING] Given start_id is not matched with the bos token '
108
+ 'id of the pretrained tokenizer.')
109
+ if model_args['end_id'] not in (tokenizer.pad_token_id, tokenizer.eos_token_id):
110
+ print('[LYRA][WARNING] Given end_id is not matched with neither pad '
111
+ 'token id nor eos token id of the pretrained tokenizer.')
112
+
113
+ print(f'Loading model from {self.model_path}')
114
+ model = LlamaModel(**model_args)
115
+ return model, tokenizer
116
+
117
+ def generate(self, prompts: typing.List[str] | str,
118
+ output_length: int = 512,
119
+ beam_width: int = 1,
120
+ top_k: typing.Optional[torch.IntTensor] = 1,
121
+ top_p: typing.Optional[torch.FloatTensor] = 1.0,
122
+ beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
123
+ temperature: typing.Optional[torch.FloatTensor] = 1.0,
124
+ len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
125
+ repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
126
+ presence_penalty: typing.Optional[torch.FloatTensor] = None,
127
+ min_length: typing.Optional[torch.IntTensor] = None,
128
+ bad_words_list: typing.Optional[torch.IntTensor] = None,
129
+ do_sample: bool = False,
130
+ return_output_length: bool = False,
131
+ return_cum_log_probs: int = 0):
132
+ if isinstance(prompts, str):
133
+ prompts = [prompts, ]
134
+ inputs = prompts
135
+
136
+ batch_size = len(inputs)
137
+ ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
138
+ ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
139
+
140
+ # we must encode the raw prompt text one by one in order to compute the length of the original text.
141
+ input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
142
+ input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
143
+ # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
144
+ input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
145
+
146
+ random_seed = None
147
+ if do_sample:
148
+ random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
149
+
150
+ outputs = self.model(start_ids=input_token_ids,
151
+ start_lengths=input_lengths,
152
+ output_len=output_length,
153
+ beam_width=beam_width,
154
+ top_k=top_k * ones_int,
155
+ top_p=top_p * ones_float,
156
+ beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
157
+ temperature=temperature * ones_float,
158
+ len_penalty=len_penalty * ones_float,
159
+ repetition_penalty=repetition_penalty * ones_float,
160
+ random_seed=random_seed,
161
+ return_output_length=return_output_length,
162
+ return_cum_log_probs=return_cum_log_probs)
163
+
164
+ if return_cum_log_probs > 0:
165
+ outputs = outputs[0] # output_token_ids.
166
+
167
+ # Slice the generated token ids of the 1st beam result.
168
+ # output = input tokens + generated tokens.
169
+ output_token_ids = [out[0, length:].cpu()
170
+ for out, length in zip(outputs, input_lengths)]
171
+
172
+ output_texts = self.tokenizer.batch_decode(
173
+ output_token_ids, skip_special_tokens=True)
174
+
175
+ return output_texts
176
+
177
+ def stream_generate(self, prompts: typing.List[str] | str,
178
+ output_length: int = 512,
179
+ beam_width: int = 1,
180
+ top_k: typing.Optional[torch.IntTensor] = 1,
181
+ top_p: typing.Optional[torch.FloatTensor] = 1.0,
182
+ beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
183
+ temperature: typing.Optional[torch.FloatTensor] = 1.0,
184
+ len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
185
+ repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
186
+ presence_penalty: typing.Optional[torch.FloatTensor] = None,
187
+ min_length: typing.Optional[torch.IntTensor] = None,
188
+ bad_words_list: typing.Optional[torch.IntTensor] = None,
189
+ do_sample: bool = False,
190
+ return_output_length: bool = False,
191
+ return_cum_log_probs: int = 0):
192
+ if isinstance(prompts, str):
193
+ prompts = [prompts, ]
194
+
195
+ inputs = prompts
196
+
197
+ batch_size = len(inputs)
198
+ ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
199
+ ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
200
+
201
+ # we must encode the raw prompt text one by one in order to compute the length of the original text.
202
+ input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
203
+ input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
204
+ # after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
205
+ input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
206
+
207
+ random_seed = None
208
+ if do_sample:
209
+ random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
210
+
211
+ for finish, output_ids, sequence_length, output_cum_log_probs in self.model.stream_forward(start_ids=input_token_ids,
212
+ start_lengths=input_lengths,
213
+ output_len=output_length,
214
+ beam_width=beam_width,
215
+ top_k=top_k * ones_int,
216
+ top_p=top_p * ones_float,
217
+ beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
218
+ temperature=temperature * ones_float,
219
+ len_penalty=len_penalty * ones_float,
220
+ repetition_penalty=repetition_penalty * ones_float,
221
+ random_seed=random_seed,
222
+ return_output_length=return_output_length,
223
+ return_cum_log_probs=return_cum_log_probs):
224
+
225
+ # Slice the generated token ids of the 1st beam result.
226
+ # output = input tokens + generated tokens.
227
+ output_token_ids = [out[0, length:].cpu()
228
+ for out, length in zip(output_ids, input_lengths)]
229
+ output_texts = self.tokenizer.batch_decode(
230
+ output_token_ids, skip_special_tokens=True)
231
+
232
+ yield finish, output_texts
lyrallms/LyraLlamaPy/lyra_llama/model.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function
2
+
3
+ import copy
4
+ import os
5
+ import pathlib
6
+ import typing
7
+
8
+ import numpy as np
9
+ import torch
10
+ import torch.distributed as dist
11
+ import torch.nn as nn
12
+
13
+ import time
14
+ from queue import Queue
15
+ from threading import Thread
16
+
17
+ import sys
18
+ sys.path.append('/usr/lib/lyralib')
19
+ import lyraOp
20
+
21
+ str_type_map = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}
22
+
23
+ class LlamaModel(nn.Module):
24
+ def __init__(self,
25
+ head_num,
26
+ size_per_head,
27
+ inter_size,
28
+ vocab_size,
29
+ rotary_embedding_dim,
30
+ start_id, end_id, layer_num,
31
+ max_seq_len: int,
32
+ layernorm_eps,
33
+ tensor_para_size: int,
34
+ pipeline_para_size: int,
35
+ use_gptj_residual,
36
+ lib_path: typing.Union[str, pathlib.Path],
37
+ model_path,
38
+ kvqparams_fpath: str = "",
39
+ memopt_mode: int = 0,
40
+ quant_data_type: str = "int8",
41
+ inference_data_type: str = "fp16",
42
+ weights_data_type: typing.Union[str, np.dtype] = np.float32,
43
+ kv_head_num = 0,
44
+ rope_theta = 10000.0):
45
+ super().__init__()
46
+ self.head_num = head_num
47
+ self.kv_head_num = kv_head_num
48
+ self.size_per_head = size_per_head
49
+ self.inter_size = inter_size
50
+ self.vocab_size = vocab_size
51
+ self.rotary_embedding_dim = rotary_embedding_dim
52
+ self.start_id = start_id
53
+ self.end_id = end_id
54
+ self.max_seq_len = max_seq_len
55
+ self.layer_num = layer_num
56
+ self.use_gptj_residual = use_gptj_residual
57
+ self.layernorm_eps = layernorm_eps
58
+ self.memopt_mode = memopt_mode
59
+ self.quant_data_type = quant_data_type
60
+ self.rope_theta = rope_theta
61
+
62
+ # multi-gpu params
63
+ self.tensor_para_size = tensor_para_size
64
+ self.pipeline_para_size = pipeline_para_size
65
+ self.build_model = False
66
+ self.weights_data_type = weights_data_type
67
+ self.inference_data_type = inference_data_type
68
+
69
+ # queue for streaming
70
+ self.que = Queue()
71
+ self.threads = [None] * self.tensor_para_size
72
+
73
+ assert torch.cuda.is_available(), "CUDA is required for this model."
74
+
75
+ assert head_num % tensor_para_size == 0, "head_num must be a multiple of tensor_para_size."
76
+ assert layer_num % pipeline_para_size == 0, "layer_num must be a multiple of pipeline_para_size."
77
+
78
+ # Load the C++ model into Pytorch model.
79
+ # torch.classes.load_library(os.path.abspath(lib_path))
80
+
81
+ # Prepare for tensor/pipeline parallel
82
+ try:
83
+ dist.init_process_group(backend='mpi')
84
+ except:
85
+ print("[INFO] WARNING: Have initialized the process group")
86
+ self.rank = dist.get_rank()
87
+ self.device_count = torch.cuda.device_count()
88
+ self.device = self.rank % self.device_count
89
+ torch.cuda.set_device(self.device)
90
+
91
+ world_size = dist.get_world_size()
92
+ # print(tensor_para_size * pipeline_para_size)
93
+ assert world_size == tensor_para_size * pipeline_para_size, "tensor_para_size * pipeline_para_size must be equal to world_size."
94
+
95
+ self.tensor_para_rank = self.rank % self.tensor_para_size
96
+ self.pipeline_para_rank = self.rank // self.tensor_para_size
97
+
98
+ if self.kv_head_num == 0:
99
+ self.kv_head_num = self.head_num
100
+
101
+ self.model = lyraOp.LyraLlama(
102
+ self.head_num, self.size_per_head, self.inter_size,
103
+ self.layer_num,
104
+ self.vocab_size,
105
+ self.rotary_embedding_dim,
106
+ self.layernorm_eps,
107
+ self.start_id, self.end_id,
108
+ self.tensor_para_size, self.pipeline_para_size,
109
+ self.max_seq_len,
110
+ self.use_gptj_residual,
111
+ self.memopt_mode,
112
+ self.quant_data_type,
113
+ model_path,
114
+ kvqparams_fpath,
115
+ self.weights_data_type,
116
+ self.inference_data_type,
117
+ self.kv_head_num,
118
+ self.rope_theta)
119
+
120
+ self.build_model = True
121
+ torch.cuda.empty_cache()
122
+
123
+ def forward(self,
124
+ start_ids: torch.Tensor,
125
+ start_lengths: torch.Tensor,
126
+ output_len,
127
+ beam_width=1,
128
+ top_k: torch.Tensor = None,
129
+ top_p: torch.Tensor = None,
130
+ beam_search_diversity_rate: torch.Tensor = None,
131
+ temperature: torch.Tensor = None,
132
+ len_penalty: torch.Tensor = None,
133
+ repetition_penalty: torch.Tensor = None,
134
+ random_seed: torch.Tensor = None,
135
+ return_output_length=False,
136
+ return_cum_log_probs=0):
137
+
138
+ input_len = start_ids.size(1)
139
+ assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
140
+
141
+ # Inputs to device
142
+ input_ids = start_ids.cuda(self.device)
143
+ input_lengths = start_lengths.cuda(self.device)
144
+ # outputs: output_ids, output_lengths, output_cum_log_probs (optional)
145
+ outputs = self.model.forward(input_ids,
146
+ input_lengths,
147
+ output_len,
148
+ beam_width, # optional, can be None
149
+ top_k, # optional, can be None
150
+ top_p, # optional, can be None
151
+ beam_search_diversity_rate, # optional, can be None
152
+ temperature, # optional, can be None
153
+ len_penalty, # optional, can be None
154
+ repetition_penalty, # optional, can be None
155
+ random_seed, # optional, can be None
156
+ return_cum_log_probs) # optional, can be None
157
+
158
+ if return_cum_log_probs == 0:
159
+ output_ids, output_lengths = outputs
160
+ else:
161
+ output_ids, output_lengths, output_cum_log_probs = outputs
162
+ if return_output_length:
163
+ if return_cum_log_probs > 0:
164
+ return output_ids, output_lengths, output_cum_log_probs
165
+ else:
166
+ return output_ids, output_lengths
167
+ else:
168
+ return output_ids
169
+
170
+ def set_input_tensor(self, input_tensor):
171
+ """Set input tensor to be used instead of forward()'s input.
172
+
173
+ When doing pipeline parallelism the input from the previous
174
+ stage comes from communication, not from the input, so the
175
+ model's forward_step_func won't have it. This function is thus
176
+ used by internal code to bypass the input provided by the
177
+ forward_step_func"""
178
+ self.input_tensor = input_tensor
179
+
180
+
181
+ def _forward_callback(self, output_ids, seq_lengths, ctx):
182
+ self.que.put((False, (list(output_ids), list(seq_lengths))))
183
+
184
+ def _tensormap_dict_to_py_dict(self, tensormap_dict: lyraOp.TensorMap):
185
+ """map torch tensormap to py dict."""
186
+ ret = dict()
187
+ for k, v in tensormap_dict.items():
188
+ ret[k] = v
189
+
190
+ return ret
191
+
192
+
193
+ def stream_forward(self,
194
+ start_ids: torch.Tensor,
195
+ start_lengths: torch.Tensor,
196
+ output_len,
197
+ beam_width=1,
198
+ top_k: torch.Tensor = None,
199
+ top_p: torch.Tensor = None,
200
+ beam_search_diversity_rate: torch.Tensor = None,
201
+ temperature: torch.Tensor = None,
202
+ len_penalty: torch.Tensor = None,
203
+ repetition_penalty: torch.Tensor = None,
204
+ random_seed: torch.Tensor = None,
205
+ return_output_length=False,
206
+ return_cum_log_probs=0):
207
+
208
+ # Register callback func to model
209
+ self.model.registerCallback(self._forward_callback)
210
+
211
+ batch_size = start_ids.size(0)
212
+ input_len = start_ids.size(1)
213
+ assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
214
+
215
+ # Inputs to device
216
+ input_ids = start_ids.cuda(self.device)
217
+ input_lengths = start_lengths.cuda(self.device)
218
+ # outputs: output_ids, output_lengths, output_cum_log_probs (optional)
219
+
220
+ # Init thread of model inference
221
+ def _func(enque_output):
222
+ outputs = self.model.forward(input_ids,
223
+ input_lengths,
224
+ output_len,
225
+ beam_width, # optional, can be None
226
+ top_k, # optional, can be None
227
+ top_p, # optional, can be None
228
+ beam_search_diversity_rate, # optional, can be None
229
+ temperature, # optional, can be None
230
+ len_penalty, # optional, can be None
231
+ repetition_penalty, # optional, can be None
232
+ random_seed, # optional, can be None
233
+ return_cum_log_probs) # optional, can be None
234
+ if enque_output:
235
+ self.que.put((True, (outputs[0].tolist(), outputs[1].tolist())))
236
+
237
+ # Start thread of model inference
238
+ t = Thread(target=_func,
239
+ args=(True,),
240
+ daemon=True)
241
+ t.start()
242
+ self.threads[0] = t
243
+
244
+ # Generate streaming output
245
+ while True:
246
+ while self.que.qsize() > 1:
247
+ self.que.get()
248
+
249
+ finish, outputs = self.que.get()
250
+
251
+ output_ids, sequence_length = outputs
252
+ output_ids_tensor = torch.tensor(output_ids).view(batch_size, beam_width, -1)
253
+ sequence_length_tensor = torch.tensor(sequence_length).view(batch_size, beam_width)
254
+
255
+ if return_output_length:
256
+ if return_cum_log_probs > 0:
257
+ yield finish, output_ids_tensor, sequence_length_tensor, None
258
+ else:
259
+ yield finish, output_ids_tensor, sequence_length_tensor, None
260
+ else:
261
+ yield finish, output_ids_tensor, None, None
262
+
263
+ if finish:
264
+ for t in self.threads:
265
+ t.join()
266
+ while self.que.qsize() > 0:
267
+ self.que.get()
268
+ break
269
+
270
+ self.model.unRegisterCallback()
lyrallms/README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## `lyrallms` 能力矩阵
2
+
3
+ | |Attn方法| |MEMOPT模式| |KVCache精度| |
4
+ |:----|:----|:----|:----|:----|:----|:----|
5
+ | |Unfused|FlashAttn2|W4A16|W8A16|FP16|INT8|
6
+ |LLaMA|✅|✅|✅|✅|✅|✅|
7
+ |XVERSE|✅|✅|✅|✅|✅|✅|
8
+ |Baichuan 1/2 (7B及13B)|✅|❌|✅|✅|✅|❌|
9
+ |ChatGLM|✅|❌|❌|✅|✅|❌|
10
+ |BELLE|✅|❌|❌|✅|✅|❌|
11
+
12
+ ## `lyrallms` 使用
13
+
14
+ ### 校准 (Calibration)
15
+
16
+ 参考`calibration`文件夹下的[README.md](./calibration/README.md) 。
17
+
18
+ ### Python转换及调用加速模型
19
+
20
+ #### LLaMA
21
+
22
+ 参考`LyraLlamaPy`文件夹下的[README.md](./LyraLlamaPy/README.md) 。
23
+
24
+ #### Baichuan
25
+
26
+ 参考`LyraBaichuanPy`文件夹下的[README.md](./LyraLlamaPy/README.md) 。
27
+
models/.gitkeep ADDED
File without changes
models/Baichuan/Baichuan2_13B_Base/1-gpu-fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cee68cc4fc1b5d25cd39f3bd64ae7ee25f15035892cbbedb10e0b980d9afd87f
3
+ size 27793336320
models/Baichuan/Baichuan2_13B_Base/config.ini ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [baichuan]
2
+ model_name = Baichuan2_13B_base
3
+ head_num = 40
4
+ size_per_head = 128
5
+ inter_size = 13696
6
+ num_layer = 40
7
+ rotary_embedding = 128
8
+ layernorm_eps = 1e-06
9
+ vocab_size = 125696
10
+ start_id = 1
11
+ end_id = 2
12
+ tensor_para_size = 1
13
+ weight_data_type = fp16
14
+
models/Baichuan/Baichuan2_13B_Base/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "architectures": [
4
+ "BaichuanForCausalLM"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_baichuan.BaichuanConfig",
8
+ "AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
9
+ },
10
+ "bos_token_id": 1,
11
+ "eos_token_id": 2,
12
+ "gradient_checkpointing": false,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 5120,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 13696,
17
+ "model_max_length": 4096,
18
+ "model_type": "baichuan",
19
+ "num_attention_heads": 40,
20
+ "num_hidden_layers": 40,
21
+ "pad_token_id": 0,
22
+ "rms_norm_eps": 1e-06,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.29.2",
26
+ "use_cache": true,
27
+ "vocab_size": 125696
28
+ }
models/Baichuan/Baichuan2_13B_Base/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": true
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": true
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": true
22
+ },
23
+ "pad_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": true
29
+ }
30
+ }
models/Baichuan/Baichuan2_13B_Base/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79452955be6b419a65984273a9f08af86042e1c2a75ee3ba989cbf620a133cc2
3
+ size 2001107
models/Baichuan/Baichuan2_13B_Base/tokenizer_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "auto_map": {
5
+ "AutoTokenizer": [
6
+ "tokenization_baichuan.BaichuanTokenizer",
7
+ null
8
+ ]
9
+ },
10
+ "bos_token": {
11
+ "__type": "AddedToken",
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": true
17
+ },
18
+ "clean_up_tokenization_spaces": false,
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": true
26
+ },
27
+ "model_max_length": 4096,
28
+ "pad_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": true
35
+ },
36
+ "sp_model_kwargs": {},
37
+ "tokenizer_class": "BaichuanTokenizer",
38
+ "unk_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<unk>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": true
45
+ }
46
+ }
models/Baichuan/Baichuan2_13B_Chat/1-gpu-fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:182aeae174da2d23af945c93ab92a6ba48ccf9bbc02474096ba950dd7e17bdd2
3
+ size 27793336320
models/Baichuan/Baichuan2_13B_Chat/config.ini ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [baichuan]
2
+ model_name = Baichuan2_13B_chat
3
+ head_num = 40
4
+ size_per_head = 128
5
+ inter_size = 13696
6
+ num_layer = 40
7
+ rotary_embedding = 128
8
+ layernorm_eps = 1e-06
9
+ vocab_size = 125696
10
+ start_id = 1
11
+ end_id = 2
12
+ tensor_para_size = 1
13
+ weight_data_type = fp16
14
+
models/Baichuan/Baichuan2_13B_Chat/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "architectures": [
4
+ "BaichuanForCausalLM"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_baichuan.BaichuanConfig",
8
+ "AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
9
+ },
10
+ "tokenizer_class": "BaichuanTokenizer",
11
+ "bos_token_id": 1,
12
+ "eos_token_id": 2,
13
+ "gradient_checkpointing": false,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 5120,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 13696,
18
+ "model_max_length": 4096,
19
+ "model_type": "baichuan",
20
+ "num_attention_heads": 40,
21
+ "num_hidden_layers": 40,
22
+ "pad_token_id": 0,
23
+ "rms_norm_eps": 1e-06,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.29.2",
27
+ "use_cache": true,
28
+ "vocab_size": 125696
29
+ }
models/Baichuan/Baichuan2_13B_Chat/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": true
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": true
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": true
22
+ },
23
+ "pad_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": true
29
+ }
30
+ }
models/Baichuan/Baichuan2_13B_Chat/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79452955be6b419a65984273a9f08af86042e1c2a75ee3ba989cbf620a133cc2
3
+ size 2001107
models/Baichuan/Baichuan2_13B_Chat/tokenizer_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "auto_map": {
5
+ "AutoTokenizer": [
6
+ "tokenization_baichuan.BaichuanTokenizer",
7
+ null
8
+ ]
9
+ },
10
+ "bos_token": {
11
+ "__type": "AddedToken",
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": true
17
+ },
18
+ "clean_up_tokenization_spaces": false,
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": true
26
+ },
27
+ "model_max_length": 4096,
28
+ "pad_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": true
35
+ },
36
+ "sp_model_kwargs": {},
37
+ "tokenizer_class": "BaichuanTokenizer",
38
+ "unk_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<unk>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": true
45
+ }
46
+ }
models/Baichuan/Baichuan2_7B_Base/1-gpu-fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f83ab15fe6cd2d93be29248e87051db3b62921d3093922d7e73c9817bc0409b
3
+ size 15011946496
models/Baichuan/Baichuan2_7B_Base/config.ini ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [baichuan]
2
+ model_name = Baichuan2_7B_base
3
+ head_num = 32
4
+ size_per_head = 128
5
+ inter_size = 11008
6
+ num_layer = 32
7
+ rotary_embedding = 128
8
+ layernorm_eps = 1e-06
9
+ vocab_size = 125696
10
+ start_id = 1
11
+ end_id = 2
12
+ tensor_para_size = 1
13
+ weight_data_type = fp16
14
+
models/Baichuan/Baichuan2_7B_Base/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BaichuanForCausalLM"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_baichuan.BaichuanConfig",
7
+ "AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
8
+ },
9
+ "bos_token_id": 1,
10
+ "eos_token_id": 2,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "max_position_embeddings": 4096,
16
+ "model_max_length": 4096,
17
+ "model_type": "baichuan",
18
+ "num_attention_heads": 32,
19
+ "num_hidden_layers": 32,
20
+ "pad_token_id": 0,
21
+ "rms_norm_eps": 1e-06,
22
+ "_from_model_config": true,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.29.2",
26
+ "use_cache": true,
27
+ "vocab_size": 125696
28
+ }