# This is a combination of 22 commits.
Browse files# This is the 1st commit message:
Init
# This is the commit message #2:
[Enhancement] Update README
# This is the commit message #3:
Upload SM80 so files
# This is the commit message #4:
Track model files
# This is the commit message #5:
Update gitignore
# This is the commit message #6:
Upload converted XVERSE 13B Chat model files
# This is the commit message #7:
Upload converted Baichuan2 13B Chat model files
# This is the commit message #8:
Upload converted Baichuan2 7B model files
# This is the commit message #9:
Upload converted Baichuan2 13B Base model files
# This is the commit message #10:
Upload converted Baichuan 7B Base model files
# This is the commit message #11:
Upload converted Baichuan 13B Chat model files
# This is the commit message #12:
Upload converted Baichuan 13B Base model files
# This is the commit message #13:
Upload converted LLaMA Ziya 13B model files
# This is the commit message #14:
Upload converted Yi 6B model files
# This is the commit message #15:
Update README and .gitattributes
# This is the commit message #16:
Remove SM70 so files
# This is the commit message #17:
Update README
# This is the commit message #18:
Update SM80 Cuda11 so file
# This is the commit message #19:
Update README
# This is the commit message #20:
Update Python codes
# This is the commit message #21:
Update Python codes
# This is the commit message #22:
Update gitattributes
- .gitattributes +2 -0
- .gitignore +8 -0
- README.md +157 -0
- lyralib/.gitattributes +3 -0
- lyralib/.gitkeep +0 -0
- lyralib/sm80/cuda11/lyraOp.cpython-38-x86_64-linux-gnu.so +3 -0
- lyralib/sm80/cuda12/lyraOp.cpython-38-x86_64-linux-gnu.so +3 -0
- lyrallms/LyraBaichuanPy/README.md +88 -0
- lyrallms/LyraBaichuanPy/configuration_baichuan.py +69 -0
- lyrallms/LyraBaichuanPy/examples/README.md +105 -0
- lyrallms/LyraBaichuanPy/examples/batch_demo.py +103 -0
- lyrallms/LyraBaichuanPy/examples/batch_stream_demo.py +101 -0
- lyrallms/LyraBaichuanPy/examples/random_batch_demo.py +116 -0
- lyrallms/LyraBaichuanPy/examples/varlen_prompts.json +6 -0
- lyrallms/LyraBaichuanPy/generation_utils.py +83 -0
- lyrallms/LyraBaichuanPy/lyra_baichuan/__init__.py +1 -0
- lyrallms/LyraBaichuanPy/lyra_baichuan/config.py +34 -0
- lyrallms/LyraBaichuanPy/lyra_baichuan/lyra_baichuan.py +391 -0
- lyrallms/LyraBaichuanPy/lyra_baichuan/model.py +258 -0
- lyrallms/LyraBaichuanPy/lyra_baichuan/tokenization_baichuan.py +232 -0
- lyrallms/LyraLlamaPy/README.md +75 -0
- lyrallms/LyraLlamaPy/examples/README.md +114 -0
- lyrallms/LyraLlamaPy/examples/batch_demo.py +109 -0
- lyrallms/LyraLlamaPy/examples/batch_stream_demo.py +135 -0
- lyrallms/LyraLlamaPy/examples/random_batch_demo.py +123 -0
- lyrallms/LyraLlamaPy/examples/test.sh +20 -0
- lyrallms/LyraLlamaPy/examples/test_stream.sh +21 -0
- lyrallms/LyraLlamaPy/examples/torch_benchmark.py +111 -0
- lyrallms/LyraLlamaPy/examples/varlen_prompts.json +6 -0
- lyrallms/LyraLlamaPy/lyra_llama/__init__.py +1 -0
- lyrallms/LyraLlamaPy/lyra_llama/config.py +34 -0
- lyrallms/LyraLlamaPy/lyra_llama/lyra_llama.py +232 -0
- lyrallms/LyraLlamaPy/lyra_llama/model.py +270 -0
- lyrallms/README.md +27 -0
- models/.gitkeep +0 -0
- models/Baichuan/Baichuan2_13B_Base/1-gpu-fp16.bin +3 -0
- models/Baichuan/Baichuan2_13B_Base/config.ini +14 -0
- models/Baichuan/Baichuan2_13B_Base/config.json +28 -0
- models/Baichuan/Baichuan2_13B_Base/special_tokens_map.json +30 -0
- models/Baichuan/Baichuan2_13B_Base/tokenizer.model +3 -0
- models/Baichuan/Baichuan2_13B_Base/tokenizer_config.json +46 -0
- models/Baichuan/Baichuan2_13B_Chat/1-gpu-fp16.bin +3 -0
- models/Baichuan/Baichuan2_13B_Chat/config.ini +14 -0
- models/Baichuan/Baichuan2_13B_Chat/config.json +29 -0
- models/Baichuan/Baichuan2_13B_Chat/special_tokens_map.json +30 -0
- models/Baichuan/Baichuan2_13B_Chat/tokenizer.model +3 -0
- models/Baichuan/Baichuan2_13B_Chat/tokenizer_config.json +46 -0
- models/Baichuan/Baichuan2_7B_Base/1-gpu-fp16.bin +3 -0
- models/Baichuan/Baichuan2_7B_Base/config.ini +14 -0
- models/Baichuan/Baichuan2_7B_Base/config.json +28 -0
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
models/* filter=lfs diff=lfs merge=lfs -text
|
37 |
+
lyralib/* filter=lfs diff=lfs merge=lfs -text
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*~
|
2 |
+
*.o
|
3 |
+
*build*/
|
4 |
+
__pycache__/
|
5 |
+
.vscode
|
6 |
+
.idea
|
7 |
+
.cache
|
8 |
+
**/.ipynb_checkpoints/
|
@@ -1,3 +1,160 @@
|
|
1 |
---
|
2 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: mit
|
3 |
+
language: en
|
4 |
+
tags:
|
5 |
+
- LLM
|
6 |
+
- LLaMA
|
7 |
+
- Baichuan
|
8 |
+
- Baichuan2
|
9 |
+
- XVERSE
|
10 |
---
|
11 |
+
# Model Card for lyraLLMs
|
12 |
+
|
13 |
+
## Introduction
|
14 |
+
|
15 |
+
We have released **lyraLLMs**, a highly optimized and easy-to-use inference engine for LLMs.
|
16 |
+
|
17 |
+
**lyraLLMs** is suitable for NVIDIA GPUs:
|
18 |
+
- Volta (V100)
|
19 |
+
- Turing (T4)
|
20 |
+
- Ampere (A100/A10)
|
21 |
+
- Ada Lovelace (RTX 4090, etc.)
|
22 |
+
|
23 |
+
**lyraLLMs** supports many popular HuggingFace models as follows:
|
24 |
+
- [BELLE](https://huggingface.co/TMElyralab/lyraBELLE)
|
25 |
+
- [ChatGLM](https://huggingface.co/TMElyralab/lyraChatGLM)
|
26 |
+
- LLaMA
|
27 |
+
- LLaMA 2
|
28 |
+
- XVERSE
|
29 |
+
- Baichuan 1 & 2
|
30 |
+
|
31 |
+
**lyraLLMs** is fast, memory-efficient & easy to use with:
|
32 |
+
- State-of-the-art throughtput (up to 7K tokens/s for LLaMA 13B)
|
33 |
+
- Efficient memory usage of attention with FlashAttention2
|
34 |
+
- Quantization: MEMOPT mode (W8A16, W4A16), KVCache Int8
|
35 |
+
- Easy-to-use Python API to serve LLMs
|
36 |
+
- Streaming outputs
|
37 |
+
|
38 |
+
If you like our work and consider to join us, feel free to drop a line at [email protected]
|
39 |
+
|
40 |
+
## Speed
|
41 |
+
|
42 |
+
### Settings
|
43 |
+
* Evaluated at tokens/s (input + output)
|
44 |
+
* Test on A100 40G, CUDA 12.0
|
45 |
+
* Enable the use of MEMOPT mode and KVCache Int8
|
46 |
+
|
47 |
+
### Throughputs
|
48 |
+
|
49 |
+
### XVERSE-13B-Chat
|
50 |
+
|
51 |
+
#### Input
|
52 |
+
北京的景点:故宫、天坛、万里长城等。\n深圳的景点:
|
53 |
+
|
54 |
+
| Version | Batch Size 1 | Batch Size 64 | Batch Size 128 | Batch Size 256 | Batch Size 512 |
|
55 |
+
| --- | --- | --- | --- | --- | --- |
|
56 |
+
| Torch 2.1.0 | 52.9 | 2308.1 | OOM | | |
|
57 |
+
| lyraXVERSE | 200.4 | 4624.8 | 5759.7 | 6075.6 | 5733 |
|
58 |
+
|
59 |
+
### Baichuan2-7B-Base
|
60 |
+
|
61 |
+
#### Input
|
62 |
+
北京的景点:登鹳雀楼->王之涣\n夜雨寄北->
|
63 |
+
|
64 |
+
| Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
|
65 |
+
| --- | --- | --- | --- | --- | --- |
|
66 |
+
| Torch 2.0.1 | 41.2 | 323.2 | 640.0 | 1256.8 | 2231.0 |
|
67 |
+
| lyraBaichuan | 125.9 | 948.1 | 1749.3 | 2974.0 | 4370.1 |
|
68 |
+
|
69 |
+
### Baichuan2-13B-Base
|
70 |
+
|
71 |
+
#### Input
|
72 |
+
北京的景点:登鹳雀楼->王之涣\n夜雨寄北->
|
73 |
+
|
74 |
+
| Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
|
75 |
+
| --- | --- | --- | --- | --- | --- |
|
76 |
+
| Torch 2.0.1 | 40.9 | 307.9 | 555.6 | 1010.4 | 1601.0 |
|
77 |
+
| lyraBaichuan | 80.0 | 568.2 | 1124.4 | 1942.6 | 2828.0 |
|
78 |
+
|
79 |
+
### Yi-6B
|
80 |
+
|
81 |
+
#### Input
|
82 |
+
\# write the quick sort algorithm
|
83 |
+
|
84 |
+
| Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
|
85 |
+
| --- | --- | --- | --- | --- | --- |
|
86 |
+
| Torch 2.1.0 | 31.4 | 247.5 | 490.4 | 987.2 | 1796.3 |
|
87 |
+
| lyraLLaMA | 93.8 | 735.6 | 2339.8 | 3020.9 | 4630.8 |
|
88 |
+
|
89 |
+
### Yi-34B
|
90 |
+
|
91 |
+
Due to limitation of VRAM, we cannot profile the throughputs of Yi-34B on A100 40G using Torch.
|
92 |
+
|
93 |
+
#### Input
|
94 |
+
Let me tell you an interesting story about cat Tom and mouse Jerry,
|
95 |
+
|
96 |
+
| Version | Batch Size 1 | Batch Size 8 | Batch Size 16 | Batch Size 32 | Batch Size 64 |
|
97 |
+
| --- | --- | --- | --- | --- | --- |
|
98 |
+
| lyraLLaMA | 52.5 | 399.4 | 753.0 | 1138.2 | 1926.2 |
|
99 |
+
|
100 |
+
## Usage
|
101 |
+
|
102 |
+
### Environment (Docker recommended)
|
103 |
+
|
104 |
+
- For Cuda 11.X: we recommend ```nvcr.io/nvidia/pytorch:22.12-py3```
|
105 |
+
- For Cuda 12.0: we recommend ```nvcr.io/nvidia/pytorch:23.02-py3```
|
106 |
+
|
107 |
+
```bash
|
108 |
+
docker pull nvcr.io/nvidia/pytorch:23.02-py3
|
109 |
+
docker run --rm -it --gpus all -v ./:/lyraLLMs nvcr.io/nvidia/pytorch:23.02-py3
|
110 |
+
|
111 |
+
pip install -r requirements.txt
|
112 |
+
```
|
113 |
+
|
114 |
+
### Convert Models
|
115 |
+
|
116 |
+
We have released multiple optimized models converted from original HuggingFace ones:
|
117 |
+
- ChatGLM-6B
|
118 |
+
- XVERSE-13B-Chat
|
119 |
+
- LLaMA-Ziya-13B
|
120 |
+
- Baichuan-7B, Baichuan-13B-Base, Baichuan-13B-Chat, Baichuan2-7B-Base, Baichuan2-7B-Chat, Baichuan2-13B-Base and lyraBaichuan2-13B-Chat
|
121 |
+
- Yi-6B
|
122 |
+
|
123 |
+
Feel free to contact us if you would like to convert a finetuned version of LLMs.
|
124 |
+
|
125 |
+
### Inference
|
126 |
+
|
127 |
+
Refer to [README.md](./lyrallms/README.md) for inference of converted models with **lyraLLMs**.
|
128 |
+
|
129 |
+
### Python Demo
|
130 |
+
|
131 |
+
```python
|
132 |
+
from lyra_llama import lyraLlama
|
133 |
+
|
134 |
+
model_path = 'XXX' # 包含转换后的模型参数,配置,tokenizer文件目录
|
135 |
+
data_type = 'fp16'
|
136 |
+
memopt_mode = 0 # 如需使用MEMOPT模式推理, memopt_mode=1
|
137 |
+
|
138 |
+
model = lyraLlama(model_path, data_type, memopt_mode)
|
139 |
+
|
140 |
+
prompts = '列出3个不同的机器学习算法,并说明它们的适用范围.'
|
141 |
+
prompts = [prompts,] * 64
|
142 |
+
|
143 |
+
output_texts = model.generate(prompts, output_length=150, do_sample=False, top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0)
|
144 |
+
print(output_texts)
|
145 |
+
|
146 |
+
```
|
147 |
+
|
148 |
+
## Citation
|
149 |
+
``` bibtex
|
150 |
+
@Misc{lyraLLMs2024,
|
151 |
+
author = {Kangjian Wu, Zhengtao Wang, Yibo Lu, Haoxiong Su, Bin Wu},
|
152 |
+
title = {lyraLLMs: A highly optimized and easy-to-use inference engine for LLMs},
|
153 |
+
howpublished = {\url{https://huggingface.co/TMElyralab/lyraLLMs}},
|
154 |
+
year = {2024}
|
155 |
+
}
|
156 |
+
```
|
157 |
+
|
158 |
+
## Report bug
|
159 |
+
- start a discussion to report any bugs!--> https://huggingface.co/TMElyralab/lyraLLMs/discussions
|
160 |
+
- report bug with a `[bug]` mark in the title.
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:213543e928e2727580c3f1dcbddfaf56b7a778ec7dfb29f4b3b66ab0009bfd0b
|
3 |
+
size 41
|
File without changes
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f50ec3dbe390bffc052e754a294614025fb423b23c6bc8a26a8dadf52d1b29c2
|
3 |
+
size 233586480
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b8189f3321cd0578da920f15d4b74e7a96be7556731e1de3cb313b8700e3c45
|
3 |
+
size 234352496
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## 模型和环境
|
2 |
+
|
3 |
+
### 构建环境
|
4 |
+
```shell
|
5 |
+
# 本地获取ngc pytorch原生镜像
|
6 |
+
docker pull nvcr.io/nvidia/pytorch:23.02-py3
|
7 |
+
|
8 |
+
# 启动容器
|
9 |
+
docker run --gpus all -itd --rm --name lyrallms_cu12 nvcr.io/nvidia/pytorch:23.02-py3
|
10 |
+
docker exec -it lyrallms_cu12 bash
|
11 |
+
```
|
12 |
+
|
13 |
+
获取代码后安装依赖
|
14 |
+
```shell
|
15 |
+
pip install -r requirements.txt
|
16 |
+
```
|
17 |
+
|
18 |
+
将`lyralib`下对应cuda版本的[so文件](../../lyralib/sm80) 复制到`/usr/lib/lyralib`下。
|
19 |
+
|
20 |
+
## 推理使用
|
21 |
+
|
22 |
+
### 使用核心片段
|
23 |
+
|
24 |
+
```python
|
25 |
+
from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
|
26 |
+
|
27 |
+
model_path = 'XXX' # 包含转换后的模型参数,配置,tokenizer文件目录
|
28 |
+
tokenizer_path = 'XXX'
|
29 |
+
data_type = 'fp16' # 推理精度
|
30 |
+
memopt_mode = 1
|
31 |
+
|
32 |
+
# 加载加速后的模型,C++ 底层已经掩盖,依赖加速编译的 /usr/lib/ftlib 下的 so 库,已经打在镜像中
|
33 |
+
# 模型加载需要花一些时间,建议把下载的模型参数解压到本地磁盘
|
34 |
+
# 如需使用Baichuan1/2-7B模型,下方更换为:lyraBaichuan7B(model_path, tokenizer_path, data_type, memopt_mode)
|
35 |
+
model = lyraBaichuan13B(model_path, tokenizer_path, data_type, memopt_mode)
|
36 |
+
|
37 |
+
# 输入, 若有多个输入,可batch 推理,prompts 支持列表,这里为模拟多个输入,直接复制 32 分,batchsize 达到32
|
38 |
+
prompts = "登鹳雀楼->王之涣\n夜雨寄北->"
|
39 |
+
prompts = [prompts,]*32
|
40 |
+
|
41 |
+
# 生成, 最大长度可自行调整,这里设置 64,模型遇到 end token 或者达到最大计算长度时会停止当前批次计算.
|
42 |
+
output_texts = model.generate(prompts, output_length=64, do_sample=False, top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0)
|
43 |
+
|
44 |
+
# 输出查看, 虽然 输入字符串也是参与计算,用于评估模型吞吐量和计算速度。
|
45 |
+
# 这里为了显示应用方便, output_texts 中每个样本的输出已经去掉了输入字符串
|
46 |
+
print(output_texts)
|
47 |
+
|
48 |
+
# 输出示例
|
49 |
+
>>> Inputs: 登鹳雀楼->王之涣
|
50 |
+
夜雨寄北->
|
51 |
+
>>> Outputs:
|
52 |
+
李商隐
|
53 |
+
望洞庭->刘禹锡
|
54 |
+
黄鹤楼送孟浩然之广陵->李白
|
55 |
+
登岳阳楼->杜甫
|
56 |
+
秋词->刘禹锡
|
57 |
+
枫桥夜泊->张继
|
58 |
+
饮湖上初晴后雨->苏轼
|
59 |
+
浪淘沙->刘禹锡
|
60 |
+
```
|
61 |
+
|
62 |
+
### demo 脚本
|
63 |
+
|
64 |
+
`examples/batch_demo.py` 中有类似上面的使用示例,做了简单的跑速测试,考虑大家对 token 的理解各有不同,我们这里直接按字符数来评估,不同 token 的理解可以自行根据生成结果字符数去观测。
|
65 |
+
|
66 |
+
更多测试脚本及用法详见参考 `examples` 下的 [README.md](./examples/README.md) ,如:
|
67 |
+
- Batch推理
|
68 |
+
- 不等长Batch推理
|
69 |
+
- Batch流式推理
|
70 |
+
|
71 |
+
## 自定义模型参数
|
72 |
+
|
73 |
+
已提供转换脚本 `parse_model_params.py` 可以将 Baichuan1/2 模型的 HuggingFace 格式参数,转换为加速版本下各层模型需要的模型参数。这里我们提供一个模型名字 `-model_name` 的转换参数,可以自行填入,以便生成可区分的 config.in 文件。
|
74 |
+
|
75 |
+
```shell
|
76 |
+
python parse_model_params.py -i your_model_dir -o output_dir -t_g 1 -i_g 1 -weight_data_type "fp16" -model_name "baichuan2-13b"
|
77 |
+
```
|
78 |
+
|
79 |
+
该转换脚本还会将同目录下 tokenizer_source 里的 `tokenizer.model` `special_tokens_map.json` `tokenizer_config.json` 四个文件拷贝到 output_dir 下,以便后续使用加速模型时直接能初始化对应的 加速后的 Baichuan 的 tokenizer.
|
80 |
+
|
81 |
+
转换后的模型参数将以每个参数一个文件的形式存放在 `output_dir/{i_g}-gpu-{weight_data_type}` 下,需要使用`merge_bin.py`将多个bin文件合并为一个。
|
82 |
+
|
83 |
+
```shell
|
84 |
+
layer_num=40 # 13B->40, 7B->32
|
85 |
+
python merge_bin.py -i model_dir/{i_g}-gpu-{weight_data_type} -o output_dir -l ${layer_num}
|
86 |
+
```
|
87 |
+
|
88 |
+
将上述 `config.ini` `config.json` `tokenizer.model` `special_tokens_map.json` `tokenizer_config.json` 五个文件拷贝到 output_dir 下。
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2023 Baichuan Inc. All Rights Reserved.
|
2 |
+
|
3 |
+
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
|
4 |
+
#
|
5 |
+
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
|
6 |
+
# and OPT implementations in this library. It has been modified from its
|
7 |
+
# original forms to accommodate minor architectural differences compared
|
8 |
+
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
|
9 |
+
#
|
10 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
11 |
+
# you may not use this file except in compliance with the License.
|
12 |
+
# You may obtain a copy of the License at
|
13 |
+
#
|
14 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
15 |
+
#
|
16 |
+
# Unless required by applicable law or agreed to in writing, software
|
17 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
18 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
19 |
+
# See the License for the specific language governing permissions and
|
20 |
+
# limitations under the License.
|
21 |
+
|
22 |
+
from transformers.configuration_utils import PretrainedConfig
|
23 |
+
from transformers.utils import logging
|
24 |
+
|
25 |
+
|
26 |
+
logger = logging.get_logger(__name__)
|
27 |
+
|
28 |
+
|
29 |
+
class BaichuanConfig(PretrainedConfig):
|
30 |
+
model_type = "baichuan"
|
31 |
+
keys_to_ignore_at_inference = ["past_key_values"]
|
32 |
+
|
33 |
+
def __init__(
|
34 |
+
self,
|
35 |
+
vocab_size=125696,
|
36 |
+
hidden_size=4096,
|
37 |
+
intermediate_size=11008,
|
38 |
+
num_hidden_layers=32,
|
39 |
+
num_attention_heads=32,
|
40 |
+
hidden_act="silu",
|
41 |
+
max_position_embeddings=4096,
|
42 |
+
initializer_range=0.02,
|
43 |
+
rms_norm_eps=1e-6,
|
44 |
+
use_cache=True,
|
45 |
+
pad_token_id=0,
|
46 |
+
bos_token_id=1,
|
47 |
+
eos_token_id=2,
|
48 |
+
tie_word_embeddings=False,
|
49 |
+
z_loss_weight=0,
|
50 |
+
**kwargs,
|
51 |
+
):
|
52 |
+
self.vocab_size = vocab_size
|
53 |
+
self.max_position_embeddings = max_position_embeddings
|
54 |
+
self.hidden_size = hidden_size
|
55 |
+
self.intermediate_size = intermediate_size
|
56 |
+
self.num_hidden_layers = num_hidden_layers
|
57 |
+
self.num_attention_heads = num_attention_heads
|
58 |
+
self.hidden_act = hidden_act
|
59 |
+
self.initializer_range = initializer_range
|
60 |
+
self.rms_norm_eps = rms_norm_eps
|
61 |
+
self.use_cache = use_cache
|
62 |
+
self.z_loss_weight = z_loss_weight
|
63 |
+
super().__init__(
|
64 |
+
pad_token_id=pad_token_id,
|
65 |
+
bos_token_id=bos_token_id,
|
66 |
+
eos_token_id=eos_token_id,
|
67 |
+
tie_word_embeddings=tie_word_embeddings,
|
68 |
+
**kwargs,
|
69 |
+
)
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## 测试脚本
|
2 |
+
|
3 |
+
### batch推理
|
4 |
+
|
5 |
+
```sh
|
6 |
+
export FMHA_VERSION=OFF
|
7 |
+
export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
|
8 |
+
|
9 |
+
model_path=ModelPath # 转换后模型所处文件夹路径
|
10 |
+
data_type=fp16 # 权重保存精度
|
11 |
+
memopt_mode=0 # MEMOPT模式: 0/1
|
12 |
+
quant_type="int8" # 量化精度: int4/int8
|
13 |
+
max_output_length=256
|
14 |
+
warmups=1
|
15 |
+
avgnums=1
|
16 |
+
|
17 |
+
python batch_demo.py --model-path $model_path\
|
18 |
+
--tokenizer-path $model_path\
|
19 |
+
--data-type $data_type\
|
20 |
+
--memopt_mode $memopt_mode\
|
21 |
+
--quant-type ${quant_type}\
|
22 |
+
--max-output-length $max_output_length\
|
23 |
+
--warmups $warmups\
|
24 |
+
--avgnums $avgnums
|
25 |
+
```
|
26 |
+
|
27 |
+
### batch流式推理
|
28 |
+
|
29 |
+
```sh
|
30 |
+
export FMHA_VERSION=OFF
|
31 |
+
export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
|
32 |
+
export LYRA_STREAM_CB_STEP=30 # 回调函数间隔步数
|
33 |
+
|
34 |
+
model_path=ModelPath # 转换后模型所处文件夹路径
|
35 |
+
data_type=fp16 # 权重保存精度
|
36 |
+
memopt_mode=0 # MEMOPT模式: 0/1
|
37 |
+
quant_type="int8" # 量化精度: int4/int8
|
38 |
+
max_output_length=256
|
39 |
+
warmups=1
|
40 |
+
avgnums=1
|
41 |
+
|
42 |
+
python batch_stream_demo.py --model-path $model_path\
|
43 |
+
--tokenizer-path $model_path\
|
44 |
+
--data-type $data_type\
|
45 |
+
--memopt_mode $memopt_mode\
|
46 |
+
--quant-type ${quant_type}\
|
47 |
+
--max-output-length $max_output_length\
|
48 |
+
--warmups $warmups\
|
49 |
+
--avgnums $avgnums
|
50 |
+
```
|
51 |
+
### 不等长batch推理
|
52 |
+
|
53 |
+
```sh
|
54 |
+
export FMHA_VERSION=OFF
|
55 |
+
export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
|
56 |
+
|
57 |
+
model_path=ModelPath # 转换后模型所处文件夹路径
|
58 |
+
prompt_filepath=valen_prompts.json # 用于测试的不等长prompts文件,从中采样
|
59 |
+
data_type=fp16 # 权重保存精度
|
60 |
+
memopt_mode=0 # MEMOPT模式: 0/1
|
61 |
+
quant_type="int8" # 量化精度: int4/int8
|
62 |
+
max_output_length=256
|
63 |
+
warmups=1
|
64 |
+
avgnums=1
|
65 |
+
|
66 |
+
python random_batch_demo.py --model-path $model_path\
|
67 |
+
--tokenizer-path $model_path\
|
68 |
+
--data-type $data_type\
|
69 |
+
--memopt_mode $memopt_mode\
|
70 |
+
--quant-type ${quant_type}\
|
71 |
+
--prompt_filepath $prompt_filepath\
|
72 |
+
--max-output-length $max_output_length\
|
73 |
+
--warmups $warmups\
|
74 |
+
--avgnums $avgnums
|
75 |
+
```
|
76 |
+
|
77 |
+
## Prompt例子
|
78 |
+
|
79 |
+
### 短序列
|
80 |
+
```
|
81 |
+
北京的景点:故宫、天坛、万里长城等。\n深圳的景点:
|
82 |
+
```
|
83 |
+
```
|
84 |
+
今天天气大概 25度,有点小雨,吹着风,我想去户外散步,应该穿什么样的衣服 裤子鞋子搭配
|
85 |
+
```
|
86 |
+
|
87 |
+
### 1K序列
|
88 |
+
```
|
89 |
+
《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲,于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日,“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者:主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J (大卫·哈斯金斯)。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲:“Boys”;“咬我的臀部”;“Some Faces”和斯卡雷鬼曲调“Harry”,这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议,凯文·哈斯金斯 (Kevin Haskins) 说,“那里有力量流行音乐,还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中(除了“Bela Lugosi's Dead”),只有“Harry”获得了官方发行;1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目,包括“Boys”的原始录音,一直未发行,直到 2018 年The Bela Session以黑胶唱片和CD 形式发行,并可供乐队数字下载。[11]在额外的曲目中,《经典摇滚》杂志写道:“其余的材料发现乐队正在摸索方向,甚至触及了斯卡。”\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:
|
90 |
+
```
|
91 |
+
|
92 |
+
### 2K序列
|
93 |
+
```
|
94 |
+
根据所给刑事法律文书中的案情描述,预测被告人被判的罪名。你需要从这些罪名中选择最恰当的一项:妨害公务,寻衅滋事,盗窃、侮辱尸体,危险物品肇事,非法采矿,组织、强迫、引诱、容留、介绍卖淫,开设赌场,聚众斗殴,绑架,非法持有毒品,销售假冒注册商标的商品,容留他人吸毒,假冒注册商标,交通肇事,破坏电力设备,组织卖淫,合同诈骗,走私武器、弹药,抢劫,非法处置查封、扣押、冻结的财产,以危险方法危害公共安全,过失投放危险物质,非法制造、买卖、运输、邮寄、储存枪支、弹药、爆炸物,伪造、变造、买卖武装部队公文、证件、印章,持有、使用假币,重婚,聚众冲击国家机关,生产、销售伪劣农药、兽药、化肥、种子,收买被拐卖的妇女、儿童,聚众哄抢,重大劳动安全事故,侵占,包庇毒品犯罪分子,虚报注册资本,违法发放贷款,制造、贩卖、传播淫秽物品,窝藏、包庇,帮助毁灭、伪造证据,放火,强奸,非法携带枪支、弹药、管制刀具、危险物品危及公共安全,伪造、变造金融票证,爆炸,玩忽职守,对非国家工作人员行贿,伪造、倒卖伪造的有价票证,私分国有资产,非法收购、运输、加工、出售国家重点保护植物、国家重点保护植物制品,生产、销售假药,挪用特定款物,过失致人死亡,走私国家禁止进出口的货物、物品,非法制造、买卖、运输、储存危险物质,洗钱,骗取贷款、票据承兑、金融票证,非法买卖制毒物品,非法买卖、运输、携带、持有毒品原植物种子、幼苗,生产、销售有毒、有害食品,滥用职权,招收公务员、学生徇私舞弊,诬告陷害,非法获取国家秘密,非法行医,非法收购、运输、出售珍贵、濒危野生动物、珍贵、濒危野生动物制品,非法出售发票,行贿,高利转贷,非法吸收公众存款,传播淫秽物品,非法进行节育手术,盗伐林木,聚众扰乱社会秩序,走私、贩卖、运输、制造毒品,滥伐林木,赌博,非法经营,生产、销售不符合安全标准的食品,提供侵入、非法控制计算机信息系统程序、工具,倒卖文物,窃取、收买、非法提供信用卡信息,盗掘古文化遗址、古墓葬,协助组织卖淫,破坏广播电视设施、公用电信设施,走私普通货物、物品,逃税,破坏监管秩序,失火,受贿,组织、领导、参加黑社会性质组织,票据诈骗,非法制造、销售非法制造的注册商标标识,侵犯著作权,伪造、变造、买卖国家机关公文、证件、印章,徇私舞弊不征、少征税款,强迫劳动,贷款诈骗,劫持船只、汽车,诈骗,非法种植毒品原植物,非法狩猎,挪用资金,非法收购、运输盗伐、滥伐的林木,出售、购买、运输假币,抢夺,虐待被监管人,窝藏、转移、收购、销售赃物,破坏计算机信息系统,制作、复制、出版、贩卖、传播淫秽物品牟利,拒不支付劳动报酬,盗窃、抢夺枪支、弹药、爆炸物,强迫他人吸毒,走私珍贵动物、珍贵动物制品,虐待,非法获取公民个人信息,破坏交通设施,非法转让、倒卖土地使用权,非法捕捞水产品,非法占用农用地,非法制造、出售非法制造的发票,非法持有、私藏枪支、弹药,集资诈骗,强迫卖淫,伪造公司、企业、事业单位、人民团体印章,利用影响力受贿,编造、故意传播虚假恐怖信息,介绍贿赂,传播性病,拐卖妇女、儿童,倒卖车票、船票,窝藏、转移、隐瞒毒品、毒赃,徇私舞弊不移交刑事案件,过失损坏广播电视设施、公用电信设施,动植物检疫徇私舞弊,破坏交通工具,猥亵儿童,挪用公款,伪造货币,冒充军人招摇撞骗,非法采伐、毁坏国家重点保护植物,故意毁坏财物,非法拘禁,招摇撞骗,伪造、变造居民身份证,徇私枉法,非法生产、买卖警用装备,掩饰、隐瞒犯罪所得、犯罪所得收益,生产、销售伪劣产品,破坏生产经营,帮助犯罪分子逃避处罚,贪污,投放危险物质,持有伪造的发票,危险驾驶,妨害作证,非法猎捕、杀害珍贵、濒危野生动物,重大责任事故,诽谤,虚开发票,引诱、教唆、欺骗他人吸毒,脱逃,扰乱无线电通讯管理秩序,保险诈骗,非法生产、销售间谍专用器材,非法组织卖血,强迫交易,串通投标,破坏易燃易爆设备,传授犯罪方法,妨害信用卡管理,拐骗儿童,单位行贿,打击报复证人,拒不执行判决、裁定,经济犯,金融凭证诈骗,虚开增值税专用发票、用于骗取出口退税、抵扣税款发票,走私废物,组织、领导传销活动,单位受贿,盗窃、抢夺枪支、弹药、爆炸物、危险物质,过失以危险方法危害公共安全,过失致人重伤,引诱、容留、介绍卖淫,遗弃,走私,信用卡诈骗,对单位行贿,故意杀人,聚众扰乱公共场��秩序、交通秩序,盗窃,故意伤害,非法侵入住宅,强制猥亵、侮辱妇女,伪证,污染环境,巨额财产来源不明,非国家工作人员受贿,侮辱,隐匿、故意销毁会计凭证、会计帐簿、财务会计报告,过失损坏武器装备、军事设施、军事通信,敲诈勒索,职务侵占。\n经审理查明:2013年9月底的一天晚上,被告人陆某德酒后经过沭阳县某镇某村张某荣家时,发现张某荣家没有人,即用石头砸破张某荣家房门玻璃,打开房门进入张某荣家中。因进入张某荣时被房门遗留的玻璃划伤,被告人陆某德在张某荣家北屋门和北屋东首间墙面遗留两处血迹。2014年1月7日,被告人陆某德被公安民警从其家中传唤到案,并如实供述自己的罪行。上述事实,有公诉机关提交的,经过庭审质证的,且均具有证据证明效力的以下证据予以证明:被告人陆某德供述其非法侵入他人住宅的时间、地点、经过等事实。该供述得到了被害人张某荣的陈述、证人周某花、李某华等人的证言、法庭科学DNA检验鉴定书、现场勘验检查笔录、现场图、现场照片等证据予以证实,足以认定。刑事判决书证明证明了被告人陆某德有前科;公安机关出具的“发破案经过”及“抓获经过”证明了本案案发及被告人陆某德的归案情况。\n
|
95 |
+
```
|
96 |
+
|
97 |
+
### 4K序列
|
98 |
+
```
|
99 |
+
<context>/*\n * Implement the \"Falling Rocks\" game in the text console. \n * A small dwarf stays at the bottom of the screen and can \n * move left and right (by the arrows keys). A number of rocks \n * of different sizes and forms constantly fall down and you \n * need to avoid a crash.\n * Rocks are the symbols ^, @, *, &, +, %, $, #, !, ., ;, - distributed \n * with appropriate density. The dwarf is (O). \n * Ensure a constant game speed by Thread.Sleep(150).\n * Implement collision detection and scoring system.\n*/\n\nusing System;\nusing System.Threading;\nusing System.Collections.Generic;\nusing System.Threading.Tasks;\n\nclass FallingRocks\n{\n struct Position\n {\n public int X, Y;\n public string symbol;\n public ConsoleColor color;\n\n public Position(int x, int y, string symbol, ConsoleColor color)\n {\n this.X = x;\n this.Y = y;\n this.symbol = symbol;\n this.color = color;\n }\n }\n\n static void Main()\n {\n Thread oThread = new Thread(new ThreadStart(Mainn));\n Thread aThread = new Thread(new ThreadStart(Clr));\n \n aThread.Start();\n oThread.Start();\n oThread.Join();\n aThread.Join();\n }\n\n static void Clr()\n {\n while (true)\n {\n Thread.Sleep(10);\n Console.Clear();\n }\n }\n static void Mainn()\n {\n //Random generator for rocks color, position and symbol\n Random randomGenerator = new Random();\n \n //Sleep time for the game loop\n double sleepTime = 150;\n //Console settings\n Console.CursorVisible = false;\n Console.BufferHeight = Console.WindowHeight;\n \n //number of rocks in the Array rocks\n int rocksCount = 0;\n\n //array with the symbols of the rocks\n string[] symbols = new string[] { \"^\", \"@\", \"*\", \"&\", \"+\", \"%\", \"$\", \"#\", \"!\", \".\", \";\" };\n \n //array with colors for the rocks\n ConsoleColor[] colors = new ConsoleColor[] {ConsoleColor.Yellow, ConsoleColor.White, ConsoleColor.Gray};\n \n //array with rocks\n Position[] rocks = new Position[200];\n \n //position for the dwarf\n Position dwarf = new Position(10, Console.WindowHeight - 1,\"(0)\",ConsoleColor.Red);\n \n //bool variable to say when the game loop to be over\n bool gameLoop = true;\n\n //variable keeping the score\n ulong score = 0;\n\n //the game loop\n while (gameLoop)\n {\n //score is growing as the cycle runs\n score++;\n\n //setting the Y component for all the rocks in the array to grow with 2\n for (int i = 0; i <= rocks.Length - 1; i++)\n {\n rocks[i].Y = rocks[i].Y + 2;\n }\n\n //generating rocks\n for (int x = 0; x <= randomGenerator.Next(2, 4); x++)\n {\n rocks[rocksCount] = new Position(randomGenerator.Next(x * 15, x * 15 + 20), 0\n , symbols[randomGenerator.Next(0, symbols.Length - 1)]\n , colors[randomGenerator.Next(0, colors.Length - 1)]);\n if (rocksCount >= 199) rocksCount = 0;\n rocksCount++;\n }\n\n //printing the rocks and other stuff\n foreach (var item in rocks)\n {\n foreach (var rock in rocks)\n {\n //checking for colision\n if ((rock.X >= dwarf.X) && (rock.X <= (dwarf.X + 2)) && (rock.Y == dwarf.Y))\n {\n gameLoop = false;\n break;\n }\n } \n\n //printing the rocks\n if (item.Y < Console.WindowHeight)\n { \n Console.SetCursorPosition(item.X, item.Y);\n Console.ForegroundColor = item.color;\n Console.Write(item.symbol);\n }\n\n //checking for key pressed\n if (Console.KeyAvailable)\n {\n ConsoleKeyInfo pressedKey = Console.ReadKey();\n if (pressedKey.Key == ConsoleKey.RightArrow)\n {\n if(dwarf.X < Console.WindowWidth - 20)\n {\n //removing the old positions of the dwarf and increasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X++;\n }\n }\n if (pressedKey.Key == ConsoleKey.LeftArrow) \n {\n if(dwarf.X >= 1)\n {\n //removing the old positions of the dwarf and decreasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X--;\n }\n }\n }\n }\n \n //printing the dwarf\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.ForegroundColor = dwarf.color;\n Console.Write(dwarf.symbol); \n \n //sleeping the loop for sometime\n //Thread.Sleep((int)sleepTime);\n\n //reducing the sleep time of the loop\n sleepTime -= 0.5;\n\n \n //removing the rocks \n //foreach (var item in rocks)\n //{\n // if (item.Y < Console.WindowHeight)\n // {\n // Console.SetCursorPosition(item.X, item.Y);\n // Console.Write(\" \");\n // }\n //} \n }\n //Printing the score after the game is over\n Console.Clear();\n Console.WriteLine(\"Game over! Your score is: \" + score);\n\n }\n}\n</context>\n\n这个\"Falling Rocks\"游戏是如何工作的呢?可以详细解释一下代码的运作机制吗? \n\n\n\n
|
100 |
+
```
|
101 |
+
|
102 |
+
### 8K序列
|
103 |
+
```
|
104 |
+
<context># -*- coding: utf-8 -*-\n# This code is part of Amoco\n# Copyright (C) 2021 Axel Tillequin ([email protected])\n# published under GPLv2 license\nfrom amoco.arch.tricore import env\nfrom amoco.arch.core import *\n# -------------------------------------------------------\n# from TriCore TC1.6.2 core architecture manual V1.2.2\n# (32-bit Unified Processor Core), 2020-01-15\n# define all except FPU instructions\n# -------------------------------------------------------\nISPECS = []\n@ispec("32<[ disp1(16) disp2(8) {6d} ]", mnemonic="CALL")\n@ispec("32<[ disp1(16) disp2(8) {61} ]", mnemonic="FCALL")\n@ispec("32<[ disp1(16) disp2(8) {1d} ]", mnemonic="J")\n@ispec("32<[ disp1(16) disp2(8) {5d} ]", mnemonic="JL")\ndef tricore_branch(obj, disp1, disp2):\n v = env.cst(((disp2<<16)+disp1)<<1,24)\n obj.operands = [disp.signextend(32)]\n obj.type = type_control_flow\n@ispec("32<[ disp1(16) disp2(8) {ed} ]", mnemonic="CALLA")\n@ispec("32<[ disp1(16) disp2(8) {e1} ]", mnemonic="FCALLA")\n@ispec("32<[ disp1(16) disp2(8) {9d} ]", mnemonic="JA")\n@ispec("32<[ disp1(16) disp2(8) {dd} ]", mnemonic="JLA")\ndef tricore_branch(obj, disp1, disp2):\n v = env.cst((disp2<<16)+disp1,24)\n addr = composer([env.bit0,v[0:20],env.cst(0,7),v[20:24]])\n obj.operands = [addr]\n obj.type = type_control_flow\n@ispec("32<[ ---- {00} ---- ---- a(4) {2d} ]", mnemonic="CALLI")\n@ispec("32<[ ---- {01} ---- ---- a(4) {2d} ]", mnemonic="FCALLI")\n@ispec("32<[ ---- {03} ---- ---- a(4) {2d} ]", mnemonic="JI")\n@ispec("32<[ ---- {02} ---- ---- a(4) {2d} ]", mnemonic="JLI")\ndef tricore_branchI(obj, a):\n src = env.A[a]\n obj.operands = [src]\n obj.type = type_control_flow\n@ispec("16<[ disp(8) {5c} ]", mnemonic="CALL")\n@ispec("16<[ disp(8) {3c} ]", mnemonic="J")\n@ispec("16<[ disp(8) {ee} ]", mnemonic="JNZ")\n@ispec("16<[ disp(8) {6e} ]", mnemonic="JZ")\ndef tricore_branch(obj, disp):\n disp = env.cst(disp<<1,8)\n obj.operands = [disp.signextend(32)]\n obj.type = type_control_flow\n@ispec("32<[ ---- 0000000 const9(9) ---- {ad} ]", mnemonic="BISR")\n@ispec("32<[ ---- 0000100 const9(9) ---- {ad} ]", mnemonic="SYSCALL")\ndef tricore_system(obj, const9):\n obj.operands = [env.cst(const9,9)]\n obj.type = type_system\n@ispec("32<[ c(4) {1c} ---- b(4) ---- {0b} ]", mnemonic="ABS")\n@ispec("32<[ c(4) {5c} ---- b(4) ---- {0b} ]", mnemonic="ABS_B")\n@ispec("32<[ c(4) {7c} ---- b(4) ---- {0b} ]", mnemonic="ABS_H")\n@ispec("32<[ c(4) {1d} ---- b(4) ---- {0b} ]", mnemonic="ABSS")\n@ispec("32<[ c(4) {7d} ---- b(4) ---- {0b} ]", mnemonic="ABSS_H")\n@ispec("32<[ c(4) {1f} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n src = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {80} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n src = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, src.signextend(64)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {81} ---- b(4) a(4) {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b, a):\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, composer([src2,src1])]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) {4e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_B")\n@ispec("32<[ c(4) {6e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_H")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) {6f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS_H")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0b} ]", mnemonic="ADD_B")\n@ispec("32<[ c(4) {60} ---- b(4) a(4) {0b} ]", mnemonic="ADD_H")\n@ispec("32<[ c(4) {05} ---- b(4) a(4) {0b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_H")\n@ispec("32<[ c(4) {63} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_HU")\n@ispec("32<[ c(4) {03} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_U")\n@ispec("32<[ c(4) {04} ---- b(4) a(4) {0b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0f} ]", mnemonic="AND")\n@ispec("32<[ c(4) {20} ---- b(4) a(4) {0b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) {24} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) {25} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) {22} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) {23} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) {21} ---- b(4) a(4) {0b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) {10} ---- b(4) a(4) {0b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) {50} ---- b(4) a(4) {0b} ]", mnemonic="EQ_B")\n@ispec("32<[ c(4) {70} ---- b(4) a(4) {0b} ]", mnemonic="EQ_H")\n@ispec("32<[ c(4) {90} ---- b(4) a(4) {0b} ]", mnemonic="EQ_W")\n@ispec("32<[ c(4) {56} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) {76} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) {14} ---- b(4) a(4) {0b} ]", mnemonic="GE")\n@ispec("32<[ c(4) {15} ---- b(4) a(4) {0b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) {12} ---- b(4) a(4) {0b} ]", mnemonic="LT")\n@ispec("32<[ c(4) {13} ---- b(4) a(4) {0b} ]", mnemonic="LT_U")\n@ispec("32<[ c(4) {52} ---- b(4) a(4) {0b} ]", mnemonic="LT_B")\n@ispec("32<[ c(4) {53} ---- b(4) a(4) {0b} ]", mnemonic="LT_BU")\n@ispec("32<[ c(4) {72} ---- b(4) a(4) {0b} ]", mnemonic="LT_H")\n@ispec("32<[ c(4) {73} ---- b(4) a(4) {0b} ]", mnemonic="LT_HU")\n@ispec("32<[ c(4) {92} ---- b(4) a(4) {0b} ]", mnemonic="LT_W")\n@ispec("32<[ c(4) {93} ---- b(4) a(4) {0b} ]", mnemonic="LT_WU")\n@ispec("32<[ c(4) {1a} ---- b(4) a(4) {0b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) {1b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) {5a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_B")\n@ispec("32<[ c(4) {5b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_BU")\n@ispec("32<[ c(4) {7a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_H")\n@ispec("32<[ c(4) {7b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_HU")\n@ispec("32<[ c(4) {18} ---- b(4) a(4) {0b} ]", mnemonic="MIN")\n@ispec("32<[ c(4) {19} ---- b(4) a(4) {0b} ]", mnemonic="MIN_U")\n@ispec("32<[ c(4) {58} ---- b(4) a(4) {0b} ]", mnemonic="MIN_B")\n@ispec("32<[ c(4) {59} ---- b(4) a(4) {0b} ]", mnemonic="MIN_BU")\n@ispec("32<[ c(4) {78} ---- b(4) a(4) {0b} ]", mnemonic="MIN_H")\n@ispec("32<[ c(4) {79} ---- b(4) a(4) {0b} ]", mnemonic="MIN_HU")\n@ispec("32<[ c(4) {09} ---- b(4) a(4) {0f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) {11} ---- b(4) a(4) {0b} ]", mnemonic="NE")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0f} ]", mnemonic="OR")\n@ispec("32<[ c(4) {27} ---- b(4) a(4) {0b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) {2b} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) {2c} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) {29} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) {2a} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) {28} ---- b(4) a(4) {0b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0f} ]", mnemonic="SH")\n@ispec("32<[ c(4) {37} ---- b(4) a(4) {0b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) {3b} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) {3c} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) {39} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) {3a} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) {38} ---- b(4) a(4) {0b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {0f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {0f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0b} ]", mnemonic="SUB")\n@ispec("32<[ c(4) {48} ---- b(4) a(4) {0b} ]", mnemonic="SUB_B")\n@ispec("32<[ c(4) {68} ---- b(4) a(4) {0b} ]", mnemonic="SUB_H")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0b} ]", mnemonic="SUBC")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_U")\n@ispec("32<[ c(4) {6a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_H")\n@ispec("32<[ c(4) {6b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_HU")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0b} ]", mnemonic="SUBX")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) {2f} ---- b(4) a(4) {0b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) {30} ---- b(4) a(4) {0b} ]", mnemonic="XOR_NE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {01} ]", mnemonic="EQ_A")\n@ispec("32<[ c(4) {43} ---- b(4) a(4) {01} ]", mnemonic="GE_A")\n@ispec("32<[ c(4) {42} ---- b(4) a(4) {01} ]", mnemonic="LT_A")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {01} ]", mnemonic="NE_A")\ndef tricore_daa_arithmetic(obj, c, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {63} ---- b(4) ---- {01} ]", mnemonic="MOV_A", _dst=env.A, _src=env.D)\n@ispec("32<[ c(4) {00} ---- b(4) ---- {01} ]", mnemonic="MOV_AA", _dst=env.A, _src=env.A)\n@ispec("32<[ c(4) {4c} ---- b(4) ---- {01} ]", mnemonic="MOV_D", _dst=env.D, _src=env.A)\ndef tricore_daa_arithmetic(obj, c, b, _dst, _src):\n dst = _dst[c]\n src = _src[b]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {48} ---- ---- a(4) {01} ]", mnemonic="EQZ_A")\n@ispec("32<[ c(4) {49} ---- ---- a(4) {01} ]", mnemonic="NEZ_A")\ndef tricore_da_arithmetic(obj, c, a):\n src1 = env.A[a]\n dst = env.D[c]\n obj.operands = [dst, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {01} --00 b(4) a(4) {4b} ]", mnemonic="BMERGE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {06} --00 b(4) a(4) {4b} ]", mnemonic="CRC32_B")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32B_W")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32L_W")\ndef tricore_crc32(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src2, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {20} --01 b(4) a(4) {4b} ]", mnemonic="DIV")\n@ispec("32<[ c(4) {21} --01 b(4) a(4) {4b} ]", mnemonic="DIV_U")\n@ispec("32<[ c(4) {5a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_B")\n@ispec("32<[ c(4) {4a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_BU")\n@ispec("32<[ c(4) {3a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_H")\n@ispec("32<[ c(4) {2a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_HU")\n@ispec("32<[ c(4) {1a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT")\n@ispec("32<[ c(4) {0a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_U")\ndef tricore_edd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 100 ----- b(4) a(4) {17} ]", mnemonic="DEXTR")\ndef tricore_dddc(obj, c, d, b, a):\n shift = env.D[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, shift]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 ----- ---- a(4) {17} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 ----- ---- a(4) {17} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, a):\n if d%2:\n raise InstructionError(obj)\n width = env.E[d][32:37]\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, width]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 0--00 ---- a(4) {6b} ]", mnemonic="PACK")\ndef tricore_extr(obj, c, d, a):\n if d%2:\n raise InstructionError(obj)\n src1 = env.E[d]\n src2 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {08} -- 00 ---- a(4) {4b} ]", mnemonic="UNPACK")\ndef tricore_extr(obj, c, d, a):\n src = env.D[a]\n dst = env.E[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {02} -- 00 ---- a(4) {4b} ]", mnemonic="PARITY")\n@ispec("32<[ c(4) {22} -- 00 ---- a(4) {4b} ]", mnemonic="POPCNT_W")\ndef tricore_extr(obj, c, d, a):\n src = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 ----- b(4) a(4) {77} ]", mnemonic="DEXTR")\ndef tricore_dextr(obj, c, pos, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, env.cst(pos,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 10 width(5) ---- a(4) {37} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) pos(5) 11 width(5) ---- a(4) {37} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, pos, width, a):\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) const(4) ---- {b7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, const):\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) const(4) ---- {d7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, const):\n src2 = env.D[d]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, env.cst(const,4), src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) b(4) ---- {37} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, b):\n src1 = env.D[b]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) b(4) ---- {57} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, b):\n src1 = env.D[b]\n src2 = env.D[d]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) const(4) a(4) {b7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, const, a):\n dst = env.D[c]\n src1 = env.D[a]\n obj.operands = [dst, src1, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- const(4) a(4) {97} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, const, a):\n src1 = env.D[a]\n if d%2:\n raise InstructionError(obj)\n src3 = env.E[d]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(const,4), src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) const(4) a(4) {d7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, const, a):\n src1 = env.D[a]\n src3 = env.D[d]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(const,4), src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) b(4) a(4) {37} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, b, a):\n dst = env.D[c]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- b(4) a(4) {17} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n if d%2:\n raise InstructionError(obj)\n src3 = env.E[d]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) b(4) a(4) {57} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n src3 = env.D[d]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, src3, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 width(5) ---- a(4) {57} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 width(5) ---- a(4) {57} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, width, a):\n src2 = env.D[d]\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {09} --00 ---- a(4) {4b} ]", mnemonic="BSPLIT")\ndef tricore_edd_arithmetic(obj, c, a):\n src1 = env.D[a]\n dst = env.E[c]\n obj.operands = [dst, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) 0001110 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) 0001111 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) 0000101 ~const9(9) a(4) {8b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) 0000011 ~const9(9) a(4) {8b} ]", mnemonic="ADDS_U") #const9 is signed\n@ispec("32<[ c(4) 0000100 ~const9(9) a(4) {8b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) 0100000 ~const9(9) a(4) {8b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) 0100100 ~const9(9) a(4) {8b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) 0100010 ~const9(9) a(4) {8b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) 0100001 ~const9(9) a(4) {8b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) 0010000 ~const9(9) a(4) {8b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) 1010110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) 1110110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) 0010100 ~const9(9) a(4) {8b} ]", mnemonic="GE")\n@ispec("32<[ c(4) 0010010 ~const9(9) a(4) {8b} ]", mnemonic="LT")\n@ispec("32<[ c(4) 0011010 ~const9(9) a(4) {8b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) 0010001 ~const9(9) a(4) {8b} ]", mnemonic="NE")\n@ispec("32<[ c(4) 0100111 ~const9(9) a(4) {8b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) 0101011 ~const9(9) a(4) {8b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) 0101001 ~const9(9) a(4) {8b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) 0001000 ~const9(9) a(4) {8b} ]", mnemonic="RSUB")\n@ispec("32<[ c(4) 0001001 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS")\n@ispec("32<[ c(4) 0001011 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS_U") #const9 is signed\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8f} ]", mnemonic="SH")\n@ispec("32<[ c(4) 1000000 ~const9(9) a(4) {8f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) 0110111 ~const9(9) a(4) {8b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) 0111011 ~const9(9) a(4) {8b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) 0111001 ~const9(9) a(4) {8b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) 0111000 ~const9(9) a(4) {8b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) 0000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) 1000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) 0101111 ~const9(9) a(4) {8b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) 0110011 ~const9(9) a(4) {8b} ]", mnemonic="XOR_GE")\n@ispec("32<[ c(4) 0110001 ~const9(9) a(4) {8b} ]", mnemonic="XOR_LT")\n@ispec("32<[ c(4) 0110000 ~const9(9) a(4) {8b} ]", mnemonic="XOR_NE")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n src1 = env.D[a]\n if obj.mnemonic in ("SH","SHA","SHAS"):\n const9 = const9[0:6]\n elif obj.mnemonic in ("SH_H","SHA_H"):\n const9 = const9[0:5]\n src2 = env.cst(const9.int(-1),32)\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_OR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {87} ]", mnemonic="AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {87} ]", mnemonic="ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {67} ]", mnemonic="INS_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {67} ]", mnemonic="INSN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {07} ]", mnemonic="NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {87} ]", mnemonic="NOR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {87} ]", mnemonic="OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {07} ]", mnemonic="ORN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_ORN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XOR_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {07} ]", mnemonic="XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {07} ]", mnemonic="XOR_T")\ndef tricore_ddd_arithmetic(obj, c, pos2, pos1, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1[pos1:pos1+1], src2[pos2:pos2+1]]\n obj.type = type_data_processing\n@ispec("32<[ c(4) 0001000 const9(9) a(4) {8f} ]", mnemonic="AND")\n@ispec("32<[ c(4) 0100101 const9(9) a(4) {8b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) 0100011 const9(9) a(4) {8b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) 0001110 const9(9) a(4) {8f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) 0001001 const9(9) a(4) {8f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) 0001011 const9(9) a(4) {8f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) 0010101 const9(9) a(4) {8b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) 0001010 const9(9) a(4) {8f} ]", mnemonic="OR")\n@ispec("32<[ c(4) 0101100 const9(9) a(4) {8b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) 0101010 const9(9) a(4) {8b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) 0101000 const9(9) a(4) {8b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) 0001111 const9(9) a(4) {8f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) 0000111 const9(9) a(4) {8f} ]", mnemonic="SHUFFLE")\n@ispec("32<[ c(4) 0001101 const9(9) a(4) {8f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) 0001100 const9(9) a(4) {8f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) 0111100 const9(9) a(4) {8b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) 0111010 const9(9) a(4) {8b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) 0110100 const9(9) a(4) {8b} ]", mnemonic="XOR_GE_U")\n@ispec("32<[ c(4) 0110011 const9(9) a(4) {8b} ]", mnemonic="XOR_LT_U")\n@ispec("32<[ c(4) 0011011 const9(9) a(4) {8b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) 0010011 const9(9) a(4) {8b} ]", mnemonic="LT_U")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n src1 = env.D[a]\n src2 = env.cst(const9,32)\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {c2} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {06} ]", mnemonic="SH")\n@ispec("16<[ ~const4(4) a(4) {86} ]", mnemonic="SHA")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[a]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {92} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {8a} ]", mnemonic="CADD")\n@ispec("16<[ ~const4(4) a(4) {ca} ]", mnemonic="CADDN")\n@ispec("16<[ ~const4(4) a(4) {aa} ]", mnemonic="CMOV")\n@ispec("16<[ ~const4(4) a(4) {ea} ]", mnemonic="CMOVN")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[15]\n obj.operands = [dst, src1, src2]\n if "CADD" in obj.mnemonic:\n obj.operands = [dst, src1, dst, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {9a} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {ba} ]", mnemonic="EQ")\n@ispec("16<[ ~const4(4) a(4) {fa} ]", mnemonic="LT")\n@ispec("16<[ ~const4(4) a(4) {82} ]", mnemonic="MOV")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[15]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[a]\n obj.operands = [dst, src1, src2]\n if obj.mnemonic=="MOV":\n obj.operands = [src1,src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {d2} ]", mnemonic="MOV")\ndef tricore_ec_arithmetic(obj, const4, a):\n dst = env.E[a]\n src = env.cst(const4.int(-1),64)\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ const4(4) a(4) {a0} ]", mnemonic="MOV_A")\ndef tricore_ec_arithmetic(obj, const4, a):\n dst = env.A[a]\n src = env.cst(const4,32)\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ const8(8) {16} ]", mnemonic="AND")\n@ispec("16<[ const8(8) {da} ]", mnemonic="MOV")\n@ispec("16<[ const8(8) {96} ]", mnemonic="OR")\ndef tricore_ddc_arithmetic(obj, const8):\n dst = env.D[15]\n src2 = env.cst(const8,32)\n src1 = env.D[15]\n obj.operands = [dst, src1, src2]\n if obj.mnemonic=="MOV":\n obj.operands = [src1,src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {42} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {26} ]", mnemonic="AND")\n@ispec("16<[ b(4) a(4) {a6} ]", mnemonic="OR")\n@ispec("16<[ b(4) a(4) {a2} ]", mnemonic="SUB")\n@ispec("16<[ b(4) a(4) {62} ]", mnemonic="SUBS")\n@ispec("16<[ b(4) a(4) {c6} ]", mnemonic="XOR")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[a]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {02} ]", mnemonic="MOV" , _dst=env.D, _src=env.D)\n@ispec("16<[ b(4) a(4) {60} ]", mnemonic="MOV_A" , _dst=env.A, _src=env.D)\n@ispec("16<[ b(4) a(4) {40} ]", mnemonic="MOV_AA" , _dst=env.A, _src=env.A)\n@ispec("16<[ b(4) a(4) {80} ]", mnemonic="MOV_D" , _dst=env.D, _src=env.A)\ndef tricore_mov(obj, b, a, _dst, _src):\n dst = _dst[a]\n src = _src[b]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {12} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {2a} ]", mnemonic="CMOV")\n@ispec("16<[ b(4) a(4) {6a} ]", mnemonic="CMOVN")\n@ispec("16<[ b(4) a(4) {52} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[a]\n src1 = env.D[15]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {1a} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {22} ]", mnemonic="ADDS")\n@ispec("16<[ b(4) a(4) {3a} ]", mnemonic="EQ")\n@ispec("16<[ b(4) a(4) {7a} ]", mnemonic="LT")\n@ispec("16<[ b(4) a(4) {5a} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[15]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {01} ]", mnemonic="ADD_A")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {01} ]", mnemonic="SUB_A")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {b0} ]", mnemonic="ADD_A")\ndef tricore_aac_arithmetic(obj, const4, a):\n dst = env.A[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.A[a]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ const8(8) {20} ]", mnemonic="SUB_A")\ndef tricore_aac_arithmetic(obj, const8, a):\n dst = env.A[10]\n src2 = env.cst(const8,32)\n src1 = env.A[10]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {30} ]", mnemonic="ADD_A")\ndef tricore_aa_arithmetic(obj, b, a):\n dst = env.A[a]\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {1b} ]", mnemonic="ADDI")\n@ispec("32<[ c(4) ~const16(16) a(4) {9b} ]", mnemonic="ADDIH")\ndef tricore_di_arithmetic(obj, c, const16, a):\n src1 = env.D[a]\n src2 = env.cst(const16.int(-1),32)\n if self.mnemonic=="ADDIH": src2=src2<<16\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {11} ]", mnemonic="ADDIH_A")\ndef tricore_ai_arithmetic(obj, c, const16, a):\n src1 = env.A[a]\n src2 = env.cst(const16.int(-1),32)<<16\n dst = env.A[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {60} -- n(2) b(4) a(4) {01} ]", mnemonic="ADDSC_A")\ndef tricore_aaa_arithmetic(obj, c, n, b, a):\n src1 = env.D[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src2, src1, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {01} ]", mnemonic="ADDSC_AT")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src2, src1]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) n(2) 010000 ]", mnemonic="ADDSC_A")\ndef tricore_aa_arithmetic(obj, b, a, n):\n dst = env.A[a]\n src1 = env.D[15]\n src2 = env.A[b]\n obj.operands = [dst, src2, src1, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ off2(4) 10 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I", mode="Circular")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W", mode="Circular")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Circular")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Pre-increment")\ndef tricore_cache(obj, off2, off1, b):\n src2 = env.A[b]\n src1 = env.cst((off2<<6)+off1,10)\n obj.operands = [src2, src1]\n obj.type = type_system\n@ispec("32<[ off2(4) 10 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Pre-increment")\ndef tricore_swap(obj, off2, off1, b, a):\n if a%2:\n raise InstructionError(obj)\n dst = env.D[a]\n src1 = env.A[b]\n src2 = env.cst((off2<<6)+off1,10)\n src3 = env.E[a]\n obj.operands = [dst, src1, src2, src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ~const9(9) a(4) {ab} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {ab} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {ab} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {ab} ]", mnemonic="SELN")\ndef tricore_cond_ddc(obj, c, d, const9, a):\n cond = env.D[d]\n src1 = env.D[a]\n src2 = env.cst(const9.int(-1),32)\n dst = env.D[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {13} ]", mnemonic="MADD_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {33} ]", mnemonic="MSUB_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="64+(32+K9)->64")\ndef tricore_cond_eec(obj, c, d, const9, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.cst(const9.int(-1),32)\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*16U)->64")\ndef tricore_cond_eec(obj, c, d, n, b, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 0000 ---- b(4) a(4) {2b} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 0001 ---- b(4) a(4) {2b} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 0010 ---- b(4) a(4) {2b} ]", mnemonic="CSUB")\n@ispec("32<[ c(4) d(4) 0011 ---- b(4) a(4) {2b} ]", mnemonic="CSUBN")\n@ispec("32<[ c(4) d(4) {0a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) {8a} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) {88} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) 0100 ---- b(4) a(4) {2b} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 0101 ---- b(4) a(4) {2b} ]", mnemonic="SELN")\ndef tricore_cond_ddd(obj, c, d, b, a):\n cond = env.D[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) {6a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {ea} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {68} b(4) a(4) {03} ]", mnemonic="MADD_U", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {e8} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="64+(32*32)->64")\ndef tricore_cond_ddd(obj, c, d, b, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {1c} ---- ---- a(4) {0f} ]", mnemonic="CLO")\n@ispec("32<[ c(4) {7d} ---- ---- a(4) {0f} ]", mnemonic="CLO_H")\n@ispec("32<[ c(4) {1d} ---- ---- a(4) {0f} ]", mnemonic="CLS")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0f} ]", mnemonic="CLS_H")\n@ispec("32<[ c(4) {1b} ---- ---- a(4) {0f} ]", mnemonic="CLZ")\n@ispec("32<[ c(4) {7c} ---- ---- a(4) {0f} ]", mnemonic="CLZ_H")\n@ispec("32<[ c(4) {5e} ---- ---- a(4) {0b} ]", mnemonic="SAT_B")\n@ispec("32<[ c(4) {5f} ---- ---- a(4) {0b} ]", mnemonic="SAT_BU")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0b} ]", mnemonic="SAT_H")\n@ispec("32<[ c(4) {7f} ---- ---- a(4) {0b} ]", mnemonic="SAT_HU")\ndef tricore_dd_arithmetic(obj, c, a):\n src = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ 1010 ---- {00} ]", mnemonic="DEBUG")\n@ispec("16<[ 0000 ---- {00} ]", mnemonic="NOP")\ndef tricore_system(obj):\n obj.operands = []\n obj.type = type_system\n@ispec("16<[ 0111 ---- {00} ]", mnemonic="FRET")\n@ispec("16<[ 1001 ---- {00} ]", mnemonic="RET")\n@ispec("16<[ 1000 ---- {00} ]", mnemonic="RFE")\ndef tricore_ret(obj):\n obj.operands = []\n obj.type = type_control_flow\n@ispec("32<[ ---- 000100 ---------- ---- {0d} ]", mnemonic="DEBUG")\n@ispec("32<[ ---- 001101 ---------- ---- {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 010010 ---------- ---- {0d} ]", mnemonic="DSYNC")\n@ispec("32<[ ---- 001100 ---------- ---- {0d} ]", mnemonic="ENABLE")\n@ispec("32<[ ---- 010011 ---------- ---- {0d} ]", mnemonic="ISYNC")\n@ispec("32<[ ---- 010101 ---------- ---- {0d} ]", mnemonic="TRAPSV")\n@ispec("32<[ ---- 010100 ---------- ---- {0d} ]", mnemonic="TRAPV")\n@ispec("32<[ ---- 000000 ---------- ---- {0d} ]", mnemonic="NOP")\n@ispec("32<[ ---- 001001 ---------- ---- {0d} ]", mnemonic="RSLCX")\n@ispec("32<[ ---- 000000 ---------- ---- {2f} ]", mnemonic="RSTV")\n@ispec("32<[ ---- 001000 ---------- ---- {0d} ]", mnemonic="SVLCX")\n@ispec("32<[ ---- 010110 ---------- ---- {0d} ]", mnemonic="WAIT")\ndef tricore_system(obj):\n obj.operands = []\n obj.type = type_system\n@ispec("32<[ ---- 000011 ---------- ---- {0d} ]", mnemonic="FRET")\n@ispec("32<[ ---- 000110 ---------- ---- {0d} ]", mnemonic="RET")\n@ispec("32<[ ---- 000111 ---------- ---- {0d} ]", mnemonic="RFE")\n@ispec("32<[ ---- 000101 ---------- ---- {0d} ]", mnemonic="RFM")\ndef tricore_ret(obj):\n obj.operands = []\n obj.type = type_control_flow\n@ispec("32<[ ---- 001111 ---------- a(4) {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 001110 ---------- a(4) {0d} ]", mnemonic="RESTORE")\ndef tricore_system(obj, a):\n obj.operands = [env.D[a]]\n obj.type = type_system\n@ispec("32<[ c(4) d(4) 1101 -- 00 b(4) ---- {6b} ]", mnemonic="DVADJ")\n@ispec("32<[ c(4) d(4) 1111 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP")\n@ispec("32<[ c(4) d(4) 1110 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP_U")\n@ispec("32<[ c(4) d(4) 1010 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX")\n@ispec("32<[ c(4) d(4) 1011 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX_U")\n@ispec("32<[ c(4) d(4) 1000 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN")\n@ispec("32<[ c(4) d(4) 1001 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN_U")\ndef tricore_eee(obj, c, d, b):\n if d%2 or b%2 or c%2:\n raise InstructionError(obj)\n src1 = env.E[d]\n src2 = env.E[b]\n dst = env.E[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) disp(4) {1e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {9e} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ ~const4(4) disp(4) {5e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {de} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, const4, disp, _off):\n dst = env.D[15]\n src1 = env.cst(const4.int(-1),32)\n src2 = env.cst(disp,32)+_off\n obj.operands = [dst, src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {3e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ b(4) disp(4) {be} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ b(4) disp(4) {7e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ b(4) disp(4) {fe} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, b, disp, _off):\n dst = env.D[15]\n src1 = env.D[b]\n src2 = env.cst(disp,32)+_off\n obj.operands = [dst, src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {ce} ]", mnemonic="JGEZ")\n@ispec("16<[ b(4) disp(4) {4e} ]", mnemonic="JGTZ")\n@ispec("16<[ b(4) disp(4) {8e} ]", mnemonic="JLEZ")\n@ispec("16<[ b(4) disp(4) {0e} ]", mnemonic="JLTZ")\n@ispec("16<[ b(4) disp(4) {f6} ]", mnemonic="JNZ")\n@ispec("16<[ b(4) disp(4) {76} ]", mnemonic="JZ")\ndef tricore_jcc(obj, b, disp):\n src1 = env.D[b]\n src2 = env.cst(disp,32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {df} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {df} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, const, a):\n src1 = env.D[a]\n src2 = env.cst(const,4)\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JEQ_A")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JNE_A")\ndef tricore_jcc(obj, disp, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) ---- a(4) {bd} ]", mnemonic="JNZ_A")\n@ispec("32<[ 0 ~disp(15) ---- a(4) {bd} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, disp, a):\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOP")\n@ispec("32<[ 1 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOPU")\ndef tricore_jcc(obj, disp, b):\n src1 = env.A[b]\n src2 = env.cst(disp.int(-1)*2,32)\n obj.operands = [src1, src2]\n if obj.mnemonic=="LOOPU":\n obj.operands = [src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {7c} ]", mnemonic="JNZ_A")\n@ispec("16<[ b(4) disp(4) {bc} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, b, disp):\n src1 = env.A[b]\n src2 = env.cst(disp,32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) #disp(4) {fc} ]", mnemonic="LOOP")\ndef tricore_jcc(obj, b, disp):\n src1 = env.A[b]\n src2 = env.cst(int(("1"*27)+disp+"0",2),32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {dc} ]", mnemonic="JI")\ndef tricore_ji(obj, a):\n src = env.A[a]\n obj.operands = [src]\n obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {46} ]", mnemonic="NOT")\n@ispec("16<[ 0101 a(4) {32} ]", mnemonic="RSUB")\n@ispec("16<[ 0000 a(4) {32} ]", mnemonic="SAT_B")\n@ispec("16<[ 0001 a(4) {32} ]", mnemonic="SAT_BU")\n@ispec("16<[ 0010 a(4) {32} ]", mnemonic="SAT_H")\n@ispec("16<[ 0011 a(4) {32} ]", mnemonic="SAT_HU")\ndef tricore_a(obj, a):\n src = env.D[a]\n obj.operands = [src]\n obj.type = type_data_processing\n@ispec("16<[ n(4) disp(4) {ae} ]", mnemonic="JNZ_T")\n@ispec("16<[ n(4) disp(4) {2e} ]", mnemonic="JZ_T")\ndef tricore_ji(obj, n, disp):\n obj.operands = [env.D[15][n:n+1], env.cst(disp,32)]\n obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JNZ_T")\n@ispec("32<[ 0 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JZ_T")\ndef tricore_jcc(obj, disp, n, a, h):\n i = n+(h<<4)\n src = env.D[a][i:i+1]\n obj.operands = [src, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_BU", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_HU", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {45} ]", mnemonic="LD_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LEA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n dst = env.D[a]\n if obj.mnemonic in ("LD_A", "LEA") : dst = env.A[a]\n if obj.mnemonic in ("LD_D","LDMST") : dst = env.E[a]\n if obj.mnemonic=="LD_DA": dst = env.P[a]\n src = off1//off2//off3\n obj.operands = [dst, composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LHA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n dst = env.A[a]\n src = off1//off2//off3//off4\n obj.operands = [dst, composer([env.cst(0,14),env.cst(src.int(),18)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {65} ]", mnemonic="ST_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="SWAP_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="LDMST", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, a):\n src = env.D[a]\n if obj.mnemonic in ("ST_A",) : src = env.A[a]\n if obj.mnemonic in ("ST_D","LDMST") : src = env.E[a]\n if obj.mnemonic=="ST_DA": src = env.P[a]\n addr = off1//off2//off3\n obj.operands = [composer([env.cst(addr.int(),28),env.cst(off4,4)]), src]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) b bpos(3) {d5} ]", mnemonic="ST_T", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, b, bpos):\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)]), env.cst(bpos,3), env.cst(b,1)]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) ---- {15} ]", mnemonic="STLCX", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4):\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDLCX", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDUCX", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n src = off1//off2//off3\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="LEA", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b, a):\n dst = env.D[a]\n if obj.mnemonic=="LD_A" : dst = env.A[a]\n elif obj.mnemonic=="LEA" : dst = env.A[a]\n elif obj.mnemonic=="LD_D" : dst = env.E[a]\n elif obj.mnemonic=="LDMST" : dst = env.E[a]\n elif obj.mnemonic=="LD_DA" : dst = env.P[a]\n obj.b = b\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [dst, src1, src2]\n if obj.mode == "Bit-Reverse":\n obj.operands.pop()\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Pre-increment")\ndef tricore_st(obj, off2, off1, b, a):\n dst = env.D[a]\n if obj.mnemonic=="ST_A" : dst = env.A[a]\n elif obj.mnemonic=="ST_D" : dst = env.E[a]\n elif obj.mnemonic=="ST_DA" : dst = env.P[a]\n elif obj.mnemonic=="LDMST" : dst = env.E[a]\n obj.b = b\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2, dst]\n if obj.mode == "Bit-Reverse":\n obj.operands.pop()\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Pre-increment")\ndef tricore_ld(obj, off2, off1, b, a):\n dst = env.D[a]\n src1 = env.P[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2, dst]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) ---- {49} ]", mnemonic="LDLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) ---- {49} ]", mnemonic="LDUCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) ---- {49} ]", mnemonic="STLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) ---- {49} ]", mnemonic="STUCX", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b):\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {99} ]", mnemonic="LD_A", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {79} ]", mnemonic="LD_B", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {39} ]", mnemonic="LD_BU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {b9} ]", mnemonic="LD_HU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {19} ]", mnemonic="LD_W", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {d9} ]", mnemonic="LEA", mode="Long-offset")\ndef tricore_ld(obj, off2, off3, off1, b, a):\n dst = env.D[a]\n\n</context>\n\n假设一个实际的场景,我们需要采用这段代码来实现工作流程,可以给我解释一下这段代码的作用吗? \n\n\n\n
|
105 |
+
```
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from time import perf_counter
|
3 |
+
|
4 |
+
import sys
|
5 |
+
sys.path.append('../')
|
6 |
+
|
7 |
+
from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
|
8 |
+
|
9 |
+
|
10 |
+
def get_args():
|
11 |
+
parser = argparse.ArgumentParser(description="Faster Baichuan Demo")
|
12 |
+
|
13 |
+
parser.add_argument('--model-path', type=str, required=True,
|
14 |
+
help='Model Path, include config.ini and tokenizer files')
|
15 |
+
# parser.add_argument('--tokenizer-path', type=str, default='/group/30063/users/vanewu/LocalModels/ChatGLM6B-Torch/chatglm-6b')
|
16 |
+
parser.add_argument('--tokenizer-path', type=str, default=None)
|
17 |
+
|
18 |
+
parser.add_argument(
|
19 |
+
'--data-type', type=str, metavar='TYPE', default='fp16',
|
20 |
+
choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
|
21 |
+
help='The data type to inference. If None, the data type follows the '
|
22 |
+
'checkpoint data type.')
|
23 |
+
|
24 |
+
parser.add_argument(
|
25 |
+
'--memopt_mode', type=int, default=0, choices=[0, 1],
|
26 |
+
help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
|
27 |
+
' 0: FP16 mode'
|
28 |
+
' 1: Use MEMOPT mode')
|
29 |
+
|
30 |
+
parser.add_argument(
|
31 |
+
'--quant-type', type=str, metavar='TYPE', default='int8',
|
32 |
+
choices=['int4', 'int8'],
|
33 |
+
help='The data type of quantization. Only used in MEMOPT.')
|
34 |
+
|
35 |
+
parser.add_argument("--prompt", type=str, required=False)
|
36 |
+
parser.add_argument("--max-output-length", type=int, default=512)
|
37 |
+
parser.add_argument("--warmups", type=int, default=10)
|
38 |
+
parser.add_argument("--avgnums", type=int, default=10)
|
39 |
+
args = parser.parse_args()
|
40 |
+
|
41 |
+
print('\n=================== Arguments ===================')
|
42 |
+
for k, v in vars(args).items():
|
43 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
44 |
+
print('=================================================')
|
45 |
+
|
46 |
+
return args
|
47 |
+
|
48 |
+
|
49 |
+
def main():
|
50 |
+
args = get_args()
|
51 |
+
|
52 |
+
# model = lyraBaichuan7B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type)
|
53 |
+
model = lyraBaichuan13B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type)
|
54 |
+
|
55 |
+
# prompt_template = "<reserved_106>{}\n<reserved_107>" # baichuan chat
|
56 |
+
prompt_template = "{}" # baichuan
|
57 |
+
|
58 |
+
prompt = prompt_template.format(args.prompt)
|
59 |
+
|
60 |
+
test_batch_size = [1, 2, 4] # 8, 16, 32, 64
|
61 |
+
print("test_batch_size: ", test_batch_size)
|
62 |
+
|
63 |
+
for i, bs in enumerate(test_batch_size):
|
64 |
+
prompts = [prompt, ]*bs
|
65 |
+
|
66 |
+
# warmup gpu
|
67 |
+
for _ in range(args.warmups):
|
68 |
+
output_texts = model.generate(
|
69 |
+
prompts, output_length=args.max_output_length,
|
70 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.1, do_sample=False)
|
71 |
+
|
72 |
+
start = perf_counter()
|
73 |
+
for _ in range(args.avgnums):
|
74 |
+
output_texts = model.generate(
|
75 |
+
prompts, output_length=args.max_output_length,
|
76 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
|
77 |
+
end = perf_counter()
|
78 |
+
cost = (end - start) / args.avgnums
|
79 |
+
|
80 |
+
input_output_texts = [prompt+' ' + gtext for prompt,
|
81 |
+
gtext in zip(prompts, output_texts)]
|
82 |
+
tokens = 0
|
83 |
+
input_tokens = len(model.tokenizer.encode(prompt))
|
84 |
+
words = 0
|
85 |
+
for text in input_output_texts:
|
86 |
+
tokens += len(model.tokenizer.encode(text))
|
87 |
+
words += len(text)
|
88 |
+
print(
|
89 |
+
f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
|
90 |
+
)
|
91 |
+
print(
|
92 |
+
f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
|
93 |
+
)
|
94 |
+
|
95 |
+
if i == 0:
|
96 |
+
for k in range(bs):
|
97 |
+
print(
|
98 |
+
f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
|
99 |
+
if k>2:
|
100 |
+
break
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
main()
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from time import perf_counter
|
3 |
+
|
4 |
+
import sys
|
5 |
+
sys.path.append('../')
|
6 |
+
|
7 |
+
from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
|
8 |
+
|
9 |
+
|
10 |
+
def print_list(lines):
|
11 |
+
# 清空终端输出
|
12 |
+
print("\033c", end="")
|
13 |
+
|
14 |
+
# 逐行打印字符串列表
|
15 |
+
print('\n'.join(lines))
|
16 |
+
|
17 |
+
def get_args():
|
18 |
+
parser = argparse.ArgumentParser(description="Faster Baichuan Demo")
|
19 |
+
|
20 |
+
parser.add_argument('--model-path', type=str, required=True,
|
21 |
+
help='Model Path, include config.ini and tokenizer files')
|
22 |
+
parser.add_argument('--tokenizer-path', type=str, default=None)
|
23 |
+
|
24 |
+
parser.add_argument(
|
25 |
+
'--data-type', type=str, metavar='TYPE', default='fp16',
|
26 |
+
choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
|
27 |
+
help='The data type to inference. If None, the data type follows the '
|
28 |
+
'checkpoint data type.')
|
29 |
+
|
30 |
+
parser.add_argument(
|
31 |
+
'--memopt_mode', type=int, default=0, choices=[0, 1],
|
32 |
+
help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
|
33 |
+
' 0: FP16 mode'
|
34 |
+
' 1: Use MEMOPT mode')
|
35 |
+
|
36 |
+
parser.add_argument("--prompt", type=str, required=False)
|
37 |
+
parser.add_argument("--max-output-length", type=int, default=512)
|
38 |
+
parser.add_argument("--warmups", type=int, default=10)
|
39 |
+
parser.add_argument("--avgnums", type=int, default=10)
|
40 |
+
args = parser.parse_args()
|
41 |
+
|
42 |
+
print('\n=================== Arguments ===================')
|
43 |
+
for k, v in vars(args).items():
|
44 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
45 |
+
print('=================================================')
|
46 |
+
|
47 |
+
return args
|
48 |
+
|
49 |
+
|
50 |
+
def main():
|
51 |
+
args = get_args()
|
52 |
+
|
53 |
+
# model = lyraBaichuan7B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
|
54 |
+
model = lyraBaichuan13B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
|
55 |
+
|
56 |
+
# prompt_template = "<reserved_106>{}\n<reserved_107>" # baichuan chat
|
57 |
+
prompt_template = "{}" # baichuan
|
58 |
+
|
59 |
+
prompt = prompt_template.format(args.prompt)
|
60 |
+
|
61 |
+
test_batch_size = [1, 2, 4] # 8, 16, 32, 64
|
62 |
+
print("test_batch_size: ", test_batch_size)
|
63 |
+
|
64 |
+
for i, bs in enumerate(test_batch_size):
|
65 |
+
prompts = [prompt, ]*bs
|
66 |
+
|
67 |
+
# warmup gpu
|
68 |
+
for _ in range(args.warmups):
|
69 |
+
output_texts = model.generate(
|
70 |
+
prompts, output_length=args.max_output_length,
|
71 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.1, do_sample=False)
|
72 |
+
|
73 |
+
start = perf_counter()
|
74 |
+
for _ in range(args.avgnums):
|
75 |
+
for finish, output_texts in model.stream_generate(prompts,
|
76 |
+
output_length=args.max_output_length,
|
77 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False):
|
78 |
+
print_list(output_texts)
|
79 |
+
|
80 |
+
if finish:
|
81 |
+
break
|
82 |
+
end = perf_counter()
|
83 |
+
cost = (end - start) / args.avgnums
|
84 |
+
|
85 |
+
input_output_texts = [prompt+' ' + gtext for prompt,
|
86 |
+
gtext in zip(prompts, output_texts)]
|
87 |
+
tokens = 0
|
88 |
+
input_tokens = len(model.tokenizer.encode(prompt))
|
89 |
+
words = 0
|
90 |
+
for text in input_output_texts:
|
91 |
+
tokens += len(model.tokenizer.encode(text))
|
92 |
+
words += len(text)
|
93 |
+
print(
|
94 |
+
f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
|
95 |
+
)
|
96 |
+
print(
|
97 |
+
f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
|
98 |
+
)
|
99 |
+
|
100 |
+
if __name__ == "__main__":
|
101 |
+
main()
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import json
|
3 |
+
import random
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
from time import perf_counter
|
7 |
+
|
8 |
+
import sys
|
9 |
+
sys.path.append('../')
|
10 |
+
from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
|
11 |
+
|
12 |
+
|
13 |
+
def get_args():
|
14 |
+
parser = argparse.ArgumentParser(description="Faster Baichuan Demo")
|
15 |
+
|
16 |
+
parser.add_argument('--model-path', type=str, required=True,
|
17 |
+
help='Model Path, include config.ini and tokenizer files')
|
18 |
+
# parser.add_argument('--tokenizer-path', type=str, default='/group/30063/users/vanewu/LocalModels/ChatGLM6B-Torch/chatglm-6b')
|
19 |
+
parser.add_argument('--tokenizer-path', type=str, default=None)
|
20 |
+
|
21 |
+
parser.add_argument(
|
22 |
+
'--data-type', type=str, metavar='TYPE', default='fp16',
|
23 |
+
choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
|
24 |
+
help='The data type to inference. If None, the data type follows the '
|
25 |
+
'checkpoint data type.')
|
26 |
+
|
27 |
+
parser.add_argument(
|
28 |
+
'--memopt_mode', type=int, default=0, choices=[0, 1],
|
29 |
+
help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
|
30 |
+
' 0: FP16 mode'
|
31 |
+
' 1: Use MEMOPT mode')
|
32 |
+
|
33 |
+
parser.add_argument("--prompt_filepath", type=str, required=True)
|
34 |
+
parser.add_argument("--max-output-length", type=int, default=512)
|
35 |
+
parser.add_argument("--warmups", type=int, default=10)
|
36 |
+
parser.add_argument("--avgnums", type=int, default=10)
|
37 |
+
args = parser.parse_args()
|
38 |
+
|
39 |
+
print('\n=================== Arguments ===================')
|
40 |
+
for k, v in vars(args).items():
|
41 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
42 |
+
print('=================================================')
|
43 |
+
|
44 |
+
return args
|
45 |
+
|
46 |
+
|
47 |
+
def main():
|
48 |
+
args = get_args()
|
49 |
+
|
50 |
+
# model = lyraBaichuan7B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
|
51 |
+
model = lyraBaichuan13B(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode)
|
52 |
+
|
53 |
+
with open(args.prompt_filepath, "rb") as f:
|
54 |
+
input_datas = json.loads(f.read())
|
55 |
+
|
56 |
+
used_input_data = input_datas[0]
|
57 |
+
|
58 |
+
# prompt_template = "<reserved_106>{}\n<reserved_107>" # baichuan chat
|
59 |
+
prompt_template = "{}" # baichuan
|
60 |
+
|
61 |
+
test_batch_size = [1, 2, 4,] # 8, 16, 32, 64
|
62 |
+
print("test_batch_size: ", test_batch_size)
|
63 |
+
|
64 |
+
for i, bs in enumerate(test_batch_size):
|
65 |
+
all_use_prompts = []
|
66 |
+
all_output_texts = []
|
67 |
+
|
68 |
+
# warmup gpu
|
69 |
+
for _ in range(args.warmups):
|
70 |
+
prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
|
71 |
+
output_texts = model.generate(
|
72 |
+
prompts, output_length=args.max_output_length,
|
73 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
|
74 |
+
|
75 |
+
all_cost_s = 0.0
|
76 |
+
|
77 |
+
for _ in range(args.avgnums):
|
78 |
+
prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
|
79 |
+
all_use_prompts.extend(prompts)
|
80 |
+
|
81 |
+
start = perf_counter()
|
82 |
+
output_texts = model.generate(
|
83 |
+
prompts, output_length=args.max_output_length,
|
84 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
|
85 |
+
all_cost_s += perf_counter() - start
|
86 |
+
|
87 |
+
all_output_texts.extend(output_texts)
|
88 |
+
|
89 |
+
cost = all_cost_s / args.avgnums
|
90 |
+
|
91 |
+
input_output_texts = [prompt + ' ' + gtext for prompt,gtext in zip(all_use_prompts, all_output_texts)]
|
92 |
+
|
93 |
+
tokens = 0
|
94 |
+
avg_input_tokens = np.mean([len(model.tokenizer.encode(prompt)) for prompt in all_use_prompts])
|
95 |
+
|
96 |
+
words = 0
|
97 |
+
for text in input_output_texts:
|
98 |
+
tokens += len(model.tokenizer.encode(text))
|
99 |
+
words += len(text)
|
100 |
+
print(
|
101 |
+
f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Avg Input tokens: {avg_input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
|
102 |
+
)
|
103 |
+
print(
|
104 |
+
f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
|
105 |
+
)
|
106 |
+
|
107 |
+
if i == 0:
|
108 |
+
for k in range(bs):
|
109 |
+
print(
|
110 |
+
f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
|
111 |
+
if k>2:
|
112 |
+
break
|
113 |
+
|
114 |
+
if __name__ == "__main__":
|
115 |
+
main()
|
116 |
+
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
"歌曲名:《幸福万年长》;歌手名:汤灿;歌曲描述:汤灿的幸福万年长创作背景:2001年,汤灿决定推出一首能够贴近听众和潮流的民歌。为此,她邀请了创作过歌曲《为你》《快乐老家》的音乐人浮克合作,邀其担任该曲的制作工作。虽然浮克此前一直从事流行歌曲的工作,但他其实也是一位衷情民歌风格的音乐人,于是两人一拍即合,合作了该曲。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
|
3 |
+
"歌曲名:《小丑面具》;歌手名:韩庚;歌曲描述:韩庚的小丑面具的歌曲鉴赏:韩庚在这首歌化身为“小丑”,带上面具调侃这社会上的表面功夫,用幽默又神经质的方式批判愈形冷酷的人心。在这首独特的电子舞曲当中,韩庚尝试了各种不同的发声方式,冷笑、哭喊、啜泣……甚至用声乐融合鬼魅的方法演唱,让人不禁陷入他建构的虚幻氛围而随之起舞。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
|
4 |
+
"《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲,于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日,“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者:主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J (大卫·哈斯金斯)。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲:“Boys”;“咬我的臀部”;“Some Faces”和斯卡雷鬼曲调“Harry”,这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议,凯文·哈斯金斯 (Kevin Haskins) 说,“那里有力量流行音乐,还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中(除了“Bela Lugosi's Dead”),只有“Harry”获得了官方发行;1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目,包括“Boys”的原始录音,一直未发行,直到 2018 年The Bela Session以黑胶唱片和CD 形式发行,并可供乐队数字下载。[11]在额外的曲目中,《经典摇滚》杂志写道:“其余的材料发现乐队正在摸索方向,甚至触及了斯卡。”\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
|
5 |
+
"歌曲名:《仓颉》;歌手名:五月天;歌曲描述:五月天的仓颉的歌曲鉴赏:五月天 仓颉(2张)《仓颉》是一首写在文明即将消失前的情诗,陈信宏的词写得颇有味道。《仓颉》这样淡淡的歌曲,或许不够大气,但是陈信宏真诚的演唱足以令人感动,而且《仓颉》的歌词也写得很有哲理。这首歌曲朗朗上口的旋律和诗意的文字使得它很适合在KTV演唱。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:"
|
6 |
+
]
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from queue import Queue
|
3 |
+
|
4 |
+
import torch
|
5 |
+
|
6 |
+
|
7 |
+
def build_chat_input(model, tokenizer, messages: List[dict], max_new_tokens: int=0):
|
8 |
+
def _parse_messages(messages, split_role="user"):
|
9 |
+
system, rounds = "", []
|
10 |
+
round = []
|
11 |
+
for i, message in enumerate(messages):
|
12 |
+
if message["role"] == "system":
|
13 |
+
assert i == 0
|
14 |
+
system = message["content"]
|
15 |
+
continue
|
16 |
+
if message["role"] == split_role and round:
|
17 |
+
rounds.append(round)
|
18 |
+
round = []
|
19 |
+
round.append(message)
|
20 |
+
if round:
|
21 |
+
rounds.append(round)
|
22 |
+
return system, rounds
|
23 |
+
|
24 |
+
max_new_tokens = max_new_tokens or model.generation_config.max_new_tokens
|
25 |
+
max_input_tokens = model.config.model_max_length - max_new_tokens
|
26 |
+
system, rounds = _parse_messages(messages, split_role="user")
|
27 |
+
system_tokens = tokenizer.encode(system)
|
28 |
+
max_history_tokens = max_input_tokens - len(system_tokens)
|
29 |
+
|
30 |
+
history_tokens = []
|
31 |
+
for round in rounds[::-1]:
|
32 |
+
round_tokens = []
|
33 |
+
for message in round:
|
34 |
+
if message["role"] == "user":
|
35 |
+
round_tokens.append(model.generation_config.user_token_id)
|
36 |
+
else:
|
37 |
+
round_tokens.append(model.generation_config.assistant_token_id)
|
38 |
+
round_tokens.extend(tokenizer.encode(message["content"]))
|
39 |
+
if len(history_tokens) == 0 or len(history_tokens) + len(round_tokens) <= max_history_tokens:
|
40 |
+
history_tokens = round_tokens + history_tokens # concat left
|
41 |
+
if len(history_tokens) < max_history_tokens:
|
42 |
+
continue
|
43 |
+
break
|
44 |
+
|
45 |
+
input_tokens = system_tokens + history_tokens
|
46 |
+
if messages[-1]["role"] != "assistant":
|
47 |
+
input_tokens.append(model.generation_config.assistant_token_id)
|
48 |
+
input_tokens = input_tokens[-max_input_tokens:] # truncate left
|
49 |
+
return torch.LongTensor([input_tokens]).to(model.device)
|
50 |
+
|
51 |
+
|
52 |
+
class TextIterStreamer:
|
53 |
+
def __init__(self, tokenizer, skip_prompt=False, skip_special_tokens=False):
|
54 |
+
self.tokenizer = tokenizer
|
55 |
+
self.skip_prompt = skip_prompt
|
56 |
+
self.skip_special_tokens = skip_special_tokens
|
57 |
+
self.tokens = []
|
58 |
+
self.text_queue = Queue()
|
59 |
+
self.next_tokens_are_prompt = True
|
60 |
+
|
61 |
+
def put(self, value):
|
62 |
+
if self.skip_prompt and self.next_tokens_are_prompt:
|
63 |
+
self.next_tokens_are_prompt = False
|
64 |
+
else:
|
65 |
+
if len(value.shape) > 1:
|
66 |
+
value = value[0]
|
67 |
+
self.tokens.extend(value.tolist())
|
68 |
+
self.text_queue.put(
|
69 |
+
self.tokenizer.decode(self.tokens, skip_special_tokens=self.skip_special_tokens))
|
70 |
+
|
71 |
+
def end(self):
|
72 |
+
self.text_queue.put(None)
|
73 |
+
|
74 |
+
def __iter__(self):
|
75 |
+
return self
|
76 |
+
|
77 |
+
def __next__(self):
|
78 |
+
value = self.text_queue.get()
|
79 |
+
if value is None:
|
80 |
+
raise StopIteration()
|
81 |
+
else:
|
82 |
+
return value
|
83 |
+
|
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import dataclasses
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
|
5 |
+
@dataclasses.dataclass
|
6 |
+
class LyraBaichuanParam:
|
7 |
+
num_heads: int = 40
|
8 |
+
size_per_head: int = 128
|
9 |
+
inter_size: int = 13824
|
10 |
+
num_layers: int = 40
|
11 |
+
vocab_size: int = 39424
|
12 |
+
start_id: Optional[int] = 1
|
13 |
+
end_id: Optional[int] = 2
|
14 |
+
tensor_para_size: int = 1
|
15 |
+
pipeline_para_size: int = 1
|
16 |
+
remove_padding: bool = True
|
17 |
+
shared_contexts_ratio: float = 1.0
|
18 |
+
layernorm_eps: float = 1e-6
|
19 |
+
weights_data_type: str = "fp16"
|
20 |
+
rotary_embedding: int = 128
|
21 |
+
use_gptj_residual: bool = False
|
22 |
+
|
23 |
+
def __post_init__(self):
|
24 |
+
if not 0.0 <= self.shared_contexts_ratio <= 1.0:
|
25 |
+
raise ValueError(
|
26 |
+
f'Got an invalid value of shared_context_ratio '
|
27 |
+
f'{self.shared_contexts_ratio} - range: [0.0, 1.0]')
|
28 |
+
|
29 |
+
def asdict(self):
|
30 |
+
return dataclasses.asdict(self)
|
31 |
+
|
32 |
+
|
33 |
+
LYRA_BAICHUAN_PARAM = LyraBaichuanParam()
|
34 |
+
LIB_SO_PATH = '/usr/lib/ftlib/libth_lyrallms.so'
|
@@ -0,0 +1,391 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import configparser
|
4 |
+
import pathlib
|
5 |
+
import typing
|
6 |
+
import os
|
7 |
+
|
8 |
+
import torch
|
9 |
+
import transformers
|
10 |
+
from torch.nn.utils.rnn import pad_sequence
|
11 |
+
|
12 |
+
from .config import LYRA_BAICHUAN_PARAM, LIB_SO_PATH
|
13 |
+
from .model import BaichuanModel
|
14 |
+
from .tokenization_baichuan import BaichuanTokenizer
|
15 |
+
|
16 |
+
class lyraBaichuan7B:
|
17 |
+
def __init__(self, model_path, tokenizer_path=None, dtype='fp16', memopt_mode=0, quant_dtype="int4") -> None:
|
18 |
+
self.model_path = model_path
|
19 |
+
self.tokenizer_path = tokenizer_path
|
20 |
+
self.dtype = dtype
|
21 |
+
|
22 |
+
self.memopt_mode = memopt_mode
|
23 |
+
self.quant_data_type = quant_dtype
|
24 |
+
|
25 |
+
self.model, self.tokenizer = self.load_model_and_tokenizer()
|
26 |
+
print("Got model and tokenizer")
|
27 |
+
|
28 |
+
def load_model_and_tokenizer(self):
|
29 |
+
if self.tokenizer_path is None:
|
30 |
+
tokenizer_path = self.model_path
|
31 |
+
else:
|
32 |
+
tokenizer_path = self.tokenizer_path
|
33 |
+
|
34 |
+
print(f'Loading tokenizer from {tokenizer_path}')
|
35 |
+
tokenizer = BaichuanTokenizer.from_pretrained(tokenizer_path)
|
36 |
+
|
37 |
+
checkpoint_path = pathlib.Path(self.model_path)
|
38 |
+
config_path = checkpoint_path / 'config.ini'
|
39 |
+
|
40 |
+
if config_path.exists():
|
41 |
+
# Read model params from config.
|
42 |
+
cfg = configparser.ConfigParser()
|
43 |
+
cfg.read(config_path)
|
44 |
+
model_name = 'baichuan'
|
45 |
+
inference_data_type = self.dtype
|
46 |
+
if inference_data_type == None:
|
47 |
+
inference_data_type = cfg.get(model_name, "weight_data_type")
|
48 |
+
model_args = dict(
|
49 |
+
head_num=cfg.getint(model_name, 'head_num'),
|
50 |
+
size_per_head=cfg.getint(model_name, "size_per_head"),
|
51 |
+
inter_size=cfg.getint(model_name, 'inter_size'),
|
52 |
+
layer_num=cfg.getint(model_name, "num_layer"),
|
53 |
+
rotary_embedding_dim=cfg.getint(model_name, 'rotary_embedding'),
|
54 |
+
layernorm_eps=cfg.getfloat(model_name, 'layernorm_eps'),
|
55 |
+
vocab_size=cfg.getint(model_name, "vocab_size"),
|
56 |
+
start_id=cfg.getint(model_name, "start_id"),
|
57 |
+
end_id=cfg.getint(model_name, "end_id"),
|
58 |
+
weights_data_type=cfg.get(model_name, "weight_data_type"),
|
59 |
+
tensor_para_size=cfg.getint(model_name, "tensor_para_size"),
|
60 |
+
inference_data_type=inference_data_type)
|
61 |
+
else:
|
62 |
+
inference_data_type = self.dtype
|
63 |
+
if inference_data_type == None:
|
64 |
+
inference_data_type = LYRA_BAICHUAN_PARAM.weights_data_type
|
65 |
+
model_args = dict(head_num=LYRA_BAICHUAN_PARAM.num_heads,
|
66 |
+
size_per_head=LYRA_BAICHUAN_PARAM.size_per_head,
|
67 |
+
inter_size=LYRA_BAICHUAN_PARAM.inter_size,
|
68 |
+
layer_num=LYRA_BAICHUAN_PARAM.num_layers,
|
69 |
+
rotary_embedding_dim=LYRA_BAICHUAN_PARAM.rotary_embedding,
|
70 |
+
layernorm_eps=LYRA_BAICHUAN_PARAM.layernorm_eps,
|
71 |
+
vocab_size=LYRA_BAICHUAN_PARAM.vocab_size,
|
72 |
+
start_id=LYRA_BAICHUAN_PARAM.start_id or tokenizer.bos_token_id,
|
73 |
+
end_id=LYRA_BAICHUAN_PARAM.end_id or tokenizer.eos_token_id,
|
74 |
+
weights_data_type=LYRA_BAICHUAN_PARAM.weights_data_type,
|
75 |
+
tensor_para_size=LYRA_BAICHUAN_PARAM.tensor_para_size,
|
76 |
+
inference_data_type=inference_data_type)
|
77 |
+
|
78 |
+
# update common parameters
|
79 |
+
model_args.update(dict(
|
80 |
+
lib_path=LIB_SO_PATH,
|
81 |
+
model_path=os.path.join(self.model_path, "1-gpu-fp16.bin"),
|
82 |
+
max_seq_len=0, # for position seq embedding
|
83 |
+
pipeline_para_size=LYRA_BAICHUAN_PARAM.pipeline_para_size,
|
84 |
+
use_gptj_residual=LYRA_BAICHUAN_PARAM.use_gptj_residual,
|
85 |
+
memopt_mode=self.memopt_mode,
|
86 |
+
quant_data_type=self.quant_data_type
|
87 |
+
))
|
88 |
+
|
89 |
+
print('[FT][INFO] Load Our FT Highly Optimized Baichuan-7B model')
|
90 |
+
for k, v in model_args.items():
|
91 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
92 |
+
|
93 |
+
# Check sanity and consistency between the model and tokenizer.
|
94 |
+
checklist = ['head_num', 'size_per_head', 'vocab_size', 'layer_num',
|
95 |
+
'tensor_para_size', 'tensor_para_size', 'weights_data_type']
|
96 |
+
if None in [model_args[k] for k in checklist]:
|
97 |
+
none_params = [p for p in checklist if model_args[p] is None]
|
98 |
+
print(f'[FT][WARNING] Found None parameters {none_params}. They must '
|
99 |
+
f'be provided either by config file or CLI arguments.')
|
100 |
+
if model_args['start_id'] != tokenizer.bos_token_id:
|
101 |
+
print('[FT][WARNING] Given start_id is not matched with the bos token '
|
102 |
+
'id of the pretrained tokenizer.')
|
103 |
+
if model_args['end_id'] not in (tokenizer.pad_token_id, tokenizer.eos_token_id):
|
104 |
+
print('[FT][WARNING] Given end_id is not matched with neither pad '
|
105 |
+
'token id nor eos token id of the pretrained tokenizer.')
|
106 |
+
|
107 |
+
print(f'Loading model from {self.model_path}')
|
108 |
+
model = BaichuanModel(**model_args)
|
109 |
+
return model, tokenizer
|
110 |
+
|
111 |
+
def generate(self, prompts: typing.List[str] | str,
|
112 |
+
output_length: int = 512,
|
113 |
+
beam_width: int = 1,
|
114 |
+
top_k: typing.Optional[torch.IntTensor] = 1,
|
115 |
+
top_p: typing.Optional[torch.FloatTensor] = 1.0,
|
116 |
+
beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
|
117 |
+
temperature: typing.Optional[torch.FloatTensor] = 1.0,
|
118 |
+
len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
|
119 |
+
repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
|
120 |
+
presence_penalty: typing.Optional[torch.FloatTensor] = None,
|
121 |
+
min_length: typing.Optional[torch.IntTensor] = None,
|
122 |
+
bad_words_list: typing.Optional[torch.IntTensor] = None,
|
123 |
+
do_sample: bool = False,
|
124 |
+
return_output_length: bool = False,
|
125 |
+
return_cum_log_probs: int = 0):
|
126 |
+
#
|
127 |
+
if isinstance(prompts, str):
|
128 |
+
prompts = [prompts, ]
|
129 |
+
|
130 |
+
inputs = prompts
|
131 |
+
|
132 |
+
batch_size = len(inputs)
|
133 |
+
ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
|
134 |
+
ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
|
135 |
+
|
136 |
+
# we must encode the raw prompt text one by one in order to compute the length of the original text.
|
137 |
+
input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
|
138 |
+
input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
|
139 |
+
# after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
|
140 |
+
input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
|
141 |
+
|
142 |
+
random_seed = None
|
143 |
+
if do_sample:
|
144 |
+
random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
|
145 |
+
|
146 |
+
outputs = self.model(start_ids=input_token_ids,
|
147 |
+
start_lengths=input_lengths,
|
148 |
+
output_len=output_length,
|
149 |
+
beam_width=beam_width,
|
150 |
+
top_k=top_k * ones_int,
|
151 |
+
top_p=top_p * ones_float,
|
152 |
+
beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
|
153 |
+
temperature=temperature * ones_float,
|
154 |
+
len_penalty=len_penalty * ones_float,
|
155 |
+
repetition_penalty=repetition_penalty * ones_float,
|
156 |
+
random_seed=random_seed,
|
157 |
+
return_output_length=return_output_length,
|
158 |
+
return_cum_log_probs=return_cum_log_probs)
|
159 |
+
|
160 |
+
if return_cum_log_probs > 0:
|
161 |
+
outputs = outputs[0] # output_token_ids.
|
162 |
+
|
163 |
+
# Slice the generated token ids of the 1st beam result.
|
164 |
+
# output = input tokens + generated tokens.
|
165 |
+
output_token_ids = [out[0, length:].cpu()
|
166 |
+
for out, length in zip(outputs, input_lengths)]
|
167 |
+
|
168 |
+
output_texts = self.tokenizer.batch_decode(
|
169 |
+
output_token_ids, skip_special_tokens=True)
|
170 |
+
|
171 |
+
return output_texts
|
172 |
+
|
173 |
+
class lyraBaichuan13B:
|
174 |
+
def __init__(self, model_path, tokenizer_path=None, dtype='fp16', memopt_mode=0, quant_dtype="int4") -> None:
|
175 |
+
self.model_path = model_path
|
176 |
+
self.tokenizer_path = tokenizer_path
|
177 |
+
self.dtype = dtype
|
178 |
+
|
179 |
+
self.memopt_mode = memopt_mode
|
180 |
+
self.quant_data_type = quant_dtype
|
181 |
+
|
182 |
+
self.model, self.tokenizer = self.load_model_and_tokenizer()
|
183 |
+
print("Got model and tokenizer")
|
184 |
+
|
185 |
+
def load_model_and_tokenizer(self):
|
186 |
+
if self.tokenizer_path is None:
|
187 |
+
tokenizer_path = self.model_path
|
188 |
+
else:
|
189 |
+
tokenizer_path = self.tokenizer_path
|
190 |
+
|
191 |
+
print(f'Loading tokenizer from {tokenizer_path}')
|
192 |
+
tokenizer = BaichuanTokenizer.from_pretrained(tokenizer_path)
|
193 |
+
|
194 |
+
checkpoint_path = pathlib.Path(self.model_path)
|
195 |
+
config_path = checkpoint_path / 'config.ini'
|
196 |
+
|
197 |
+
if config_path.exists():
|
198 |
+
# Read model params from config.
|
199 |
+
cfg = configparser.ConfigParser()
|
200 |
+
cfg.read(config_path)
|
201 |
+
model_name = 'baichuan'
|
202 |
+
inference_data_type = self.dtype
|
203 |
+
if inference_data_type == None:
|
204 |
+
inference_data_type = cfg.get(model_name, "weight_data_type")
|
205 |
+
model_args = dict(
|
206 |
+
head_num=cfg.getint(model_name, 'head_num'),
|
207 |
+
size_per_head=cfg.getint(model_name, "size_per_head"),
|
208 |
+
inter_size=cfg.getint(model_name, 'inter_size'),
|
209 |
+
layer_num=cfg.getint(model_name, "num_layer"),
|
210 |
+
rotary_embedding_dim=0,
|
211 |
+
layernorm_eps=cfg.getfloat(model_name, 'layernorm_eps'),
|
212 |
+
vocab_size=cfg.getint(model_name, "vocab_size"),
|
213 |
+
start_id=cfg.getint(model_name, "start_id"),
|
214 |
+
end_id=cfg.getint(model_name, "end_id"),
|
215 |
+
weights_data_type=cfg.get(model_name, "weight_data_type"),
|
216 |
+
tensor_para_size=cfg.getint(model_name, "tensor_para_size"),
|
217 |
+
inference_data_type=inference_data_type)
|
218 |
+
else:
|
219 |
+
inference_data_type = self.dtype
|
220 |
+
if inference_data_type == None:
|
221 |
+
inference_data_type = LYRA_BAICHUAN_PARAM.weights_data_type
|
222 |
+
model_args = dict(head_num=LYRA_BAICHUAN_PARAM.num_heads,
|
223 |
+
size_per_head=LYRA_BAICHUAN_PARAM.size_per_head,
|
224 |
+
inter_size=LYRA_BAICHUAN_PARAM.inter_size,
|
225 |
+
layer_num=LYRA_BAICHUAN_PARAM.num_layers,
|
226 |
+
rotary_embedding_dim=0,
|
227 |
+
layernorm_eps=LYRA_BAICHUAN_PARAM.layernorm_eps,
|
228 |
+
vocab_size=LYRA_BAICHUAN_PARAM.vocab_size,
|
229 |
+
start_id=LYRA_BAICHUAN_PARAM.start_id or tokenizer.bos_token_id,
|
230 |
+
end_id=LYRA_BAICHUAN_PARAM.end_id or tokenizer.eos_token_id,
|
231 |
+
weights_data_type=LYRA_BAICHUAN_PARAM.weights_data_type,
|
232 |
+
tensor_para_size=LYRA_BAICHUAN_PARAM.tensor_para_size,
|
233 |
+
inference_data_type=inference_data_type)
|
234 |
+
|
235 |
+
# update common parameters
|
236 |
+
model_args.update(dict(
|
237 |
+
lib_path=LIB_SO_PATH,
|
238 |
+
model_path=os.path.join(self.model_path, "1-gpu-fp16.bin"),
|
239 |
+
max_seq_len=0, # for position seq embedding
|
240 |
+
pipeline_para_size=LYRA_BAICHUAN_PARAM.pipeline_para_size,
|
241 |
+
use_gptj_residual=LYRA_BAICHUAN_PARAM.use_gptj_residual,
|
242 |
+
memopt_mode=self.memopt_mode,
|
243 |
+
quant_data_type=self.quant_data_type
|
244 |
+
))
|
245 |
+
|
246 |
+
print('[FT][INFO] Load Our FT Highly Optimized Baichuan-13B model')
|
247 |
+
for k, v in model_args.items():
|
248 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
249 |
+
|
250 |
+
# Check sanity and consistency between the model and tokenizer.
|
251 |
+
checklist = ['head_num', 'size_per_head', 'vocab_size', 'layer_num',
|
252 |
+
'tensor_para_size', 'tensor_para_size', 'weights_data_type']
|
253 |
+
if None in [model_args[k] for k in checklist]:
|
254 |
+
none_params = [p for p in checklist if model_args[p] is None]
|
255 |
+
print(f'[FT][WARNING] Found None parameters {none_params}. They must '
|
256 |
+
f'be provided either by config file or CLI arguments.')
|
257 |
+
if model_args['start_id'] != tokenizer.bos_token_id:
|
258 |
+
print('[FT][WARNING] Given start_id is not matched with the bos token '
|
259 |
+
'id of the pretrained tokenizer.')
|
260 |
+
if model_args['end_id'] not in (tokenizer.pad_token_id, tokenizer.eos_token_id):
|
261 |
+
print('[FT][WARNING] Given end_id is not matched with neither pad '
|
262 |
+
'token id nor eos token id of the pretrained tokenizer.')
|
263 |
+
|
264 |
+
print(f'Loading model from {self.model_path}')
|
265 |
+
model = BaichuanModel(**model_args)
|
266 |
+
return model, tokenizer
|
267 |
+
|
268 |
+
def generate(self, prompts: typing.List[str] | str,
|
269 |
+
output_length: int = 512,
|
270 |
+
beam_width: int = 1,
|
271 |
+
top_k: typing.Optional[torch.IntTensor] = 1,
|
272 |
+
top_p: typing.Optional[torch.FloatTensor] = 1.0,
|
273 |
+
beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
|
274 |
+
temperature: typing.Optional[torch.FloatTensor] = 1.0,
|
275 |
+
len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
|
276 |
+
repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
|
277 |
+
presence_penalty: typing.Optional[torch.FloatTensor] = None,
|
278 |
+
min_length: typing.Optional[torch.IntTensor] = None,
|
279 |
+
bad_words_list: typing.Optional[torch.IntTensor] = None,
|
280 |
+
do_sample: bool = False,
|
281 |
+
return_output_length: bool = False,
|
282 |
+
return_cum_log_probs: int = 0):
|
283 |
+
#
|
284 |
+
if isinstance(prompts, str):
|
285 |
+
prompts = [prompts, ]
|
286 |
+
|
287 |
+
inputs = prompts
|
288 |
+
|
289 |
+
batch_size = len(inputs)
|
290 |
+
ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
|
291 |
+
ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
|
292 |
+
|
293 |
+
# we must encode the raw prompt text one by one in order to compute the length of the original text.
|
294 |
+
input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
|
295 |
+
input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
|
296 |
+
# after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
|
297 |
+
input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
|
298 |
+
|
299 |
+
random_seed = None
|
300 |
+
if do_sample:
|
301 |
+
random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
|
302 |
+
|
303 |
+
outputs = self.model(start_ids=input_token_ids,
|
304 |
+
start_lengths=input_lengths,
|
305 |
+
output_len=output_length,
|
306 |
+
beam_width=beam_width,
|
307 |
+
top_k=top_k * ones_int,
|
308 |
+
top_p=top_p * ones_float,
|
309 |
+
beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
|
310 |
+
temperature=temperature * ones_float,
|
311 |
+
len_penalty=len_penalty * ones_float,
|
312 |
+
repetition_penalty=repetition_penalty * ones_float,
|
313 |
+
random_seed=random_seed,
|
314 |
+
return_output_length=return_output_length,
|
315 |
+
return_cum_log_probs=return_cum_log_probs)
|
316 |
+
|
317 |
+
if return_cum_log_probs > 0:
|
318 |
+
outputs = outputs[0] # output_token_ids.
|
319 |
+
|
320 |
+
# Slice the generated token ids of the 1st beam result.
|
321 |
+
# output = input tokens + generated tokens.
|
322 |
+
output_token_ids = [out[0, length:].cpu()
|
323 |
+
for out, length in zip(outputs, input_lengths)]
|
324 |
+
|
325 |
+
output_texts = self.tokenizer.batch_decode(
|
326 |
+
output_token_ids, skip_special_tokens=True)
|
327 |
+
|
328 |
+
return output_texts
|
329 |
+
|
330 |
+
def stream_generate(self, prompts: typing.List[str] | str,
|
331 |
+
output_length: int = 512,
|
332 |
+
beam_width: int = 1,
|
333 |
+
top_k: typing.Optional[torch.IntTensor] = 1,
|
334 |
+
top_p: typing.Optional[torch.FloatTensor] = 1.0,
|
335 |
+
beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
|
336 |
+
temperature: typing.Optional[torch.FloatTensor] = 1.0,
|
337 |
+
len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
|
338 |
+
repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
|
339 |
+
presence_penalty: typing.Optional[torch.FloatTensor] = None,
|
340 |
+
min_length: typing.Optional[torch.IntTensor] = None,
|
341 |
+
bad_words_list: typing.Optional[torch.IntTensor] = None,
|
342 |
+
do_sample: bool = False,
|
343 |
+
return_output_length: bool = False,
|
344 |
+
return_cum_log_probs: int = 0):
|
345 |
+
if isinstance(prompts, str):
|
346 |
+
prompts = [prompts, ]
|
347 |
+
|
348 |
+
inputs = prompts
|
349 |
+
|
350 |
+
batch_size = len(inputs)
|
351 |
+
ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
|
352 |
+
ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
|
353 |
+
|
354 |
+
# we must encode the raw prompt text one by one in order to compute the length of the original text.
|
355 |
+
input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
|
356 |
+
input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
|
357 |
+
# after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
|
358 |
+
input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
|
359 |
+
|
360 |
+
random_seed = None
|
361 |
+
if do_sample:
|
362 |
+
random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
|
363 |
+
|
364 |
+
for finish, output_ids, sequence_length, output_cum_log_probs in self.model.stream_forward(start_ids=input_token_ids,
|
365 |
+
start_lengths=input_lengths,
|
366 |
+
output_len=output_length,
|
367 |
+
beam_width=beam_width,
|
368 |
+
top_k=top_k * ones_int,
|
369 |
+
top_p=top_p * ones_float,
|
370 |
+
beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
|
371 |
+
temperature=temperature * ones_float,
|
372 |
+
len_penalty=len_penalty * ones_float,
|
373 |
+
repetition_penalty=repetition_penalty * ones_float,
|
374 |
+
random_seed=random_seed,
|
375 |
+
return_output_length=return_output_length,
|
376 |
+
return_cum_log_probs=return_cum_log_probs):
|
377 |
+
|
378 |
+
# Slice the generated token ids of the 1st beam result.
|
379 |
+
# output = input tokens + generated tokens.
|
380 |
+
output_token_ids = [out[0, length:].cpu()
|
381 |
+
for out, length in zip(output_ids, input_lengths)]
|
382 |
+
|
383 |
+
output_texts = self.tokenizer.batch_decode(
|
384 |
+
output_token_ids, skip_special_tokens=True)
|
385 |
+
|
386 |
+
if finish:
|
387 |
+
break
|
388 |
+
|
389 |
+
yield finish, output_texts
|
390 |
+
|
391 |
+
return finish, output_texts
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import print_function
|
2 |
+
|
3 |
+
import copy
|
4 |
+
import os
|
5 |
+
import pathlib
|
6 |
+
import typing
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
import torch
|
10 |
+
import torch.distributed as dist
|
11 |
+
import torch.nn as nn
|
12 |
+
|
13 |
+
from queue import Queue
|
14 |
+
from threading import Thread
|
15 |
+
|
16 |
+
import sys
|
17 |
+
sys.path.append('/usr/lib/lyralib')
|
18 |
+
import lyraOp
|
19 |
+
|
20 |
+
str_type_map = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}
|
21 |
+
|
22 |
+
class BaichuanModel(nn.Module):
|
23 |
+
def __init__(self,
|
24 |
+
head_num,
|
25 |
+
size_per_head,
|
26 |
+
inter_size,
|
27 |
+
vocab_size,
|
28 |
+
rotary_embedding_dim,
|
29 |
+
start_id, end_id, layer_num,
|
30 |
+
max_seq_len: int,
|
31 |
+
layernorm_eps,
|
32 |
+
tensor_para_size: int,
|
33 |
+
pipeline_para_size: int,
|
34 |
+
use_gptj_residual,
|
35 |
+
lib_path: typing.Union[str, pathlib.Path],
|
36 |
+
model_path,
|
37 |
+
memopt_mode: int = 0,
|
38 |
+
quant_data_type: str = "int8",
|
39 |
+
inference_data_type: str = "fp16",
|
40 |
+
weights_data_type: typing.Union[str, np.dtype] = np.float32):
|
41 |
+
super().__init__()
|
42 |
+
self.head_num = head_num
|
43 |
+
self.size_per_head = size_per_head
|
44 |
+
self.inter_size = inter_size
|
45 |
+
self.vocab_size = vocab_size
|
46 |
+
self.rotary_embedding_dim = rotary_embedding_dim
|
47 |
+
self.start_id = start_id
|
48 |
+
self.end_id = end_id
|
49 |
+
self.max_seq_len = max_seq_len
|
50 |
+
self.layer_num = layer_num
|
51 |
+
self.use_gptj_residual = use_gptj_residual
|
52 |
+
self.layernorm_eps = layernorm_eps
|
53 |
+
self.memopt_mode = memopt_mode
|
54 |
+
self.quant_data_type = quant_data_type
|
55 |
+
|
56 |
+
# multi-gpu params
|
57 |
+
self.tensor_para_size = tensor_para_size
|
58 |
+
self.pipeline_para_size = pipeline_para_size
|
59 |
+
self.build_model = False
|
60 |
+
self.weights_data_type = weights_data_type
|
61 |
+
self.inference_data_type = inference_data_type
|
62 |
+
|
63 |
+
assert torch.cuda.is_available(), "CUDA is required for this model."
|
64 |
+
|
65 |
+
assert head_num % tensor_para_size == 0, "head_num must be a multiple of tensor_para_size."
|
66 |
+
assert layer_num % pipeline_para_size == 0, "layer_num must be a multiple of pipeline_para_size."
|
67 |
+
|
68 |
+
# queue for streaming
|
69 |
+
self.que = Queue()
|
70 |
+
self.threads = [None] * self.tensor_para_size
|
71 |
+
|
72 |
+
# Load the C++ model into Pytorch model.
|
73 |
+
# torch.classes.load_library(os.path.abspath(lib_path))
|
74 |
+
|
75 |
+
# Prepare for tensor/pipeline parallel
|
76 |
+
try:
|
77 |
+
dist.init_process_group(backend='mpi')
|
78 |
+
except:
|
79 |
+
print("[INFO] WARNING: Have initialized the process group")
|
80 |
+
self.rank = dist.get_rank()
|
81 |
+
self.device_count = torch.cuda.device_count()
|
82 |
+
self.device = self.rank % self.device_count
|
83 |
+
torch.cuda.set_device(self.device)
|
84 |
+
|
85 |
+
world_size = dist.get_world_size()
|
86 |
+
# print(tensor_para_size * pipeline_para_size)
|
87 |
+
assert world_size == tensor_para_size * pipeline_para_size, "tensor_para_size * pipeline_para_size must be equal to world_size."
|
88 |
+
|
89 |
+
self.tensor_para_rank = self.rank % self.tensor_para_size
|
90 |
+
self.pipeline_para_rank = self.rank // self.tensor_para_size
|
91 |
+
|
92 |
+
self.model = lyraOp.LyraBaichuan(
|
93 |
+
self.head_num, self.size_per_head, self.inter_size,
|
94 |
+
self.layer_num,
|
95 |
+
self.vocab_size,
|
96 |
+
self.rotary_embedding_dim,
|
97 |
+
self.layernorm_eps,
|
98 |
+
self.start_id, self.end_id,
|
99 |
+
self.tensor_para_size, self.pipeline_para_size,
|
100 |
+
self.max_seq_len,
|
101 |
+
self.use_gptj_residual,
|
102 |
+
self.memopt_mode,
|
103 |
+
self.quant_data_type,
|
104 |
+
model_path,
|
105 |
+
self.weights_data_type,
|
106 |
+
self.inference_data_type)
|
107 |
+
|
108 |
+
self.build_model = True
|
109 |
+
torch.cuda.empty_cache()
|
110 |
+
|
111 |
+
def forward(self,
|
112 |
+
start_ids: torch.Tensor,
|
113 |
+
start_lengths: torch.Tensor,
|
114 |
+
output_len,
|
115 |
+
beam_width=1,
|
116 |
+
top_k: torch.Tensor = None,
|
117 |
+
top_p: torch.Tensor = None,
|
118 |
+
beam_search_diversity_rate: torch.Tensor = None,
|
119 |
+
temperature: torch.Tensor = None,
|
120 |
+
len_penalty: torch.Tensor = None,
|
121 |
+
repetition_penalty: torch.Tensor = None,
|
122 |
+
random_seed: torch.Tensor = None,
|
123 |
+
return_output_length=False,
|
124 |
+
return_cum_log_probs=0):
|
125 |
+
|
126 |
+
input_len = start_ids.size(1)
|
127 |
+
assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
|
128 |
+
|
129 |
+
# Inputs to device
|
130 |
+
input_ids = start_ids.cuda(self.device)
|
131 |
+
input_lengths = start_lengths.cuda(self.device)
|
132 |
+
# outputs: output_ids, output_lengths, output_cum_log_probs (optional)
|
133 |
+
outputs = self.model.forward(input_ids,
|
134 |
+
input_lengths,
|
135 |
+
output_len,
|
136 |
+
beam_width, # optional, can be None
|
137 |
+
top_k, # optional, can be None
|
138 |
+
top_p, # optional, can be None
|
139 |
+
beam_search_diversity_rate, # optional, can be None
|
140 |
+
temperature, # optional, can be None
|
141 |
+
len_penalty, # optional, can be None
|
142 |
+
repetition_penalty, # optional, can be None
|
143 |
+
random_seed, # optional, can be None
|
144 |
+
return_cum_log_probs) # optional, can be None
|
145 |
+
|
146 |
+
if return_cum_log_probs == 0:
|
147 |
+
output_ids, output_lengths = outputs
|
148 |
+
else:
|
149 |
+
output_ids, output_lengths, output_cum_log_probs = outputs
|
150 |
+
if return_output_length:
|
151 |
+
if return_cum_log_probs > 0:
|
152 |
+
return output_ids, output_lengths, output_cum_log_probs
|
153 |
+
else:
|
154 |
+
return output_ids, output_lengths
|
155 |
+
else:
|
156 |
+
return output_ids
|
157 |
+
|
158 |
+
def set_input_tensor(self, input_tensor):
|
159 |
+
"""Set input tensor to be used instead of forward()'s input.
|
160 |
+
|
161 |
+
When doing pipeline parallelism the input from the previous
|
162 |
+
stage comes from communication, not from the input, so the
|
163 |
+
model's forward_step_func won't have it. This function is thus
|
164 |
+
used by internal code to bypass the input provided by the
|
165 |
+
forward_step_func"""
|
166 |
+
self.input_tensor = input_tensor
|
167 |
+
|
168 |
+
def _forward_callback(self, output_ids, seq_lengths, ctx):
|
169 |
+
self.que.put((False, (list(output_ids), list(seq_lengths))))
|
170 |
+
|
171 |
+
|
172 |
+
def _tensormap_dict_to_py_dict(self, tensormap_dict: lyraOp.TensorMap):
|
173 |
+
"""map torch tensormap to py dict."""
|
174 |
+
ret = dict()
|
175 |
+
for k, v in tensormap_dict.items():
|
176 |
+
ret[k] = v
|
177 |
+
|
178 |
+
return ret
|
179 |
+
|
180 |
+
def stream_forward(self,
|
181 |
+
start_ids: torch.Tensor,
|
182 |
+
start_lengths: torch.Tensor,
|
183 |
+
output_len,
|
184 |
+
beam_width=1,
|
185 |
+
top_k: torch.Tensor = None,
|
186 |
+
top_p: torch.Tensor = None,
|
187 |
+
beam_search_diversity_rate: torch.Tensor = None,
|
188 |
+
temperature: torch.Tensor = None,
|
189 |
+
len_penalty: torch.Tensor = None,
|
190 |
+
repetition_penalty: torch.Tensor = None,
|
191 |
+
random_seed: torch.Tensor = None,
|
192 |
+
return_output_length=False,
|
193 |
+
return_cum_log_probs=0):
|
194 |
+
|
195 |
+
# Register callback func to model
|
196 |
+
self.model.registerCallback(self._forward_callback)
|
197 |
+
|
198 |
+
batch_size = start_ids.size(0)
|
199 |
+
input_len = start_ids.size(1)
|
200 |
+
assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
|
201 |
+
|
202 |
+
# Inputs to device
|
203 |
+
input_ids = start_ids.cuda(self.device)
|
204 |
+
input_lengths = start_lengths.cuda(self.device)
|
205 |
+
# outputs: output_ids, output_lengths, output_cum_log_probs (optional)
|
206 |
+
|
207 |
+
# Init thread of model inference
|
208 |
+
def _func(enque_output):
|
209 |
+
outputs = self.model.forward(input_ids,
|
210 |
+
input_lengths,
|
211 |
+
output_len,
|
212 |
+
beam_width, # optional, can be None
|
213 |
+
top_k, # optional, can be None
|
214 |
+
top_p, # optional, can be None
|
215 |
+
beam_search_diversity_rate, # optional, can be None
|
216 |
+
temperature, # optional, can be None
|
217 |
+
len_penalty, # optional, can be None
|
218 |
+
repetition_penalty, # optional, can be None
|
219 |
+
random_seed, # optional, can be None
|
220 |
+
return_cum_log_probs) # optional, can be None
|
221 |
+
if enque_output:
|
222 |
+
self.que.put((True, (outputs[0].tolist(), outputs[1].tolist())))
|
223 |
+
|
224 |
+
# Start thread of model inference
|
225 |
+
t = Thread(target=_func,
|
226 |
+
args=(True,),
|
227 |
+
daemon=True)
|
228 |
+
t.start()
|
229 |
+
self.threads[0] = t
|
230 |
+
|
231 |
+
# Generate streaming output
|
232 |
+
while True:
|
233 |
+
# while self.que.qsize() > 1:
|
234 |
+
# self.que.get()
|
235 |
+
|
236 |
+
finish, outputs = self.que.get()
|
237 |
+
|
238 |
+
output_ids, sequence_length = outputs
|
239 |
+
output_ids = torch.tensor(output_ids).view(batch_size, beam_width, -1)
|
240 |
+
sequence_length = torch.tensor(sequence_length).view(batch_size, beam_width)
|
241 |
+
|
242 |
+
if return_output_length:
|
243 |
+
if return_cum_log_probs > 0:
|
244 |
+
yield finish, output_ids, sequence_length, None
|
245 |
+
else:
|
246 |
+
yield finish, output_ids, sequence_length, None
|
247 |
+
else:
|
248 |
+
yield finish, output_ids, None, None
|
249 |
+
|
250 |
+
if finish:
|
251 |
+
for t in self.threads:
|
252 |
+
t.join()
|
253 |
+
while self.que.qsize() > 0:
|
254 |
+
self.que.get()
|
255 |
+
break
|
256 |
+
|
257 |
+
self.model.unRegisterCallback()
|
258 |
+
return finish, output_ids, None, None
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
|
2 |
+
|
3 |
+
import os
|
4 |
+
from shutil import copyfile
|
5 |
+
from typing import Any, Dict, List, Optional, Tuple
|
6 |
+
|
7 |
+
import sentencepiece as spm
|
8 |
+
from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
|
9 |
+
from transformers.utils import logging
|
10 |
+
|
11 |
+
|
12 |
+
logger = logging.get_logger(__name__)
|
13 |
+
|
14 |
+
VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
|
15 |
+
|
16 |
+
PRETRAINED_VOCAB_FILES_MAP = {
|
17 |
+
"vocab_file": {},
|
18 |
+
"tokenizer_file": {},
|
19 |
+
}
|
20 |
+
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
|
21 |
+
|
22 |
+
|
23 |
+
class BaichuanTokenizer(PreTrainedTokenizer):
|
24 |
+
"""
|
25 |
+
Construct a Baichuan tokenizer. Based on byte-level Byte-Pair-Encoding.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
vocab_file (`str`):
|
29 |
+
Path to the vocabulary file.
|
30 |
+
"""
|
31 |
+
|
32 |
+
vocab_files_names = VOCAB_FILES_NAMES
|
33 |
+
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
34 |
+
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
35 |
+
model_input_names = ["input_ids", "attention_mask"]
|
36 |
+
|
37 |
+
def __init__(
|
38 |
+
self,
|
39 |
+
vocab_file,
|
40 |
+
unk_token="<unk>",
|
41 |
+
bos_token="<s>",
|
42 |
+
eos_token="</s>",
|
43 |
+
pad_token=None,
|
44 |
+
sp_model_kwargs: Optional[Dict[str, Any]] = None,
|
45 |
+
add_bos_token=True,
|
46 |
+
add_eos_token=False,
|
47 |
+
clean_up_tokenization_spaces=False,
|
48 |
+
**kwargs,
|
49 |
+
):
|
50 |
+
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
51 |
+
bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
|
52 |
+
eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
|
53 |
+
unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
|
54 |
+
pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
|
55 |
+
super().__init__(
|
56 |
+
bos_token=bos_token,
|
57 |
+
eos_token=eos_token,
|
58 |
+
unk_token=unk_token,
|
59 |
+
pad_token=pad_token,
|
60 |
+
add_bos_token=add_bos_token,
|
61 |
+
add_eos_token=add_eos_token,
|
62 |
+
sp_model_kwargs=self.sp_model_kwargs,
|
63 |
+
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
64 |
+
**kwargs,
|
65 |
+
)
|
66 |
+
self.vocab_file = vocab_file
|
67 |
+
self.add_bos_token = add_bos_token
|
68 |
+
self.add_eos_token = add_eos_token
|
69 |
+
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
70 |
+
self.sp_model.Load(vocab_file)
|
71 |
+
|
72 |
+
def __getstate__(self):
|
73 |
+
state = self.__dict__.copy()
|
74 |
+
state["sp_model"] = None
|
75 |
+
return state
|
76 |
+
|
77 |
+
def __setstate__(self, d):
|
78 |
+
self.__dict__ = d
|
79 |
+
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
80 |
+
self.sp_model.Load(self.vocab_file)
|
81 |
+
|
82 |
+
@property
|
83 |
+
def vocab_size(self):
|
84 |
+
"""Returns vocab size"""
|
85 |
+
return self.sp_model.get_piece_size()
|
86 |
+
|
87 |
+
def get_vocab(self):
|
88 |
+
"""Returns vocab as a dict"""
|
89 |
+
vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
|
90 |
+
vocab.update(self.added_tokens_encoder)
|
91 |
+
return vocab
|
92 |
+
|
93 |
+
def _tokenize(self, text):
|
94 |
+
"""Returns a tokenized string."""
|
95 |
+
return self.sp_model.encode(text, out_type=str)
|
96 |
+
|
97 |
+
def _convert_token_to_id(self, token):
|
98 |
+
"""Converts a token (str) in an id using the vocab."""
|
99 |
+
return self.sp_model.piece_to_id(token)
|
100 |
+
|
101 |
+
def _convert_id_to_token(self, index):
|
102 |
+
"""Converts an index (integer) in a token (str) using the vocab."""
|
103 |
+
token = self.sp_model.IdToPiece(index)
|
104 |
+
return token
|
105 |
+
|
106 |
+
def convert_tokens_to_string(self, tokens):
|
107 |
+
"""Converts a sequence of tokens (string) in a single string."""
|
108 |
+
current_sub_tokens = []
|
109 |
+
out_string = ""
|
110 |
+
prev_is_special = False
|
111 |
+
for i, token in enumerate(tokens):
|
112 |
+
# make sure that special tokens are not decoded using sentencepiece model
|
113 |
+
if token in self.all_special_tokens:
|
114 |
+
if not prev_is_special and i != 0:
|
115 |
+
out_string += " "
|
116 |
+
out_string += self.sp_model.decode(current_sub_tokens) + token
|
117 |
+
prev_is_special = True
|
118 |
+
current_sub_tokens = []
|
119 |
+
else:
|
120 |
+
current_sub_tokens.append(token)
|
121 |
+
prev_is_special = False
|
122 |
+
out_string += self.sp_model.decode(current_sub_tokens)
|
123 |
+
return out_string
|
124 |
+
|
125 |
+
def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
|
126 |
+
"""
|
127 |
+
Save the vocabulary and special tokens file to a directory.
|
128 |
+
|
129 |
+
Args:
|
130 |
+
save_directory (`str`):
|
131 |
+
The directory in which to save the vocabulary.
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
`Tuple(str)`: Paths to the files saved.
|
135 |
+
"""
|
136 |
+
if not os.path.isdir(save_directory):
|
137 |
+
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
|
138 |
+
return
|
139 |
+
out_vocab_file = os.path.join(
|
140 |
+
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
|
141 |
+
)
|
142 |
+
|
143 |
+
if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
|
144 |
+
copyfile(self.vocab_file, out_vocab_file)
|
145 |
+
elif not os.path.isfile(self.vocab_file):
|
146 |
+
with open(out_vocab_file, "wb") as fi:
|
147 |
+
content_spiece_model = self.sp_model.serialized_model_proto()
|
148 |
+
fi.write(content_spiece_model)
|
149 |
+
|
150 |
+
return (out_vocab_file,)
|
151 |
+
|
152 |
+
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
153 |
+
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
|
154 |
+
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
|
155 |
+
|
156 |
+
output = bos_token_id + token_ids_0 + eos_token_id
|
157 |
+
|
158 |
+
if token_ids_1 is not None:
|
159 |
+
output = output + bos_token_id + token_ids_1 + eos_token_id
|
160 |
+
|
161 |
+
return output
|
162 |
+
|
163 |
+
def get_special_tokens_mask(
|
164 |
+
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
|
165 |
+
) -> List[int]:
|
166 |
+
"""
|
167 |
+
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
|
168 |
+
special tokens using the tokenizer `prepare_for_model` method.
|
169 |
+
|
170 |
+
Args:
|
171 |
+
token_ids_0 (`List[int]`):
|
172 |
+
List of IDs.
|
173 |
+
token_ids_1 (`List[int]`, *optional*):
|
174 |
+
Optional second list of IDs for sequence pairs.
|
175 |
+
already_has_special_tokens (`bool`, *optional*, defaults to `False`):
|
176 |
+
Whether or not the token list is already formatted with special tokens for the model.
|
177 |
+
|
178 |
+
Returns:
|
179 |
+
`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
|
180 |
+
"""
|
181 |
+
if already_has_special_tokens:
|
182 |
+
return super().get_special_tokens_mask(
|
183 |
+
token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
|
184 |
+
)
|
185 |
+
|
186 |
+
bos_token_id = [1] if self.add_bos_token else []
|
187 |
+
eos_token_id = [1] if self.add_eos_token else []
|
188 |
+
|
189 |
+
if token_ids_1 is None:
|
190 |
+
return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
|
191 |
+
return (
|
192 |
+
bos_token_id
|
193 |
+
+ ([0] * len(token_ids_0))
|
194 |
+
+ eos_token_id
|
195 |
+
+ bos_token_id
|
196 |
+
+ ([0] * len(token_ids_1))
|
197 |
+
+ eos_token_id
|
198 |
+
)
|
199 |
+
|
200 |
+
def create_token_type_ids_from_sequences(
|
201 |
+
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
202 |
+
) -> List[int]:
|
203 |
+
"""
|
204 |
+
Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
|
205 |
+
sequence pair mask has the following format:
|
206 |
+
|
207 |
+
```
|
208 |
+
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
|
209 |
+
| first sequence | second sequence |
|
210 |
+
```
|
211 |
+
|
212 |
+
if token_ids_1 is None, only returns the first portion of the mask (0s).
|
213 |
+
|
214 |
+
Args:
|
215 |
+
token_ids_0 (`List[int]`):
|
216 |
+
List of ids.
|
217 |
+
token_ids_1 (`List[int]`, *optional*):
|
218 |
+
Optional second list of IDs for sequence pairs.
|
219 |
+
|
220 |
+
Returns:
|
221 |
+
`List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
|
222 |
+
"""
|
223 |
+
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
|
224 |
+
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
|
225 |
+
|
226 |
+
output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
|
227 |
+
|
228 |
+
if token_ids_1 is not None:
|
229 |
+
output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
|
230 |
+
|
231 |
+
return output
|
232 |
+
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## 模型和环境
|
2 |
+
|
3 |
+
### 构建环境
|
4 |
+
```shell
|
5 |
+
# 本地获取ngc pytorch cuda12原生镜像
|
6 |
+
docker pull nvcr.io/nvidia/pytorch:23.02-py3
|
7 |
+
|
8 |
+
# 启动容器
|
9 |
+
docker run --gpus all -itd --rm --name lyrallms_cu12 nvcr.io/nvidia/pytorch:23.02-py3
|
10 |
+
docker exec -it lyrallms_cu12 bash
|
11 |
+
```
|
12 |
+
|
13 |
+
获取代码后安装依赖
|
14 |
+
```shell
|
15 |
+
pip install -r requirements.txt
|
16 |
+
```
|
17 |
+
|
18 |
+
将`lyralib`下对应cuda版本的[so文件](../../lyralib/sm80) 复制到`/usr/lib/lyralib`下。
|
19 |
+
|
20 |
+
## 推理使用
|
21 |
+
|
22 |
+
### 使用核心片段
|
23 |
+
|
24 |
+
```python
|
25 |
+
from lyra_llama import lyraLlama
|
26 |
+
|
27 |
+
model_path = 'XXX' # 包含转换后的模型参数,配置,tokenizer文件目录
|
28 |
+
data_type = 'fp16'
|
29 |
+
memopt_mode = 0 # 如需使用MEMOPT模式推理, memopt_mode=1
|
30 |
+
|
31 |
+
# 加载加速后的模型,C++ 底层已经掩盖,依赖加速编译的 /usr/lib/ftlib 下的 so 库,已经打在镜像中
|
32 |
+
# 模型加载需要花一些时间,因为现在 IO 参数是多个小文件,建议把下载的模型参数解压到本地磁盘
|
33 |
+
model = lyraLlama(model_path, data_type, memopt_mode)
|
34 |
+
|
35 |
+
# 输入, 若有多个输入,可batch 推理,prompts 支持列表,这里为模拟多个输入,直接复制 32 分,batchsize 达到32
|
36 |
+
prompts = '列出3个不同的机器学习算法,并说明它们的适用范围.'
|
37 |
+
prompts = [prompts,]*64
|
38 |
+
|
39 |
+
# 生成, 最大长度可自行调整,这里设置 150,模型遇到 end token 或者达到最大计算长度时会停止当前批次计算.
|
40 |
+
# 因为 LLaMA-ZIYA 词表是按字切分,导致存储和计算量非常大,若是长序列生成情况,请自行缩小 batch_size
|
41 |
+
output_texts = model.generate(prompts, output_length=150, do_sample=False, top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0)
|
42 |
+
|
43 |
+
# 输出查看, 虽然 输入字符串也是参与计算,用于评估模型吞吐量和计算速度。
|
44 |
+
# 这里为了显示应用方便, output_texts 中每个样本的输出已经去掉了输入字符串
|
45 |
+
print(output_texts)
|
46 |
+
|
47 |
+
# 输出示例
|
48 |
+
>>> Inputs: 列出3个不同的机器学习算法,并说明它们的适用范围.
|
49 |
+
>>> Outputs:
|
50 |
+
1. 线性回归(Linear Regression):适用于解决两个变量之间的关系问题,例如预测房价或销售额。它可以用于回归分析和回归聚类分析。
|
51 |
+
2. 决策树(Decision Tree):适用于解决分类和回归问题。它可以用于分类、回归、异常检测和聚类分析。
|
52 |
+
3. 神经网络(Neural Network):适用于解决分类、回归和聚类问题。它可以用于图像识别、语音识别
|
53 |
+
```
|
54 |
+
|
55 |
+
### demo 脚本
|
56 |
+
|
57 |
+
`examples/batch_demo.py` 中有类似上面的使用示例,做了简单的跑速测试,考虑大家对 token 的理解各有不同,我们这里直接按字符数来评估,不同 token 的理解可以自行根据生成结果字符数去观测。注意:在 `LLaMA-ZIYA` 中,tokenizer 对中文的切分,约等于一个字是一个 token.
|
58 |
+
|
59 |
+
更多测试脚本及用法详见参考 `examples` 下的 [README.md](./examples/README.md) ,如:
|
60 |
+
- Batch推理
|
61 |
+
- 不等长Batch推理
|
62 |
+
- Batch流式推理
|
63 |
+
|
64 |
+
## 自定义模型参数
|
65 |
+
|
66 |
+
已提供转换脚本 `parse_model_params.py` 可以将 LLaMa 模型的 HuggingFace 格式参数,转换为加速版本下各层模型需要的模型参数。因为 LLaMa 有很多变体,所以这里我们提供一个模型名字 `-model_name` 的转换参数,可以自行填入,以便生成可区分的 config.in 文件。
|
67 |
+
|
68 |
+
```shell
|
69 |
+
python parse_model_params.py -i your_model_dir -o output_dir -t_g 1 -i_g 1 -weight_data_type "fp16" -model_name "llama"
|
70 |
+
```
|
71 |
+
|
72 |
+
转换后的模型参数将以每个参数一个文件的形式存放在 `output_dir/{i_g}-gpu-{weight_data_type}` 下,分割的形式有助于并发 IO,但缺陷是不便捷。
|
73 |
+
|
74 |
+
同时该转换脚本还会将同目录下 tokenizer_source 里的 `tokenizer.model` `tokenizer.json` `special_tokens_map.json` `tokenizer_config.json` 四个文件拷贝到 output_dir 下,以便后续使用加速模型时直接能初始化对应的 加速后的 LLaMa 的 tokenizer.
|
75 |
+
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## 测试脚本
|
2 |
+
|
3 |
+
### batch推理
|
4 |
+
|
5 |
+
```sh
|
6 |
+
export FMHA_VERSION=V2 # 如使用旧版本Attn,设置 FMHA_VERSION=OFF
|
7 |
+
export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
|
8 |
+
|
9 |
+
model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
|
10 |
+
kv_qparams_fpath=KVScalesPath # (可选) 校准后的KVCache量化Scales文件路径 (past_kv_scale.bin)
|
11 |
+
|
12 |
+
data_type=fp16 # 权重保存精度
|
13 |
+
memopt_mode=0 # MEMOPT模式: 0/1
|
14 |
+
quant_type="int8" # 量化精度: int4/int8
|
15 |
+
max_output_length=256
|
16 |
+
warmups=1
|
17 |
+
avgnums=1
|
18 |
+
|
19 |
+
python batch_demo.py --model-path $model_path\
|
20 |
+
--tokenizer-path $model_path\
|
21 |
+
--data-type $data_type\
|
22 |
+
--memopt_mode $memopt_mode\
|
23 |
+
--quant-type ${quant_type}\
|
24 |
+
--max-output-length $max_output_length\
|
25 |
+
--warmups $warmups\
|
26 |
+
--avgnums $avgnums\
|
27 |
+
--kvqparams-fpath $kv_qparams_fpath
|
28 |
+
```
|
29 |
+
|
30 |
+
### batch流式推理
|
31 |
+
|
32 |
+
```sh
|
33 |
+
export FMHA_VERSION=V2 # 如使用旧版本Attn,设置为OFF
|
34 |
+
export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
|
35 |
+
export LYRA_STREAM_CB_STEP=30 # 回调函数间隔步数
|
36 |
+
|
37 |
+
model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
|
38 |
+
kv_qparams_fpath=KVScalesPath # (可选) 校准后的KVCache量化Scales文件路径 (past_kv_scale.bin)
|
39 |
+
|
40 |
+
data_type=fp16 # 权重保存精度
|
41 |
+
memopt_mode=0 # MEMOPT模式: 0/1
|
42 |
+
quant_type="int8" # 量化精度: int4/int8
|
43 |
+
max_output_length=256
|
44 |
+
warmups=1
|
45 |
+
avgnums=1
|
46 |
+
|
47 |
+
python batch_stream_demo.py --model-path $model_path\
|
48 |
+
--tokenizer-path $model_path\
|
49 |
+
--data-type $data_type\
|
50 |
+
--memopt_mode $memopt_mode\
|
51 |
+
--quant-type ${quant_type}\
|
52 |
+
--max-output-length $max_output_length\
|
53 |
+
--warmups $warmups\
|
54 |
+
--avgnums $avgnums\
|
55 |
+
--kvqparams-fpath $kv_qparams_fpath
|
56 |
+
```
|
57 |
+
### 不等长batch推理
|
58 |
+
|
59 |
+
```sh
|
60 |
+
export FMHA_VERSION=V2 # 如使用旧版本Attn,设置为OFF
|
61 |
+
export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
|
62 |
+
|
63 |
+
model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
|
64 |
+
kv_qparams_fpath=KVScalesPath # (可选) 校准后的KVCache量化Scales文件路径 (past_kv_scale.bin)
|
65 |
+
|
66 |
+
prompt_filepath=valen_prompts.json # 用于测试的不等长prompts文件,从中采样
|
67 |
+
data_type=fp16 # 权重保存精度
|
68 |
+
memopt_mode=0 # MEMOPT模式: 0/1
|
69 |
+
quant_type="int8" # 量化精度: int4/int8
|
70 |
+
max_output_length=256
|
71 |
+
warmups=1
|
72 |
+
avgnums=1
|
73 |
+
|
74 |
+
python random_batch_demo.py --model-path $model_path\
|
75 |
+
--tokenizer-path $model_path\
|
76 |
+
--data-type $data_type\
|
77 |
+
--memopt_mode $memopt_mode\
|
78 |
+
--quant-type ${quant_type}\
|
79 |
+
--prompt_filepath $prompt_filepath\
|
80 |
+
--max-output-length $max_output_length\
|
81 |
+
--warmups $warmups\
|
82 |
+
--avgnums $avgnums\
|
83 |
+
--kvqparams-fpath $kv_qparams_fpath
|
84 |
+
```
|
85 |
+
|
86 |
+
## Prompt例子
|
87 |
+
|
88 |
+
### 短序列
|
89 |
+
```
|
90 |
+
北京的景点:故宫、天坛、万里长城等。\n深圳的景点:
|
91 |
+
```
|
92 |
+
```
|
93 |
+
今天天气大概 25度,有点小雨,吹着风,我想去户外散步,应该穿什么样的衣服 裤子鞋子搭配
|
94 |
+
```
|
95 |
+
|
96 |
+
### 1K序列
|
97 |
+
```
|
98 |
+
《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲,于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日,“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者:主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J (大卫·哈斯金斯)。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲:“Boys”;“咬我的臀部”;“Some Faces”和斯卡雷鬼曲调“Harry”,这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议,凯文·哈斯金斯 (Kevin Haskins) 说,“那里有力量流行音乐,还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中(除了“Bela Lugosi's Dead”),只有“Harry”获得了官方发行;1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目,��括“Boys”的原始录音,一直未发行,直到 2018 年The Bela Session以黑胶唱片和CD 形式发行,并可供乐队数字下载。[11]在额外的曲目中,《经典摇滚》杂志写道:“其余的材料发现乐队正在摸索方向,甚至触及了斯卡。”\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:
|
99 |
+
```
|
100 |
+
|
101 |
+
### 2K序列
|
102 |
+
```
|
103 |
+
根据所给刑事法律文书中的案情描述,预测被告人被判的罪名。你需要从这些罪名中选择最恰当的一项:妨害公务,寻衅滋事,盗窃、侮辱尸体,危险物品肇事,非法采矿,组织、强迫、引诱、容留、介绍卖淫,开设赌场,聚众斗殴,绑架,非法持有毒品,销售假冒注册商标的商品,容留他人吸毒,假冒注册商标,交通肇事,破坏电力设备,组织卖淫,合同诈骗,走私武器、弹药,抢劫,非法处置查封、扣押、冻结的财产,以危险方法危害公共安全,过失投放危险物质,非法制造、买卖、运输、邮寄、储存枪支、弹药、爆炸物,伪造、变造、买卖武装部队公文、证件、印章,持有、使用假币,重婚,聚众冲击国家机关,生产、销售伪劣农药、兽药、化肥、种子,收买被拐卖的妇女、儿童,聚众哄抢,重大劳动安全事故,侵占,包庇毒品犯罪分子,虚报注册资本,违法发放贷款,制造、贩卖、传播淫秽物品,窝藏、包庇,帮助毁灭、伪造证据,放火,强奸,非法携带枪支、弹药、管制刀具、危险物品危及公共安全,伪造、变造金融票证,爆炸,玩忽职守,对非国家工作人员行贿,伪造、倒卖伪造的有价票证,私分国有资产,非法收购、运输、加工、出售国家重点保护植物、国家重点保护植物制品,生产、销售假药,挪用特定款物,过失致人死亡,走私国家禁止进出口的货物、物品,非法制造、买卖、运输、储存危险物质,洗钱,骗取贷款、票据承兑、金融票证,非法买卖制毒物品,非法买卖、运输、携带、持有毒品原植物种子、幼苗,生产、销售有毒、有害食品,滥用职权,招收公务员、学生徇私舞弊,诬告陷害,非法获取国家秘密,非法行医,非法收购、运输、出售珍贵、濒危野生动物、珍贵、濒危野生动物制品,非法出售发票,行贿,高利转贷,非法吸收公众存款,传播淫秽物品,非法进行节育手术,盗伐林木,聚众扰乱社会秩序,走私、贩卖、运输、制造毒品,滥伐林木,赌博,非法经营,生产、销售不符合安全标准的食品,提供侵入、非法控制计算机信息系统程序、工具,倒卖文物,窃取、收买、非法提供信用卡信息,盗掘古文化遗址、古墓葬,协助组织卖淫,破坏广播电视设施、公用电信设施,走私普通货物、物品,逃税,破坏监管秩序,失火,受贿,组织、领导、参加黑社会性质组织,票据诈骗,非法制造、销售非法制造的注册商标标识,侵犯著作权,伪造、变造、买卖国家机关公文、证件、印章,徇私舞弊不征、少征税款,强迫劳动,贷款诈骗,劫持船只、汽车,诈骗,非法种植毒品原植物,非法狩猎,挪用资金,非法收购、运输盗伐、滥伐的林木,出售、购买、运输假币,抢夺,虐待被监管人,窝藏、转移、收购、销售赃物,破坏计算机信息系统,制作、复制、出版、贩卖、传播淫秽物品牟利,拒不支付劳动报酬,盗窃、抢夺枪支、弹药、爆炸物,强迫他人吸毒,走私珍贵动物、珍贵动物制品,虐待,非法获取公民个人信息,破坏交通设施,非法转让、倒卖土地使用权,非法捕捞水产品,非法占用农用地,非法制造、出售非法制造的发票,非法持有、私藏枪支、弹药,集资诈骗,强迫卖淫,伪造公司、企业、事业单位、人民团体印章,利用影响力受贿,编造、故意传播虚假恐怖信息,介绍贿赂,传播性病,拐卖妇女、儿童,倒卖车票、船票,窝藏、转移、隐瞒毒品、毒赃,徇私舞弊不移交刑事案件,过失损坏广播电视设施、公用电信设施,动植物检疫徇私舞弊,破坏交通工具,猥亵儿童,挪用公款,伪造货币,冒充军人招摇撞骗,非法采伐、毁坏国家重点保护植物,故意毁坏财物,非法拘禁,招摇撞骗,伪造、变造居民身份证,徇私枉法,非法生产、买卖警用装备,掩饰、隐瞒犯罪所得、犯罪所得收益,生产、销售伪劣产品,破坏生产经营,帮助犯罪分子逃避处罚,贪污,投放危险物质,持有伪造的发票,危险驾驶,妨害作证,非法猎捕、杀害珍贵、濒危野生动物,重大责任事故,诽谤,虚开发票,引诱���教唆、欺骗他人吸毒,脱逃,扰乱无线电通讯管理秩序,保险诈骗,非法生产、销售间谍专用器材,非法组织卖血,强迫交易,串通投标,破坏易燃易爆设备,传授犯罪方法,妨害信用卡管理,拐骗儿童,单位行贿,打击报复证人,拒不执行判决、裁定,经济犯,金融凭证诈骗,虚开增值税专用发票、用于骗取出口退税、抵扣税款发票,走私废物,组织、领导传销活动,单位受贿,盗窃、抢夺枪支、弹药、爆炸物、危险物质,过失以危险方法危害公共安全,过失致人重伤,引诱、容留、介绍卖淫,遗弃,走私,信用卡诈骗,对单位行贿,故意杀人,聚众扰乱公共场所秩序、交通秩序,盗窃,故意伤害,非法侵入住宅,强制猥亵、侮辱妇女,伪证,污染环境,巨额财产来源不明,非国家工作人员受贿,侮辱,隐匿、故意销毁会计凭证、会计帐簿、财务会计报告,过失损坏武器装备、军事设施、军事通信,敲诈勒索,职务侵占。\n经审理查明:2013年9月底的一天晚上,被告人陆某德酒后经过沭阳县某镇某村张某荣家时,发现张某荣家没有人,即用石头砸破张某荣家房门玻璃,打开房门进入张某荣家中。因进入张某荣时被房门遗留的玻璃划伤,被告人陆某德在张某荣家北屋门和北屋东首间墙面遗留两处血迹。2014年1月7日,被告人陆某德被公安民警从其家中传唤到案,并如实供述自己的罪行。上述事实,有公诉机关提交的,经过庭审质证的,且均具有证据证明效力的以下证据予以证明:被告人陆某德供述其非法侵入他人住宅的时间、地点、经过等事实。该供述得到了被害人张某荣的陈述、证人周某花、李某华等人的证言、法庭科学DNA检验鉴定书、现场勘验检查笔录、现场图、现场照片等证据予以证实,足以认定。刑事判决书证明证明了被告人陆某德有前科;公安机关出具的“发破案经过”及“抓获经过”证明了本案案发及被告人陆某德的归案情况。\n
|
104 |
+
```
|
105 |
+
|
106 |
+
### 4K序列
|
107 |
+
```
|
108 |
+
<context>/*\n * Implement the \"Falling Rocks\" game in the text console. \n * A small dwarf stays at the bottom of the screen and can \n * move left and right (by the arrows keys). A number of rocks \n * of different sizes and forms constantly fall down and you \n * need to avoid a crash.\n * Rocks are the symbols ^, @, *, &, +, %, $, #, !, ., ;, - distributed \n * with appropriate density. The dwarf is (O). \n * Ensure a constant game speed by Thread.Sleep(150).\n * Implement collision detection and scoring system.\n*/\n\nusing System;\nusing System.Threading;\nusing System.Collections.Generic;\nusing System.Threading.Tasks;\n\nclass FallingRocks\n{\n struct Position\n {\n public int X, Y;\n public string symbol;\n public ConsoleColor color;\n\n public Position(int x, int y, string symbol, ConsoleColor color)\n {\n this.X = x;\n this.Y = y;\n this.symbol = symbol;\n this.color = color;\n }\n }\n\n static void Main()\n {\n Thread oThread = new Thread(new ThreadStart(Mainn));\n Thread aThread = new Thread(new ThreadStart(Clr));\n \n aThread.Start();\n oThread.Start();\n oThread.Join();\n aThread.Join();\n }\n\n static void Clr()\n {\n while (true)\n {\n Thread.Sleep(10);\n Console.Clear();\n }\n }\n static void Mainn()\n {\n //Random generator for rocks color, position and symbol\n Random randomGenerator = new Random();\n \n //Sleep time for the game loop\n double sleepTime = 150;\n //Console settings\n Console.CursorVisible = false;\n Console.BufferHeight = Console.WindowHeight;\n \n //number of rocks in the Array rocks\n int rocksCount = 0;\n\n //array with the symbols of the rocks\n string[] symbols = new string[] { \"^\", \"@\", \"*\", \"&\", \"+\", \"%\", \"$\", \"#\", \"!\", \".\", \";\" };\n \n //array with colors for the rocks\n ConsoleColor[] colors = new ConsoleColor[] {ConsoleColor.Yellow, ConsoleColor.White, ConsoleColor.Gray};\n \n //array with rocks\n Position[] rocks = new Position[200];\n \n //position for the dwarf\n Position dwarf = new Position(10, Console.WindowHeight - 1,\"(0)\",ConsoleColor.Red);\n \n //bool variable to say when the game loop to be over\n bool gameLoop = true;\n\n //variable keeping the score\n ulong score = 0;\n\n //the game loop\n while (gameLoop)\n {\n //score is growing as the cycle runs\n score++;\n\n //setting the Y component for all the rocks in the array to grow with 2\n for (int i = 0; i <= rocks.Length - 1; i++)\n {\n rocks[i].Y = rocks[i].Y + 2;\n }\n\n //generating rocks\n for (int x = 0; x <= randomGenerator.Next(2, 4); x++)\n {\n rocks[rocksCount] = new Position(randomGenerator.Next(x * 15, x * 15 + 20), 0\n , symbols[randomGenerator.Next(0, symbols.Length - 1)]\n , colors[randomGenerator.Next(0, colors.Length - 1)]);\n if (rocksCount >= 199) rocksCount = 0;\n rocksCount++;\n }\n\n //printing the rocks and other stuff\n foreach (var item in rocks)\n {\n foreach (var rock in rocks)\n {\n //checking for colision\n if ((rock.X >= dwarf.X) && (rock.X <= (dwarf.X + 2)) && (rock.Y == dwarf.Y))\n {\n gameLoop = false;\n break;\n }\n } \n\n //printing the rocks\n if (item.Y < Console.WindowHeight)\n { \n Console.SetCursorPosition(item.X, item.Y);\n Console.ForegroundColor = item.color;\n Console.Write(item.symbol);\n }\n\n //checking for key pressed\n if (Console.KeyAvailable)\n {\n ConsoleKeyInfo pressedKey = Console.ReadKey();\n if (pressedKey.Key == ConsoleKey.RightArrow)\n {\n if(dwarf.X < Console.WindowWidth - 20)\n {\n //removing the old positions of the dwarf and increasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X++;\n }\n }\n if (pressedKey.Key == ConsoleKey.LeftArrow) \n {\n if(dwarf.X >= 1)\n {\n //removing the old positions of the dwarf and decreasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X--;\n }\n }\n }\n }\n \n //printing the dwarf\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.ForegroundColor = dwarf.color;\n Console.Write(dwarf.symbol); \n \n //sleeping the loop for sometime\n //Thread.Sleep((int)sleepTime);\n\n //reducing the sleep time of the loop\n sleepTime -= 0.5;\n\n \n //removing the rocks \n //foreach (var item in rocks)\n //{\n // if (item.Y < Console.WindowHeight)\n // {\n // Console.SetCursorPosition(item.X, item.Y);\n // Console.Write(\" \");\n // }\n //} \n }\n //Printing the score after the game is over\n Console.Clear();\n Console.WriteLine(\"Game over! Your score is: \" + score);\n\n }\n}\n</context>\n\n这个\"Falling Rocks\"游戏是如何工作的呢?可以详细解释一下代码的运作机制吗? \n\n\n\n
|
109 |
+
```
|
110 |
+
|
111 |
+
### 8K序列
|
112 |
+
```
|
113 |
+
<context># -*- coding: utf-8 -*-\n# This code is part of Amoco\n# Copyright (C) 2021 Axel Tillequin ([email protected])\n# published under GPLv2 license\nfrom amoco.arch.tricore import env\nfrom amoco.arch.core import *\n# -------------------------------------------------------\n# from TriCore TC1.6.2 core architecture manual V1.2.2\n# (32-bit Unified Processor Core), 2020-01-15\n# define all except FPU instructions\n# -------------------------------------------------------\nISPECS = []\n@ispec("32<[ disp1(16) disp2(8) {6d} ]", mnemonic="CALL")\n@ispec("32<[ disp1(16) disp2(8) {61} ]", mnemonic="FCALL")\n@ispec("32<[ disp1(16) disp2(8) {1d} ]", mnemonic="J")\n@ispec("32<[ disp1(16) disp2(8) {5d} ]", mnemonic="JL")\ndef tricore_branch(obj, disp1, disp2):\n v = env.cst(((disp2<<16)+disp1)<<1,24)\n obj.operands = [disp.signextend(32)]\n obj.type = type_control_flow\n@ispec("32<[ disp1(16) disp2(8) {ed} ]", mnemonic="CALLA")\n@ispec("32<[ disp1(16) disp2(8) {e1} ]", mnemonic="FCALLA")\n@ispec("32<[ disp1(16) disp2(8) {9d} ]", mnemonic="JA")\n@ispec("32<[ disp1(16) disp2(8) {dd} ]", mnemonic="JLA")\ndef tricore_branch(obj, disp1, disp2):\n v = env.cst((disp2<<16)+disp1,24)\n addr = composer([env.bit0,v[0:20],env.cst(0,7),v[20:24]])\n obj.operands = [addr]\n obj.type = type_control_flow\n@ispec("32<[ ---- {00} ---- ---- a(4) {2d} ]", mnemonic="CALLI")\n@ispec("32<[ ---- {01} ---- ---- a(4) {2d} ]", mnemonic="FCALLI")\n@ispec("32<[ ---- {03} ---- ---- a(4) {2d} ]", mnemonic="JI")\n@ispec("32<[ ---- {02} ---- ---- a(4) {2d} ]", mnemonic="JLI")\ndef tricore_branchI(obj, a):\n src = env.A[a]\n obj.operands = [src]\n obj.type = type_control_flow\n@ispec("16<[ disp(8) {5c} ]", mnemonic="CALL")\n@ispec("16<[ disp(8) {3c} ]", mnemonic="J")\n@ispec("16<[ disp(8) {ee} ]", mnemonic="JNZ")\n@ispec("16<[ disp(8) {6e} ]", mnemonic="JZ")\ndef tricore_branch(obj, disp):\n disp = env.cst(disp<<1,8)\n obj.operands = [disp.signextend(32)]\n obj.type = type_control_flow\n@ispec("32<[ ---- 0000000 const9(9) ---- {ad} ]", mnemonic="BISR")\n@ispec("32<[ ---- 0000100 const9(9) ---- {ad} ]", mnemonic="SYSCALL")\ndef tricore_system(obj, const9):\n obj.operands = [env.cst(const9,9)]\n obj.type = type_system\n@ispec("32<[ c(4) {1c} ---- b(4) ---- {0b} ]", mnemonic="ABS")\n@ispec("32<[ c(4) {5c} ---- b(4) ---- {0b} ]", mnemonic="ABS_B")\n@ispec("32<[ c(4) {7c} ---- b(4) ---- {0b} ]", mnemonic="ABS_H")\n@ispec("32<[ c(4) {1d} ---- b(4) ---- {0b} ]", mnemonic="ABSS")\n@ispec("32<[ c(4) {7d} ---- b(4) ---- {0b} ]", mnemonic="ABSS_H")\n@ispec("32<[ c(4) {1f} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n src = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {80} ---- b(4) ---- {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b):\n src = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, src.signextend(64)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {81} ---- b(4) a(4) {0b} ]", mnemonic="MOV")\ndef tricore_dd_arithmetic(obj, c, b, a):\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, composer([src2,src1])]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) {4e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_B")\n@ispec("32<[ c(4) {6e} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIF_H")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) {6f} ---- b(4) a(4) {0b} ]", mnemonic="ABSDIFS_H")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0b} ]", mnemonic="ADD_B")\n@ispec("32<[ c(4) {60} ---- b(4) a(4) {0b} ]", mnemonic="ADD_H")\n@ispec("32<[ c(4) {05} ---- b(4) a(4) {0b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_H")\n@ispec("32<[ c(4) {63} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_HU")\n@ispec("32<[ c(4) {03} ---- b(4) a(4) {0b} ]", mnemonic="ADDS_U")\n@ispec("32<[ c(4) {04} ---- b(4) a(4) {0b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0f} ]", mnemonic="AND")\n@ispec("32<[ c(4) {20} ---- b(4) a(4) {0b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) {24} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) {25} ---- b(4) a(4) {0b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) {22} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) {23} ---- b(4) a(4) {0b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) {21} ---- b(4) a(4) {0b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) {0e} ---- b(4) a(4) {0f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) {10} ---- b(4) a(4) {0b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) {50} ---- b(4) a(4) {0b} ]", mnemonic="EQ_B")\n@ispec("32<[ c(4) {70} ---- b(4) a(4) {0b} ]", mnemonic="EQ_H")\n@ispec("32<[ c(4) {90} ---- b(4) a(4) {0b} ]", mnemonic="EQ_W")\n@ispec("32<[ c(4) {56} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) {76} ---- b(4) a(4) {0b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) {14} ---- b(4) a(4) {0b} ]", mnemonic="GE")\n@ispec("32<[ c(4) {15} ---- b(4) a(4) {0b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) {12} ---- b(4) a(4) {0b} ]", mnemonic="LT")\n@ispec("32<[ c(4) {13} ---- b(4) a(4) {0b} ]", mnemonic="LT_U")\n@ispec("32<[ c(4) {52} ---- b(4) a(4) {0b} ]", mnemonic="LT_B")\n@ispec("32<[ c(4) {53} ---- b(4) a(4) {0b} ]", mnemonic="LT_BU")\n@ispec("32<[ c(4) {72} ---- b(4) a(4) {0b} ]", mnemonic="LT_H")\n@ispec("32<[ c(4) {73} ---- b(4) a(4) {0b} ]", mnemonic="LT_HU")\n@ispec("32<[ c(4) {92} ---- b(4) a(4) {0b} ]", mnemonic="LT_W")\n@ispec("32<[ c(4) {93} ---- b(4) a(4) {0b} ]", mnemonic="LT_WU")\n@ispec("32<[ c(4) {1a} ---- b(4) a(4) {0b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) {1b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) {5a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_B")\n@ispec("32<[ c(4) {5b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_BU")\n@ispec("32<[ c(4) {7a} ---- b(4) a(4) {0b} ]", mnemonic="MAX_H")\n@ispec("32<[ c(4) {7b} ---- b(4) a(4) {0b} ]", mnemonic="MAX_HU")\n@ispec("32<[ c(4) {18} ---- b(4) a(4) {0b} ]", mnemonic="MIN")\n@ispec("32<[ c(4) {19} ---- b(4) a(4) {0b} ]", mnemonic="MIN_U")\n@ispec("32<[ c(4) {58} ---- b(4) a(4) {0b} ]", mnemonic="MIN_B")\n@ispec("32<[ c(4) {59} ---- b(4) a(4) {0b} ]", mnemonic="MIN_BU")\n@ispec("32<[ c(4) {78} ---- b(4) a(4) {0b} ]", mnemonic="MIN_H")\n@ispec("32<[ c(4) {79} ---- b(4) a(4) {0b} ]", mnemonic="MIN_HU")\n@ispec("32<[ c(4) {09} ---- b(4) a(4) {0f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) {11} ---- b(4) a(4) {0b} ]", mnemonic="NE")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0f} ]", mnemonic="OR")\n@ispec("32<[ c(4) {27} ---- b(4) a(4) {0b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) {2b} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) {2c} ---- b(4) a(4) {0b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) {29} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) {2a} ---- b(4) a(4) {0b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) {28} ---- b(4) a(4) {0b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) {0f} ---- b(4) a(4) {0f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) {00} ---- b(4) a(4) {0f} ]", mnemonic="SH")\n@ispec("32<[ c(4) {37} ---- b(4) a(4) {0b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) {3b} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) {3c} ---- b(4) a(4) {0b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {0f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) {39} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) {3a} ---- b(4) a(4) {0b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) {38} ---- b(4) a(4) {0b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {0f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {0f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {0f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) {08} ---- b(4) a(4) {0b} ]", mnemonic="SUB")\n@ispec("32<[ c(4) {48} ---- b(4) a(4) {0b} ]", mnemonic="SUB_B")\n@ispec("32<[ c(4) {68} ---- b(4) a(4) {0b} ]", mnemonic="SUB_H")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0b} ]", mnemonic="SUBC")\n@ispec("32<[ c(4) {0a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS")\n@ispec("32<[ c(4) {0b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_U")\n@ispec("32<[ c(4) {6a} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_H")\n@ispec("32<[ c(4) {6b} ---- b(4) a(4) {0b} ]", mnemonic="SUBS_HU")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0b} ]", mnemonic="SUBX")\n@ispec("32<[ c(4) {0d} ---- b(4) a(4) {0f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) {0c} ---- b(4) a(4) {0f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) {2f} ---- b(4) a(4) {0b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) {30} ---- b(4) a(4) {0b} ]", mnemonic="XOR_NE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {40} ---- b(4) a(4) {01} ]", mnemonic="EQ_A")\n@ispec("32<[ c(4) {43} ---- b(4) a(4) {01} ]", mnemonic="GE_A")\n@ispec("32<[ c(4) {42} ---- b(4) a(4) {01} ]", mnemonic="LT_A")\n@ispec("32<[ c(4) {41} ---- b(4) a(4) {01} ]", mnemonic="NE_A")\ndef tricore_daa_arithmetic(obj, c, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {63} ---- b(4) ---- {01} ]", mnemonic="MOV_A", _dst=env.A, _src=env.D)\n@ispec("32<[ c(4) {00} ---- b(4) ---- {01} ]", mnemonic="MOV_AA", _dst=env.A, _src=env.A)\n@ispec("32<[ c(4) {4c} ---- b(4) ---- {01} ]", mnemonic="MOV_D", _dst=env.D, _src=env.A)\ndef tricore_daa_arithmetic(obj, c, b, _dst, _src):\n dst = _dst[c]\n src = _src[b]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {48} ---- ---- a(4) {01} ]", mnemonic="EQZ_A")\n@ispec("32<[ c(4) {49} ---- ---- a(4) {01} ]", mnemonic="NEZ_A")\ndef tricore_da_arithmetic(obj, c, a):\n src1 = env.A[a]\n dst = env.D[c]\n obj.operands = [dst, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {01} --00 b(4) a(4) {4b} ]", mnemonic="BMERGE")\ndef tricore_ddd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {06} --00 b(4) a(4) {4b} ]", mnemonic="CRC32_B")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32B_W")\n@ispec("32<[ c(4) {03} --00 b(4) a(4) {4b} ]", mnemonic="CRC32L_W")\ndef tricore_crc32(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src2, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {20} --01 b(4) a(4) {4b} ]", mnemonic="DIV")\n@ispec("32<[ c(4) {21} --01 b(4) a(4) {4b} ]", mnemonic="DIV_U")\n@ispec("32<[ c(4) {5a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_B")\n@ispec("32<[ c(4) {4a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_BU")\n@ispec("32<[ c(4) {3a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_H")\n@ispec("32<[ c(4) {2a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_HU")\n@ispec("32<[ c(4) {1a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT")\n@ispec("32<[ c(4) {0a} --00 b(4) a(4) {4b} ]", mnemonic="DVINIT_U")\ndef tricore_edd_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 100 ----- b(4) a(4) {17} ]", mnemonic="DEXTR")\ndef tricore_dddc(obj, c, d, b, a):\n shift = env.D[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, shift]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 ----- ---- a(4) {17} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 ----- ---- a(4) {17} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, a):\n if d%2:\n raise InstructionError(obj)\n width = env.E[d][32:37]\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, width]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 0--00 ---- a(4) {6b} ]", mnemonic="PACK")\ndef tricore_extr(obj, c, d, a):\n if d%2:\n raise InstructionError(obj)\n src1 = env.E[d]\n src2 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {08} -- 00 ---- a(4) {4b} ]", mnemonic="UNPACK")\ndef tricore_extr(obj, c, d, a):\n src = env.D[a]\n dst = env.E[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {02} -- 00 ---- a(4) {4b} ]", mnemonic="PARITY")\n@ispec("32<[ c(4) {22} -- 00 ---- a(4) {4b} ]", mnemonic="POPCNT_W")\ndef tricore_extr(obj, c, d, a):\n src = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 ----- b(4) a(4) {77} ]", mnemonic="DEXTR")\ndef tricore_dextr(obj, c, pos, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, env.cst(pos,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 10 width(5) ---- a(4) {37} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) pos(5) 11 width(5) ---- a(4) {37} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, pos, width, a):\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) const(4) ---- {b7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, const):\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) const(4) ---- {d7} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, const):\n src2 = env.D[d]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, env.cst(const,4), src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 01 width(5) b(4) ---- {37} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, pos, width, b):\n src1 = env.D[b]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 001 width(5) b(4) ---- {57} ]", mnemonic="IMASK")\ndef tricore_imask(obj, c, d, width, b):\n src1 = env.D[b]\n src2 = env.D[d]\n if c%2:\n raise InstructionError(obj)\n dst = env.E[c]\n obj.operands = [dst, src1, src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) const(4) a(4) {b7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, const, a):\n dst = env.D[c]\n src1 = env.D[a]\n obj.operands = [dst, src1, env.cst(const,4), env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- const(4) a(4) {97} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, const, a):\n src1 = env.D[a]\n if d%2:\n raise InstructionError(obj)\n src3 = env.E[d]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(const,4), src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) const(4) a(4) {d7} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, const, a):\n src1 = env.D[a]\n src3 = env.D[d]\n dst = env.D[c]\n obj.operands = [dst, src1, env.cst(const,4), src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos(5) 00 width(5) b(4) a(4) {37} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, pos, width, b, a):\n dst = env.D[c]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2, env.cst(pos,5), env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ----- b(4) a(4) {17} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n if d%2:\n raise InstructionError(obj)\n src3 = env.E[d]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 width(5) b(4) a(4) {57} ]", mnemonic="INSERT")\ndef tricore_imask(obj, c, d, width, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n src3 = env.D[d]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, src3, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 010 width(5) ---- a(4) {57} ]", mnemonic="EXTR")\n@ispec("32<[ c(4) d(4) 011 width(5) ---- a(4) {57} ]", mnemonic="EXTR_U")\ndef tricore_extr(obj, c, d, width, a):\n src2 = env.D[d]\n src1 = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src1, src2, env.cst(width,5)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {09} --00 ---- a(4) {4b} ]", mnemonic="BSPLIT")\ndef tricore_edd_arithmetic(obj, c, a):\n src1 = env.D[a]\n dst = env.E[c]\n obj.operands = [dst, src1]\n obj.type = type_data_processing\n@ispec("32<[ c(4) 0001110 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIF")\n@ispec("32<[ c(4) 0001111 ~const9(9) a(4) {8b} ]", mnemonic="ABSDIFS")\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8b} ]", mnemonic="ADD")\n@ispec("32<[ c(4) 0000101 ~const9(9) a(4) {8b} ]", mnemonic="ADDC")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8b} ]", mnemonic="ADDS")\n@ispec("32<[ c(4) 0000011 ~const9(9) a(4) {8b} ]", mnemonic="ADDS_U") #const9 is signed\n@ispec("32<[ c(4) 0000100 ~const9(9) a(4) {8b} ]", mnemonic="ADDX")\n@ispec("32<[ c(4) 0100000 ~const9(9) a(4) {8b} ]", mnemonic="AND_EQ")\n@ispec("32<[ c(4) 0100100 ~const9(9) a(4) {8b} ]", mnemonic="AND_GE")\n@ispec("32<[ c(4) 0100010 ~const9(9) a(4) {8b} ]", mnemonic="AND_LT")\n@ispec("32<[ c(4) 0100001 ~const9(9) a(4) {8b} ]", mnemonic="AND_NE")\n@ispec("32<[ c(4) 0010000 ~const9(9) a(4) {8b} ]", mnemonic="EQ")\n@ispec("32<[ c(4) 1010110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_B")\n@ispec("32<[ c(4) 1110110 ~const9(9) a(4) {8b} ]", mnemonic="EQANY_H")\n@ispec("32<[ c(4) 0010100 ~const9(9) a(4) {8b} ]", mnemonic="GE")\n@ispec("32<[ c(4) 0010010 ~const9(9) a(4) {8b} ]", mnemonic="LT")\n@ispec("32<[ c(4) 0011010 ~const9(9) a(4) {8b} ]", mnemonic="MAX")\n@ispec("32<[ c(4) 0010001 ~const9(9) a(4) {8b} ]", mnemonic="NE")\n@ispec("32<[ c(4) 0100111 ~const9(9) a(4) {8b} ]", mnemonic="OR_EQ")\n@ispec("32<[ c(4) 0101011 ~const9(9) a(4) {8b} ]", mnemonic="OR_GE")\n@ispec("32<[ c(4) 0101001 ~const9(9) a(4) {8b} ]", mnemonic="OR_LT")\n@ispec("32<[ c(4) 0001000 ~const9(9) a(4) {8b} ]", mnemonic="RSUB")\n@ispec("32<[ c(4) 0001001 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS")\n@ispec("32<[ c(4) 0001011 ~const9(9) a(4) {8b} ]", mnemonic="RSUBS_U") #const9 is signed\n@ispec("32<[ c(4) 0000000 ~const9(9) a(4) {8f} ]", mnemonic="SH")\n@ispec("32<[ c(4) 1000000 ~const9(9) a(4) {8f} ]", mnemonic="SH_H")\n@ispec("32<[ c(4) 0110111 ~const9(9) a(4) {8b} ]", mnemonic="SH_EQ")\n@ispec("32<[ c(4) 0111011 ~const9(9) a(4) {8b} ]", mnemonic="SH_GE")\n@ispec("32<[ c(4) 0111001 ~const9(9) a(4) {8b} ]", mnemonic="SH_LT")\n@ispec("32<[ c(4) 0111000 ~const9(9) a(4) {8b} ]", mnemonic="SH_NE")\n@ispec("32<[ c(4) 0000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA")\n@ispec("32<[ c(4) 1000001 ~const9(9) a(4) {8f} ]", mnemonic="SHA_H")\n@ispec("32<[ c(4) 0000010 ~const9(9) a(4) {8f} ]", mnemonic="SHAS")\n@ispec("32<[ c(4) 0101111 ~const9(9) a(4) {8b} ]", mnemonic="XOR_EQ")\n@ispec("32<[ c(4) 0110011 ~const9(9) a(4) {8b} ]", mnemonic="XOR_GE")\n@ispec("32<[ c(4) 0110001 ~const9(9) a(4) {8b} ]", mnemonic="XOR_LT")\n@ispec("32<[ c(4) 0110000 ~const9(9) a(4) {8b} ]", mnemonic="XOR_NE")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n src1 = env.D[a]\n if obj.mnemonic in ("SH","SHA","SHAS"):\n const9 = const9[0:6]\n elif obj.mnemonic in ("SH_H","SHA_H"):\n const9 = const9[0:5]\n src2 = env.cst(const9.int(-1),32)\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {47} ]", mnemonic="AND_OR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {87} ]", mnemonic="AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {87} ]", mnemonic="ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {67} ]", mnemonic="INS_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {67} ]", mnemonic="INSN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {07} ]", mnemonic="NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {87} ]", mnemonic="NOR_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {c7} ]", mnemonic="OR_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {87} ]", mnemonic="OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {07} ]", mnemonic="ORN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_AND_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_ANDN_T")\n@ispec("32<[ c(4) pos2(5) 00 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_NAND_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_NOR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {27} ]", mnemonic="SH_OR_T")\n@ispec("32<[ c(4) pos2(5) 01 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_ORN_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {a7} ]", mnemonic="SH_XOR_T")\n@ispec("32<[ c(4) pos2(5) 10 pos1(5) b(4) a(4) {07} ]", mnemonic="XNOR_T")\n@ispec("32<[ c(4) pos2(5) 11 pos1(5) b(4) a(4) {07} ]", mnemonic="XOR_T")\ndef tricore_ddd_arithmetic(obj, c, pos2, pos1, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, src1[pos1:pos1+1], src2[pos2:pos2+1]]\n obj.type = type_data_processing\n@ispec("32<[ c(4) 0001000 const9(9) a(4) {8f} ]", mnemonic="AND")\n@ispec("32<[ c(4) 0100101 const9(9) a(4) {8b} ]", mnemonic="AND_GE_U")\n@ispec("32<[ c(4) 0100011 const9(9) a(4) {8b} ]", mnemonic="AND_LT_U")\n@ispec("32<[ c(4) 0001110 const9(9) a(4) {8f} ]", mnemonic="ANDN")\n@ispec("32<[ c(4) 0001001 const9(9) a(4) {8f} ]", mnemonic="NAND")\n@ispec("32<[ c(4) 0001011 const9(9) a(4) {8f} ]", mnemonic="NOR")\n@ispec("32<[ c(4) 0010101 const9(9) a(4) {8b} ]", mnemonic="GE_U")\n@ispec("32<[ c(4) 0001010 const9(9) a(4) {8f} ]", mnemonic="OR")\n@ispec("32<[ c(4) 0101100 const9(9) a(4) {8b} ]", mnemonic="OR_GE_U")\n@ispec("32<[ c(4) 0101010 const9(9) a(4) {8b} ]", mnemonic="OR_LT_U")\n@ispec("32<[ c(4) 0101000 const9(9) a(4) {8b} ]", mnemonic="OR_NE")\n@ispec("32<[ c(4) 0001111 const9(9) a(4) {8f} ]", mnemonic="ORN")\n@ispec("32<[ c(4) 0000111 const9(9) a(4) {8f} ]", mnemonic="SHUFFLE")\n@ispec("32<[ c(4) 0001101 const9(9) a(4) {8f} ]", mnemonic="XNOR")\n@ispec("32<[ c(4) 0001100 const9(9) a(4) {8f} ]", mnemonic="XOR")\n@ispec("32<[ c(4) 0111100 const9(9) a(4) {8b} ]", mnemonic="SH_GE_U")\n@ispec("32<[ c(4) 0111010 const9(9) a(4) {8b} ]", mnemonic="SH_LT_U")\n@ispec("32<[ c(4) 0110100 const9(9) a(4) {8b} ]", mnemonic="XOR_GE_U")\n@ispec("32<[ c(4) 0110011 const9(9) a(4) {8b} ]", mnemonic="XOR_LT_U")\n@ispec("32<[ c(4) 0011011 const9(9) a(4) {8b} ]", mnemonic="MAX_U")\n@ispec("32<[ c(4) 0010011 const9(9) a(4) {8b} ]", mnemonic="LT_U")\ndef tricore_ddc_arithmetic(obj, c, const9, a):\n src1 = env.D[a]\n src2 = env.cst(const9,32)\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {c2} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {06} ]", mnemonic="SH")\n@ispec("16<[ ~const4(4) a(4) {86} ]", mnemonic="SHA")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[a]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {92} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {8a} ]", mnemonic="CADD")\n@ispec("16<[ ~const4(4) a(4) {ca} ]", mnemonic="CADDN")\n@ispec("16<[ ~const4(4) a(4) {aa} ]", mnemonic="CMOV")\n@ispec("16<[ ~const4(4) a(4) {ea} ]", mnemonic="CMOVN")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[15]\n obj.operands = [dst, src1, src2]\n if "CADD" in obj.mnemonic:\n obj.operands = [dst, src1, dst, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {9a} ]", mnemonic="ADD")\n@ispec("16<[ ~const4(4) a(4) {ba} ]", mnemonic="EQ")\n@ispec("16<[ ~const4(4) a(4) {fa} ]", mnemonic="LT")\n@ispec("16<[ ~const4(4) a(4) {82} ]", mnemonic="MOV")\ndef tricore_ddc_arithmetic(obj, const4, a):\n dst = env.D[15]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.D[a]\n obj.operands = [dst, src1, src2]\n if obj.mnemonic=="MOV":\n obj.operands = [src1,src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {d2} ]", mnemonic="MOV")\ndef tricore_ec_arithmetic(obj, const4, a):\n dst = env.E[a]\n src = env.cst(const4.int(-1),64)\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ const4(4) a(4) {a0} ]", mnemonic="MOV_A")\ndef tricore_ec_arithmetic(obj, const4, a):\n dst = env.A[a]\n src = env.cst(const4,32)\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ const8(8) {16} ]", mnemonic="AND")\n@ispec("16<[ const8(8) {da} ]", mnemonic="MOV")\n@ispec("16<[ const8(8) {96} ]", mnemonic="OR")\ndef tricore_ddc_arithmetic(obj, const8):\n dst = env.D[15]\n src2 = env.cst(const8,32)\n src1 = env.D[15]\n obj.operands = [dst, src1, src2]\n if obj.mnemonic=="MOV":\n obj.operands = [src1,src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {42} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {26} ]", mnemonic="AND")\n@ispec("16<[ b(4) a(4) {a6} ]", mnemonic="OR")\n@ispec("16<[ b(4) a(4) {a2} ]", mnemonic="SUB")\n@ispec("16<[ b(4) a(4) {62} ]", mnemonic="SUBS")\n@ispec("16<[ b(4) a(4) {c6} ]", mnemonic="XOR")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[a]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {02} ]", mnemonic="MOV" , _dst=env.D, _src=env.D)\n@ispec("16<[ b(4) a(4) {60} ]", mnemonic="MOV_A" , _dst=env.A, _src=env.D)\n@ispec("16<[ b(4) a(4) {40} ]", mnemonic="MOV_AA" , _dst=env.A, _src=env.A)\n@ispec("16<[ b(4) a(4) {80} ]", mnemonic="MOV_D" , _dst=env.D, _src=env.A)\ndef tricore_mov(obj, b, a, _dst, _src):\n dst = _dst[a]\n src = _src[b]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {12} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {2a} ]", mnemonic="CMOV")\n@ispec("16<[ b(4) a(4) {6a} ]", mnemonic="CMOVN")\n@ispec("16<[ b(4) a(4) {52} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[a]\n src1 = env.D[15]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {1a} ]", mnemonic="ADD")\n@ispec("16<[ b(4) a(4) {22} ]", mnemonic="ADDS")\n@ispec("16<[ b(4) a(4) {3a} ]", mnemonic="EQ")\n@ispec("16<[ b(4) a(4) {7a} ]", mnemonic="LT")\n@ispec("16<[ b(4) a(4) {5a} ]", mnemonic="SUB")\ndef tricore_dd_arithmetic(obj, b, a):\n dst = env.D[15]\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {01} ---- b(4) a(4) {01} ]", mnemonic="ADD_A")\n@ispec("32<[ c(4) {02} ---- b(4) a(4) {01} ]", mnemonic="SUB_A")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) a(4) {b0} ]", mnemonic="ADD_A")\ndef tricore_aac_arithmetic(obj, const4, a):\n dst = env.A[a]\n src2 = env.cst(const4.int(-1),32)\n src1 = env.A[a]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ const8(8) {20} ]", mnemonic="SUB_A")\ndef tricore_aac_arithmetic(obj, const8, a):\n dst = env.A[10]\n src2 = env.cst(const8,32)\n src1 = env.A[10]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) {30} ]", mnemonic="ADD_A")\ndef tricore_aa_arithmetic(obj, b, a):\n dst = env.A[a]\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {1b} ]", mnemonic="ADDI")\n@ispec("32<[ c(4) ~const16(16) a(4) {9b} ]", mnemonic="ADDIH")\ndef tricore_di_arithmetic(obj, c, const16, a):\n src1 = env.D[a]\n src2 = env.cst(const16.int(-1),32)\n if self.mnemonic=="ADDIH": src2=src2<<16\n dst = env.D[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) ~const16(16) a(4) {11} ]", mnemonic="ADDIH_A")\ndef tricore_ai_arithmetic(obj, c, const16, a):\n src1 = env.A[a]\n src2 = env.cst(const16.int(-1),32)<<16\n dst = env.A[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {60} -- n(2) b(4) a(4) {01} ]", mnemonic="ADDSC_A")\ndef tricore_aaa_arithmetic(obj, c, n, b, a):\n src1 = env.D[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src2, src1, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {62} ---- b(4) a(4) {01} ]", mnemonic="ADDSC_AT")\ndef tricore_aaa_arithmetic(obj, c, b, a):\n src1 = env.D[a]\n src2 = env.A[b]\n dst = env.A[c]\n obj.operands = [dst, src2, src1]\n obj.type = type_data_processing\n@ispec("16<[ b(4) a(4) n(2) 010000 ]", mnemonic="ADDSC_A")\ndef tricore_aa_arithmetic(obj, b, a, n):\n dst = env.A[a]\n src1 = env.D[15]\n src2 = env.A[b]\n obj.operands = [dst, src2, src1, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ off2(4) 10 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_I", mode="Circular")\n@ispec("32<[ off2(4) 00 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1110 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_I", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_W", mode="Circular")\n@ispec("32<[ off2(4) 00 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1100 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {a9} ]", mnemonic="CACHEA_WI", mode="Circular")\n@ispec("32<[ off2(4) 00 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1101 off1(6) b(4) ---- {89} ]", mnemonic="CACHEA_WI", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1011 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1010 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_I", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Short-offset")\n@ispec("32<[ off2(4) 00 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Post-increment")\n@ispec("32<[ off2(4) 01 1111 off1(6) b(4) ---- {89} ]", mnemonic="CACHEI_WI", mode="Pre-increment")\ndef tricore_cache(obj, off2, off1, b):\n src2 = env.A[b]\n src1 = env.cst((off2<<6)+off1,10)\n obj.operands = [src2, src1]\n obj.type = type_system\n@ispec("32<[ off2(4) 10 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {69} ]", mnemonic="CMPSWAP_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0011 off1(6) b(4) a(4) {49} ]", mnemonic="CMPSWAP_W", mode="Pre-increment")\n@ispec("32<[ off2(4) 10 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Short-offset")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Bit-reverse")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {69} ]", mnemonic="SWAPMSK_W", mode="Circular")\n@ispec("32<[ off2(4) 00 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Post-increment")\n@ispec("32<[ off2(4) 01 0010 off1(6) b(4) a(4) {49} ]", mnemonic="SWAPMSK_W", mode="Pre-increment")\ndef tricore_swap(obj, off2, off1, b, a):\n if a%2:\n raise InstructionError(obj)\n dst = env.D[a]\n src1 = env.A[b]\n src2 = env.cst((off2<<6)+off1,10)\n src3 = env.E[a]\n obj.operands = [dst, src1, src2, src3]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 000 ~const9(9) a(4) {ab} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {ab} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 001 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="32+(32+K9)->32")\n@ispec("32<[ c(4) d(4) 100 ~const9(9) a(4) {ab} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 101 ~const9(9) a(4) {ab} ]", mnemonic="SELN")\ndef tricore_cond_ddc(obj, c, d, const9, a):\n cond = env.D[d]\n src1 = env.D[a]\n src2 = env.cst(const9.int(-1),32)\n dst = env.D[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {13} ]", mnemonic="MADD", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {13} ]", mnemonic="MADD_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {13} ]", mnemonic="MADDS_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 011 ~const9(9) a(4) {33} ]", mnemonic="MSUB", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 010 ~const9(9) a(4) {33} ]", mnemonic="MSUB_U", opt4="64+(32+K9)->64")\n@ispec("32<[ c(4) d(4) 111 ~const9(9) a(4) {33} ]", mnemonic="MSUBS_U", opt4="64+(32+K9)->64")\ndef tricore_cond_eec(obj, c, d, const9, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.cst(const9.int(-1),32)\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {83} ]", mnemonic="MADD_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {83} ]", mnemonic="MADDS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {43} ]", mnemonic="MADD_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {43} ]", mnemonic="MADDS_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 011010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LL")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="LU")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UL")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUB_H", op4="UU")\n@ispec("32<[ c(4) d(4) 111010 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LL")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="LU")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UL")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {a3} ]", mnemonic="MSUBS_H", op4="UU")\n@ispec("32<[ c(4) d(4) 000010 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 011011 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 000001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 011001 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 000000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 011000 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 000101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 011101 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 000100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 011100 n(2) b(4) a(4) {63} ]", mnemonic="MSUB_Q", op4="64+(16U*16U)->64")\n@ispec("32<[ c(4) d(4) 100010 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(32*32)Up->32")\n@ispec("32<[ c(4) d(4) 111011 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) 100001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*32)Up->32")\n@ispec("32<[ c(4) d(4) 111001 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*32)->64")\n@ispec("32<[ c(4) d(4) 100000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*32)Up->32")\n@ispec("32<[ c(4) d(4) 111000 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*32)->64")\n@ispec("32<[ c(4) d(4) 100101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16L*16L)->32")\n@ispec("32<[ c(4) d(4) 111101 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16L*16L)->64")\n@ispec("32<[ c(4) d(4) 100100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="32+(16U*16U)->32")\n@ispec("32<[ c(4) d(4) 111100 n(2) b(4) a(4) {63} ]", mnemonic="MSUBS_Q", op4="64+(16U*16U)->64")\ndef tricore_cond_eec(obj, c, d, n, b, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2, env.cst(n,2)]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) 0000 ---- b(4) a(4) {2b} ]", mnemonic="CADD")\n@ispec("32<[ c(4) d(4) 0001 ---- b(4) a(4) {2b} ]", mnemonic="CADDN")\n@ispec("32<[ c(4) d(4) 0010 ---- b(4) a(4) {2b} ]", mnemonic="CSUB")\n@ispec("32<[ c(4) d(4) 0011 ---- b(4) a(4) {2b} ]", mnemonic="CSUBN")\n@ispec("32<[ c(4) d(4) {0a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) {8a} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) {88} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="32+(32*32)->32")\n@ispec("32<[ c(4) d(4) 0100 ---- b(4) a(4) {2b} ]", mnemonic="SEL")\n@ispec("32<[ c(4) d(4) 0101 ---- b(4) a(4) {2b} ]", mnemonic="SELN")\ndef tricore_cond_ddd(obj, c, d, b, a):\n cond = env.D[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.D[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) d(4) {6a} b(4) a(4) {03} ]", mnemonic="MADD", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {ea} b(4) a(4) {03} ]", mnemonic="MADDS", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {68} b(4) a(4) {03} ]", mnemonic="MADD_U", opt4="64+(32*32)->64")\n@ispec("32<[ c(4) d(4) {e8} b(4) a(4) {03} ]", mnemonic="MADDS_U", opt4="64+(32*32)->64")\ndef tricore_cond_ddd(obj, c, d, b, a):\n cond = env.E[d]\n src1 = env.D[a]\n src2 = env.D[b]\n dst = env.E[c]\n obj.operands = [dst, cond, src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ c(4) {1c} ---- ---- a(4) {0f} ]", mnemonic="CLO")\n@ispec("32<[ c(4) {7d} ---- ---- a(4) {0f} ]", mnemonic="CLO_H")\n@ispec("32<[ c(4) {1d} ---- ---- a(4) {0f} ]", mnemonic="CLS")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0f} ]", mnemonic="CLS_H")\n@ispec("32<[ c(4) {1b} ---- ---- a(4) {0f} ]", mnemonic="CLZ")\n@ispec("32<[ c(4) {7c} ---- ---- a(4) {0f} ]", mnemonic="CLZ_H")\n@ispec("32<[ c(4) {5e} ---- ---- a(4) {0b} ]", mnemonic="SAT_B")\n@ispec("32<[ c(4) {5f} ---- ---- a(4) {0b} ]", mnemonic="SAT_BU")\n@ispec("32<[ c(4) {7e} ---- ---- a(4) {0b} ]", mnemonic="SAT_H")\n@ispec("32<[ c(4) {7f} ---- ---- a(4) {0b} ]", mnemonic="SAT_HU")\ndef tricore_dd_arithmetic(obj, c, a):\n src = env.D[a]\n dst = env.D[c]\n obj.operands = [dst, src]\n obj.type = type_data_processing\n@ispec("16<[ 1010 ---- {00} ]", mnemonic="DEBUG")\n@ispec("16<[ 0000 ---- {00} ]", mnemonic="NOP")\ndef tricore_system(obj):\n obj.operands = []\n obj.type = type_system\n@ispec("16<[ 0111 ---- {00} ]", mnemonic="FRET")\n@ispec("16<[ 1001 ---- {00} ]", mnemonic="RET")\n@ispec("16<[ 1000 ---- {00} ]", mnemonic="RFE")\ndef tricore_ret(obj):\n obj.operands = []\n obj.type = type_control_flow\n@ispec("32<[ ---- 000100 ---------- ---- {0d} ]", mnemonic="DEBUG")\n@ispec("32<[ ---- 001101 ---------- ---- {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 010010 ---------- ---- {0d} ]", mnemonic="DSYNC")\n@ispec("32<[ ---- 001100 ---------- ---- {0d} ]", mnemonic="ENABLE")\n@ispec("32<[ ---- 010011 ---------- ---- {0d} ]", mnemonic="ISYNC")\n@ispec("32<[ ---- 010101 ---------- ---- {0d} ]", mnemonic="TRAPSV")\n@ispec("32<[ ---- 010100 ---------- ---- {0d} ]", mnemonic="TRAPV")\n@ispec("32<[ ---- 000000 ---------- ---- {0d} ]", mnemonic="NOP")\n@ispec("32<[ ---- 001001 ---------- ---- {0d} ]", mnemonic="RSLCX")\n@ispec("32<[ ---- 000000 ---------- ---- {2f} ]", mnemonic="RSTV")\n@ispec("32<[ ---- 001000 ---------- ---- {0d} ]", mnemonic="SVLCX")\n@ispec("32<[ ---- 010110 ---------- ---- {0d} ]", mnemonic="WAIT")\ndef tricore_system(obj):\n obj.operands = []\n obj.type = type_system\n@ispec("32<[ ---- 000011 ---------- ---- {0d} ]", mnemonic="FRET")\n@ispec("32<[ ---- 000110 ---------- ---- {0d} ]", mnemonic="RET")\n@ispec("32<[ ---- 000111 ---------- ---- {0d} ]", mnemonic="RFE")\n@ispec("32<[ ---- 000101 ---------- ---- {0d} ]", mnemonic="RFM")\ndef tricore_ret(obj):\n obj.operands = []\n obj.type = type_control_flow\n@ispec("32<[ ---- 001111 ---------- a(4) {0d} ]", mnemonic="DISABLE")\n@ispec("32<[ ---- 001110 ---------- a(4) {0d} ]", mnemonic="RESTORE")\ndef tricore_system(obj, a):\n obj.operands = [env.D[a]]\n obj.type = type_system\n@ispec("32<[ c(4) d(4) 1101 -- 00 b(4) ---- {6b} ]", mnemonic="DVADJ")\n@ispec("32<[ c(4) d(4) 1111 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP")\n@ispec("32<[ c(4) d(4) 1110 -- 00 b(4) ---- {6b} ]", mnemonic="DVSTEP_U")\n@ispec("32<[ c(4) d(4) 1010 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX")\n@ispec("32<[ c(4) d(4) 1011 -- 00 b(4) ---- {6b} ]", mnemonic="IXMAX_U")\n@ispec("32<[ c(4) d(4) 1000 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN")\n@ispec("32<[ c(4) d(4) 1001 -- 00 b(4) ---- {6b} ]", mnemonic="IXMIN_U")\ndef tricore_eee(obj, c, d, b):\n if d%2 or b%2 or c%2:\n raise InstructionError(obj)\n src1 = env.E[d]\n src2 = env.E[b]\n dst = env.E[c]\n obj.operands = [dst, src1, src2]\n obj.type = type_data_processing\n@ispec("16<[ ~const4(4) disp(4) {1e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {9e} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ ~const4(4) disp(4) {5e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ ~const4(4) disp(4) {de} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, const4, disp, _off):\n dst = env.D[15]\n src1 = env.cst(const4.int(-1),32)\n src2 = env.cst(disp,32)+_off\n obj.operands = [dst, src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {3e} ]", mnemonic="JEQ", _off=0)\n@ispec("16<[ b(4) disp(4) {be} ]", mnemonic="JEQ", _off=16)\n@ispec("16<[ b(4) disp(4) {7e} ]", mnemonic="JNE", _off=0)\n@ispec("16<[ b(4) disp(4) {fe} ]", mnemonic="JNE", _off=16)\ndef tricore_jcc(obj, b, disp, _off):\n dst = env.D[15]\n src1 = env.D[b]\n src2 = env.cst(disp,32)+_off\n obj.operands = [dst, src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {ce} ]", mnemonic="JGEZ")\n@ispec("16<[ b(4) disp(4) {4e} ]", mnemonic="JGTZ")\n@ispec("16<[ b(4) disp(4) {8e} ]", mnemonic="JLEZ")\n@ispec("16<[ b(4) disp(4) {0e} ]", mnemonic="JLTZ")\n@ispec("16<[ b(4) disp(4) {f6} ]", mnemonic="JNZ")\n@ispec("16<[ b(4) disp(4) {76} ]", mnemonic="JZ")\ndef tricore_jcc(obj, b, disp):\n src1 = env.D[b]\n src2 = env.cst(disp,32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {df} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {df} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {ff} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {bf} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) const(4) a(4) {9f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, const, a):\n src1 = env.D[a]\n src2 = env.cst(const,4)\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JEQ")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {5f} ]", mnemonic="JNE")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7f} ]", mnemonic="JGE_U")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {3f} ]", mnemonic="JLT_U")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNED")\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {1f} ]", mnemonic="JNEI")\ndef tricore_jcc(obj, disp, b, a):\n src1 = env.D[a]\n src2 = env.D[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JEQ_A")\n@ispec("32<[ 1 ~disp(15) b(4) a(4) {7d} ]", mnemonic="JNE_A")\ndef tricore_jcc(obj, disp, b, a):\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) ---- a(4) {bd} ]", mnemonic="JNZ_A")\n@ispec("32<[ 0 ~disp(15) ---- a(4) {bd} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, disp, a):\n src1 = env.A[a]\n src2 = env.A[b]\n obj.operands = [src1, src2, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ 0 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOP")\n@ispec("32<[ 1 ~disp(15) b(4) ---- {fd} ]", mnemonic="LOOPU")\ndef tricore_jcc(obj, disp, b):\n src1 = env.A[b]\n src2 = env.cst(disp.int(-1)*2,32)\n obj.operands = [src1, src2]\n if obj.mnemonic=="LOOPU":\n obj.operands = [src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) disp(4) {7c} ]", mnemonic="JNZ_A")\n@ispec("16<[ b(4) disp(4) {bc} ]", mnemonic="JZ_A")\ndef tricore_jcc(obj, b, disp):\n src1 = env.A[b]\n src2 = env.cst(disp,32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ b(4) #disp(4) {fc} ]", mnemonic="LOOP")\ndef tricore_jcc(obj, b, disp):\n src1 = env.A[b]\n src2 = env.cst(int(("1"*27)+disp+"0",2),32)\n obj.operands = [src1, src2]\n obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {dc} ]", mnemonic="JI")\ndef tricore_ji(obj, a):\n src = env.A[a]\n obj.operands = [src]\n obj.type = type_control_flow\n@ispec("16<[ 0000 a(4) {46} ]", mnemonic="NOT")\n@ispec("16<[ 0101 a(4) {32} ]", mnemonic="RSUB")\n@ispec("16<[ 0000 a(4) {32} ]", mnemonic="SAT_B")\n@ispec("16<[ 0001 a(4) {32} ]", mnemonic="SAT_BU")\n@ispec("16<[ 0010 a(4) {32} ]", mnemonic="SAT_H")\n@ispec("16<[ 0011 a(4) {32} ]", mnemonic="SAT_HU")\ndef tricore_a(obj, a):\n src = env.D[a]\n obj.operands = [src]\n obj.type = type_data_processing\n@ispec("16<[ n(4) disp(4) {ae} ]", mnemonic="JNZ_T")\n@ispec("16<[ n(4) disp(4) {2e} ]", mnemonic="JZ_T")\ndef tricore_ji(obj, n, disp):\n obj.operands = [env.D[15][n:n+1], env.cst(disp,32)]\n obj.type = type_control_flow\n@ispec("32<[ 1 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JNZ_T")\n@ispec("32<[ 0 ~disp(15) n(4) a(4) h 1101111 ]", mnemonic="JZ_T")\ndef tricore_jcc(obj, disp, n, a, h):\n i = n+(h<<4)\n src = env.D[a][i:i+1]\n obj.operands = [src, env.cst(disp.int(-1),32)]\n obj.type = type_control_flow\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_BU", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {05} ]", mnemonic="LD_HU", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {45} ]", mnemonic="LD_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {85} ]", mnemonic="LD_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LEA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n dst = env.D[a]\n if obj.mnemonic in ("LD_A", "LEA") : dst = env.A[a]\n if obj.mnemonic in ("LD_D","LDMST") : dst = env.E[a]\n if obj.mnemonic=="LD_DA": dst = env.P[a]\n src = off1//off2//off3\n obj.operands = [dst, composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {c5} ]", mnemonic="LHA", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n dst = env.A[a]\n src = off1//off2//off3//off4\n obj.operands = [dst, composer([env.cst(0,14),env.cst(src.int(),18)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_A", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_B", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_D", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_DA", mode="Absolute")\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {25} ]", mnemonic="ST_H", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {65} ]", mnemonic="ST_Q", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {a5} ]", mnemonic="ST_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="SWAP_W", mode="Absolute")\n@ispec("32<[ ~off2(4) 01 ~off3(4) ~off1(6) ~off4(4) a(4) {e5} ]", mnemonic="LDMST", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, a):\n src = env.D[a]\n if obj.mnemonic in ("ST_A",) : src = env.A[a]\n if obj.mnemonic in ("ST_D","LDMST") : src = env.E[a]\n if obj.mnemonic=="ST_DA": src = env.P[a]\n addr = off1//off2//off3\n obj.operands = [composer([env.cst(addr.int(),28),env.cst(off4,4)]), src]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) b bpos(3) {d5} ]", mnemonic="ST_T", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4, b, bpos):\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)]), env.cst(bpos,3), env.cst(b,1)]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 00 ~off3(4) ~off1(6) ~off4(4) ---- {15} ]", mnemonic="STLCX", mode="Absolute")\ndef tricore_st(obj, off2, off3, off1, off4):\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDLCX", mode="Absolute")\n@ispec("32<[ ~off2(4) 11 ~off3(4) ~off1(6) ~off4(4) a(4) {15} ]", mnemonic="LDUCX", mode="Absolute")\ndef tricore_ld(obj, off2, off3, off1, off4, a):\n src = off1//off2//off3\n obj.operands = [composer([env.cst(src.int(),28),env.cst(off4,4)])]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_BU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_BU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_HU", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0011 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_HU", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {29} ]", mnemonic="LD_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="LEA", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b, a):\n dst = env.D[a]\n if obj.mnemonic=="LD_A" : dst = env.A[a]\n elif obj.mnemonic=="LEA" : dst = env.A[a]\n elif obj.mnemonic=="LD_D" : dst = env.E[a]\n elif obj.mnemonic=="LDMST" : dst = env.E[a]\n elif obj.mnemonic=="LD_DA" : dst = env.P[a]\n obj.b = b\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [dst, src1, src2]\n if obj.mode == "Bit-Reverse":\n obj.operands.pop()\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_A", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0110 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_A", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_B", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_B", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_D", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0101 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_D", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_DA", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0111 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_DA", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_H", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0010 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_H", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_Q", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_Q", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {a9} ]", mnemonic="ST_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0100 ~off1(6) b(4) a(4) {89} ]", mnemonic="ST_W", mode="Pre-increment")\n@ispec("32<[ ~off2(4) 10 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {69} ]", mnemonic="LDMST", mode="Circular")\n@ispec("32<[ ~off2(4) 00 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 0001 ~off1(6) b(4) a(4) {49} ]", mnemonic="LDMST", mode="Pre-increment")\ndef tricore_st(obj, off2, off1, b, a):\n dst = env.D[a]\n if obj.mnemonic=="ST_A" : dst = env.A[a]\n elif obj.mnemonic=="ST_D" : dst = env.E[a]\n elif obj.mnemonic=="ST_DA" : dst = env.P[a]\n elif obj.mnemonic=="LDMST" : dst = env.E[a]\n obj.b = b\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2, dst]\n if obj.mode == "Bit-Reverse":\n obj.operands.pop()\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Short-offset")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Bit-reverse")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {69} ]", mnemonic="SWAP_W", mode="Circular")\n@ispec("32<[ ~off2(4) 00 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Post-increment")\n@ispec("32<[ ~off2(4) 01 1000 ~off1(6) b(4) a(4) {49} ]", mnemonic="SWAP_W", mode="Pre-increment")\ndef tricore_ld(obj, off2, off1, b, a):\n dst = env.D[a]\n src1 = env.P[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2, dst]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) 10 0100 ~off1(6) b(4) ---- {49} ]", mnemonic="LDLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0101 ~off1(6) b(4) ---- {49} ]", mnemonic="LDUCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0110 ~off1(6) b(4) ---- {49} ]", mnemonic="STLCX", mode="Short-offset")\n@ispec("32<[ ~off2(4) 10 0111 ~off1(6) b(4) ---- {49} ]", mnemonic="STUCX", mode="Short-offset")\ndef tricore_ld(obj, off2, off1, b):\n src1 = env.A[b]\n off10 = off1//off2\n src2 = env.cst(off10.int(-1),10)\n obj.operands = [src1, src2]\n obj.type = type_data_processing\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {99} ]", mnemonic="LD_A", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {79} ]", mnemonic="LD_B", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {39} ]", mnemonic="LD_BU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {09} ]", mnemonic="LD_H", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {b9} ]", mnemonic="LD_HU", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {19} ]", mnemonic="LD_W", mode="Long-offset")\n@ispec("32<[ ~off2(4) ~off3(6) ~off1(6) b(4) a(4) {d9} ]", mnemonic="LEA", mode="Long-offset")\ndef tricore_ld(obj, off2, off3, off1, b, a):\n dst = env.D[a]\n\n</context>\n\n假设一个实际的场景,我们需要采用这段代码来实现工作流程,可以给我解释一下这段代码的作用吗? \n\n\n\n
|
114 |
+
```
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import sys
|
3 |
+
sys.path.append('../')
|
4 |
+
from time import perf_counter
|
5 |
+
|
6 |
+
from lyra_llama import lyraLlama
|
7 |
+
|
8 |
+
|
9 |
+
def get_args():
|
10 |
+
parser = argparse.ArgumentParser(description="Faster ChatGLM6B Demo")
|
11 |
+
|
12 |
+
parser.add_argument('--model-path', type=str, required=True,
|
13 |
+
help='Model Path, include config.ini and tokenizer files')
|
14 |
+
parser.add_argument('--tokenizer-path', type=str, default=None)
|
15 |
+
|
16 |
+
parser.add_argument(
|
17 |
+
'--data-type', type=str, metavar='TYPE', default='fp16',
|
18 |
+
choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
|
19 |
+
help='The data type to inference. If None, the data type follows the '
|
20 |
+
'checkpoint data type.')
|
21 |
+
|
22 |
+
parser.add_argument(
|
23 |
+
'--memopt-mode', type=int, default=0, choices=[0, 1],
|
24 |
+
help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
|
25 |
+
' 0: FP16 mode'
|
26 |
+
' 1: Use MEMOPT mode')
|
27 |
+
|
28 |
+
parser.add_argument(
|
29 |
+
'--quant-type', type=str, metavar='TYPE', default='int8',
|
30 |
+
choices=['int4', 'int8'],
|
31 |
+
help='The data type of quantization. Only used in MEMOPT.')
|
32 |
+
|
33 |
+
parser.add_argument(
|
34 |
+
'--kvqparams-fpath', type=str, required=False, default="",
|
35 |
+
help='File path of kv quantized params.')
|
36 |
+
|
37 |
+
parser.add_argument("--prompt", type=str, required=False)
|
38 |
+
parser.add_argument("--max-output-length", type=int, default=512)
|
39 |
+
parser.add_argument("--warmups", type=int, default=10)
|
40 |
+
parser.add_argument("--avgnums", type=int, default=10)
|
41 |
+
args = parser.parse_args()
|
42 |
+
|
43 |
+
print('\n=================== Arguments ===================')
|
44 |
+
for k, v in vars(args).items():
|
45 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
46 |
+
print('=================================================')
|
47 |
+
|
48 |
+
return args
|
49 |
+
|
50 |
+
|
51 |
+
def main():
|
52 |
+
args = get_args()
|
53 |
+
|
54 |
+
model = lyraLlama(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type, args.kvqparams_fpath)
|
55 |
+
|
56 |
+
# args.prompt = '''<context>/*\n * Implement the \"Falling Rocks\" game in the text console. \n * A small dwarf stays at the bottom of the screen and can \n * move left and right (by the arrows keys). A number of rocks \n * of different sizes and forms constantly fall down and you \n * need to avoid a crash.\n * Rocks are the symbols ^, @, *, &, +, %, $, #, !, ., ;, - distributed \n * with appropriate density. The dwarf is (O). \n * Ensure a constant game speed by Thread.Sleep(150).\n * Implement collision detection and scoring system.\n*/\n\nusing System;\nusing System.Threading;\nusing System.Collections.Generic;\nusing System.Threading.Tasks;\n\nclass FallingRocks\n{\n struct Position\n {\n public int X, Y;\n public string symbol;\n public ConsoleColor color;\n\n public Position(int x, int y, string symbol, ConsoleColor color)\n {\n this.X = x;\n this.Y = y;\n this.symbol = symbol;\n this.color = color;\n }\n }\n\n static void Main()\n {\n Thread oThread = new Thread(new ThreadStart(Mainn));\n Thread aThread = new Thread(new ThreadStart(Clr));\n \n aThread.Start();\n oThread.Start();\n oThread.Join();\n aThread.Join();\n }\n\n static void Clr()\n {\n while (true)\n {\n Thread.Sleep(10);\n Console.Clear();\n }\n }\n static void Mainn()\n {\n //Random generator for rocks color, position and symbol\n Random randomGenerator = new Random();\n \n //Sleep time for the game loop\n double sleepTime = 150;\n //Console settings\n Console.CursorVisible = false;\n Console.BufferHeight = Console.WindowHeight;\n \n //number of rocks in the Array rocks\n int rocksCount = 0;\n\n //array with the symbols of the rocks\n string[] symbols = new string[] { \"^\", \"@\", \"*\", \"&\", \"+\", \"%\", \"$\", \"#\", \"!\", \".\", \";\" };\n \n //array with colors for the rocks\n ConsoleColor[] colors = new ConsoleColor[] {ConsoleColor.Yellow, ConsoleColor.White, ConsoleColor.Gray};\n \n //array with rocks\n Position[] rocks = new Position[200];\n \n //position for the dwarf\n Position dwarf = new Position(10, Console.WindowHeight - 1,\"(0)\",ConsoleColor.Red);\n \n //bool variable to say when the game loop to be over\n bool gameLoop = true;\n\n //variable keeping the score\n ulong score = 0;\n\n //the game loop\n while (gameLoop)\n {\n //score is growing as the cycle runs\n score++;\n\n //setting the Y component for all the rocks in the array to grow with 2\n for (int i = 0; i <= rocks.Length - 1; i++)\n {\n rocks[i].Y = rocks[i].Y + 2;\n }\n\n //generating rocks\n for (int x = 0; x <= randomGenerator.Next(2, 4); x++)\n {\n rocks[rocksCount] = new Position(randomGenerator.Next(x * 15, x * 15 + 20), 0\n , symbols[randomGenerator.Next(0, symbols.Length - 1)]\n , colors[randomGenerator.Next(0, colors.Length - 1)]);\n if (rocksCount >= 199) rocksCount = 0;\n rocksCount++;\n }\n\n //printing the rocks and other stuff\n foreach (var item in rocks)\n {\n foreach (var rock in rocks)\n {\n //checking for colision\n if ((rock.X >= dwarf.X) && (rock.X <= (dwarf.X + 2)) && (rock.Y == dwarf.Y))\n {\n gameLoop = false;\n break;\n }\n } \n\n //printing the rocks\n if (item.Y < Console.WindowHeight)\n { \n Console.SetCursorPosition(item.X, item.Y);\n Console.ForegroundColor = item.color;\n Console.Write(item.symbol);\n }\n\n //checking for key pressed\n if (Console.KeyAvailable)\n {\n ConsoleKeyInfo pressedKey = Console.ReadKey();\n if (pressedKey.Key == ConsoleKey.RightArrow)\n {\n if(dwarf.X < Console.WindowWidth - 20)\n {\n //removing the old positions of the dwarf and increasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X++;\n }\n }\n if (pressedKey.Key == ConsoleKey.LeftArrow) \n {\n if(dwarf.X >= 1)\n {\n //removing the old positions of the dwarf and decreasing his X value\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.Write(\" \");\n dwarf.X--;\n }\n }\n }\n }\n \n //printing the dwarf\n Console.SetCursorPosition(dwarf.X, dwarf.Y);\n Console.ForegroundColor = dwarf.color;\n Console.Write(dwarf.symbol); \n \n //sleeping the loop for sometime\n //Thread.Sleep((int)sleepTime);\n\n //reducing the sleep time of the loop\n sleepTime -= 0.5;\n\n \n //removing the rocks \n //foreach (var item in rocks)\n //{\n // if (item.Y < Console.WindowHeight)\n // {\n // Console.SetCursorPosition(item.X, item.Y);\n // Console.Write(\" \");\n // }\n //} \n }\n //Printing the score after the game is over\n Console.Clear();\n Console.WriteLine(\"Game over! Your score is: \" + score);\n\n }\n}\n</context>\n\n这个\"Falling Rocks\"游戏是如何工作的呢?可以详细解释一下代码的运作机制吗? \n\n\n\n'''
|
57 |
+
|
58 |
+
prompt_template = "Human: {}\n\nAssistant:" # xverse
|
59 |
+
# prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
|
60 |
+
|
61 |
+
prompt = prompt_template.format(args.prompt)
|
62 |
+
|
63 |
+
test_batch_size = [1, 8, 16, 32, 64] # 8, 16, 32, 64
|
64 |
+
print("test_batch_size: ", test_batch_size)
|
65 |
+
|
66 |
+
for i, bs in enumerate(test_batch_size):
|
67 |
+
prompts = [prompt, ] * bs
|
68 |
+
|
69 |
+
# warmup gpu
|
70 |
+
for _ in range(args.warmups):
|
71 |
+
output_texts = model.generate(
|
72 |
+
prompts, output_length=args.max_output_length,
|
73 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
|
74 |
+
|
75 |
+
start = perf_counter()
|
76 |
+
for _ in range(args.avgnums):
|
77 |
+
output_texts = model.generate(
|
78 |
+
prompts, output_length=args.max_output_length,
|
79 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
|
80 |
+
end = perf_counter()
|
81 |
+
cost = (end - start) / args.avgnums
|
82 |
+
|
83 |
+
input_output_texts = [prompt + ' ' + gtext for prompt,
|
84 |
+
gtext in zip(prompts, output_texts)]
|
85 |
+
tokens = 0
|
86 |
+
input_tokens = len(model.tokenizer.encode(prompt))
|
87 |
+
words = 0
|
88 |
+
for text in input_output_texts:
|
89 |
+
tokens += len(model.tokenizer.encode(text))
|
90 |
+
words += len(text)
|
91 |
+
|
92 |
+
avg_output_tokens = tokens / len(input_output_texts) - input_tokens
|
93 |
+
print(
|
94 |
+
f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Output tokens: {avg_output_tokens} Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
|
95 |
+
)
|
96 |
+
print(
|
97 |
+
f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
|
98 |
+
)
|
99 |
+
|
100 |
+
if i == 0:
|
101 |
+
for k in range(bs):
|
102 |
+
print(
|
103 |
+
f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
|
104 |
+
if k > 2:
|
105 |
+
break
|
106 |
+
|
107 |
+
|
108 |
+
if __name__ == "__main__":
|
109 |
+
main()
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import sys
|
3 |
+
from time import perf_counter
|
4 |
+
|
5 |
+
import sys
|
6 |
+
# import ipdb
|
7 |
+
sys.path.append('../')
|
8 |
+
import threading
|
9 |
+
import time
|
10 |
+
|
11 |
+
from lyra_llama import lyraLlama
|
12 |
+
|
13 |
+
|
14 |
+
def print_string(string, prev_seq_length=None, finish=False):
|
15 |
+
if finish:
|
16 |
+
print_list([string])
|
17 |
+
return
|
18 |
+
|
19 |
+
print("\033c", end="")
|
20 |
+
|
21 |
+
if prev_seq_length:
|
22 |
+
print(string[:prev_seq_length], end='', flush=True)
|
23 |
+
string = string[prev_seq_length:]
|
24 |
+
|
25 |
+
for c_char in string:
|
26 |
+
print(c_char, end='', flush=True)
|
27 |
+
time.sleep(0.025) # 控制每个字符的输出间隔,可以根据需要调整
|
28 |
+
|
29 |
+
|
30 |
+
def print_list(lines):
|
31 |
+
# 清空终端输出
|
32 |
+
print("\033c", end="")
|
33 |
+
|
34 |
+
# 逐行打印字符串列表
|
35 |
+
print('\n'.join(lines))
|
36 |
+
|
37 |
+
|
38 |
+
def get_args():
|
39 |
+
parser = argparse.ArgumentParser(description="Faster ChatGLM6B Demo")
|
40 |
+
|
41 |
+
parser.add_argument('--model-path', type=str, required=True,
|
42 |
+
help='Model Path, include config.ini and tokenizer files')
|
43 |
+
parser.add_argument('--tokenizer-path', type=str, default=None)
|
44 |
+
|
45 |
+
parser.add_argument(
|
46 |
+
'--data-type', type=str, metavar='TYPE', default='fp16',
|
47 |
+
choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
|
48 |
+
help='The data type to inference. If None, the data type follows the '
|
49 |
+
'checkpoint data type.')
|
50 |
+
|
51 |
+
parser.add_argument(
|
52 |
+
'--memopt_mode', type=int, default=0, choices=[0, 1],
|
53 |
+
help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
|
54 |
+
' 0: FP16 mode'
|
55 |
+
' 1: Use MEMOPT mode')
|
56 |
+
|
57 |
+
parser.add_argument(
|
58 |
+
'--quant-type', type=str, metavar='TYPE', default='int8',
|
59 |
+
choices=['int4', 'int8'],
|
60 |
+
help='The data type of quantization. Only used in MEMOPT.')
|
61 |
+
|
62 |
+
parser.add_argument(
|
63 |
+
'--kvqparams-fpath', type=str, required=False, default="",
|
64 |
+
help='File path of kv quantized params.')
|
65 |
+
|
66 |
+
parser.add_argument("--prompt", type=str, required=False)
|
67 |
+
parser.add_argument("--max-output-length", type=int, default=512)
|
68 |
+
parser.add_argument("--warmups", type=int, default=10)
|
69 |
+
parser.add_argument("--avgnums", type=int, default=10)
|
70 |
+
args = parser.parse_args()
|
71 |
+
|
72 |
+
print('\n=================== Arguments ===================')
|
73 |
+
for k, v in vars(args).items():
|
74 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
75 |
+
print('=================================================')
|
76 |
+
|
77 |
+
return args
|
78 |
+
|
79 |
+
|
80 |
+
def main():
|
81 |
+
args = get_args()
|
82 |
+
|
83 |
+
model = lyraLlama(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type, args.kvqparams_fpath)
|
84 |
+
|
85 |
+
prompt_template = "Human: {}\n\nAssistant:" # xverse
|
86 |
+
# prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
|
87 |
+
|
88 |
+
prompt = prompt_template.format(args.prompt)
|
89 |
+
|
90 |
+
test_batch_size = [1] # 8, 16, 32, 64
|
91 |
+
print("test_batch_size: ", test_batch_size)
|
92 |
+
|
93 |
+
for i, bs in enumerate(test_batch_size):
|
94 |
+
prompts = [prompt, ] * bs
|
95 |
+
|
96 |
+
# warmup gpu
|
97 |
+
for _ in range(args.warmups):
|
98 |
+
for finish, output_texts in model.stream_generate(prompts,
|
99 |
+
output_length=args.max_output_length,
|
100 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False):
|
101 |
+
pass
|
102 |
+
|
103 |
+
start = perf_counter()
|
104 |
+
for _ in range(args.avgnums):
|
105 |
+
prev_sequence_lengths = None
|
106 |
+
stream_counter = 0
|
107 |
+
for finish, output_texts in model.stream_generate(prompts,
|
108 |
+
output_length=args.max_output_length,
|
109 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False):
|
110 |
+
|
111 |
+
if len(output_texts) == 1:
|
112 |
+
print_string(output_texts[0], prev_sequence_lengths, finish)
|
113 |
+
prev_sequence_lengths = len(output_texts[0])
|
114 |
+
else:
|
115 |
+
print_list(output_texts)
|
116 |
+
|
117 |
+
stream_counter += 1
|
118 |
+
|
119 |
+
end = perf_counter()
|
120 |
+
cost = (end - start) / args.avgnums
|
121 |
+
|
122 |
+
input_output_texts = [prompt + ' ' + gtext for prompt,
|
123 |
+
gtext in zip(prompts, output_texts)]
|
124 |
+
tokens = 0
|
125 |
+
input_tokens = len(model.tokenizer.encode(prompt))
|
126 |
+
words = 0
|
127 |
+
for text in input_output_texts:
|
128 |
+
tokens += len(model.tokenizer.encode(text))
|
129 |
+
words += len(text)
|
130 |
+
|
131 |
+
avg_output_tokens = tokens / len(input_output_texts) - input_tokens
|
132 |
+
|
133 |
+
|
134 |
+
if __name__ == "__main__":
|
135 |
+
main()
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import json
|
3 |
+
import random
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
from time import perf_counter
|
7 |
+
|
8 |
+
import sys
|
9 |
+
sys.path.append('../')
|
10 |
+
from lyra_llama import lyraLlama
|
11 |
+
|
12 |
+
|
13 |
+
def get_args():
|
14 |
+
parser = argparse.ArgumentParser(description="Faster ChatGLM6B Demo")
|
15 |
+
|
16 |
+
parser.add_argument('--model-path', type=str, required=True,
|
17 |
+
help='Model Path, include config.ini and tokenizer files')
|
18 |
+
parser.add_argument('--tokenizer-path', type=str, default=None)
|
19 |
+
|
20 |
+
parser.add_argument(
|
21 |
+
'--data-type', type=str, metavar='TYPE', default='fp16',
|
22 |
+
choices=[None, 'fp32', 'fp16', 'bf16', 'int8'],
|
23 |
+
help='The data type to inference. If None, the data type follows the '
|
24 |
+
'checkpoint data type.')
|
25 |
+
|
26 |
+
parser.add_argument(
|
27 |
+
'--memopt_mode', type=int, default=0, choices=[0, 1],
|
28 |
+
help='Use MEMOPT mode to increase speed and reduce VRAM usage.'
|
29 |
+
' 0: FP16 mode'
|
30 |
+
' 1: Use MEMOPT mode')
|
31 |
+
|
32 |
+
parser.add_argument(
|
33 |
+
'--quant-type', type=str, metavar='TYPE', default='int8',
|
34 |
+
choices=['int4', 'int8'],
|
35 |
+
help='The data type of quantization. Only used in MEMOPT.')
|
36 |
+
|
37 |
+
parser.add_argument(
|
38 |
+
'--kvqparams-fpath', type=str, required=False, default="",
|
39 |
+
help='File path of kv quantized params.')
|
40 |
+
|
41 |
+
parser.add_argument("--prompt_filepath", type=str, required=True)
|
42 |
+
parser.add_argument("--max-output-length", type=int, default=512)
|
43 |
+
parser.add_argument("--warmups", type=int, default=10)
|
44 |
+
parser.add_argument("--avgnums", type=int, default=10)
|
45 |
+
args = parser.parse_args()
|
46 |
+
|
47 |
+
print('\n=================== Arguments ===================')
|
48 |
+
for k, v in vars(args).items():
|
49 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
50 |
+
print('=================================================')
|
51 |
+
|
52 |
+
return args
|
53 |
+
|
54 |
+
|
55 |
+
def main():
|
56 |
+
args = get_args()
|
57 |
+
|
58 |
+
model = lyraLlama(args.model_path, args.tokenizer_path, args.data_type, args.memopt_mode, args.quant_type, args.kvqparams_fpath)
|
59 |
+
|
60 |
+
with open(args.prompt_filepath, "rb") as f:
|
61 |
+
input_datas = json.loads(f.read())
|
62 |
+
|
63 |
+
used_input_data = input_datas[0]
|
64 |
+
|
65 |
+
prompt_template = "Human: {}\n\nAssistant:" # xverse
|
66 |
+
# prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
|
67 |
+
|
68 |
+
test_batch_size = [1, 2, 4,] # 8, 16, 32, 64
|
69 |
+
print("test_batch_size: ", test_batch_size)
|
70 |
+
|
71 |
+
for i, bs in enumerate(test_batch_size):
|
72 |
+
all_use_prompts = []
|
73 |
+
all_output_texts = []
|
74 |
+
|
75 |
+
# warmup gpu
|
76 |
+
for _ in range(args.warmups):
|
77 |
+
prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
|
78 |
+
output_texts = model.generate(
|
79 |
+
prompts, output_length=args.max_output_length,
|
80 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
|
81 |
+
|
82 |
+
all_cost_s = 0.0
|
83 |
+
|
84 |
+
for _ in range(args.avgnums):
|
85 |
+
prompts = [prompt_template.format( used_input_data['prompts'].format(*x) ) for x in random.choices(used_input_data['contents'], bs)]
|
86 |
+
all_use_prompts.extend(prompts)
|
87 |
+
|
88 |
+
start = perf_counter()
|
89 |
+
output_texts = model.generate(
|
90 |
+
prompts, output_length=args.max_output_length,
|
91 |
+
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False)
|
92 |
+
all_cost_s += perf_counter() - start
|
93 |
+
|
94 |
+
all_output_texts.extend(output_texts)
|
95 |
+
|
96 |
+
cost = all_cost_s / args.avgnums
|
97 |
+
|
98 |
+
input_output_texts = [prompt + ' ' + gtext for prompt,gtext in zip(all_use_prompts, all_output_texts)]
|
99 |
+
|
100 |
+
tokens = 0
|
101 |
+
avg_input_tokens = np.mean([len(model.tokenizer.encode(prompt)) for prompt in all_use_prompts])
|
102 |
+
|
103 |
+
words = 0
|
104 |
+
for text in input_output_texts:
|
105 |
+
tokens += len(model.tokenizer.encode(text))
|
106 |
+
words += len(text)
|
107 |
+
print(
|
108 |
+
f"\nFaster-Dtype: {args.data_type}, Batch Size: {bs}, All tokens: {tokens}. Avg Input tokens: {avg_input_tokens}. Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
|
109 |
+
)
|
110 |
+
print(
|
111 |
+
f"Faster-Dtype: {args.data_type}, Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
|
112 |
+
)
|
113 |
+
|
114 |
+
if i == 0:
|
115 |
+
for k in range(bs):
|
116 |
+
print(
|
117 |
+
f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
|
118 |
+
if k>2:
|
119 |
+
break
|
120 |
+
|
121 |
+
if __name__ == "__main__":
|
122 |
+
main()
|
123 |
+
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export FMHA_VERSION=V2 # 如使用旧版本Attn,设置 FMHA_VERSION=OFF
|
2 |
+
export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
|
3 |
+
|
4 |
+
model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
|
5 |
+
|
6 |
+
data_type=fp16 # 权重保存精度
|
7 |
+
memopt_mode=0 # MEMOPT模式: 0/1
|
8 |
+
quant_type="int8" # 量化精度: int4/int8
|
9 |
+
max_output_length=256
|
10 |
+
warmups=1
|
11 |
+
avgnums=1
|
12 |
+
|
13 |
+
python batch_demo.py --model-path $model_path\
|
14 |
+
--tokenizer-path $model_path\
|
15 |
+
--data-type $data_type\
|
16 |
+
--memopt_mode $memopt_mode\
|
17 |
+
--quant-type ${quant_type}\
|
18 |
+
--max-output-length $max_output_length\
|
19 |
+
--warmups $warmups\
|
20 |
+
--avgnums $avgnums
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export FMHA_VERSION=V2 # 如使用旧版本Attn,设置为OFF
|
2 |
+
export KV_CACHE_DTYPE=DEFAULT # 如使用KVCache Int8,设置 KV_CACHE_DTYPE=INT8
|
3 |
+
export LYRA_STREAM_CB_STEP=30 # 回调函数间隔步数
|
4 |
+
|
5 |
+
model_path=ModelPath # 转换后模型所处文件夹路径 (1-gpu-fp16.bin等文件所在目录)
|
6 |
+
|
7 |
+
data_type=fp16 # 权重保存精度
|
8 |
+
memopt_mode=0 # MEMOPT模式: 0/1
|
9 |
+
quant_type="int8" # 量化精度: int4/int8
|
10 |
+
max_output_length=256
|
11 |
+
warmups=1
|
12 |
+
avgnums=1
|
13 |
+
|
14 |
+
python batch_stream_demo.py --model-path $model_path\
|
15 |
+
--tokenizer-path $model_path\
|
16 |
+
--data-type $data_type\
|
17 |
+
--memopt_mode $memopt_mode\
|
18 |
+
--quant-type ${quant_type}\
|
19 |
+
--max-output-length $max_output_length\
|
20 |
+
--warmups $warmups\
|
21 |
+
--avgnums $avgnums
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer
|
2 |
+
from transformers import LlamaForCausalLM, AutoModelForCausalLM
|
3 |
+
from time import perf_counter
|
4 |
+
import torch
|
5 |
+
import argparse
|
6 |
+
|
7 |
+
def get_args():
|
8 |
+
parser = argparse.ArgumentParser(description="Torch model Demo")
|
9 |
+
|
10 |
+
parser.add_argument('--model-path', type=str, required=True,
|
11 |
+
help='Model Path, include config.ini and tokenizer files')
|
12 |
+
parser.add_argument('--tokenizer-path', type=str, default=None)
|
13 |
+
|
14 |
+
parser.add_argument("--prompt", type=str, required=False)
|
15 |
+
parser.add_argument("--max-output-length", type=int, default=512)
|
16 |
+
parser.add_argument("--warmups", type=int, default=10)
|
17 |
+
parser.add_argument("--avgnums", type=int, default=10)
|
18 |
+
args = parser.parse_args()
|
19 |
+
|
20 |
+
print('\n=================== Arguments ===================')
|
21 |
+
for k, v in vars(args).items():
|
22 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
23 |
+
print('=================================================')
|
24 |
+
|
25 |
+
return args
|
26 |
+
|
27 |
+
def main():
|
28 |
+
args = get_args()
|
29 |
+
device = torch.device("cuda")
|
30 |
+
|
31 |
+
prompt_template = "Human: {}\n\nAssistant:" # xverse
|
32 |
+
# prompt_template = "<human>:{}\n<bot>:" # llama-ziya 13b
|
33 |
+
|
34 |
+
prompt = prompt_template.format(args.prompt)
|
35 |
+
|
36 |
+
model = AutoModelForCausalLM.from_pretrained(args.model_path, torch_dtype=torch.float16, trust_remote_code=True).eval().to(device)
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model_path, use_fast=False, trust_remote_code=True)
|
38 |
+
|
39 |
+
test_batch_size = [1, 8, 16, 32, 64]
|
40 |
+
print("test_batch_size: ", test_batch_size)
|
41 |
+
|
42 |
+
for i, bs in enumerate(test_batch_size):
|
43 |
+
|
44 |
+
prompts = [prompt] * bs
|
45 |
+
|
46 |
+
# warmup gpu
|
47 |
+
for _ in range(args.warmups):
|
48 |
+
input_ids = tokenizer(prompts, return_tensors="pt").input_ids.to(device)
|
49 |
+
generate_ids = model.generate(
|
50 |
+
input_ids,
|
51 |
+
max_new_tokens=args.max_output_length,
|
52 |
+
do_sample = False,
|
53 |
+
top_k = 30,
|
54 |
+
top_p = 0.85,
|
55 |
+
temperature = 1.0,
|
56 |
+
repetition_penalty=1.,
|
57 |
+
eos_token_id=2,
|
58 |
+
bos_token_id=1,
|
59 |
+
pad_token_id=0)
|
60 |
+
|
61 |
+
generate_ids = [output_ids[len(single_input_id):] for single_input_id, output_ids in zip(input_ids, generate_ids)]
|
62 |
+
outputs = tokenizer.batch_decode(generate_ids)
|
63 |
+
|
64 |
+
# test
|
65 |
+
start = perf_counter()
|
66 |
+
for _ in range(args.avgnums):
|
67 |
+
input_ids = tokenizer(prompts, return_tensors="pt").input_ids.to(device)
|
68 |
+
generate_ids = model.generate(
|
69 |
+
input_ids,
|
70 |
+
max_new_tokens=args.max_output_length,
|
71 |
+
do_sample = False,
|
72 |
+
top_k = 30,
|
73 |
+
top_p = 0.85,
|
74 |
+
temperature = 1.0,
|
75 |
+
repetition_penalty=1.,
|
76 |
+
eos_token_id=2,
|
77 |
+
bos_token_id=1,
|
78 |
+
pad_token_id=0)
|
79 |
+
|
80 |
+
generate_ids = [output_ids[len(single_input_id):] for single_input_id, output_ids in zip(input_ids, generate_ids)]
|
81 |
+
output_texts = tokenizer.batch_decode(generate_ids)
|
82 |
+
|
83 |
+
end = perf_counter()
|
84 |
+
cost = (end - start) / args.avgnums
|
85 |
+
|
86 |
+
# 计算吞吐量
|
87 |
+
input_output_texts = [prompt + ' ' + gtext for prompt, gtext in zip(prompts, output_texts)]
|
88 |
+
tokens = 0
|
89 |
+
input_tokens = len(tokenizer.encode(prompt))
|
90 |
+
words = 0
|
91 |
+
for text in input_output_texts:
|
92 |
+
tokens += len(tokenizer.encode(text))
|
93 |
+
words += len(text)
|
94 |
+
|
95 |
+
avg_output_tokens = tokens / len(input_output_texts) - input_tokens
|
96 |
+
print(
|
97 |
+
f"\nBatch Size: {bs}, All tokens: {tokens}. Input tokens: {input_tokens}. Output tokens: {avg_output_tokens} Cost: {cost} seconds. Speed: {tokens/cost} tokens/s."
|
98 |
+
)
|
99 |
+
print(
|
100 |
+
f"Batch Size: {bs}, All generated words: {words}. Cost: {cost} seconds. Speed: {words/cost} words/s."
|
101 |
+
)
|
102 |
+
|
103 |
+
if i == 0:
|
104 |
+
for k in range(bs):
|
105 |
+
print(
|
106 |
+
f"The {k} Sample, \n\t\tInputs: {prompts[k]}. \n\t\tOutputs: {output_texts[k].lstrip()}")
|
107 |
+
if k > 2:
|
108 |
+
break
|
109 |
+
|
110 |
+
if __name__ == "__main__":
|
111 |
+
main()
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
"歌曲名:《幸福万年长》;歌手名:汤灿;歌曲描述:汤灿的幸福万年长创作背景:2001年,汤灿决定推出一首能够贴近听众和潮流的民歌。为此,她邀请了创作过歌曲《为你》《快乐老家》的音乐人浮克合作,邀其担任该曲的制作工作。虽然浮克此前一直从事流行歌曲的工作,但他其实也是一位衷情民歌风格的音乐人,于是两人一拍即合,合作了该曲。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
|
3 |
+
"歌曲名:《小丑面具》;歌手名:韩庚;歌曲描述:韩庚的小丑面具的歌曲鉴赏:韩庚在这首歌化身为“小丑”,带上面具调侃这社会上的表面功夫,用幽默又神经质的方式批判愈形冷酷的人心。在这首独特的电子舞曲当中,韩庚尝试了各种不同的发声方式,冷笑、哭喊、啜泣……甚至用声乐融合鬼魅的方法演唱,让人不禁陷入他建构的虚幻氛围而随之起舞。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
|
4 |
+
"《Bela Lugosi's Dead 》是英国后朋克乐队Bauhaus的首张单曲,于 1979 年 8 月 6 日在Small Wonder厂牌上发行。[4]它通常被认为是第一张哥特式摇滚唱片。\n1979 年 1 月 26 日,“Bela Lugosi's Dead”在威灵伯勒的贝克录音室进行了六个小时的“录音室现场”录制。这是他们在乐队成立六周后一起录制的第一首歌曲。[6]所有四位乐队成员都被认为是这首歌的作者:主唱彼得·墨菲、吉他手丹尼尔·阿什、鼓手凯文·哈斯金斯和贝斯手大卫·J (大卫·哈斯金斯)。David J 声称这首歌的歌词是他写的。[5] “Bela Lugosi's Dead”的替代版本还包括他们下一首单曲“ Dark Entries ”的早期演示录音的一部分。\n\n在同一场会议中还录制了另外四首歌曲:“Boys”;“咬我的臀部”;“Some Faces”和斯卡雷鬼曲调“Harry”,这是关于Blondie主唱Deborah Harry的。[7] [8]关于这次会议,凯文·哈斯金斯 (Kevin Haskins) 说,“那里有力量流行音乐,还有斯卡。我们试图找到我们的声音。” [9]\n\n在那次录制期间录制的歌曲中(除了“Bela Lugosi's Dead”),只有“Harry”获得了官方发行;1982年作为单曲“ Kick in the Eye ”的B面。1979 年晚些时候在 Beck Studios 录制的《Boys》版本被用作原版单曲《Bela Lugosi's Dead》的 B 面。[10]其余曲目,包括“Boys”的原始录音,一直未发行,直到 2018 年The Bela Session以黑胶唱片和CD 形式发行,并可供乐队数字下载。[11]在额外的曲目中,《经典摇滚》杂志写道:“其余的材料发现乐队正在摸索方向,甚至触及了斯卡。”\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:",
|
5 |
+
"歌曲名:《仓颉》;歌手名:五月天;歌曲描述:五月天的仓颉的歌曲鉴赏:五月天 仓颉(2张)《仓颉》是一首写在文明即将消失前的情诗,陈信宏的词写得颇有味道。《仓颉》这样淡淡的歌曲,或许不够大气,但是陈信宏真诚的演唱足以令人感动,而且《仓颉》的歌词也写得很有哲理。这首歌曲朗朗上口的旋律和诗意的文字使得它很适合在KTV演唱。\n根据上述信息,请回答用户问题:请从歌曲背景、风格与音乐性、影响与文化参考、歌手或者乐队这几个角度介绍一下Bela Lugosi's Dead这首歌。回答:"
|
6 |
+
]
|
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .lyra_llama import lyraLlama
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import dataclasses
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
|
5 |
+
@dataclasses.dataclass
|
6 |
+
class LyraLlamaParam:
|
7 |
+
num_heads: int = 40
|
8 |
+
size_per_head: int = 128
|
9 |
+
inter_size: int = 13824
|
10 |
+
num_layers: int = 40
|
11 |
+
vocab_size: int = 39424
|
12 |
+
start_id: Optional[int] = 1
|
13 |
+
end_id: Optional[int] = 2
|
14 |
+
tensor_para_size: int = 1
|
15 |
+
pipeline_para_size: int = 1
|
16 |
+
remove_padding: bool = True
|
17 |
+
shared_contexts_ratio: float = 1.0
|
18 |
+
layernorm_eps: float = 1e-6
|
19 |
+
weights_data_type: str = "fp16"
|
20 |
+
rotary_embedding: int = 128
|
21 |
+
use_gptj_residual: bool = False
|
22 |
+
|
23 |
+
def __post_init__(self):
|
24 |
+
if not 0.0 <= self.shared_contexts_ratio <= 1.0:
|
25 |
+
raise ValueError(
|
26 |
+
f'Got an invalid value of shared_context_ratio '
|
27 |
+
f'{self.shared_contexts_ratio} - range: [0.0, 1.0]')
|
28 |
+
|
29 |
+
def asdict(self):
|
30 |
+
return dataclasses.asdict(self)
|
31 |
+
|
32 |
+
|
33 |
+
LYRA_LLAMA_PARAM = LyraLlamaParam()
|
34 |
+
LIB_SO_PATH = '/usr/lib/ftlib/lyraOp.cpython-38-x86_64-linux-gnu.so'
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import configparser
|
4 |
+
import pathlib
|
5 |
+
import typing
|
6 |
+
import os
|
7 |
+
|
8 |
+
import torch
|
9 |
+
import transformers
|
10 |
+
from torch.nn.utils.rnn import pad_sequence
|
11 |
+
|
12 |
+
from .config import LYRA_LLAMA_PARAM, LIB_SO_PATH
|
13 |
+
from .model import LlamaModel
|
14 |
+
|
15 |
+
|
16 |
+
class lyraLlama:
|
17 |
+
def __init__(self, model_path, tokenizer_path=None, dtype='fp16', memopt_mode=0, quant_dtype="int4", kvqparams_fpath="") -> None:
|
18 |
+
self.model_path = model_path
|
19 |
+
self.tokenizer_path = tokenizer_path
|
20 |
+
self.kvqparams_fpath = kvqparams_fpath
|
21 |
+
|
22 |
+
self.dtype = dtype
|
23 |
+
|
24 |
+
self.memopt_mode = memopt_mode
|
25 |
+
self.quant_data_type = quant_dtype
|
26 |
+
|
27 |
+
self.model, self.tokenizer = self.load_model_and_tokenizer()
|
28 |
+
print("Got model and tokenizer")
|
29 |
+
|
30 |
+
def load_model_and_tokenizer(self):
|
31 |
+
if self.tokenizer_path is None:
|
32 |
+
tokenizer_path = self.model_path
|
33 |
+
else:
|
34 |
+
tokenizer_path = self.tokenizer_path
|
35 |
+
|
36 |
+
print(f'Loading tokenizer from {tokenizer_path}')
|
37 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_path)
|
38 |
+
|
39 |
+
checkpoint_path = pathlib.Path(self.model_path)
|
40 |
+
config_path = checkpoint_path / 'config.ini'
|
41 |
+
|
42 |
+
if config_path.exists():
|
43 |
+
# Read model params from config.
|
44 |
+
cfg = configparser.ConfigParser()
|
45 |
+
cfg.read(config_path)
|
46 |
+
model_name = 'llama'
|
47 |
+
inference_data_type = self.dtype
|
48 |
+
if inference_data_type == None:
|
49 |
+
inference_data_type = cfg.get(model_name, "weight_data_type")
|
50 |
+
model_args = dict(
|
51 |
+
head_num=cfg.getint(model_name, 'head_num'),
|
52 |
+
kv_head_num=cfg.getint(model_name, 'kv_head_num', fallback=0),
|
53 |
+
size_per_head=cfg.getint(model_name, "size_per_head"),
|
54 |
+
inter_size=cfg.getint(model_name, 'inter_size'),
|
55 |
+
layer_num=cfg.getint(model_name, "num_layer"),
|
56 |
+
rotary_embedding_dim=cfg.getint(model_name, 'rotary_embedding'),
|
57 |
+
layernorm_eps=cfg.getfloat(model_name, 'layernorm_eps'),
|
58 |
+
vocab_size=cfg.getint(model_name, "vocab_size"),
|
59 |
+
start_id=cfg.getint(model_name, "start_id"),
|
60 |
+
end_id=cfg.getint(model_name, "end_id"),
|
61 |
+
weights_data_type=cfg.get(model_name, "weight_data_type"),
|
62 |
+
tensor_para_size=cfg.getint(model_name, "tensor_para_size"),
|
63 |
+
inference_data_type=inference_data_type,
|
64 |
+
rope_theta=cfg.getfloat(model_name, "rope_theta", fallback=float(10000.0)))
|
65 |
+
else:
|
66 |
+
inference_data_type = self.dtype
|
67 |
+
if inference_data_type == None:
|
68 |
+
inference_data_type = LYRA_LLAMA_PARAM.weights_data_type
|
69 |
+
model_args = dict(head_num=LYRA_LLAMA_PARAM.num_heads,
|
70 |
+
size_per_head=LYRA_LLAMA_PARAM.size_per_head,
|
71 |
+
inter_size=LYRA_LLAMA_PARAM.inter_size,
|
72 |
+
layer_num=LYRA_LLAMA_PARAM.num_layers,
|
73 |
+
rotary_embedding_dim=LYRA_LLAMA_PARAM.rotary_embedding,
|
74 |
+
layernorm_eps=LYRA_LLAMA_PARAM.layernorm_eps,
|
75 |
+
vocab_size=LYRA_LLAMA_PARAM.vocab_size,
|
76 |
+
start_id=LYRA_LLAMA_PARAM.start_id or tokenizer.bos_token_id,
|
77 |
+
end_id=LYRA_LLAMA_PARAM.end_id or tokenizer.eos_token_id,
|
78 |
+
weights_data_type=LYRA_LLAMA_PARAM.weights_data_type,
|
79 |
+
tensor_para_size=LYRA_LLAMA_PARAM.tensor_para_size,
|
80 |
+
inference_data_type=inference_data_type)
|
81 |
+
|
82 |
+
# update common parameters
|
83 |
+
model_args.update(dict(
|
84 |
+
lib_path=LIB_SO_PATH,
|
85 |
+
model_path=os.path.join(self.model_path, "1-gpu-fp16.bin"),
|
86 |
+
kvqparams_fpath=self.kvqparams_fpath, # kv quantized scales (calibrated)
|
87 |
+
max_seq_len=0, # for position seq embedding
|
88 |
+
pipeline_para_size=LYRA_LLAMA_PARAM.pipeline_para_size,
|
89 |
+
use_gptj_residual=LYRA_LLAMA_PARAM.use_gptj_residual,
|
90 |
+
memopt_mode=self.memopt_mode,
|
91 |
+
quant_data_type=self.quant_data_type
|
92 |
+
# shared_contexts_ratio=LYRA_LLAMA_PARAM.shared_contexts_ratio,
|
93 |
+
))
|
94 |
+
|
95 |
+
print('[LYRA][INFO] Load Our LYRA Highly Optimized LLaMA model')
|
96 |
+
for k, v in model_args.items():
|
97 |
+
print(f' - {k.ljust(25, ".")}: {v}')
|
98 |
+
|
99 |
+
# Check sanity and consistency between the model and tokenizer.
|
100 |
+
checklist = ['head_num', 'size_per_head', 'vocab_size', 'layer_num',
|
101 |
+
'tensor_para_size', 'tensor_para_size', 'weights_data_type']
|
102 |
+
if None in [model_args[k] for k in checklist]:
|
103 |
+
none_params = [p for p in checklist if model_args[p] is None]
|
104 |
+
print(f'[LYRA][WARNING] Found None parameters {none_params}. They must '
|
105 |
+
f'be provided either by config file or CLI arguments.')
|
106 |
+
if model_args['start_id'] != tokenizer.bos_token_id:
|
107 |
+
print('[LYRA][WARNING] Given start_id is not matched with the bos token '
|
108 |
+
'id of the pretrained tokenizer.')
|
109 |
+
if model_args['end_id'] not in (tokenizer.pad_token_id, tokenizer.eos_token_id):
|
110 |
+
print('[LYRA][WARNING] Given end_id is not matched with neither pad '
|
111 |
+
'token id nor eos token id of the pretrained tokenizer.')
|
112 |
+
|
113 |
+
print(f'Loading model from {self.model_path}')
|
114 |
+
model = LlamaModel(**model_args)
|
115 |
+
return model, tokenizer
|
116 |
+
|
117 |
+
def generate(self, prompts: typing.List[str] | str,
|
118 |
+
output_length: int = 512,
|
119 |
+
beam_width: int = 1,
|
120 |
+
top_k: typing.Optional[torch.IntTensor] = 1,
|
121 |
+
top_p: typing.Optional[torch.FloatTensor] = 1.0,
|
122 |
+
beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
|
123 |
+
temperature: typing.Optional[torch.FloatTensor] = 1.0,
|
124 |
+
len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
|
125 |
+
repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
|
126 |
+
presence_penalty: typing.Optional[torch.FloatTensor] = None,
|
127 |
+
min_length: typing.Optional[torch.IntTensor] = None,
|
128 |
+
bad_words_list: typing.Optional[torch.IntTensor] = None,
|
129 |
+
do_sample: bool = False,
|
130 |
+
return_output_length: bool = False,
|
131 |
+
return_cum_log_probs: int = 0):
|
132 |
+
if isinstance(prompts, str):
|
133 |
+
prompts = [prompts, ]
|
134 |
+
inputs = prompts
|
135 |
+
|
136 |
+
batch_size = len(inputs)
|
137 |
+
ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
|
138 |
+
ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
|
139 |
+
|
140 |
+
# we must encode the raw prompt text one by one in order to compute the length of the original text.
|
141 |
+
input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
|
142 |
+
input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
|
143 |
+
# after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
|
144 |
+
input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
|
145 |
+
|
146 |
+
random_seed = None
|
147 |
+
if do_sample:
|
148 |
+
random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
|
149 |
+
|
150 |
+
outputs = self.model(start_ids=input_token_ids,
|
151 |
+
start_lengths=input_lengths,
|
152 |
+
output_len=output_length,
|
153 |
+
beam_width=beam_width,
|
154 |
+
top_k=top_k * ones_int,
|
155 |
+
top_p=top_p * ones_float,
|
156 |
+
beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
|
157 |
+
temperature=temperature * ones_float,
|
158 |
+
len_penalty=len_penalty * ones_float,
|
159 |
+
repetition_penalty=repetition_penalty * ones_float,
|
160 |
+
random_seed=random_seed,
|
161 |
+
return_output_length=return_output_length,
|
162 |
+
return_cum_log_probs=return_cum_log_probs)
|
163 |
+
|
164 |
+
if return_cum_log_probs > 0:
|
165 |
+
outputs = outputs[0] # output_token_ids.
|
166 |
+
|
167 |
+
# Slice the generated token ids of the 1st beam result.
|
168 |
+
# output = input tokens + generated tokens.
|
169 |
+
output_token_ids = [out[0, length:].cpu()
|
170 |
+
for out, length in zip(outputs, input_lengths)]
|
171 |
+
|
172 |
+
output_texts = self.tokenizer.batch_decode(
|
173 |
+
output_token_ids, skip_special_tokens=True)
|
174 |
+
|
175 |
+
return output_texts
|
176 |
+
|
177 |
+
def stream_generate(self, prompts: typing.List[str] | str,
|
178 |
+
output_length: int = 512,
|
179 |
+
beam_width: int = 1,
|
180 |
+
top_k: typing.Optional[torch.IntTensor] = 1,
|
181 |
+
top_p: typing.Optional[torch.FloatTensor] = 1.0,
|
182 |
+
beam_search_diversity_rate: typing.Optional[torch.FloatTensor] = 0.0,
|
183 |
+
temperature: typing.Optional[torch.FloatTensor] = 1.0,
|
184 |
+
len_penalty: typing.Optional[torch.FloatTensor] = 0.0,
|
185 |
+
repetition_penalty: typing.Optional[torch.FloatTensor] = 1.0,
|
186 |
+
presence_penalty: typing.Optional[torch.FloatTensor] = None,
|
187 |
+
min_length: typing.Optional[torch.IntTensor] = None,
|
188 |
+
bad_words_list: typing.Optional[torch.IntTensor] = None,
|
189 |
+
do_sample: bool = False,
|
190 |
+
return_output_length: bool = False,
|
191 |
+
return_cum_log_probs: int = 0):
|
192 |
+
if isinstance(prompts, str):
|
193 |
+
prompts = [prompts, ]
|
194 |
+
|
195 |
+
inputs = prompts
|
196 |
+
|
197 |
+
batch_size = len(inputs)
|
198 |
+
ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
|
199 |
+
ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
|
200 |
+
|
201 |
+
# we must encode the raw prompt text one by one in order to compute the length of the original text.
|
202 |
+
input_token_ids = [self.tokenizer(text, return_tensors="pt").input_ids.int().squeeze() for text in inputs]
|
203 |
+
input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
|
204 |
+
# after got the length of each input text tokens. we can batchfy the input list to a tensor. padding the right.
|
205 |
+
input_token_ids = pad_sequence(input_token_ids, batch_first=True, padding_value=self.tokenizer.eos_token_id)
|
206 |
+
|
207 |
+
random_seed = None
|
208 |
+
if do_sample:
|
209 |
+
random_seed = torch.randint(0, 262144, (batch_size,), dtype=torch.long)
|
210 |
+
|
211 |
+
for finish, output_ids, sequence_length, output_cum_log_probs in self.model.stream_forward(start_ids=input_token_ids,
|
212 |
+
start_lengths=input_lengths,
|
213 |
+
output_len=output_length,
|
214 |
+
beam_width=beam_width,
|
215 |
+
top_k=top_k * ones_int,
|
216 |
+
top_p=top_p * ones_float,
|
217 |
+
beam_search_diversity_rate=beam_search_diversity_rate * ones_float,
|
218 |
+
temperature=temperature * ones_float,
|
219 |
+
len_penalty=len_penalty * ones_float,
|
220 |
+
repetition_penalty=repetition_penalty * ones_float,
|
221 |
+
random_seed=random_seed,
|
222 |
+
return_output_length=return_output_length,
|
223 |
+
return_cum_log_probs=return_cum_log_probs):
|
224 |
+
|
225 |
+
# Slice the generated token ids of the 1st beam result.
|
226 |
+
# output = input tokens + generated tokens.
|
227 |
+
output_token_ids = [out[0, length:].cpu()
|
228 |
+
for out, length in zip(output_ids, input_lengths)]
|
229 |
+
output_texts = self.tokenizer.batch_decode(
|
230 |
+
output_token_ids, skip_special_tokens=True)
|
231 |
+
|
232 |
+
yield finish, output_texts
|
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import print_function
|
2 |
+
|
3 |
+
import copy
|
4 |
+
import os
|
5 |
+
import pathlib
|
6 |
+
import typing
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
import torch
|
10 |
+
import torch.distributed as dist
|
11 |
+
import torch.nn as nn
|
12 |
+
|
13 |
+
import time
|
14 |
+
from queue import Queue
|
15 |
+
from threading import Thread
|
16 |
+
|
17 |
+
import sys
|
18 |
+
sys.path.append('/usr/lib/lyralib')
|
19 |
+
import lyraOp
|
20 |
+
|
21 |
+
str_type_map = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}
|
22 |
+
|
23 |
+
class LlamaModel(nn.Module):
|
24 |
+
def __init__(self,
|
25 |
+
head_num,
|
26 |
+
size_per_head,
|
27 |
+
inter_size,
|
28 |
+
vocab_size,
|
29 |
+
rotary_embedding_dim,
|
30 |
+
start_id, end_id, layer_num,
|
31 |
+
max_seq_len: int,
|
32 |
+
layernorm_eps,
|
33 |
+
tensor_para_size: int,
|
34 |
+
pipeline_para_size: int,
|
35 |
+
use_gptj_residual,
|
36 |
+
lib_path: typing.Union[str, pathlib.Path],
|
37 |
+
model_path,
|
38 |
+
kvqparams_fpath: str = "",
|
39 |
+
memopt_mode: int = 0,
|
40 |
+
quant_data_type: str = "int8",
|
41 |
+
inference_data_type: str = "fp16",
|
42 |
+
weights_data_type: typing.Union[str, np.dtype] = np.float32,
|
43 |
+
kv_head_num = 0,
|
44 |
+
rope_theta = 10000.0):
|
45 |
+
super().__init__()
|
46 |
+
self.head_num = head_num
|
47 |
+
self.kv_head_num = kv_head_num
|
48 |
+
self.size_per_head = size_per_head
|
49 |
+
self.inter_size = inter_size
|
50 |
+
self.vocab_size = vocab_size
|
51 |
+
self.rotary_embedding_dim = rotary_embedding_dim
|
52 |
+
self.start_id = start_id
|
53 |
+
self.end_id = end_id
|
54 |
+
self.max_seq_len = max_seq_len
|
55 |
+
self.layer_num = layer_num
|
56 |
+
self.use_gptj_residual = use_gptj_residual
|
57 |
+
self.layernorm_eps = layernorm_eps
|
58 |
+
self.memopt_mode = memopt_mode
|
59 |
+
self.quant_data_type = quant_data_type
|
60 |
+
self.rope_theta = rope_theta
|
61 |
+
|
62 |
+
# multi-gpu params
|
63 |
+
self.tensor_para_size = tensor_para_size
|
64 |
+
self.pipeline_para_size = pipeline_para_size
|
65 |
+
self.build_model = False
|
66 |
+
self.weights_data_type = weights_data_type
|
67 |
+
self.inference_data_type = inference_data_type
|
68 |
+
|
69 |
+
# queue for streaming
|
70 |
+
self.que = Queue()
|
71 |
+
self.threads = [None] * self.tensor_para_size
|
72 |
+
|
73 |
+
assert torch.cuda.is_available(), "CUDA is required for this model."
|
74 |
+
|
75 |
+
assert head_num % tensor_para_size == 0, "head_num must be a multiple of tensor_para_size."
|
76 |
+
assert layer_num % pipeline_para_size == 0, "layer_num must be a multiple of pipeline_para_size."
|
77 |
+
|
78 |
+
# Load the C++ model into Pytorch model.
|
79 |
+
# torch.classes.load_library(os.path.abspath(lib_path))
|
80 |
+
|
81 |
+
# Prepare for tensor/pipeline parallel
|
82 |
+
try:
|
83 |
+
dist.init_process_group(backend='mpi')
|
84 |
+
except:
|
85 |
+
print("[INFO] WARNING: Have initialized the process group")
|
86 |
+
self.rank = dist.get_rank()
|
87 |
+
self.device_count = torch.cuda.device_count()
|
88 |
+
self.device = self.rank % self.device_count
|
89 |
+
torch.cuda.set_device(self.device)
|
90 |
+
|
91 |
+
world_size = dist.get_world_size()
|
92 |
+
# print(tensor_para_size * pipeline_para_size)
|
93 |
+
assert world_size == tensor_para_size * pipeline_para_size, "tensor_para_size * pipeline_para_size must be equal to world_size."
|
94 |
+
|
95 |
+
self.tensor_para_rank = self.rank % self.tensor_para_size
|
96 |
+
self.pipeline_para_rank = self.rank // self.tensor_para_size
|
97 |
+
|
98 |
+
if self.kv_head_num == 0:
|
99 |
+
self.kv_head_num = self.head_num
|
100 |
+
|
101 |
+
self.model = lyraOp.LyraLlama(
|
102 |
+
self.head_num, self.size_per_head, self.inter_size,
|
103 |
+
self.layer_num,
|
104 |
+
self.vocab_size,
|
105 |
+
self.rotary_embedding_dim,
|
106 |
+
self.layernorm_eps,
|
107 |
+
self.start_id, self.end_id,
|
108 |
+
self.tensor_para_size, self.pipeline_para_size,
|
109 |
+
self.max_seq_len,
|
110 |
+
self.use_gptj_residual,
|
111 |
+
self.memopt_mode,
|
112 |
+
self.quant_data_type,
|
113 |
+
model_path,
|
114 |
+
kvqparams_fpath,
|
115 |
+
self.weights_data_type,
|
116 |
+
self.inference_data_type,
|
117 |
+
self.kv_head_num,
|
118 |
+
self.rope_theta)
|
119 |
+
|
120 |
+
self.build_model = True
|
121 |
+
torch.cuda.empty_cache()
|
122 |
+
|
123 |
+
def forward(self,
|
124 |
+
start_ids: torch.Tensor,
|
125 |
+
start_lengths: torch.Tensor,
|
126 |
+
output_len,
|
127 |
+
beam_width=1,
|
128 |
+
top_k: torch.Tensor = None,
|
129 |
+
top_p: torch.Tensor = None,
|
130 |
+
beam_search_diversity_rate: torch.Tensor = None,
|
131 |
+
temperature: torch.Tensor = None,
|
132 |
+
len_penalty: torch.Tensor = None,
|
133 |
+
repetition_penalty: torch.Tensor = None,
|
134 |
+
random_seed: torch.Tensor = None,
|
135 |
+
return_output_length=False,
|
136 |
+
return_cum_log_probs=0):
|
137 |
+
|
138 |
+
input_len = start_ids.size(1)
|
139 |
+
assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
|
140 |
+
|
141 |
+
# Inputs to device
|
142 |
+
input_ids = start_ids.cuda(self.device)
|
143 |
+
input_lengths = start_lengths.cuda(self.device)
|
144 |
+
# outputs: output_ids, output_lengths, output_cum_log_probs (optional)
|
145 |
+
outputs = self.model.forward(input_ids,
|
146 |
+
input_lengths,
|
147 |
+
output_len,
|
148 |
+
beam_width, # optional, can be None
|
149 |
+
top_k, # optional, can be None
|
150 |
+
top_p, # optional, can be None
|
151 |
+
beam_search_diversity_rate, # optional, can be None
|
152 |
+
temperature, # optional, can be None
|
153 |
+
len_penalty, # optional, can be None
|
154 |
+
repetition_penalty, # optional, can be None
|
155 |
+
random_seed, # optional, can be None
|
156 |
+
return_cum_log_probs) # optional, can be None
|
157 |
+
|
158 |
+
if return_cum_log_probs == 0:
|
159 |
+
output_ids, output_lengths = outputs
|
160 |
+
else:
|
161 |
+
output_ids, output_lengths, output_cum_log_probs = outputs
|
162 |
+
if return_output_length:
|
163 |
+
if return_cum_log_probs > 0:
|
164 |
+
return output_ids, output_lengths, output_cum_log_probs
|
165 |
+
else:
|
166 |
+
return output_ids, output_lengths
|
167 |
+
else:
|
168 |
+
return output_ids
|
169 |
+
|
170 |
+
def set_input_tensor(self, input_tensor):
|
171 |
+
"""Set input tensor to be used instead of forward()'s input.
|
172 |
+
|
173 |
+
When doing pipeline parallelism the input from the previous
|
174 |
+
stage comes from communication, not from the input, so the
|
175 |
+
model's forward_step_func won't have it. This function is thus
|
176 |
+
used by internal code to bypass the input provided by the
|
177 |
+
forward_step_func"""
|
178 |
+
self.input_tensor = input_tensor
|
179 |
+
|
180 |
+
|
181 |
+
def _forward_callback(self, output_ids, seq_lengths, ctx):
|
182 |
+
self.que.put((False, (list(output_ids), list(seq_lengths))))
|
183 |
+
|
184 |
+
def _tensormap_dict_to_py_dict(self, tensormap_dict: lyraOp.TensorMap):
|
185 |
+
"""map torch tensormap to py dict."""
|
186 |
+
ret = dict()
|
187 |
+
for k, v in tensormap_dict.items():
|
188 |
+
ret[k] = v
|
189 |
+
|
190 |
+
return ret
|
191 |
+
|
192 |
+
|
193 |
+
def stream_forward(self,
|
194 |
+
start_ids: torch.Tensor,
|
195 |
+
start_lengths: torch.Tensor,
|
196 |
+
output_len,
|
197 |
+
beam_width=1,
|
198 |
+
top_k: torch.Tensor = None,
|
199 |
+
top_p: torch.Tensor = None,
|
200 |
+
beam_search_diversity_rate: torch.Tensor = None,
|
201 |
+
temperature: torch.Tensor = None,
|
202 |
+
len_penalty: torch.Tensor = None,
|
203 |
+
repetition_penalty: torch.Tensor = None,
|
204 |
+
random_seed: torch.Tensor = None,
|
205 |
+
return_output_length=False,
|
206 |
+
return_cum_log_probs=0):
|
207 |
+
|
208 |
+
# Register callback func to model
|
209 |
+
self.model.registerCallback(self._forward_callback)
|
210 |
+
|
211 |
+
batch_size = start_ids.size(0)
|
212 |
+
input_len = start_ids.size(1)
|
213 |
+
assert input_len > 0, "input len must be larger than zero. For an unconditional case, use start_id as the first token."
|
214 |
+
|
215 |
+
# Inputs to device
|
216 |
+
input_ids = start_ids.cuda(self.device)
|
217 |
+
input_lengths = start_lengths.cuda(self.device)
|
218 |
+
# outputs: output_ids, output_lengths, output_cum_log_probs (optional)
|
219 |
+
|
220 |
+
# Init thread of model inference
|
221 |
+
def _func(enque_output):
|
222 |
+
outputs = self.model.forward(input_ids,
|
223 |
+
input_lengths,
|
224 |
+
output_len,
|
225 |
+
beam_width, # optional, can be None
|
226 |
+
top_k, # optional, can be None
|
227 |
+
top_p, # optional, can be None
|
228 |
+
beam_search_diversity_rate, # optional, can be None
|
229 |
+
temperature, # optional, can be None
|
230 |
+
len_penalty, # optional, can be None
|
231 |
+
repetition_penalty, # optional, can be None
|
232 |
+
random_seed, # optional, can be None
|
233 |
+
return_cum_log_probs) # optional, can be None
|
234 |
+
if enque_output:
|
235 |
+
self.que.put((True, (outputs[0].tolist(), outputs[1].tolist())))
|
236 |
+
|
237 |
+
# Start thread of model inference
|
238 |
+
t = Thread(target=_func,
|
239 |
+
args=(True,),
|
240 |
+
daemon=True)
|
241 |
+
t.start()
|
242 |
+
self.threads[0] = t
|
243 |
+
|
244 |
+
# Generate streaming output
|
245 |
+
while True:
|
246 |
+
while self.que.qsize() > 1:
|
247 |
+
self.que.get()
|
248 |
+
|
249 |
+
finish, outputs = self.que.get()
|
250 |
+
|
251 |
+
output_ids, sequence_length = outputs
|
252 |
+
output_ids_tensor = torch.tensor(output_ids).view(batch_size, beam_width, -1)
|
253 |
+
sequence_length_tensor = torch.tensor(sequence_length).view(batch_size, beam_width)
|
254 |
+
|
255 |
+
if return_output_length:
|
256 |
+
if return_cum_log_probs > 0:
|
257 |
+
yield finish, output_ids_tensor, sequence_length_tensor, None
|
258 |
+
else:
|
259 |
+
yield finish, output_ids_tensor, sequence_length_tensor, None
|
260 |
+
else:
|
261 |
+
yield finish, output_ids_tensor, None, None
|
262 |
+
|
263 |
+
if finish:
|
264 |
+
for t in self.threads:
|
265 |
+
t.join()
|
266 |
+
while self.que.qsize() > 0:
|
267 |
+
self.que.get()
|
268 |
+
break
|
269 |
+
|
270 |
+
self.model.unRegisterCallback()
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## `lyrallms` 能力矩阵
|
2 |
+
|
3 |
+
| |Attn方法| |MEMOPT模式| |KVCache精度| |
|
4 |
+
|:----|:----|:----|:----|:----|:----|:----|
|
5 |
+
| |Unfused|FlashAttn2|W4A16|W8A16|FP16|INT8|
|
6 |
+
|LLaMA|✅|✅|✅|✅|✅|✅|
|
7 |
+
|XVERSE|✅|✅|✅|✅|✅|✅|
|
8 |
+
|Baichuan 1/2 (7B及13B)|✅|❌|✅|✅|✅|❌|
|
9 |
+
|ChatGLM|✅|❌|❌|✅|✅|❌|
|
10 |
+
|BELLE|✅|❌|❌|✅|✅|❌|
|
11 |
+
|
12 |
+
## `lyrallms` 使用
|
13 |
+
|
14 |
+
### 校准 (Calibration)
|
15 |
+
|
16 |
+
参考`calibration`文件夹下的[README.md](./calibration/README.md) 。
|
17 |
+
|
18 |
+
### Python转换及调用加速模型
|
19 |
+
|
20 |
+
#### LLaMA
|
21 |
+
|
22 |
+
参考`LyraLlamaPy`文件夹下的[README.md](./LyraLlamaPy/README.md) 。
|
23 |
+
|
24 |
+
#### Baichuan
|
25 |
+
|
26 |
+
参考`LyraBaichuanPy`文件夹下的[README.md](./LyraLlamaPy/README.md) 。
|
27 |
+
|
File without changes
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cee68cc4fc1b5d25cd39f3bd64ae7ee25f15035892cbbedb10e0b980d9afd87f
|
3 |
+
size 27793336320
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[baichuan]
|
2 |
+
model_name = Baichuan2_13B_base
|
3 |
+
head_num = 40
|
4 |
+
size_per_head = 128
|
5 |
+
inter_size = 13696
|
6 |
+
num_layer = 40
|
7 |
+
rotary_embedding = 128
|
8 |
+
layernorm_eps = 1e-06
|
9 |
+
vocab_size = 125696
|
10 |
+
start_id = 1
|
11 |
+
end_id = 2
|
12 |
+
tensor_para_size = 1
|
13 |
+
weight_data_type = fp16
|
14 |
+
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"architectures": [
|
4 |
+
"BaichuanForCausalLM"
|
5 |
+
],
|
6 |
+
"auto_map": {
|
7 |
+
"AutoConfig": "configuration_baichuan.BaichuanConfig",
|
8 |
+
"AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
|
9 |
+
},
|
10 |
+
"bos_token_id": 1,
|
11 |
+
"eos_token_id": 2,
|
12 |
+
"gradient_checkpointing": false,
|
13 |
+
"hidden_act": "silu",
|
14 |
+
"hidden_size": 5120,
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 13696,
|
17 |
+
"model_max_length": 4096,
|
18 |
+
"model_type": "baichuan",
|
19 |
+
"num_attention_heads": 40,
|
20 |
+
"num_hidden_layers": 40,
|
21 |
+
"pad_token_id": 0,
|
22 |
+
"rms_norm_eps": 1e-06,
|
23 |
+
"tie_word_embeddings": false,
|
24 |
+
"torch_dtype": "bfloat16",
|
25 |
+
"transformers_version": "4.29.2",
|
26 |
+
"use_cache": true,
|
27 |
+
"vocab_size": 125696
|
28 |
+
}
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": true
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": true
|
15 |
+
},
|
16 |
+
"unk_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": true
|
22 |
+
},
|
23 |
+
"pad_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": true
|
29 |
+
}
|
30 |
+
}
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79452955be6b419a65984273a9f08af86042e1c2a75ee3ba989cbf620a133cc2
|
3 |
+
size 2001107
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"auto_map": {
|
5 |
+
"AutoTokenizer": [
|
6 |
+
"tokenization_baichuan.BaichuanTokenizer",
|
7 |
+
null
|
8 |
+
]
|
9 |
+
},
|
10 |
+
"bos_token": {
|
11 |
+
"__type": "AddedToken",
|
12 |
+
"content": "<s>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": true,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": true
|
17 |
+
},
|
18 |
+
"clean_up_tokenization_spaces": false,
|
19 |
+
"eos_token": {
|
20 |
+
"__type": "AddedToken",
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": true
|
26 |
+
},
|
27 |
+
"model_max_length": 4096,
|
28 |
+
"pad_token": {
|
29 |
+
"__type": "AddedToken",
|
30 |
+
"content": "<unk>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": true
|
35 |
+
},
|
36 |
+
"sp_model_kwargs": {},
|
37 |
+
"tokenizer_class": "BaichuanTokenizer",
|
38 |
+
"unk_token": {
|
39 |
+
"__type": "AddedToken",
|
40 |
+
"content": "<unk>",
|
41 |
+
"lstrip": false,
|
42 |
+
"normalized": true,
|
43 |
+
"rstrip": false,
|
44 |
+
"single_word": true
|
45 |
+
}
|
46 |
+
}
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:182aeae174da2d23af945c93ab92a6ba48ccf9bbc02474096ba950dd7e17bdd2
|
3 |
+
size 27793336320
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[baichuan]
|
2 |
+
model_name = Baichuan2_13B_chat
|
3 |
+
head_num = 40
|
4 |
+
size_per_head = 128
|
5 |
+
inter_size = 13696
|
6 |
+
num_layer = 40
|
7 |
+
rotary_embedding = 128
|
8 |
+
layernorm_eps = 1e-06
|
9 |
+
vocab_size = 125696
|
10 |
+
start_id = 1
|
11 |
+
end_id = 2
|
12 |
+
tensor_para_size = 1
|
13 |
+
weight_data_type = fp16
|
14 |
+
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"architectures": [
|
4 |
+
"BaichuanForCausalLM"
|
5 |
+
],
|
6 |
+
"auto_map": {
|
7 |
+
"AutoConfig": "configuration_baichuan.BaichuanConfig",
|
8 |
+
"AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
|
9 |
+
},
|
10 |
+
"tokenizer_class": "BaichuanTokenizer",
|
11 |
+
"bos_token_id": 1,
|
12 |
+
"eos_token_id": 2,
|
13 |
+
"gradient_checkpointing": false,
|
14 |
+
"hidden_act": "silu",
|
15 |
+
"hidden_size": 5120,
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"intermediate_size": 13696,
|
18 |
+
"model_max_length": 4096,
|
19 |
+
"model_type": "baichuan",
|
20 |
+
"num_attention_heads": 40,
|
21 |
+
"num_hidden_layers": 40,
|
22 |
+
"pad_token_id": 0,
|
23 |
+
"rms_norm_eps": 1e-06,
|
24 |
+
"tie_word_embeddings": false,
|
25 |
+
"torch_dtype": "bfloat16",
|
26 |
+
"transformers_version": "4.29.2",
|
27 |
+
"use_cache": true,
|
28 |
+
"vocab_size": 125696
|
29 |
+
}
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": true
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": true
|
15 |
+
},
|
16 |
+
"unk_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": true
|
22 |
+
},
|
23 |
+
"pad_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": true
|
29 |
+
}
|
30 |
+
}
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79452955be6b419a65984273a9f08af86042e1c2a75ee3ba989cbf620a133cc2
|
3 |
+
size 2001107
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"auto_map": {
|
5 |
+
"AutoTokenizer": [
|
6 |
+
"tokenization_baichuan.BaichuanTokenizer",
|
7 |
+
null
|
8 |
+
]
|
9 |
+
},
|
10 |
+
"bos_token": {
|
11 |
+
"__type": "AddedToken",
|
12 |
+
"content": "<s>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": true,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": true
|
17 |
+
},
|
18 |
+
"clean_up_tokenization_spaces": false,
|
19 |
+
"eos_token": {
|
20 |
+
"__type": "AddedToken",
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": true
|
26 |
+
},
|
27 |
+
"model_max_length": 4096,
|
28 |
+
"pad_token": {
|
29 |
+
"__type": "AddedToken",
|
30 |
+
"content": "<unk>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": true
|
35 |
+
},
|
36 |
+
"sp_model_kwargs": {},
|
37 |
+
"tokenizer_class": "BaichuanTokenizer",
|
38 |
+
"unk_token": {
|
39 |
+
"__type": "AddedToken",
|
40 |
+
"content": "<unk>",
|
41 |
+
"lstrip": false,
|
42 |
+
"normalized": true,
|
43 |
+
"rstrip": false,
|
44 |
+
"single_word": true
|
45 |
+
}
|
46 |
+
}
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f83ab15fe6cd2d93be29248e87051db3b62921d3093922d7e73c9817bc0409b
|
3 |
+
size 15011946496
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[baichuan]
|
2 |
+
model_name = Baichuan2_7B_base
|
3 |
+
head_num = 32
|
4 |
+
size_per_head = 128
|
5 |
+
inter_size = 11008
|
6 |
+
num_layer = 32
|
7 |
+
rotary_embedding = 128
|
8 |
+
layernorm_eps = 1e-06
|
9 |
+
vocab_size = 125696
|
10 |
+
start_id = 1
|
11 |
+
end_id = 2
|
12 |
+
tensor_para_size = 1
|
13 |
+
weight_data_type = fp16
|
14 |
+
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BaichuanForCausalLM"
|
4 |
+
],
|
5 |
+
"auto_map": {
|
6 |
+
"AutoConfig": "configuration_baichuan.BaichuanConfig",
|
7 |
+
"AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
|
8 |
+
},
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"eos_token_id": 2,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 11008,
|
15 |
+
"max_position_embeddings": 4096,
|
16 |
+
"model_max_length": 4096,
|
17 |
+
"model_type": "baichuan",
|
18 |
+
"num_attention_heads": 32,
|
19 |
+
"num_hidden_layers": 32,
|
20 |
+
"pad_token_id": 0,
|
21 |
+
"rms_norm_eps": 1e-06,
|
22 |
+
"_from_model_config": true,
|
23 |
+
"tie_word_embeddings": false,
|
24 |
+
"torch_dtype": "bfloat16",
|
25 |
+
"transformers_version": "4.29.2",
|
26 |
+
"use_cache": true,
|
27 |
+
"vocab_size": 125696
|
28 |
+
}
|