w32zhong commited on
Commit
824a19a
1 Parent(s): 067c3f6

update model

Browse files
.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ adapter_model.bin filter=lfs diff=lfs merge=lfs -text
2
+ training_args.bin filter=lfs diff=lfs merge=lfs -text
3
+ events.out.* filter=lfs diff=lfs merge=lfs -text
.keep ADDED
File without changes
README.md CHANGED
@@ -1,45 +1,20 @@
1
  ---
2
- language: en
3
- tags:
4
- - azbert
5
- - pretraining
6
- - fill-mask
7
- widget:
8
- - text: "$f$ $($ $x$ [MASK] $y$ $)$"
9
- example_title: "mathy"
10
- - text: "$x$ [MASK] $x$ $equal$ $2$ $x$"
11
- example_title: "mathy"
12
- - text: "Proof by [MASK] that $n$ $fact$ $gt$ $3$ $n$ for $n$ $gt$ $6$"
13
- example_title: "mathy"
14
- - text: "Proof by induction that $n$ [MASK] $gt$ $3$ $n$ for $n$ $gt$ $6$"
15
- example_title: "mathy"
16
- - text: "The goal of life is [MASK]."
17
- example_title: "philosophical"
18
- license: mit
19
  ---
 
20
 
21
- ## About
22
- This repository is a boilerplate to push a mask-filling model to the HuggingFace Model Hub.
23
 
24
- ### Checklist
25
- * `git-lfs` is installed
26
- * tokenizer contains all the files needed: `added_tokens.json`, `special_tokens_map.json`, `tokenizer_config.json`, `vocab.txt` and `tokenizer.json`
27
- * no `tokenizer_file` field in `tokenizer_config.json` (sometimes it is located locally at `~/.cache`)
 
 
 
 
 
 
 
28
 
29
- ### Upload
30
- 1. Put the model checkpoints and optionally log files (`*.bin` and log files `events.out.*`) to the `./ckpt` directory.
31
- 2. Add a branch `hgf` to point to your huggingface repo. For example `git remote add hgf [email protected]:approach0/mathy-vicuna-13B-FFT`
32
- 3. Run the `upload2hgf.sh` script.
33
 
34
- ### Test the MLM task (an example)
35
- ```sh
36
- pip install pya0 # for math token preprocessing
37
- # testing local checkpoints:
38
- python test.py ./ckpt/math-tokenizer ./ckpt/2-2-0/encoder.ckpt
39
- # testing Model Hub checkpoints:
40
- python test.py approach0/coco-mae-220 approach0/coco-mae-220
41
- ```
42
- > **Note**
43
- > Modify the test examples in `test.txt` to play with it.
44
- > The test file is tab-separated, the first column is additional positions you want to mask for the right-side sentence (useful for masking tokens in math markups).
45
- > A zero means no additional mask positions.
 
1
  ---
2
+ library_name: peft
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
4
+ ## Training procedure
5
 
 
 
6
 
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - load_in_8bit: True
9
+ - load_in_4bit: False
10
+ - llm_int8_threshold: 6.0
11
+ - llm_int8_skip_modules: None
12
+ - llm_int8_enable_fp32_cpu_offload: False
13
+ - llm_int8_has_fp16_weight: False
14
+ - bnb_4bit_quant_type: fp4
15
+ - bnb_4bit_use_double_quant: False
16
+ - bnb_4bit_compute_dtype: float32
17
+ ### Framework versions
18
 
 
 
 
 
19
 
20
+ - PEFT 0.4.0
 
 
 
 
 
 
 
 
 
 
 
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "TIGER-Lab/MAmmoTH-13B",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_proj",
20
+ "o_proj",
21
+ "gate_proj",
22
+ "down_proj",
23
+ "up_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e25ffaf150d89c4eb6de30ecf9f7a978967394075983cf4fe8b1cfb7ad19c4e
3
+ size 125368013
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 32000
3
+ }
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"mode": "finetune", "wandb": "True", "batch_size": "12", "context_length": "2048", "flash_atten": "True", "tokenizer": "TIGER-Lab/MAmmoTH-13B", "model": "TIGER-Lab/MAmmoTH-13B", "dataset": "approach0/retrieval-augment-finetune", "dataset_shuffle": "True", "dataset_map_fn": "datamap_double_train_for_query_and_answer", "collate_fn": "collate_final_dataset_for_generalist", "collate_add_eos": "True", "collate_debug": "True", "eval_during_train": "no", "peft": "{\n\"peft_attach_new\": true,\n\"peft_lora_rank\": 16,\n\"peft_lora_alpha\": 16,\n\"peft_lora_targets\": [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"down_proj\", \"up_proj\"]\n}", "trainer": "[\n\"--output_dir\", \"<exp_outdir>\",\n\"--save_strategy\", \"steps\",\n\"--save_steps\", 100,\n\"--save_total_limit\", 2,\n\"--evaluation_strategy\", \"no\",\n\"--eval_steps\", 10,\n\"--report_to\", \"wandb\",\n\"--logging_steps\", 1,\n\"--per_device_train_batch_size\", 1,\n\"--remove_unused_columns\", false,\n\"--gradient_accumulation_steps\", 12,\n\"--learning_rate\", 2e-5,\n\"--warmup_steps\", 10,\n\"--optim\", \"adamw_torch\",\n\"--num_train_epochs\", 2,\n\"--fp16\", false,\n\"--bf16\", true,\n\"--seed\", 70\n]", "load_in_8bit": "True", "device_map": null, "deepspeed": "{\n\"en_param_offload\": true\n}", "local_rank": "0", "run": "GCR-13B-mammoth-generalist", "7b_vicuna_v1_5": "lmsys/vicuna-7b-v1.5", "7b_vicuna_v1_5_32k": "lmsys/longchat-7b-v1.5-32k", "test_lora_repo": "Lajonbot/vicuna-7b-v1.5-PL-lora_adapter_model", "13b_mathy_fft": "approach0/mathy-vicuna-13B-FFT", "7b_wizardmath": "WizardLM/WizardMath-7B-V1.0", "13b_wizardmath": "WizardLM/WizardMath-13B-V1.0", "70b_wizardmath": "WizardLM/WizardMath-70B-V1.0", "7b_mammoth": "TIGER-Lab/MAmmoTH-7B", "13b_mammoth": "TIGER-Lab/MAmmoTH-13B", "34b_mammoth_code": "TIGER-Lab/MAmmoTH-Coder-13B", "70b_mammoth": "TIGER-Lab/MAmmoTH-70B", "7b_tora": "llm-agents/tora-7b-v1.0", "13b_tora": "llm-agents/tora-13b-v1.0", "seed": "70", "output_dir": "./output", "add_sys_paths": "[\"../Progressive-Hint\", \"../math/modeling\"]"}
inject.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "TIGER-Lab/MAmmoTH-13B", "tokenizer": "TIGER-Lab/MAmmoTH-13B", "run": "GCR-13B-mammoth-generalist"}
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
Binary file (500 kB). View file
 
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": false,
22
+ "model_max_length": 512,
23
+ "pad_token": null,
24
+ "padding_side": "right",
25
+ "sp_model_kwargs": {},
26
+ "tokenizer_class": "LlamaTokenizer",
27
+ "unk_token": {
28
+ "__type": "AddedToken",
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a62838c2f3f319f53cada2373abe415ab105927fd68d4e8c8530c1aa6e55761
3
+ size 6267