pruning commited on
Commit
3776317
1 Parent(s): 871f97b

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +15 -0
  2. adapter_0.pt +3 -0
  3. meta_model_0.pt +3 -0
  4. training_config.yml +75 -0
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ tags:
4
+ - any-to-any
5
+ - omega
6
+ - omegalabs
7
+ - bittensor
8
+ - agi
9
+ ---
10
+
11
+ This is an Any-to-Any model checkpoint for the OMEGA Labs x Bittensor Any-to-Any subnet.
12
+
13
+ Check out the [git repo](https://github.com/omegalabsinc/omegalabs-anytoany-bittensor) and find OMEGA on X: [@omegalabsai](https://x.com/omegalabsai).
14
+
15
+ Trained by pruning
adapter_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49804b152a1353ebec41a685f4eac17b38e09e006f2d51d4cedd110668a54563
3
+ size 13673786
meta_model_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7acf319d3b563a1e73e8ccfc6e8dbfcee3244224752cf5af7137c9889244674f
3
+ size 16219158403
training_config.yml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ _component_: models.lora_mmllama3_8b
3
+ lora_attn_modules:
4
+ - q_proj
5
+ - v_proj
6
+ apply_lora_to_mlp: false
7
+ apply_lora_to_output: false
8
+ lora_rank: 16
9
+ lora_alpha: 32
10
+ perception_tokens: 2
11
+ use_clip: false
12
+ tokenizer:
13
+ _component_: models.a2a_tokenizer
14
+ path: models/tokenizer.model
15
+ checkpointer:
16
+ _component_: torchtune.utils.FullModelMetaCheckpointer
17
+ checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/original
18
+ checkpoint_files:
19
+ - consolidated.00.pth
20
+ adapter_checkpoint: null
21
+ recipe_checkpoint: null
22
+ output_dir: output_checkpoints/omega_a2a
23
+ model_type: LLAMA3
24
+ resume_from_checkpoint: false
25
+ interim_checkpoint_steps: 5000
26
+ interim_gen_steps: null
27
+ max_new_tokens: 100
28
+ temperature: 0.6
29
+ top_k: 225
30
+ dataset:
31
+ _component_: ds.EvenBatcher
32
+ buffer_size: 64
33
+ dataset:
34
+ _component_: ds.RoundRobinDataset
35
+ datasets:
36
+ - _component_: ds.CaptionInstructDataset
37
+ dataset_path: ds/sam_llava/output.parquet
38
+ train_on_input: false
39
+ seed: null
40
+ shuffle: true
41
+ batch_size: 1
42
+ optimizer:
43
+ _component_: torch.optim.AdamW
44
+ weight_decay: 0.001
45
+ lr: 1.0e-05
46
+ lr_scheduler:
47
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
48
+ num_warmup_steps: 100
49
+ loss:
50
+ _component_: torch.nn.CrossEntropyLoss
51
+ epochs: 4
52
+ max_steps_per_epoch: null
53
+ gradient_accumulation_steps: 32
54
+ compile: false
55
+ output_dir: /tmp/lora_finetune_output
56
+ metric_logger:
57
+ _component_: torchtune.utils.metric_logging.StdoutLogger
58
+ log_dir: /dev/stdout
59
+ log_every_n_steps: 8
60
+ device: cuda
61
+ dtype: bf16
62
+ enable_activation_checkpointing: false
63
+ profiler:
64
+ _component_: torchtune.utils.profiler
65
+ enabled: false
66
+ inference:
67
+ prompt_template: 'Video:
68
+
69
+ {video}
70
+
71
+ Caption the previous video.'
72
+ max_new_tokens: 300
73
+ temperature: 0.6
74
+ top_k: 300
75
+ quantizer: null