Training in progress, epoch 1

Browse files

Files changed (12) hide show

.hydra/config.yaml +62 -0
.hydra/hydra.yaml +165 -0
.hydra/overrides.yaml +1 -0
adapter_config.json +25 -0
adapter_model.safetensors +3 -0
hparams.yaml +62 -0
special_tokens_map.json +24 -0
tb_logs/events.out.tfevents.1705450080.dev-gpu-pl487.610521.0 +3 -0
tokenizer.json +0 -0
tokenizer_config.json +212 -0
training_args.bin +3 -0
trl_main.log +65 -0

.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+model:
+  name: EleutherAI/pythia-14m
+  alias: pythia-14m
+  revision: null
+  subfolder: null
+  precision: bf16
+  set_eos_to_pad: true
+dataset:
+  name: gsm8k
+  alias: ${dataset.name}
+  text_field: question
+  max_length: 1024
+trainer:
+  group_by_length: false
+  remove_unused_columns: true
+  neftune_noise_alpha: null
+  eval_accumulation_steps: 1
+  per_device_train_batch_size: 32
+  per_device_eval_batch_size: 20
+  gradient_accumulation_steps: 1
+  dataloader_num_workers: 8
+  dataloader_drop_last: false
+  optim: adamw_torch_fused
+  adafactor: false
+  learning_rate: 0.0001
+  weight_decay: 0
+  adam_beta1: 0.9
+  adam_beta2: 0.999
+  adam_epsilon: 1.0e-08
+  max_grad_norm: 1.0
+  lr_scheduler_type: linear
+  warmup_ratio: 0.0
+  warmup_steps: 0
+  num_train_epochs: 1
+  max_steps: -1
+  eval_steps: 100
+  output_dir: ./
+  logging_strategy: steps
+  logging_first_step: true
+  logging_steps: 1
+  log_level: info
+  report_to: tensorboard
+  logging_dir: tb_logs
+  disable_tqdm: false
+  push_to_hub: true
+  save_strategy: epoch
+  save_steps: ${trainer.eval_steps}
+  save_only_model: true
+  seed: ${global_seed}
+  data_seed: ${global_seed}
+  full_determinism: true
+  tf32: true
+lora:
+  r: 64
+  lora_alpha: 16
+  bias: none
+  task_type: CAUSAL_LM
+  target_modules: null
+use_peft: true
+global_seed: 42
+experiment_group: training
+run_name: ${model.alias}_${now:%Y-%m-%d}T${now:%H-%M-%S}

.hydra/hydra.yaml ADDED Viewed

	@@ -0,0 +1,165 @@

+hydra:
+  run:
+    dir: ./outputs/${experiment_group}/${dataset.alias}/${run_name}
+  sweep:
+    dir: ./outputs/multirun/${experiment_group}
+    subdir: ${dataset.alias}/${run_name}_${hydra.job.id}
+  launcher:
+    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+      Use --hydra-help to view Hydra specific help
+      '
+    template: '${hydra.help.header}
+      == Configuration groups ==
+      Compose your configuration from those groups (group=option)
+      $APP_CONFIG_GROUPS
+      == Config ==
+      Override anything in the config (foo.bar=value)
+      $CONFIG
+      ${hydra.help.footer}
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+      See https://hydra.cc for more info.
+      == Flags ==
+      $FLAGS_HELP
+      == Configuration groups ==
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+      $HYDRA_CONFIG_GROUPS
+      Use ''--cfg hydra'' to Show the Hydra config.
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      colorlog:
+        (): colorlog.ColoredFormatter
+        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: colorlog
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+      colorlog:
+        (): colorlog.ColoredFormatter
+        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
+          - %(message)s'
+        log_colors:
+          DEBUG: purple
+          INFO: green
+          WARNING: yellow
+          ERROR: red
+          CRITICAL: bold_red
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: colorlog
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task: []
+  job:
+    name: trl_main
+    chdir: true
+    override_dirname: ''
+    id: ???
+    num: ???
+    config_name: trl_conf
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.3.2
+    version_base: '1.3'
+    cwd: /home/pl487/weak-to-strong-generalisation
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /home/pl487/weak-to-strong-generalisation/conf
+      schema: file
+      provider: main
+    - path: hydra_plugins.hydra_colorlog.conf
+      schema: pkg
+      provider: hydra-colorlog
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /home/pl487/weak-to-strong-generalisation/outputs/training/gsm8k/pythia-14m_2024-01-17T00-07-52
+    choices:
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: colorlog
+      hydra/hydra_logging: colorlog
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: basic
+      hydra/output: default
+  verbose: false

.hydra/overrides.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ []

adapter_config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "EleutherAI/pythia-14m",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc82b0b9d4cb85d528900aa127887e88071e0937803991945ce1121027f87761
+size 394952

hparams.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+model:
+  name: EleutherAI/pythia-14m
+  alias: pythia-14m
+  revision: null
+  subfolder: null
+  precision: bf16
+  set_eos_to_pad: true
+dataset:
+  name: gsm8k
+  alias: gsm8k
+  text_field: question
+  max_length: 1024
+trainer:
+  group_by_length: false
+  remove_unused_columns: true
+  neftune_noise_alpha: null
+  eval_accumulation_steps: 1
+  per_device_train_batch_size: 32
+  per_device_eval_batch_size: 20
+  gradient_accumulation_steps: 1
+  dataloader_num_workers: 8
+  dataloader_drop_last: false
+  optim: adamw_torch_fused
+  adafactor: false
+  learning_rate: 0.0001
+  weight_decay: 0
+  adam_beta1: 0.9
+  adam_beta2: 0.999
+  adam_epsilon: 1.0e-08
+  max_grad_norm: 1.0
+  lr_scheduler_type: linear
+  warmup_ratio: 0.0
+  warmup_steps: 0
+  num_train_epochs: 1
+  max_steps: -1
+  eval_steps: 100
+  output_dir: ./
+  logging_strategy: steps
+  logging_first_step: true
+  logging_steps: 1
+  log_level: info
+  report_to: tensorboard
+  logging_dir: tb_logs
+  disable_tqdm: false
+  push_to_hub: true
+  save_strategy: epoch
+  save_steps: 100
+  save_only_model: true
+  seed: 42
+  data_seed: 42
+  full_determinism: true
+  tf32: true
+lora:
+  r: 64
+  lora_alpha: 16
+  bias: none
+  task_type: CAUSAL_LM
+  target_modules: null
+use_peft: true
+global_seed: 42
+experiment_group: training
+run_name: pythia-14m_2024-01-17T00-07-52

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tb_logs/events.out.tfevents.1705450080.dev-gpu-pl487.610521.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19878075f35cd2790962f062c7e641249fb5b7006557191959a835c7f04c5c77
+size 41054

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,212 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|padding|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50254": {
+      "content": "                        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50255": {
+      "content": "                       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50256": {
+      "content": "                      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50257": {
+      "content": "                     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50258": {
+      "content": "                    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50259": {
+      "content": "                   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50260": {
+      "content": "                  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50261": {
+      "content": "                 ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50262": {
+      "content": "                ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50263": {
+      "content": "               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50264": {
+      "content": "              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50265": {
+      "content": "             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50266": {
+      "content": "            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50267": {
+      "content": "           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50268": {
+      "content": "          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50269": {
+      "content": "         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50270": {
+      "content": "        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50271": {
+      "content": "       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50272": {
+      "content": "      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50273": {
+      "content": "     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50274": {
+      "content": "    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50275": {
+      "content": "   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50276": {
+      "content": "  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPTNeoXTokenizer",
+  "unk_token": "<|endoftext|>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2c7d457c4198d805c663dd4afba6ca909e4ef791bef0dff9f9d3f8616bf0b31
+size 4664

trl_main.log ADDED Viewed

	@@ -0,0 +1,65 @@

+[2024-01-17 00:07:52,551][hydra][INFO] -
+model:
+  name: EleutherAI/pythia-14m
+  alias: pythia-14m
+  revision: null
+  subfolder: null
+  precision: bf16
+  set_eos_to_pad: true
+dataset:
+  name: gsm8k
+  alias: gsm8k
+  text_field: question
+  max_length: 1024
+trainer:
+  group_by_length: false
+  remove_unused_columns: true
+  neftune_noise_alpha: null
+  eval_accumulation_steps: 1
+  per_device_train_batch_size: 32
+  per_device_eval_batch_size: 20
+  gradient_accumulation_steps: 1
+  dataloader_num_workers: 8
+  dataloader_drop_last: false
+  optim: adamw_torch_fused
+  adafactor: false
+  learning_rate: 0.0001
+  weight_decay: 0
+  adam_beta1: 0.9
+  adam_beta2: 0.999
+  adam_epsilon: 1.0e-08
+  max_grad_norm: 1.0
+  lr_scheduler_type: linear
+  warmup_ratio: 0.0
+  warmup_steps: 0
+  num_train_epochs: 1
+  max_steps: -1
+  eval_steps: 100
+  output_dir: ./
+  logging_strategy: steps
+  logging_first_step: true
+  logging_steps: 1
+  log_level: info
+  report_to: tensorboard
+  logging_dir: tb_logs
+  disable_tqdm: false
+  push_to_hub: true
+  save_strategy: epoch
+  save_steps: 100
+  save_only_model: true
+  seed: 42
+  data_seed: 42
+  full_determinism: true
+  tf32: true
+lora:
+  r: 64
+  lora_alpha: 16
+  bias: none
+  task_type: CAUSAL_LM
+  target_modules: null
+use_peft: true
+global_seed: 42
+experiment_group: training
+run_name: pythia-14m_2024-01-17T00-07-52
+======================================================================