2024-mcm-everitt-ryan
/

gemma-2-9b-job-bias-qlora-seq-cls

@@ -11,7 +11,7 @@ developers: Tristan Everitt and Paul Ryan
 model_card_authors: See developers
 model_card_contact: See developers
 repo: https://gitlab.computing.dcu.ie/everitt2/2024-mcm-everitt-ryan
-training_regime: 'PEFT: None, accelerator_config="{''split_batches'': False, ''dispatch_batches'':
   None, ''even_batches'': True, ''use_seedable_sampler'': True, ''non_blocking'':
   False, ''gradient_accumulation_kwargs'': None}", adafactor=false, adam_beta1=0.9,
   adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false,
@@ -40,20 +40,20 @@ training_regime: 'PEFT: None, accelerator_config="{''split_batches'': False, ''d
   train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false,
   use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001'
 results: "                  precision    recall  f1-score   support\n    \n      \
-  \       age       0.91      0.49      0.63        80\n      disability       0.97\
-  \      0.47      0.64        80\n        feminine       0.99      0.86      0.92\
-  \        80\n         general       0.82      0.56      0.67        80\n       masculine\
-  \       0.62      0.65      0.63        80\n         neutral       0.35      0.90\
-  \      0.50        80\n          racial       0.89      0.80      0.84        80\n\
-  \       sexuality       0.95      0.75      0.84        80\n    \n       micro avg\
-  \       0.69      0.69      0.69       640\n       macro avg       0.81      0.69\
-  \      0.71       640\n    weighted avg       0.81      0.69      0.71       640\n\
-  \     samples avg       0.70      0.74      0.71       640\n    "
-compute_infrastructure: '- Linux 6.5.0-35-generic x86_64
-  - MemTotal:       1056613768 kB
-  - 256 X AMD EPYC 7702 64-Core Processor
   - GPU_0: NVIDIA L40S'
 software: python 3.10.12, accelerate 0.32.1, aiohttp 3.9.5, aiosignal 1.3.1, anyio
@@ -104,7 +104,7 @@ software: python 3.10.12, accelerate 0.32.1, aiohttp 3.9.5, aiosignal 1.3.1, any
   webcolors 1.13, webencodings 0.5.1, websocket-client 1.7.0, wheel 0.42.0, widgetsnbextension
   4.0.9, xxhash 3.4.1, yarl 1.9.4, zipp 1.0.0
 hardware_type: 1 X NVIDIA L40S
-hours_used: '6.22'
 cloud_provider: N/A
 cloud_region: N/A
 co2_emitted: N/A
@@ -134,47 +134,47 @@ model-index:
       type: mix_human-eval_synthetic
     metrics:
     - type: loss
-      value: 0.31148761510849
     - type: accuracy
-      value: 0.6523972602739726
     - type: f1_micro
-      value: 0.6891679748822606
     - type: precision_micro
-      value: 0.692429022082019
     - type: recall_micro
-      value: 0.6859375
     - type: roc_auc_micro
-      value: 0.8187872023809524
     - type: f1_macro
-      value: 0.709360114262138
     - type: precision_macro
-      value: 0.8114628911140539
     - type: recall_macro
-      value: 0.6859375000000001
     - type: roc_auc_macro
-      value: 0.8187872023809524
     - type: f1_samples
-      value: 0.7119863013698631
     - type: precision_samples
-      value: 0.7029109589041096
     - type: recall_samples
-      value: 0.7360159817351598
     - type: roc_auc_samples
-      value: 0.8432444553163732
     - type: f1_weighted
-      value: 0.709360114262138
     - type: precision_weighted
-      value: 0.8114628911140539
     - type: recall_weighted
-      value: 0.6859375
     - type: roc_auc_weighted
-      value: 0.8187872023809526
     - type: runtime
-      value: 373.8217
     - type: samples_per_second
-      value: 1.562
     - type: steps_per_second
-      value: 0.195
     - type: epoch
       value: 3.0
 ---
@@ -286,7 +286,7 @@ Use the code below to get started with the model.
 #### Training Hyperparameters
-- **Training regime:** PEFT: None, accelerator_config="{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}", adafactor=false, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false, bf16=false, bf16_full_eval=false, data_seed="None", dataloader_drop_last=false, dataloader_num_workers=0, dataloader_persistent_workers=false, dataloader_pin_memory=true, dataloader_prefetch_factor="None", ddp_backend="None", ddp_broadcast_buffers="None", ddp_bucket_cap_mb="None", ddp_find_unused_parameters="None", ddp_timeout=1800, deepspeed="None", disable_tqdm=false, dispatch_batches="None", do_eval=true, do_predict=false, do_train=false, eval_accumulation_steps="None", eval_batch_size=8, eval_delay=0, eval_do_concat_batches=true, eval_on_start=false, eval_steps="None", eval_strategy="epoch", evaluation_strategy="None", fp16=false, fp16_backend="auto", fp16_full_eval=false, fp16_opt_level="O1", fsdp="[]", fsdp_config="{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}", fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap="None", full_determinism=false, gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None", greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false, include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0, learning_rate=0.0001, length_column_name="length", load_best_model_at_end=true, local_rank=0, lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1, metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false, num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None", past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, per_gpu_eval_batch_size="None", per_gpu_train_batch_size="None", prediction_loss_only=false, ray_scope="last", remove_unused_columns=true, report_to="[]", restore_callback_states_from_checkpoint=false, resume_from_checkpoint="None", seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false, torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None", train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false, use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001 <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
 #### Speeds, Sizes, Times [optional]
@@ -322,19 +322,19 @@ Use the code below to get started with the model.
                   precision    recall  f1-score   support
-             age       0.91      0.49      0.63        80
-      disability       0.97      0.47      0.64        80
-        feminine       0.99      0.86      0.92        80
-         general       0.82      0.56      0.67        80
-       masculine       0.62      0.65      0.63        80
-         neutral       0.35      0.90      0.50        80
-          racial       0.89      0.80      0.84        80
-       sexuality       0.95      0.75      0.84        80
-       micro avg       0.69      0.69      0.69       640
-       macro avg       0.81      0.69      0.71       640
-    weighted avg       0.81      0.69      0.71       640
-     samples avg       0.70      0.74      0.71       640
 #### Summary
@@ -354,7 +354,7 @@ Use the code below to get started with the model.
 Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
 - **Hardware Type:** 1 X NVIDIA L40S
-- **Hours used:** 6.22
 - **Cloud Provider:** N/A
 - **Compute Region:** N/A
 - **Carbon Emitted:** N/A
@@ -367,9 +367,9 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 ### Compute Infrastructure
-- Linux 6.5.0-35-generic x86_64
-- MemTotal:       1056613768 kB
-- 256 X AMD EPYC 7702 64-Core Processor
 - GPU_0: NVIDIA L40S
 #### Hardware

 model_card_authors: See developers
 model_card_contact: See developers
 repo: https://gitlab.computing.dcu.ie/everitt2/2024-mcm-everitt-ryan
+training_regime: 'accelerator_config="{''split_batches'': False, ''dispatch_batches'':
   None, ''even_batches'': True, ''use_seedable_sampler'': True, ''non_blocking'':
   False, ''gradient_accumulation_kwargs'': None}", adafactor=false, adam_beta1=0.9,
   adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false,
   train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false,
   use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001'
 results: "                  precision    recall  f1-score   support\n    \n      \
+  \       age       0.72      0.60      0.65        80\n      disability       0.95\
+  \      0.50      0.66        80\n        feminine       0.99      0.91      0.95\
+  \        80\n         general       0.84      0.46      0.60        80\n       masculine\
+  \       0.65      0.66      0.66        80\n         neutral       0.34      0.86\
+  \      0.49        80\n          racial       0.90      0.82      0.86        80\n\
+  \       sexuality       0.97      0.75      0.85        80\n    \n       micro avg\
+  \       0.69      0.70      0.69       640\n       macro avg       0.80      0.70\
+  \      0.71       640\n    weighted avg       0.80      0.70      0.71       640\n\
+  \     samples avg       0.71      0.74      0.72       640\n    "
+compute_infrastructure: '- Linux 6.5.0-28-generic x86_64
+  - MemTotal:       527988292 kB
+  - 64 X Intel(R) Xeon(R) Silver 4314 CPU @ 2.40GHz
   - GPU_0: NVIDIA L40S'
 software: python 3.10.12, accelerate 0.32.1, aiohttp 3.9.5, aiosignal 1.3.1, anyio
   webcolors 1.13, webencodings 0.5.1, websocket-client 1.7.0, wheel 0.42.0, widgetsnbextension
   4.0.9, xxhash 3.4.1, yarl 1.9.4, zipp 1.0.0
 hardware_type: 1 X NVIDIA L40S
+hours_used: '4.18'
 cloud_provider: N/A
 cloud_region: N/A
 co2_emitted: N/A
       type: mix_human-eval_synthetic
     metrics:
     - type: loss
+      value: 0.2838529944419861
     - type: accuracy
+      value: 0.6438356164383562
     - type: f1_micro
+      value: 0.6947040498442367
     - type: precision_micro
+      value: 0.6925465838509317
     - type: recall_micro
+      value: 0.696875
     - type: roc_auc_micro
+      value: 0.8238839285714286
     - type: f1_macro
+      value: 0.7138661109496585
     - type: precision_macro
+      value: 0.7957063168116414
     - type: recall_macro
+      value: 0.6968749999999999
     - type: roc_auc_macro
+      value: 0.8238839285714286
     - type: f1_samples
+      value: 0.7163078930202218
     - type: precision_samples
+      value: 0.7057648401826483
     - type: recall_samples
+      value: 0.7441495433789955
     - type: roc_auc_samples
+      value: 0.8469646934116113
     - type: f1_weighted
+      value: 0.7138661109496585
     - type: precision_weighted
+      value: 0.7957063168116415
     - type: recall_weighted
+      value: 0.696875
     - type: roc_auc_weighted
+      value: 0.8238839285714284
     - type: runtime
+      value: 251.9003
     - type: samples_per_second
+      value: 2.318
     - type: steps_per_second
+      value: 0.29
     - type: epoch
       value: 3.0
 ---
 #### Training Hyperparameters
+- **Training regime:** accelerator_config="{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}", adafactor=false, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=false, batch_eval_metrics=false, bf16=false, bf16_full_eval=false, data_seed="None", dataloader_drop_last=false, dataloader_num_workers=0, dataloader_persistent_workers=false, dataloader_pin_memory=true, dataloader_prefetch_factor="None", ddp_backend="None", ddp_broadcast_buffers="None", ddp_bucket_cap_mb="None", ddp_find_unused_parameters="None", ddp_timeout=1800, deepspeed="None", disable_tqdm=false, dispatch_batches="None", do_eval=true, do_predict=false, do_train=false, eval_accumulation_steps="None", eval_batch_size=8, eval_delay=0, eval_do_concat_batches=true, eval_on_start=false, eval_steps="None", eval_strategy="epoch", evaluation_strategy="None", fp16=false, fp16_backend="auto", fp16_full_eval=false, fp16_opt_level="O1", fsdp="[]", fsdp_config="{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}", fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap="None", full_determinism=false, gradient_accumulation_steps=1, gradient_checkpointing="(False,)", gradient_checkpointing_kwargs="None", greater_is_better=false, group_by_length=true, half_precision_backend="auto", ignore_data_skip=false, include_inputs_for_metrics=false, jit_mode_eval=false, label_names="None", label_smoothing_factor=0.0, learning_rate=0.0001, length_column_name="length", load_best_model_at_end=true, local_rank=0, lr_scheduler_kwargs="{}", lr_scheduler_type="linear", max_grad_norm=1.0, max_steps=-1, metric_for_best_model="loss", mp_parameters="", neftune_noise_alpha="None", no_cuda=false, num_train_epochs=3, optim="adamw_torch", optim_args="None", optim_target_modules="None", past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, per_gpu_eval_batch_size="None", per_gpu_train_batch_size="None", prediction_loss_only=false, ray_scope="last", remove_unused_columns=true, report_to="[]", restore_callback_states_from_checkpoint=false, resume_from_checkpoint="None", seed=42, skip_memory_metrics=true, split_batches="None", tf32="None", torch_compile=false, torch_compile_backend="None", torch_compile_mode="None", torchdynamo="None", tpu_num_cores="None", train_batch_size=8, use_cpu=false, use_ipex=false, use_legacy_prediction_loop=false, use_mps_device=false, warmup_ratio=0.0, warmup_steps=0, weight_decay=0.001 <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
 #### Speeds, Sizes, Times [optional]
                   precision    recall  f1-score   support
+             age       0.72      0.60      0.65        80
+      disability       0.95      0.50      0.66        80
+        feminine       0.99      0.91      0.95        80
+         general       0.84      0.46      0.60        80
+       masculine       0.65      0.66      0.66        80
+         neutral       0.34      0.86      0.49        80
+          racial       0.90      0.82      0.86        80
+       sexuality       0.97      0.75      0.85        80
+       micro avg       0.69      0.70      0.69       640
+       macro avg       0.80      0.70      0.71       640
+    weighted avg       0.80      0.70      0.71       640
+     samples avg       0.71      0.74      0.72       640
 #### Summary
 Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
 - **Hardware Type:** 1 X NVIDIA L40S
+- **Hours used:** 4.18
 - **Cloud Provider:** N/A
 - **Compute Region:** N/A
 - **Carbon Emitted:** N/A
 ### Compute Infrastructure
+- Linux 6.5.0-28-generic x86_64
+- MemTotal:       527988292 kB
+- 64 X Intel(R) Xeon(R) Silver 4314 CPU @ 2.40GHz
 - GPU_0: NVIDIA L40S
 #### Hardware