callista6699 commited on Oct 7

Commit

8b7f717

•

1 Parent(s): baf0f94

Training completed!

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +60 -0
config.json +49 -0
model.safetensors +3 -0
run-0/checkpoint-19/config.json +49 -0
run-0/checkpoint-19/model.safetensors +3 -0
run-0/checkpoint-19/optimizer.pt +3 -0
run-0/checkpoint-19/rng_state.pth +3 -0
run-0/checkpoint-19/scheduler.pt +3 -0
run-0/checkpoint-19/special_tokens_map.json +7 -0
run-0/checkpoint-19/tokenizer.json +0 -0
run-0/checkpoint-19/tokenizer_config.json +58 -0
run-0/checkpoint-19/trainer_state.json +45 -0
run-0/checkpoint-19/training_args.bin +3 -0
run-0/checkpoint-19/vocab.txt +0 -0
run-1/checkpoint-38/config.json +49 -0
run-1/checkpoint-38/model.safetensors +3 -0
run-1/checkpoint-38/optimizer.pt +3 -0
run-1/checkpoint-38/rng_state.pth +3 -0
run-1/checkpoint-38/scheduler.pt +3 -0
run-1/checkpoint-38/special_tokens_map.json +7 -0
run-1/checkpoint-38/tokenizer.json +0 -0
run-1/checkpoint-38/tokenizer_config.json +58 -0
run-1/checkpoint-38/trainer_state.json +67 -0
run-1/checkpoint-38/training_args.bin +3 -0
run-1/checkpoint-38/vocab.txt +0 -0
run-2/checkpoint-296/config.json +49 -0
run-2/checkpoint-296/model.safetensors +3 -0
run-2/checkpoint-296/optimizer.pt +3 -0
run-2/checkpoint-296/rng_state.pth +3 -0
run-2/checkpoint-296/scheduler.pt +3 -0
run-2/checkpoint-296/special_tokens_map.json +7 -0
run-2/checkpoint-296/tokenizer.json +0 -0
run-2/checkpoint-296/tokenizer_config.json +58 -0
run-2/checkpoint-296/trainer_state.json +249 -0
run-2/checkpoint-296/training_args.bin +3 -0
run-2/checkpoint-296/vocab.txt +0 -0
run-3/checkpoint-295/config.json +49 -0
run-3/checkpoint-295/model.safetensors +3 -0
run-3/checkpoint-295/optimizer.pt +3 -0
run-3/checkpoint-295/rng_state.pth +3 -0
run-3/checkpoint-295/scheduler.pt +3 -0
run-3/checkpoint-295/special_tokens_map.json +7 -0
run-3/checkpoint-295/tokenizer.json +0 -0
run-3/checkpoint-295/tokenizer_config.json +58 -0
run-3/checkpoint-295/trainer_state.json +241 -0
run-3/checkpoint-295/training_args.bin +3 -0
run-3/checkpoint-295/vocab.txt +0 -0
run-4/checkpoint-19/config.json +49 -0
run-4/checkpoint-19/model.safetensors +3 -0
run-4/checkpoint-19/optimizer.pt +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,60 @@

+---
+library_name: transformers
+license: mit
+base_model: nlptown/bert-base-multilingual-uncased-sentiment
+tags:
+- generated_from_trainer
+model-index:
+- name: results
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# results
+This model is a fine-tuned version of [nlptown/bert-base-multilingual-uncased-sentiment](https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.6110
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2.934292727323431e-05
+- train_batch_size: 4
+- eval_batch_size: 16
+- seed: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.0002        | 1.0   | 295  | 2.6070          |
+| 0.5428        | 2.0   | 590  | 3.1094          |
+| 0.0002        | 3.0   | 885  | 2.6110          |
+### Framework versions
+- Transformers 4.44.2
+- Pytorch 2.4.1+cu121
+- Tokenizers 0.19.1

config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "finetuning_task": "sentiment-analysis",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1 star",
+    "1": "2 stars",
+    "2": "3 stars",
+    "3": "4 stars",
+    "4": "5 stars"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1 star": 0,
+    "2 stars": 1,
+    "3 stars": 2,
+    "4 stars": 3,
+    "5 stars": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 105879
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adb2f037c4fcb0095b0de830bf80c7a07b9c9db02b3ebca71a7b65e869d77343
+size 669464588

run-0/checkpoint-19/config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "finetuning_task": "sentiment-analysis",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1 star",
+    "1": "2 stars",
+    "2": "3 stars",
+    "3": "4 stars",
+    "4": "5 stars"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1 star": 0,
+    "2 stars": 1,
+    "3 stars": 2,
+    "4 stars": 3,
+    "5 stars": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 105879
+}

run-0/checkpoint-19/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d5f5abba49994e4f10e1f832ab7fb583704c86a0e59a2640cadff82a480f1f8
+size 669464588

run-0/checkpoint-19/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:857b82e9b886adf38cadf4c854d81d03f5855d5b85d01dc295baef72968df64a
+size 1339050234

run-0/checkpoint-19/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3d12f967010971f6c719fc8b0c67a887b6c05899c8df2ac0230989587877407
+size 14244

run-0/checkpoint-19/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f0333332e23fd259cb255054bff6d10b7e0571a091482a54a50087b65d8f5f2
+size 1064

run-0/checkpoint-19/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-19/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-19/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_len": 512,
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-19/trainer_state.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 19,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.5263157894736842,
+      "grad_norm": 6.248871803283691,
+      "learning_rate": 1.1698489431263144e-07,
+      "loss": 0.4253,
+      "step": 10
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 19,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 42098902794240.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.849244715631572e-06,
+    "num_train_epochs": 1,
+    "per_device_train_batch_size": 64,
+    "seed": 1
+  }
+}

run-0/checkpoint-19/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9f07479eb33644366a5ddfafbb9fb5c21b3c754996ddd2be7b070a1a725a265
+size 5112

run-0/checkpoint-19/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-38/config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "finetuning_task": "sentiment-analysis",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1 star",
+    "1": "2 stars",
+    "2": "3 stars",
+    "3": "4 stars",
+    "4": "5 stars"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1 star": 0,
+    "2 stars": 1,
+    "3 stars": 2,
+    "4 stars": 3,
+    "5 stars": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 105879
+}

run-1/checkpoint-38/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dfe9ed0a1b41491d2ecb3014db2f0bfcf7e37e311c6f7bd51e3c54ffa5bf8342
+size 669464588

run-1/checkpoint-38/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37a6a27d2262563ee9006cc2a777a34723be0df25ef09c707457b977100474ec
+size 1339050234

run-1/checkpoint-38/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1775d8c7698f101cd06aaf4074e223c5851d61233c1992c0032b90cbd1cdfb3b
+size 14244

run-1/checkpoint-38/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68e7b049716a379e783a23bccb72371c89bc92dd060a15ab0f21514622d89991
+size 1064

run-1/checkpoint-38/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-38/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-38/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_len": 512,
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-38/trainer_state.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 38,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.5263157894736842,
+      "grad_norm": 6.260778427124023,
+      "learning_rate": 1.0992924659429563e-06,
+      "loss": 0.3935,
+      "step": 10
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.8991609215736389,
+      "eval_runtime": 3.0477,
+      "eval_samples_per_second": 128.949,
+      "eval_steps_per_second": 8.203,
+      "step": 19
+    },
+    {
+      "epoch": 1.0526315789473684,
+      "grad_norm": 6.275106906890869,
+      "learning_rate": 2.1985849318859127e-06,
+      "loss": 0.3804,
+      "step": 20
+    },
+    {
+      "epoch": 1.5789473684210527,
+      "grad_norm": 5.347285747528076,
+      "learning_rate": 3.2978773978288686e-06,
+      "loss": 0.346,
+      "step": 30
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 38,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 123862865564928.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.496462329714781e-05,
+    "num_train_epochs": 2,
+    "per_device_train_batch_size": 64,
+    "seed": 7
+  }
+}

run-1/checkpoint-38/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ba44fe98f416db72ca51c922415e1f51e4e1404a042747283edd18980a15494
+size 5112

run-1/checkpoint-38/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-296/config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "finetuning_task": "sentiment-analysis",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1 star",
+    "1": "2 stars",
+    "2": "3 stars",
+    "3": "4 stars",
+    "4": "5 stars"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1 star": 0,
+    "2 stars": 1,
+    "3 stars": 2,
+    "4 stars": 3,
+    "5 stars": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 105879
+}

run-2/checkpoint-296/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9a1af22c5f87ae22ee9a1cf5ca0d62eb19a23f9a2f9522692f20f2a1ee3054f
+size 669464588

run-2/checkpoint-296/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30cd1bb00e95dba14a8cdd52c2b0e316a3fab537a820d9641d8d058cc0a2c8e3
+size 1339050234

run-2/checkpoint-296/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29fcd1f8c83d899699357277f7c8e68c2ed22b005d7b0077fd7d6708841cd58e
+size 14244

run-2/checkpoint-296/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f8904e10190ba4dd3d9443d80ca3c01c2c678f77b5c6457f9e844f15aef70c1
+size 1064

run-2/checkpoint-296/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-296/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-296/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_len": 512,
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-296/trainer_state.json ADDED Viewed

	@@ -0,0 +1,249 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 296,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06756756756756757,
+      "grad_norm": 23.774078369140625,
+      "learning_rate": 2.067012136643694e-08,
+      "loss": 0.4146,
+      "step": 10
+    },
+    {
+      "epoch": 0.13513513513513514,
+      "grad_norm": 27.321271896362305,
+      "learning_rate": 4.134024273287388e-08,
+      "loss": 0.2673,
+      "step": 20
+    },
+    {
+      "epoch": 0.20270270270270271,
+      "grad_norm": 19.634639739990234,
+      "learning_rate": 6.201036409931082e-08,
+      "loss": 0.3514,
+      "step": 30
+    },
+    {
+      "epoch": 0.2702702702702703,
+      "grad_norm": 18.950864791870117,
+      "learning_rate": 8.268048546574776e-08,
+      "loss": 0.3675,
+      "step": 40
+    },
+    {
+      "epoch": 0.33783783783783783,
+      "grad_norm": 3.3055667877197266,
+      "learning_rate": 1.0335060683218471e-07,
+      "loss": 0.2814,
+      "step": 50
+    },
+    {
+      "epoch": 0.40540540540540543,
+      "grad_norm": 17.000532150268555,
+      "learning_rate": 1.2402072819862164e-07,
+      "loss": 0.3085,
+      "step": 60
+    },
+    {
+      "epoch": 0.47297297297297297,
+      "grad_norm": 15.456575393676758,
+      "learning_rate": 1.446908495650586e-07,
+      "loss": 0.321,
+      "step": 70
+    },
+    {
+      "epoch": 0.5405405405405406,
+      "grad_norm": 10.24705982208252,
+      "learning_rate": 1.6536097093149552e-07,
+      "loss": 0.242,
+      "step": 80
+    },
+    {
+      "epoch": 0.6081081081081081,
+      "grad_norm": 7.760463714599609,
+      "learning_rate": 1.8603109229793245e-07,
+      "loss": 0.3407,
+      "step": 90
+    },
+    {
+      "epoch": 0.6756756756756757,
+      "grad_norm": 25.994970321655273,
+      "learning_rate": 2.0670121366436942e-07,
+      "loss": 0.3126,
+      "step": 100
+    },
+    {
+      "epoch": 0.7432432432432432,
+      "grad_norm": 8.515066146850586,
+      "learning_rate": 2.2737133503080635e-07,
+      "loss": 0.2059,
+      "step": 110
+    },
+    {
+      "epoch": 0.8108108108108109,
+      "grad_norm": 35.16509246826172,
+      "learning_rate": 2.480414563972433e-07,
+      "loss": 0.3152,
+      "step": 120
+    },
+    {
+      "epoch": 0.8783783783783784,
+      "grad_norm": 12.845327377319336,
+      "learning_rate": 2.687115777636802e-07,
+      "loss": 0.2524,
+      "step": 130
+    },
+    {
+      "epoch": 0.9459459459459459,
+      "grad_norm": 10.476096153259277,
+      "learning_rate": 2.893816991301172e-07,
+      "loss": 0.2762,
+      "step": 140
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.9408120512962341,
+      "eval_runtime": 3.0875,
+      "eval_samples_per_second": 127.286,
+      "eval_steps_per_second": 8.097,
+      "step": 148
+    },
+    {
+      "epoch": 1.0135135135135136,
+      "grad_norm": 9.485784530639648,
+      "learning_rate": 3.100518204965541e-07,
+      "loss": 0.2786,
+      "step": 150
+    },
+    {
+      "epoch": 1.0810810810810811,
+      "grad_norm": 13.279548645019531,
+      "learning_rate": 3.3072194186299103e-07,
+      "loss": 0.2737,
+      "step": 160
+    },
+    {
+      "epoch": 1.1486486486486487,
+      "grad_norm": 20.476022720336914,
+      "learning_rate": 3.51392063229428e-07,
+      "loss": 0.2716,
+      "step": 170
+    },
+    {
+      "epoch": 1.2162162162162162,
+      "grad_norm": 7.177048206329346,
+      "learning_rate": 3.720621845958649e-07,
+      "loss": 0.303,
+      "step": 180
+    },
+    {
+      "epoch": 1.2837837837837838,
+      "grad_norm": 25.57468032836914,
+      "learning_rate": 3.927323059623019e-07,
+      "loss": 0.3598,
+      "step": 190
+    },
+    {
+      "epoch": 1.3513513513513513,
+      "grad_norm": 17.67203712463379,
+      "learning_rate": 4.1340242732873883e-07,
+      "loss": 0.3148,
+      "step": 200
+    },
+    {
+      "epoch": 1.4189189189189189,
+      "grad_norm": 18.490848541259766,
+      "learning_rate": 4.340725486951757e-07,
+      "loss": 0.2996,
+      "step": 210
+    },
+    {
+      "epoch": 1.4864864864864864,
+      "grad_norm": 7.604789733886719,
+      "learning_rate": 4.547426700616127e-07,
+      "loss": 0.2956,
+      "step": 220
+    },
+    {
+      "epoch": 1.554054054054054,
+      "grad_norm": 6.397325038909912,
+      "learning_rate": 4.7541279142804964e-07,
+      "loss": 0.2275,
+      "step": 230
+    },
+    {
+      "epoch": 1.6216216216216215,
+      "grad_norm": 15.845990180969238,
+      "learning_rate": 4.960829127944866e-07,
+      "loss": 0.2727,
+      "step": 240
+    },
+    {
+      "epoch": 1.689189189189189,
+      "grad_norm": 4.708223342895508,
+      "learning_rate": 5.167530341609235e-07,
+      "loss": 0.288,
+      "step": 250
+    },
+    {
+      "epoch": 1.7567567567567568,
+      "grad_norm": 3.2593748569488525,
+      "learning_rate": 5.374231555273605e-07,
+      "loss": 0.2449,
+      "step": 260
+    },
+    {
+      "epoch": 1.8243243243243243,
+      "grad_norm": 19.987289428710938,
+      "learning_rate": 5.580932768937974e-07,
+      "loss": 0.3159,
+      "step": 270
+    },
+    {
+      "epoch": 1.8918918918918919,
+      "grad_norm": 23.08250617980957,
+      "learning_rate": 5.787633982602344e-07,
+      "loss": 0.2457,
+      "step": 280
+    },
+    {
+      "epoch": 1.9594594594594594,
+      "grad_norm": 16.686283111572266,
+      "learning_rate": 5.994335196266713e-07,
+      "loss": 0.1901,
+      "step": 290
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 296,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 152279624951040.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.033506068321847e-06,
+    "num_train_epochs": 2,
+    "per_device_train_batch_size": 8,
+    "seed": 39
+  }
+}

run-2/checkpoint-296/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d659a6781d7f46824a106c027f33d02fb5e12dec13a6fbc25c770267dd49254
+size 5112

run-2/checkpoint-296/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-295/config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "finetuning_task": "sentiment-analysis",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1 star",
+    "1": "2 stars",
+    "2": "3 stars",
+    "3": "4 stars",
+    "4": "5 stars"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1 star": 0,
+    "2 stars": 1,
+    "3 stars": 2,
+    "4 stars": 3,
+    "5 stars": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 105879
+}

run-3/checkpoint-295/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b66834313fb5809171823aadd15969c48fc40423a5fad81f105f9953e0f9d04
+size 669464588

run-3/checkpoint-295/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5713bd07ba4ff85132bd89c2d23741007e7369a50c36b3fec6843b6116fe5631
+size 1339050234

run-3/checkpoint-295/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e225f1e98308ebd6830a8e8a002c8234e9cdc278fa8f3763323ab15cde900ee
+size 14244

run-3/checkpoint-295/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9322356d21f5762798699d8ea516179054fd3041294f2e3ad969a0f4b93f6b2
+size 1064

run-3/checkpoint-295/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-295/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-295/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_len": 512,
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-295/trainer_state.json ADDED Viewed

	@@ -0,0 +1,241 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 295,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03389830508474576,
+      "grad_norm": 32.504554748535156,
+      "learning_rate": 1.2477695266700604e-06,
+      "loss": 0.3078,
+      "step": 10
+    },
+    {
+      "epoch": 0.06779661016949153,
+      "grad_norm": 21.678878784179688,
+      "learning_rate": 2.4955390533401208e-06,
+      "loss": 0.38,
+      "step": 20
+    },
+    {
+      "epoch": 0.1016949152542373,
+      "grad_norm": 13.523513793945312,
+      "learning_rate": 3.7433085800101813e-06,
+      "loss": 0.1996,
+      "step": 30
+    },
+    {
+      "epoch": 0.13559322033898305,
+      "grad_norm": 4.588716983795166,
+      "learning_rate": 4.9910781066802415e-06,
+      "loss": 0.2688,
+      "step": 40
+    },
+    {
+      "epoch": 0.1694915254237288,
+      "grad_norm": 12.524947166442871,
+      "learning_rate": 6.238847633350303e-06,
+      "loss": 0.2351,
+      "step": 50
+    },
+    {
+      "epoch": 0.2033898305084746,
+      "grad_norm": 39.708248138427734,
+      "learning_rate": 7.486617160020363e-06,
+      "loss": 0.2264,
+      "step": 60
+    },
+    {
+      "epoch": 0.23728813559322035,
+      "grad_norm": 33.86473083496094,
+      "learning_rate": 8.734386686690424e-06,
+      "loss": 0.1624,
+      "step": 70
+    },
+    {
+      "epoch": 0.2711864406779661,
+      "grad_norm": 30.304136276245117,
+      "learning_rate": 9.982156213360483e-06,
+      "loss": 0.1768,
+      "step": 80
+    },
+    {
+      "epoch": 0.3050847457627119,
+      "grad_norm": 4.641134262084961,
+      "learning_rate": 1.1229925740030544e-05,
+      "loss": 0.3191,
+      "step": 90
+    },
+    {
+      "epoch": 0.3389830508474576,
+      "grad_norm": 3.7959961891174316,
+      "learning_rate": 1.2477695266700606e-05,
+      "loss": 0.3552,
+      "step": 100
+    },
+    {
+      "epoch": 0.3728813559322034,
+      "grad_norm": 0.8196011781692505,
+      "learning_rate": 1.3725464793370665e-05,
+      "loss": 0.2558,
+      "step": 110
+    },
+    {
+      "epoch": 0.4067796610169492,
+      "grad_norm": 27.152273178100586,
+      "learning_rate": 1.4973234320040725e-05,
+      "loss": 0.5833,
+      "step": 120
+    },
+    {
+      "epoch": 0.4406779661016949,
+      "grad_norm": 4.423884391784668,
+      "learning_rate": 1.6221003846710788e-05,
+      "loss": 0.633,
+      "step": 130
+    },
+    {
+      "epoch": 0.4745762711864407,
+      "grad_norm": 39.28899002075195,
+      "learning_rate": 1.7468773373380848e-05,
+      "loss": 0.5682,
+      "step": 140
+    },
+    {
+      "epoch": 0.5084745762711864,
+      "grad_norm": 2.4689102172851562,
+      "learning_rate": 1.8716542900050905e-05,
+      "loss": 0.6854,
+      "step": 150
+    },
+    {
+      "epoch": 0.5423728813559322,
+      "grad_norm": 67.79933166503906,
+      "learning_rate": 1.9964312426720966e-05,
+      "loss": 0.5666,
+      "step": 160
+    },
+    {
+      "epoch": 0.576271186440678,
+      "grad_norm": 26.09642219543457,
+      "learning_rate": 2.121208195339103e-05,
+      "loss": 0.7398,
+      "step": 170
+    },
+    {
+      "epoch": 0.6101694915254238,
+      "grad_norm": 20.591644287109375,
+      "learning_rate": 2.2459851480061087e-05,
+      "loss": 0.5566,
+      "step": 180
+    },
+    {
+      "epoch": 0.6440677966101694,
+      "grad_norm": 54.15541076660156,
+      "learning_rate": 2.3707621006731148e-05,
+      "loss": 0.6932,
+      "step": 190
+    },
+    {
+      "epoch": 0.6779661016949152,
+      "grad_norm": 1.1118764877319336,
+      "learning_rate": 2.4955390533401212e-05,
+      "loss": 0.7307,
+      "step": 200
+    },
+    {
+      "epoch": 0.711864406779661,
+      "grad_norm": 7.498295783996582,
+      "learning_rate": 2.620316006007127e-05,
+      "loss": 0.7541,
+      "step": 210
+    },
+    {
+      "epoch": 0.7457627118644068,
+      "grad_norm": 125.26350402832031,
+      "learning_rate": 2.745092958674133e-05,
+      "loss": 0.6168,
+      "step": 220
+    },
+    {
+      "epoch": 0.7796610169491526,
+      "grad_norm": 64.39082336425781,
+      "learning_rate": 2.869869911341139e-05,
+      "loss": 0.4098,
+      "step": 230
+    },
+    {
+      "epoch": 0.8135593220338984,
+      "grad_norm": 13.032197952270508,
+      "learning_rate": 2.994646864008145e-05,
+      "loss": 0.549,
+      "step": 240
+    },
+    {
+      "epoch": 0.847457627118644,
+      "grad_norm": 47.54188537597656,
+      "learning_rate": 3.119423816675151e-05,
+      "loss": 1.0132,
+      "step": 250
+    },
+    {
+      "epoch": 0.8813559322033898,
+      "grad_norm": 18.0958251953125,
+      "learning_rate": 3.2442007693421575e-05,
+      "loss": 0.8416,
+      "step": 260
+    },
+    {
+      "epoch": 0.9152542372881356,
+      "grad_norm": 25.700082778930664,
+      "learning_rate": 3.368977722009163e-05,
+      "loss": 0.8113,
+      "step": 270
+    },
+    {
+      "epoch": 0.9491525423728814,
+      "grad_norm": 20.261831283569336,
+      "learning_rate": 3.4937546746761697e-05,
+      "loss": 0.9122,
+      "step": 280
+    },
+    {
+      "epoch": 0.9830508474576272,
+      "grad_norm": 12.409477233886719,
+      "learning_rate": 3.6185316273431754e-05,
+      "loss": 0.5287,
+      "step": 290
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 295,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 76304261314560.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 6.238847633350302e-05,
+    "num_train_epochs": 1,
+    "per_device_train_batch_size": 4,
+    "seed": 12
+  }
+}

run-3/checkpoint-295/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9021fc656029eb7ea6cff04ecc68350cb36d8078aeea747ba522f9bdf60c5be7
+size 5112

run-3/checkpoint-295/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-19/config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "finetuning_task": "sentiment-analysis",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1 star",
+    "1": "2 stars",
+    "2": "3 stars",
+    "3": "4 stars",
+    "4": "5 stars"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1 star": 0,
+    "2 stars": 1,
+    "3 stars": 2,
+    "4 stars": 3,
+    "5 stars": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 105879
+}

run-4/checkpoint-19/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06e915c1d6cec5db96c3027a05cb12bc940ae21f46565e79faa5754c8a684aa0
+size 669464588

run-4/checkpoint-19/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44dd60d885a10c63785672143b4d8dcc80d5911518f2efb882deb08a9ba56c5d
+size 1339050234