Upload 5 files

Browse files

Files changed (5) hide show

README.md +202 -0
adapter_config.json +33 -0
adapter_model.safetensors +3 -0
trainer_state.json +910 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: llava-hf/llava-1.5-7b-hf
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

adapter_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "llava-hf/llava-1.5-7b-hf",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "multi_modal_projector"
+  ],
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67b3bac7b69aa76ae5be6d23c2d3edd35b199e34fddebd3c92c1bf6132837a5f
+size 118568352

trainer_state.json ADDED Viewed

	@@ -0,0 +1,910 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 17.77459749552773,
+  "eval_steps": 500,
+  "global_step": 1242,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.14311270125223613,
+      "grad_norm": 7.146514892578125,
+      "learning_rate": 0.00039677938808373593,
+      "loss": 3.5751,
+      "step": 10
+    },
+    {
+      "epoch": 0.28622540250447226,
+      "grad_norm": 2.1325223445892334,
+      "learning_rate": 0.00039355877616747184,
+      "loss": 0.939,
+      "step": 20
+    },
+    {
+      "epoch": 0.4293381037567084,
+      "grad_norm": 0.8520782589912415,
+      "learning_rate": 0.00039033816425120774,
+      "loss": 0.2653,
+      "step": 30
+    },
+    {
+      "epoch": 0.5724508050089445,
+      "grad_norm": 0.7653748393058777,
+      "learning_rate": 0.00038711755233494365,
+      "loss": 0.1603,
+      "step": 40
+    },
+    {
+      "epoch": 0.7155635062611807,
+      "grad_norm": 0.661469578742981,
+      "learning_rate": 0.00038389694041867956,
+      "loss": 0.1886,
+      "step": 50
+    },
+    {
+      "epoch": 0.8586762075134168,
+      "grad_norm": 0.39610955119132996,
+      "learning_rate": 0.00038067632850241547,
+      "loss": 0.1859,
+      "step": 60
+    },
+    {
+      "epoch": 1.0017889087656529,
+      "grad_norm": 0.4488755464553833,
+      "learning_rate": 0.0003774557165861514,
+      "loss": 0.1538,
+      "step": 70
+    },
+    {
+      "epoch": 1.144901610017889,
+      "grad_norm": 0.2944377362728119,
+      "learning_rate": 0.00037423510466988734,
+      "loss": 0.1195,
+      "step": 80
+    },
+    {
+      "epoch": 1.2880143112701252,
+      "grad_norm": 0.29124024510383606,
+      "learning_rate": 0.0003710144927536232,
+      "loss": 0.1271,
+      "step": 90
+    },
+    {
+      "epoch": 1.4311270125223614,
+      "grad_norm": 0.42328736186027527,
+      "learning_rate": 0.0003677938808373591,
+      "loss": 0.1018,
+      "step": 100
+    },
+    {
+      "epoch": 1.5742397137745976,
+      "grad_norm": 0.3259565234184265,
+      "learning_rate": 0.00036457326892109506,
+      "loss": 0.0848,
+      "step": 110
+    },
+    {
+      "epoch": 1.7173524150268338,
+      "grad_norm": 0.479124516248703,
+      "learning_rate": 0.0003613526570048309,
+      "loss": 0.106,
+      "step": 120
+    },
+    {
+      "epoch": 1.8604651162790697,
+      "grad_norm": 0.40788090229034424,
+      "learning_rate": 0.0003581320450885668,
+      "loss": 0.0969,
+      "step": 130
+    },
+    {
+      "epoch": 2.0035778175313057,
+      "grad_norm": 0.3574964106082916,
+      "learning_rate": 0.0003549114331723028,
+      "loss": 0.124,
+      "step": 140
+    },
+    {
+      "epoch": 2.146690518783542,
+      "grad_norm": 0.37805065512657166,
+      "learning_rate": 0.0003516908212560387,
+      "loss": 0.0491,
+      "step": 150
+    },
+    {
+      "epoch": 2.289803220035778,
+      "grad_norm": 0.25937220454216003,
+      "learning_rate": 0.00034847020933977455,
+      "loss": 0.0669,
+      "step": 160
+    },
+    {
+      "epoch": 2.4329159212880143,
+      "grad_norm": 0.34056201577186584,
+      "learning_rate": 0.00034524959742351046,
+      "loss": 0.0595,
+      "step": 170
+    },
+    {
+      "epoch": 2.5760286225402504,
+      "grad_norm": 0.30211707949638367,
+      "learning_rate": 0.0003420289855072464,
+      "loss": 0.0648,
+      "step": 180
+    },
+    {
+      "epoch": 2.7191413237924866,
+      "grad_norm": 0.18458786606788635,
+      "learning_rate": 0.0003388083735909823,
+      "loss": 0.0545,
+      "step": 190
+    },
+    {
+      "epoch": 2.862254025044723,
+      "grad_norm": 0.27384912967681885,
+      "learning_rate": 0.0003355877616747182,
+      "loss": 0.0684,
+      "step": 200
+    },
+    {
+      "epoch": 3.005366726296959,
+      "grad_norm": 0.16877304017543793,
+      "learning_rate": 0.00033236714975845414,
+      "loss": 0.0695,
+      "step": 210
+    },
+    {
+      "epoch": 3.148479427549195,
+      "grad_norm": 0.07739146798849106,
+      "learning_rate": 0.00032914653784219005,
+      "loss": 0.0256,
+      "step": 220
+    },
+    {
+      "epoch": 3.2915921288014314,
+      "grad_norm": 0.2832132577896118,
+      "learning_rate": 0.0003259259259259259,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 3.434704830053667,
+      "grad_norm": 0.21412289142608643,
+      "learning_rate": 0.00032270531400966187,
+      "loss": 0.0287,
+      "step": 240
+    },
+    {
+      "epoch": 3.5778175313059033,
+      "grad_norm": 0.1840696483850479,
+      "learning_rate": 0.0003194847020933978,
+      "loss": 0.0469,
+      "step": 250
+    },
+    {
+      "epoch": 3.7209302325581395,
+      "grad_norm": 0.34246236085891724,
+      "learning_rate": 0.00031626409017713363,
+      "loss": 0.0243,
+      "step": 260
+    },
+    {
+      "epoch": 3.8640429338103757,
+      "grad_norm": 0.056173525750637054,
+      "learning_rate": 0.0003130434782608696,
+      "loss": 0.0252,
+      "step": 270
+    },
+    {
+      "epoch": 4.007155635062611,
+      "grad_norm": 0.09256428480148315,
+      "learning_rate": 0.0003098228663446055,
+      "loss": 0.0216,
+      "step": 280
+    },
+    {
+      "epoch": 4.150268336314848,
+      "grad_norm": 0.20085078477859497,
+      "learning_rate": 0.0003066022544283414,
+      "loss": 0.0102,
+      "step": 290
+    },
+    {
+      "epoch": 4.293381037567084,
+      "grad_norm": 0.021982286125421524,
+      "learning_rate": 0.0003033816425120773,
+      "loss": 0.0131,
+      "step": 300
+    },
+    {
+      "epoch": 4.43649373881932,
+      "grad_norm": 0.054368916898965836,
+      "learning_rate": 0.0003001610305958132,
+      "loss": 0.0145,
+      "step": 310
+    },
+    {
+      "epoch": 4.579606440071556,
+      "grad_norm": 0.0868581086397171,
+      "learning_rate": 0.00029694041867954913,
+      "loss": 0.0181,
+      "step": 320
+    },
+    {
+      "epoch": 4.722719141323792,
+      "grad_norm": 0.24308475852012634,
+      "learning_rate": 0.00029371980676328504,
+      "loss": 0.0125,
+      "step": 330
+    },
+    {
+      "epoch": 4.8658318425760285,
+      "grad_norm": 0.14394602179527283,
+      "learning_rate": 0.00029049919484702095,
+      "loss": 0.0149,
+      "step": 340
+    },
+    {
+      "epoch": 5.008944543828265,
+      "grad_norm": 0.05040862783789635,
+      "learning_rate": 0.00028727858293075686,
+      "loss": 0.0096,
+      "step": 350
+    },
+    {
+      "epoch": 5.152057245080501,
+      "grad_norm": 0.28047820925712585,
+      "learning_rate": 0.00028405797101449276,
+      "loss": 0.0032,
+      "step": 360
+    },
+    {
+      "epoch": 5.295169946332737,
+      "grad_norm": 0.07502233237028122,
+      "learning_rate": 0.0002808373590982287,
+      "loss": 0.0038,
+      "step": 370
+    },
+    {
+      "epoch": 5.438282647584973,
+      "grad_norm": 0.8537871837615967,
+      "learning_rate": 0.0002776167471819646,
+      "loss": 0.0073,
+      "step": 380
+    },
+    {
+      "epoch": 5.5813953488372094,
+      "grad_norm": 0.005727715790271759,
+      "learning_rate": 0.0002743961352657005,
+      "loss": 0.0106,
+      "step": 390
+    },
+    {
+      "epoch": 5.724508050089446,
+      "grad_norm": 0.04042937234044075,
+      "learning_rate": 0.0002711755233494364,
+      "loss": 0.0041,
+      "step": 400
+    },
+    {
+      "epoch": 5.867620751341682,
+      "grad_norm": 0.11248348653316498,
+      "learning_rate": 0.0002679549114331723,
+      "loss": 0.003,
+      "step": 410
+    },
+    {
+      "epoch": 6.010733452593918,
+      "grad_norm": 0.014976495876908302,
+      "learning_rate": 0.00026473429951690827,
+      "loss": 0.0067,
+      "step": 420
+    },
+    {
+      "epoch": 6.153846153846154,
+      "grad_norm": 0.15070898830890656,
+      "learning_rate": 0.0002615136876006441,
+      "loss": 0.0035,
+      "step": 430
+    },
+    {
+      "epoch": 6.29695885509839,
+      "grad_norm": 0.0066925715655088425,
+      "learning_rate": 0.00025829307568438003,
+      "loss": 0.0027,
+      "step": 440
+    },
+    {
+      "epoch": 6.440071556350626,
+      "grad_norm": 0.015314973890781403,
+      "learning_rate": 0.00025507246376811594,
+      "loss": 0.0045,
+      "step": 450
+    },
+    {
+      "epoch": 6.583184257602863,
+      "grad_norm": 0.030470581725239754,
+      "learning_rate": 0.00025185185185185185,
+      "loss": 0.0029,
+      "step": 460
+    },
+    {
+      "epoch": 6.726296958855098,
+      "grad_norm": 0.0092542115598917,
+      "learning_rate": 0.00024863123993558775,
+      "loss": 0.0043,
+      "step": 470
+    },
+    {
+      "epoch": 6.869409660107334,
+      "grad_norm": 0.016118695959448814,
+      "learning_rate": 0.00024541062801932366,
+      "loss": 0.0011,
+      "step": 480
+    },
+    {
+      "epoch": 7.01252236135957,
+      "grad_norm": 0.01760493591427803,
+      "learning_rate": 0.0002421900161030596,
+      "loss": 0.004,
+      "step": 490
+    },
+    {
+      "epoch": 7.155635062611807,
+      "grad_norm": 0.0077838534489274025,
+      "learning_rate": 0.0002389694041867955,
+      "loss": 0.0004,
+      "step": 500
+    },
+    {
+      "epoch": 7.298747763864043,
+      "grad_norm": 0.006766254547983408,
+      "learning_rate": 0.00023574879227053139,
+      "loss": 0.0015,
+      "step": 510
+    },
+    {
+      "epoch": 7.441860465116279,
+      "grad_norm": 0.009164445102214813,
+      "learning_rate": 0.00023252818035426732,
+      "loss": 0.0006,
+      "step": 520
+    },
+    {
+      "epoch": 7.584973166368515,
+      "grad_norm": 0.002783432835713029,
+      "learning_rate": 0.00022930756843800323,
+      "loss": 0.0005,
+      "step": 530
+    },
+    {
+      "epoch": 7.728085867620751,
+      "grad_norm": 0.15486985445022583,
+      "learning_rate": 0.0002260869565217391,
+      "loss": 0.0036,
+      "step": 540
+    },
+    {
+      "epoch": 7.8711985688729875,
+      "grad_norm": 0.042650897055864334,
+      "learning_rate": 0.00022286634460547507,
+      "loss": 0.0006,
+      "step": 550
+    },
+    {
+      "epoch": 8.014311270125223,
+      "grad_norm": 0.0018309111474081874,
+      "learning_rate": 0.00021964573268921095,
+      "loss": 0.0011,
+      "step": 560
+    },
+    {
+      "epoch": 8.15742397137746,
+      "grad_norm": 0.005560223013162613,
+      "learning_rate": 0.00021642512077294686,
+      "loss": 0.001,
+      "step": 570
+    },
+    {
+      "epoch": 8.300536672629695,
+      "grad_norm": 0.03727242350578308,
+      "learning_rate": 0.0002132045088566828,
+      "loss": 0.0015,
+      "step": 580
+    },
+    {
+      "epoch": 8.443649373881932,
+      "grad_norm": 0.34321093559265137,
+      "learning_rate": 0.00020998389694041868,
+      "loss": 0.0031,
+      "step": 590
+    },
+    {
+      "epoch": 8.586762075134168,
+      "grad_norm": 0.020942572504281998,
+      "learning_rate": 0.00020676328502415459,
+      "loss": 0.0022,
+      "step": 600
+    },
+    {
+      "epoch": 8.729874776386405,
+      "grad_norm": 0.010578208602964878,
+      "learning_rate": 0.00020354267310789052,
+      "loss": 0.0006,
+      "step": 610
+    },
+    {
+      "epoch": 8.87298747763864,
+      "grad_norm": 0.004323468543589115,
+      "learning_rate": 0.00020032206119162643,
+      "loss": 0.0005,
+      "step": 620
+    },
+    {
+      "epoch": 9.016100178890877,
+      "grad_norm": 0.1614646017551422,
+      "learning_rate": 0.00019710144927536234,
+      "loss": 0.0011,
+      "step": 630
+    },
+    {
+      "epoch": 9.159212880143112,
+      "grad_norm": 0.0012326347641646862,
+      "learning_rate": 0.00019388083735909825,
+      "loss": 0.0005,
+      "step": 640
+    },
+    {
+      "epoch": 9.30232558139535,
+      "grad_norm": 0.006543063558638096,
+      "learning_rate": 0.00019066022544283415,
+      "loss": 0.0002,
+      "step": 650
+    },
+    {
+      "epoch": 9.445438282647585,
+      "grad_norm": 0.21280421316623688,
+      "learning_rate": 0.00018743961352657006,
+      "loss": 0.0003,
+      "step": 660
+    },
+    {
+      "epoch": 9.588550983899822,
+      "grad_norm": 0.006327577400952578,
+      "learning_rate": 0.00018421900161030597,
+      "loss": 0.0002,
+      "step": 670
+    },
+    {
+      "epoch": 9.731663685152057,
+      "grad_norm": 0.0025285291485488415,
+      "learning_rate": 0.00018099838969404188,
+      "loss": 0.0001,
+      "step": 680
+    },
+    {
+      "epoch": 9.874776386404294,
+      "grad_norm": 0.0014309959951788187,
+      "learning_rate": 0.00017777777777777779,
+      "loss": 0.0001,
+      "step": 690
+    },
+    {
+      "epoch": 10.01788908765653,
+      "grad_norm": 0.0024150668177753687,
+      "learning_rate": 0.0001745571658615137,
+      "loss": 0.0001,
+      "step": 700
+    },
+    {
+      "epoch": 10.161001788908766,
+      "grad_norm": 0.0025761763099581003,
+      "learning_rate": 0.0001713365539452496,
+      "loss": 0.0001,
+      "step": 710
+    },
+    {
+      "epoch": 10.304114490161002,
+      "grad_norm": 0.0017020882805809379,
+      "learning_rate": 0.0001681159420289855,
+      "loss": 0.0001,
+      "step": 720
+    },
+    {
+      "epoch": 10.447227191413237,
+      "grad_norm": 0.0028596080373972654,
+      "learning_rate": 0.00016489533011272142,
+      "loss": 0.0001,
+      "step": 730
+    },
+    {
+      "epoch": 10.590339892665474,
+      "grad_norm": 0.0019378801807761192,
+      "learning_rate": 0.00016167471819645735,
+      "loss": 0.0001,
+      "step": 740
+    },
+    {
+      "epoch": 10.73345259391771,
+      "grad_norm": 0.001211544731631875,
+      "learning_rate": 0.00015845410628019323,
+      "loss": 0.0001,
+      "step": 750
+    },
+    {
+      "epoch": 10.876565295169947,
+      "grad_norm": 0.0033484594896435738,
+      "learning_rate": 0.00015523349436392914,
+      "loss": 0.0001,
+      "step": 760
+    },
+    {
+      "epoch": 11.019677996422182,
+      "grad_norm": 0.001493943389505148,
+      "learning_rate": 0.00015201288244766508,
+      "loss": 0.0001,
+      "step": 770
+    },
+    {
+      "epoch": 11.162790697674419,
+      "grad_norm": 0.0019909776747226715,
+      "learning_rate": 0.00014879227053140096,
+      "loss": 0.0001,
+      "step": 780
+    },
+    {
+      "epoch": 11.305903398926654,
+      "grad_norm": 0.0011982638388872147,
+      "learning_rate": 0.0001455716586151369,
+      "loss": 0.0001,
+      "step": 790
+    },
+    {
+      "epoch": 11.449016100178891,
+      "grad_norm": 0.0015958467265591025,
+      "learning_rate": 0.0001423510466988728,
+      "loss": 0.0001,
+      "step": 800
+    },
+    {
+      "epoch": 11.592128801431127,
+      "grad_norm": 0.0008461058023385704,
+      "learning_rate": 0.0001391304347826087,
+      "loss": 0.0001,
+      "step": 810
+    },
+    {
+      "epoch": 11.735241502683364,
+      "grad_norm": 0.0005576548865064979,
+      "learning_rate": 0.00013590982286634462,
+      "loss": 0.0001,
+      "step": 820
+    },
+    {
+      "epoch": 11.878354203935599,
+      "grad_norm": 0.0017713948618620634,
+      "learning_rate": 0.0001326892109500805,
+      "loss": 0.0001,
+      "step": 830
+    },
+    {
+      "epoch": 12.021466905187836,
+      "grad_norm": 0.001206880551762879,
+      "learning_rate": 0.00012946859903381643,
+      "loss": 0.0001,
+      "step": 840
+    },
+    {
+      "epoch": 12.164579606440071,
+      "grad_norm": 0.0013083606027066708,
+      "learning_rate": 0.00012624798711755234,
+      "loss": 0.0001,
+      "step": 850
+    },
+    {
+      "epoch": 12.307692307692308,
+      "grad_norm": 0.0008201482123695314,
+      "learning_rate": 0.00012302737520128825,
+      "loss": 0.0001,
+      "step": 860
+    },
+    {
+      "epoch": 12.450805008944544,
+      "grad_norm": 0.0006802495336160064,
+      "learning_rate": 0.00011980676328502416,
+      "loss": 0.0001,
+      "step": 870
+    },
+    {
+      "epoch": 12.59391771019678,
+      "grad_norm": 0.0017911783652380109,
+      "learning_rate": 0.00011658615136876008,
+      "loss": 0.0001,
+      "step": 880
+    },
+    {
+      "epoch": 12.737030411449016,
+      "grad_norm": 0.0007388959056697786,
+      "learning_rate": 0.00011336553945249598,
+      "loss": 0.0,
+      "step": 890
+    },
+    {
+      "epoch": 12.880143112701251,
+      "grad_norm": 0.0007727427291683853,
+      "learning_rate": 0.00011014492753623188,
+      "loss": 0.0001,
+      "step": 900
+    },
+    {
+      "epoch": 13.023255813953488,
+      "grad_norm": 0.0008818788919597864,
+      "learning_rate": 0.0001069243156199678,
+      "loss": 0.0001,
+      "step": 910
+    },
+    {
+      "epoch": 13.166368515205724,
+      "grad_norm": 0.0005572364898398519,
+      "learning_rate": 0.0001037037037037037,
+      "loss": 0.0,
+      "step": 920
+    },
+    {
+      "epoch": 13.30948121645796,
+      "grad_norm": 0.0009758470696397126,
+      "learning_rate": 0.00010048309178743962,
+      "loss": 0.0001,
+      "step": 930
+    },
+    {
+      "epoch": 13.452593917710196,
+      "grad_norm": 0.0003166435344610363,
+      "learning_rate": 9.726247987117553e-05,
+      "loss": 0.0001,
+      "step": 940
+    },
+    {
+      "epoch": 13.595706618962433,
+      "grad_norm": 0.0005005749990232289,
+      "learning_rate": 9.404186795491144e-05,
+      "loss": 0.0,
+      "step": 950
+    },
+    {
+      "epoch": 13.738819320214668,
+      "grad_norm": 0.0003304154670331627,
+      "learning_rate": 9.082125603864735e-05,
+      "loss": 0.0001,
+      "step": 960
+    },
+    {
+      "epoch": 13.881932021466906,
+      "grad_norm": 0.0005377003108151257,
+      "learning_rate": 8.760064412238325e-05,
+      "loss": 0.0001,
+      "step": 970
+    },
+    {
+      "epoch": 14.02504472271914,
+      "grad_norm": 0.0015913191018626094,
+      "learning_rate": 8.438003220611916e-05,
+      "loss": 0.0001,
+      "step": 980
+    },
+    {
+      "epoch": 14.168157423971378,
+      "grad_norm": 0.000676720985211432,
+      "learning_rate": 8.115942028985508e-05,
+      "loss": 0.0001,
+      "step": 990
+    },
+    {
+      "epoch": 14.311270125223613,
+      "grad_norm": 0.0007494900492019951,
+      "learning_rate": 7.793880837359099e-05,
+      "loss": 0.0001,
+      "step": 1000
+    },
+    {
+      "epoch": 14.45438282647585,
+      "grad_norm": 0.0015422647120431066,
+      "learning_rate": 7.47181964573269e-05,
+      "loss": 0.0,
+      "step": 1010
+    },
+    {
+      "epoch": 14.597495527728086,
+      "grad_norm": 0.0005012313486076891,
+      "learning_rate": 7.14975845410628e-05,
+      "loss": 0.0,
+      "step": 1020
+    },
+    {
+      "epoch": 14.740608228980323,
+      "grad_norm": 0.0008338551269844174,
+      "learning_rate": 6.827697262479872e-05,
+      "loss": 0.0,
+      "step": 1030
+    },
+    {
+      "epoch": 14.883720930232558,
+      "grad_norm": 0.0006810138584114611,
+      "learning_rate": 6.505636070853462e-05,
+      "loss": 0.0001,
+      "step": 1040
+    },
+    {
+      "epoch": 15.026833631484795,
+      "grad_norm": 0.00043299293611198664,
+      "learning_rate": 6.183574879227053e-05,
+      "loss": 0.0,
+      "step": 1050
+    },
+    {
+      "epoch": 15.16994633273703,
+      "grad_norm": 0.0005277034360915422,
+      "learning_rate": 5.861513687600645e-05,
+      "loss": 0.0,
+      "step": 1060
+    },
+    {
+      "epoch": 15.313059033989267,
+      "grad_norm": 0.0006858156993985176,
+      "learning_rate": 5.5394524959742355e-05,
+      "loss": 0.0001,
+      "step": 1070
+    },
+    {
+      "epoch": 15.456171735241503,
+      "grad_norm": 0.0008438636432401836,
+      "learning_rate": 5.217391304347826e-05,
+      "loss": 0.0,
+      "step": 1080
+    },
+    {
+      "epoch": 15.59928443649374,
+      "grad_norm": 0.0012173138093203306,
+      "learning_rate": 4.895330112721417e-05,
+      "loss": 0.0001,
+      "step": 1090
+    },
+    {
+      "epoch": 15.742397137745975,
+      "grad_norm": 0.002290137577801943,
+      "learning_rate": 4.573268921095008e-05,
+      "loss": 0.0,
+      "step": 1100
+    },
+    {
+      "epoch": 15.88550983899821,
+      "grad_norm": 0.0005496228695847094,
+      "learning_rate": 4.2512077294685994e-05,
+      "loss": 0.0,
+      "step": 1110
+    },
+    {
+      "epoch": 16.028622540250446,
+      "grad_norm": 0.0018827420426532626,
+      "learning_rate": 3.92914653784219e-05,
+      "loss": 0.0,
+      "step": 1120
+    },
+    {
+      "epoch": 16.171735241502684,
+      "grad_norm": 0.00045006562140770257,
+      "learning_rate": 3.607085346215781e-05,
+      "loss": 0.0001,
+      "step": 1130
+    },
+    {
+      "epoch": 16.31484794275492,
+      "grad_norm": 0.0005126325413584709,
+      "learning_rate": 3.2850241545893725e-05,
+      "loss": 0.0,
+      "step": 1140
+    },
+    {
+      "epoch": 16.457960644007155,
+      "grad_norm": 0.00035093360929749906,
+      "learning_rate": 2.962962962962963e-05,
+      "loss": 0.0,
+      "step": 1150
+    },
+    {
+      "epoch": 16.60107334525939,
+      "grad_norm": 0.0010109692811965942,
+      "learning_rate": 2.640901771336554e-05,
+      "loss": 0.0001,
+      "step": 1160
+    },
+    {
+      "epoch": 16.74418604651163,
+      "grad_norm": 0.0006910230731591582,
+      "learning_rate": 2.318840579710145e-05,
+      "loss": 0.0,
+      "step": 1170
+    },
+    {
+      "epoch": 16.887298747763865,
+      "grad_norm": 0.0004351095121819526,
+      "learning_rate": 1.996779388083736e-05,
+      "loss": 0.0,
+      "step": 1180
+    },
+    {
+      "epoch": 17.0304114490161,
+      "grad_norm": 0.0006468660430982709,
+      "learning_rate": 1.674718196457327e-05,
+      "loss": 0.0,
+      "step": 1190
+    },
+    {
+      "epoch": 17.173524150268335,
+      "grad_norm": 0.0002576902334112674,
+      "learning_rate": 1.3526570048309179e-05,
+      "loss": 0.0,
+      "step": 1200
+    },
+    {
+      "epoch": 17.316636851520574,
+      "grad_norm": 0.0010522498050704598,
+      "learning_rate": 1.0305958132045089e-05,
+      "loss": 0.0,
+      "step": 1210
+    },
+    {
+      "epoch": 17.45974955277281,
+      "grad_norm": 0.0007789513911120594,
+      "learning_rate": 7.0853462157809985e-06,
+      "loss": 0.0001,
+      "step": 1220
+    },
+    {
+      "epoch": 17.602862254025045,
+      "grad_norm": 0.0009570368565618992,
+      "learning_rate": 3.864734299516908e-06,
+      "loss": 0.0,
+      "step": 1230
+    },
+    {
+      "epoch": 17.74597495527728,
+      "grad_norm": 0.0009920781012624502,
+      "learning_rate": 6.44122383252818e-07,
+      "loss": 0.0,
+      "step": 1240
+    },
+    {
+      "epoch": 17.77459749552773,
+      "step": 1242,
+      "total_flos": 6.807580263736934e+16,
+      "train_loss": 0.056659956144777014,
+      "train_runtime": 9770.217,
+      "train_samples_per_second": 6.177,
+      "train_steps_per_second": 0.127
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1242,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 18,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.807580263736934e+16,
+  "train_batch_size": 6,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecb5c4e77493d6b6fd2ca53e6f13eaecbb140ab06a0f0c44421186f01576fb69
+size 6776