{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9763241396143519, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy": 19.3848, "active_queue_size": 16384.0, "cl_loss": 143.9761, "doc_norm": 8.3934, "encoder_q-embeddings": 37437.1328, "encoder_q-layer.0": 37769.1289, "encoder_q-layer.1": 36251.6484, "encoder_q-layer.10": 131884.6094, "encoder_q-layer.11": 85479.7734, "encoder_q-layer.2": 42067.3477, "encoder_q-layer.3": 49364.9062, "encoder_q-layer.4": 61561.1133, "encoder_q-layer.5": 73419.1719, "encoder_q-layer.6": 100834.1094, "encoder_q-layer.7": 115997.6562, "encoder_q-layer.8": 147833.3125, "encoder_q-layer.9": 116039.5859, "epoch": 0.0, "inbatch_neg_score": 41.1677, "inbatch_pos_score": 50.125, "learning_rate": 5.000000000000001e-07, "loss": 143.9761, "norm_diff": 0.5342, "norm_loss": 0.0, "num_token_doc": 66.8438, "num_token_overlap": 17.8607, "num_token_query": 52.4162, "num_token_union": 73.8019, "num_word_context": 202.6037, "num_word_doc": 49.888, "num_word_query": 39.9875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 119809.051, "preclip_grad_norm_avg": 0.0011, "q@queue_neg_score": 41.25, "query_norm": 7.8592, "queue_k_norm": 8.4228, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4162, "sent_len_1": 66.8438, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6337, "stdk": 0.1794, "stdq": 0.1711, "stdqueue_k": 0.1804, "stdqueue_q": 0.0, "step": 100 }, { "accuracy": 20.5566, "active_queue_size": 16384.0, "cl_loss": 87.1614, "doc_norm": 8.3461, "encoder_q-embeddings": 7572.73, "encoder_q-layer.0": 6348.5815, "encoder_q-layer.1": 7499.3281, "encoder_q-layer.10": 24949.7227, "encoder_q-layer.11": 32268.1035, "encoder_q-layer.2": 8739.1758, "encoder_q-layer.3": 10144.6309, "encoder_q-layer.4": 10954.3916, "encoder_q-layer.5": 12374.4561, "encoder_q-layer.6": 15309.6885, "encoder_q-layer.7": 16770.2031, "encoder_q-layer.8": 21894.332, "encoder_q-layer.9": 17483.3652, "epoch": 0.0, "inbatch_neg_score": 35.8475, "inbatch_pos_score": 41.0, "learning_rate": 1.0000000000000002e-06, "loss": 87.1614, "norm_diff": 1.4092, "norm_loss": 0.0, "num_token_doc": 66.7478, "num_token_overlap": 17.8232, "num_token_query": 52.2447, "num_token_union": 73.7002, "num_word_context": 202.2404, "num_word_doc": 49.7925, "num_word_query": 39.8357, "postclip_grad_norm": 1.0, "preclip_grad_norm": 24215.9167, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 35.875, "query_norm": 6.9369, "queue_k_norm": 8.3337, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2447, "sent_len_1": 66.7478, "sent_len_max_0": 128.0, "sent_len_max_1": 211.3075, "stdk": 0.1783, "stdq": 0.12, "stdqueue_k": 0.1779, "stdqueue_q": 0.0, "step": 200 }, { "accuracy": 23.8281, "active_queue_size": 16384.0, "cl_loss": 54.78, "doc_norm": 8.1626, "encoder_q-embeddings": 5941.9263, "encoder_q-layer.0": 5246.1064, "encoder_q-layer.1": 6168.3667, "encoder_q-layer.10": 14029.9209, "encoder_q-layer.11": 23951.3613, "encoder_q-layer.2": 7189.0957, "encoder_q-layer.3": 7299.7451, "encoder_q-layer.4": 7483.7778, "encoder_q-layer.5": 7494.1094, "encoder_q-layer.6": 9546.1211, "encoder_q-layer.7": 9878.3848, "encoder_q-layer.8": 11207.6436, "encoder_q-layer.9": 9195.1523, "epoch": 0.0, "inbatch_neg_score": 32.2067, "inbatch_pos_score": 35.8125, "learning_rate": 1.5e-06, "loss": 54.78, "norm_diff": 1.5687, "norm_loss": 0.0, "num_token_doc": 66.8102, "num_token_overlap": 17.8063, "num_token_query": 52.0737, "num_token_union": 73.606, "num_word_context": 202.0414, "num_word_doc": 49.8404, "num_word_query": 39.7122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16796.3611, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 32.2188, "query_norm": 6.5939, "queue_k_norm": 8.1813, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0737, "sent_len_1": 66.8102, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1225, "stdk": 0.1723, "stdq": 0.1037, "stdqueue_k": 0.1725, "stdqueue_q": 0.0, "step": 300 }, { "accuracy": 26.5137, "active_queue_size": 16384.0, "cl_loss": 37.3909, "doc_norm": 7.9936, "encoder_q-embeddings": 5052.5112, "encoder_q-layer.0": 3884.4167, "encoder_q-layer.1": 4603.7891, "encoder_q-layer.10": 11303.5146, "encoder_q-layer.11": 16221.3223, "encoder_q-layer.2": 5112.3647, "encoder_q-layer.3": 5908.2852, "encoder_q-layer.4": 6894.2666, "encoder_q-layer.5": 7143.8584, "encoder_q-layer.6": 7916.3433, "encoder_q-layer.7": 8298.9678, "encoder_q-layer.8": 9349.4482, "encoder_q-layer.9": 8540.0742, "epoch": 0.0, "inbatch_neg_score": 27.5097, "inbatch_pos_score": 30.0, "learning_rate": 2.0000000000000003e-06, "loss": 37.3909, "norm_diff": 1.6667, "norm_loss": 0.0, "num_token_doc": 66.8151, "num_token_overlap": 17.8514, "num_token_query": 52.3432, "num_token_union": 73.7487, "num_word_context": 202.2909, "num_word_doc": 49.8592, "num_word_query": 39.9008, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11750.1482, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 27.5312, "query_norm": 6.3269, "queue_k_norm": 7.9984, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3432, "sent_len_1": 66.8151, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7962, "stdk": 0.1667, "stdq": 0.0961, "stdqueue_k": 0.1663, "stdqueue_q": 0.0, "step": 400 }, { "accuracy": 29.1504, "active_queue_size": 16384.0, "cl_loss": 27.6966, "doc_norm": 7.7957, "encoder_q-embeddings": 7151.5161, "encoder_q-layer.0": 6244.5361, "encoder_q-layer.1": 7181.3252, "encoder_q-layer.10": 11974.2598, "encoder_q-layer.11": 15312.0352, "encoder_q-layer.2": 7842.9033, "encoder_q-layer.3": 9008.2139, "encoder_q-layer.4": 9710.9688, "encoder_q-layer.5": 11171.6055, "encoder_q-layer.6": 10388.9004, "encoder_q-layer.7": 9452.6279, "encoder_q-layer.8": 9617.9961, "encoder_q-layer.9": 7036.9673, "epoch": 0.0, "inbatch_neg_score": 23.1329, "inbatch_pos_score": 25.1719, "learning_rate": 2.5e-06, "loss": 27.6966, "norm_diff": 1.8394, "norm_loss": 0.0, "num_token_doc": 66.6141, "num_token_overlap": 17.7845, "num_token_query": 52.2252, "num_token_union": 73.576, "num_word_context": 202.0131, "num_word_doc": 49.6743, "num_word_query": 39.8295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13777.8779, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 23.0938, "query_norm": 5.9564, "queue_k_norm": 7.8151, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2252, "sent_len_1": 66.6141, "sent_len_max_0": 128.0, "sent_len_max_1": 210.29, "stdk": 0.16, "stdq": 0.0923, "stdqueue_k": 0.1614, "stdqueue_q": 0.0, "step": 500 }, { "accuracy": 26.416, "active_queue_size": 16384.0, "cl_loss": 23.1615, "doc_norm": 7.6096, "encoder_q-embeddings": 5079.4897, "encoder_q-layer.0": 4487.584, "encoder_q-layer.1": 5139.3457, "encoder_q-layer.10": 9901.4082, "encoder_q-layer.11": 15366.9668, "encoder_q-layer.2": 6123.3584, "encoder_q-layer.3": 6817.7905, "encoder_q-layer.4": 7747.9355, "encoder_q-layer.5": 7976.3379, "encoder_q-layer.6": 8152.5684, "encoder_q-layer.7": 7683.3281, "encoder_q-layer.8": 8188.4731, "encoder_q-layer.9": 6660.2754, "epoch": 0.01, "inbatch_neg_score": 19.8884, "inbatch_pos_score": 21.5938, "learning_rate": 3e-06, "loss": 23.1615, "norm_diff": 2.2312, "norm_loss": 0.0, "num_token_doc": 66.7901, "num_token_overlap": 17.7871, "num_token_query": 52.1305, "num_token_union": 73.6341, "num_word_context": 202.122, "num_word_doc": 49.8496, "num_word_query": 39.736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11390.1015, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 19.8906, "query_norm": 5.3784, "queue_k_norm": 7.6228, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1305, "sent_len_1": 66.7901, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6062, "stdk": 0.1548, "stdq": 0.0878, "stdqueue_k": 0.1554, "stdqueue_q": 0.0, "step": 600 }, { "accuracy": 26.0742, "active_queue_size": 16384.0, "cl_loss": 19.8919, "doc_norm": 7.4096, "encoder_q-embeddings": 5866.0142, "encoder_q-layer.0": 4761.9956, "encoder_q-layer.1": 5924.4878, "encoder_q-layer.10": 7740.1807, "encoder_q-layer.11": 10437.1172, "encoder_q-layer.2": 6763.1626, "encoder_q-layer.3": 7132.272, "encoder_q-layer.4": 7661.1763, "encoder_q-layer.5": 7683.3428, "encoder_q-layer.6": 7238.6553, "encoder_q-layer.7": 6234.5728, "encoder_q-layer.8": 6348.8833, "encoder_q-layer.9": 4747.5034, "epoch": 0.01, "inbatch_neg_score": 15.5062, "inbatch_pos_score": 16.9219, "learning_rate": 3.5000000000000004e-06, "loss": 19.8919, "norm_diff": 2.7011, "norm_loss": 0.0, "num_token_doc": 66.7165, "num_token_overlap": 17.7919, "num_token_query": 52.2371, "num_token_union": 73.6538, "num_word_context": 202.1831, "num_word_doc": 49.7898, "num_word_query": 39.8536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9882.2675, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 15.4844, "query_norm": 4.7085, "queue_k_norm": 7.4284, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2371, "sent_len_1": 66.7165, "sent_len_max_0": 128.0, "sent_len_max_1": 208.635, "stdk": 0.1481, "stdq": 0.0842, "stdqueue_k": 0.1496, "stdqueue_q": 0.0, "step": 700 }, { "accuracy": 26.3184, "active_queue_size": 16384.0, "cl_loss": 16.6507, "doc_norm": 7.2424, "encoder_q-embeddings": 5894.1279, "encoder_q-layer.0": 4658.0156, "encoder_q-layer.1": 6013.0273, "encoder_q-layer.10": 8516.5518, "encoder_q-layer.11": 12082.0176, "encoder_q-layer.2": 6832.7402, "encoder_q-layer.3": 7812.3994, "encoder_q-layer.4": 8436.9814, "encoder_q-layer.5": 7858.5479, "encoder_q-layer.6": 6800.4604, "encoder_q-layer.7": 5584.2158, "encoder_q-layer.8": 6087.9404, "encoder_q-layer.9": 4428.3652, "epoch": 0.01, "inbatch_neg_score": 10.9545, "inbatch_pos_score": 12.0859, "learning_rate": 4.000000000000001e-06, "loss": 16.6507, "norm_diff": 3.4383, "norm_loss": 0.0, "num_token_doc": 66.6348, "num_token_overlap": 17.7467, "num_token_query": 52.2238, "num_token_union": 73.6288, "num_word_context": 202.012, "num_word_doc": 49.7013, "num_word_query": 39.8492, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10449.3388, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 10.9453, "query_norm": 3.8041, "queue_k_norm": 7.2452, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2238, "sent_len_1": 66.6348, "sent_len_max_0": 128.0, "sent_len_max_1": 211.6225, "stdk": 0.1436, "stdq": 0.0763, "stdqueue_k": 0.1442, "stdqueue_q": 0.0, "step": 800 }, { "accuracy": 27.0508, "active_queue_size": 16384.0, "cl_loss": 13.8582, "doc_norm": 7.0462, "encoder_q-embeddings": 6980.6421, "encoder_q-layer.0": 6239.7925, "encoder_q-layer.1": 7277.9478, "encoder_q-layer.10": 12947.7178, "encoder_q-layer.11": 14239.6318, "encoder_q-layer.2": 8207.1797, "encoder_q-layer.3": 8684.5518, "encoder_q-layer.4": 9305.8154, "encoder_q-layer.5": 10050.7666, "encoder_q-layer.6": 8847.3965, "encoder_q-layer.7": 7737.2046, "encoder_q-layer.8": 7510.4761, "encoder_q-layer.9": 5483.3467, "epoch": 0.01, "inbatch_neg_score": 6.4045, "inbatch_pos_score": 7.3828, "learning_rate": 4.5e-06, "loss": 13.8582, "norm_diff": 4.1359, "norm_loss": 0.0, "num_token_doc": 66.511, "num_token_overlap": 17.7935, "num_token_query": 52.3168, "num_token_union": 73.5923, "num_word_context": 201.9228, "num_word_doc": 49.6375, "num_word_query": 39.926, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12994.3727, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 6.4062, "query_norm": 2.9103, "queue_k_norm": 7.0651, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3168, "sent_len_1": 66.511, "sent_len_max_0": 128.0, "sent_len_max_1": 206.7975, "stdk": 0.1377, "stdq": 0.0675, "stdqueue_k": 0.1384, "stdqueue_q": 0.0, "step": 900 }, { "accuracy": 25.4395, "active_queue_size": 16384.0, "cl_loss": 11.5561, "doc_norm": 6.8916, "encoder_q-embeddings": 6925.0566, "encoder_q-layer.0": 5469.7061, "encoder_q-layer.1": 6838.3701, "encoder_q-layer.10": 9847.0195, "encoder_q-layer.11": 12938.9336, "encoder_q-layer.2": 7929.1401, "encoder_q-layer.3": 8519.292, "encoder_q-layer.4": 9550.7705, "encoder_q-layer.5": 9664.623, "encoder_q-layer.6": 8440.3848, "encoder_q-layer.7": 6166.6138, "encoder_q-layer.8": 6698.5425, "encoder_q-layer.9": 4001.8887, "epoch": 0.01, "inbatch_neg_score": 3.8311, "inbatch_pos_score": 4.6094, "learning_rate": 5e-06, "loss": 11.5561, "norm_diff": 4.6812, "norm_loss": 0.0, "num_token_doc": 66.9162, "num_token_overlap": 17.8404, "num_token_query": 52.2656, "num_token_union": 73.7705, "num_word_context": 202.4473, "num_word_doc": 49.9301, "num_word_query": 39.8648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11857.5712, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 3.8242, "query_norm": 2.2104, "queue_k_norm": 6.8865, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2656, "sent_len_1": 66.9162, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2413, "stdk": 0.1323, "stdq": 0.0588, "stdqueue_k": 0.1324, "stdqueue_q": 0.0, "step": 1000 }, { "accuracy": 25.4883, "active_queue_size": 16384.0, "cl_loss": 10.0946, "doc_norm": 6.7195, "encoder_q-embeddings": 6948.8242, "encoder_q-layer.0": 6005.979, "encoder_q-layer.1": 7405.2637, "encoder_q-layer.10": 8701.6348, "encoder_q-layer.11": 12370.917, "encoder_q-layer.2": 8749.8037, "encoder_q-layer.3": 9889.0322, "encoder_q-layer.4": 13377.2617, "encoder_q-layer.5": 15220.0889, "encoder_q-layer.6": 14424.0078, "encoder_q-layer.7": 11545.4229, "encoder_q-layer.8": 12800.7988, "encoder_q-layer.9": 3909.9026, "epoch": 0.01, "inbatch_neg_score": 3.4733, "inbatch_pos_score": 4.1797, "learning_rate": 5.500000000000001e-06, "loss": 10.0946, "norm_diff": 4.7476, "norm_loss": 0.0, "num_token_doc": 66.644, "num_token_overlap": 17.8685, "num_token_query": 52.3583, "num_token_union": 73.6515, "num_word_context": 202.0601, "num_word_doc": 49.7134, "num_word_query": 39.9271, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15070.1505, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 3.4727, "query_norm": 1.972, "queue_k_norm": 6.7367, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3583, "sent_len_1": 66.644, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8438, "stdk": 0.1269, "stdq": 0.0551, "stdqueue_k": 0.1277, "stdqueue_q": 0.0, "step": 1100 }, { "accuracy": 25.4883, "active_queue_size": 16384.0, "cl_loss": 9.2818, "doc_norm": 6.5984, "encoder_q-embeddings": 3917.2444, "encoder_q-layer.0": 3093.9143, "encoder_q-layer.1": 3860.46, "encoder_q-layer.10": 5650.1943, "encoder_q-layer.11": 10105.0664, "encoder_q-layer.2": 4481.7446, "encoder_q-layer.3": 5140.1616, "encoder_q-layer.4": 5553.0405, "encoder_q-layer.5": 6188.9268, "encoder_q-layer.6": 5088.3892, "encoder_q-layer.7": 3802.8469, "encoder_q-layer.8": 3695.5798, "encoder_q-layer.9": 2833.7637, "epoch": 0.01, "inbatch_neg_score": 1.6214, "inbatch_pos_score": 2.2852, "learning_rate": 6e-06, "loss": 9.2818, "norm_diff": 4.8057, "norm_loss": 0.0, "num_token_doc": 66.8744, "num_token_overlap": 17.826, "num_token_query": 52.3217, "num_token_union": 73.789, "num_word_context": 202.4602, "num_word_doc": 49.8845, "num_word_query": 39.8761, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7462.2106, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.626, "query_norm": 1.7927, "queue_k_norm": 6.5862, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3217, "sent_len_1": 66.8744, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5825, "stdk": 0.1232, "stdq": 0.053, "stdqueue_k": 0.1223, "stdqueue_q": 0.0, "step": 1200 }, { "accuracy": 24.9512, "active_queue_size": 16384.0, "cl_loss": 8.5997, "doc_norm": 6.4494, "encoder_q-embeddings": 5559.5957, "encoder_q-layer.0": 4792.5049, "encoder_q-layer.1": 6218.1216, "encoder_q-layer.10": 6282.2954, "encoder_q-layer.11": 10978.1494, "encoder_q-layer.2": 7633.1777, "encoder_q-layer.3": 8255.0967, "encoder_q-layer.4": 9658.3008, "encoder_q-layer.5": 11655.4951, "encoder_q-layer.6": 10862.668, "encoder_q-layer.7": 9465.708, "encoder_q-layer.8": 11291.1523, "encoder_q-layer.9": 3448.0962, "epoch": 0.01, "inbatch_neg_score": 2.6785, "inbatch_pos_score": 3.3281, "learning_rate": 6.5000000000000004e-06, "loss": 8.5997, "norm_diff": 4.6974, "norm_loss": 0.0, "num_token_doc": 66.869, "num_token_overlap": 17.8392, "num_token_query": 52.3123, "num_token_union": 73.7347, "num_word_context": 202.3513, "num_word_doc": 49.9147, "num_word_query": 39.9114, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12065.8826, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.6758, "query_norm": 1.7519, "queue_k_norm": 6.4503, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3123, "sent_len_1": 66.869, "sent_len_max_0": 128.0, "sent_len_max_1": 209.525, "stdk": 0.1174, "stdq": 0.0487, "stdqueue_k": 0.1174, "stdqueue_q": 0.0, "step": 1300 }, { "accuracy": 27.6855, "active_queue_size": 16384.0, "cl_loss": 8.2026, "doc_norm": 6.3224, "encoder_q-embeddings": 5855.3208, "encoder_q-layer.0": 4928.1792, "encoder_q-layer.1": 5779.1182, "encoder_q-layer.10": 6941.9683, "encoder_q-layer.11": 10208.1484, "encoder_q-layer.2": 6962.9624, "encoder_q-layer.3": 7630.0498, "encoder_q-layer.4": 8217.7197, "encoder_q-layer.5": 9300.6279, "encoder_q-layer.6": 7395.001, "encoder_q-layer.7": 6806.9722, "encoder_q-layer.8": 7554.8618, "encoder_q-layer.9": 3626.4343, "epoch": 0.01, "inbatch_neg_score": 1.7818, "inbatch_pos_score": 2.4297, "learning_rate": 7.000000000000001e-06, "loss": 8.2026, "norm_diff": 4.6247, "norm_loss": 0.0, "num_token_doc": 66.6371, "num_token_overlap": 17.7943, "num_token_query": 52.2892, "num_token_union": 73.6287, "num_word_context": 202.13, "num_word_doc": 49.708, "num_word_query": 39.8775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10270.8736, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.7754, "query_norm": 1.6976, "queue_k_norm": 6.3314, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2892, "sent_len_1": 66.6371, "sent_len_max_0": 128.0, "sent_len_max_1": 209.57, "stdk": 0.1124, "stdq": 0.0488, "stdqueue_k": 0.1133, "stdqueue_q": 0.0, "step": 1400 }, { "accuracy": 28.418, "active_queue_size": 16384.0, "cl_loss": 7.8807, "doc_norm": 6.1885, "encoder_q-embeddings": 4571.6919, "encoder_q-layer.0": 3579.1438, "encoder_q-layer.1": 4640.0117, "encoder_q-layer.10": 5031.2803, "encoder_q-layer.11": 8562.7236, "encoder_q-layer.2": 5440.8467, "encoder_q-layer.3": 5928.0352, "encoder_q-layer.4": 6808.3511, "encoder_q-layer.5": 6512.3672, "encoder_q-layer.6": 4942.7783, "encoder_q-layer.7": 3938.5625, "encoder_q-layer.8": 3597.6855, "encoder_q-layer.9": 2724.0229, "epoch": 0.01, "inbatch_neg_score": 1.0136, "inbatch_pos_score": 1.625, "learning_rate": 7.5e-06, "loss": 7.8807, "norm_diff": 4.523, "norm_loss": 0.0, "num_token_doc": 66.7865, "num_token_overlap": 17.8542, "num_token_query": 52.399, "num_token_union": 73.7693, "num_word_context": 202.4445, "num_word_doc": 49.8336, "num_word_query": 39.9593, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7552.4651, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.0156, "query_norm": 1.6655, "queue_k_norm": 6.2035, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.399, "sent_len_1": 66.7865, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5163, "stdk": 0.108, "stdq": 0.0482, "stdqueue_k": 0.1082, "stdqueue_q": 0.0, "step": 1500 }, { "accuracy": 30.3223, "active_queue_size": 16384.0, "cl_loss": 7.5946, "doc_norm": 6.0778, "encoder_q-embeddings": 4983.5713, "encoder_q-layer.0": 3880.0967, "encoder_q-layer.1": 4635.4131, "encoder_q-layer.10": 3973.6309, "encoder_q-layer.11": 7561.71, "encoder_q-layer.2": 5517.2764, "encoder_q-layer.3": 5841.021, "encoder_q-layer.4": 6182.0205, "encoder_q-layer.5": 6138.5483, "encoder_q-layer.6": 5180.8804, "encoder_q-layer.7": 4632.3618, "encoder_q-layer.8": 4864.0718, "encoder_q-layer.9": 2600.9751, "epoch": 0.02, "inbatch_neg_score": 1.4434, "inbatch_pos_score": 2.0645, "learning_rate": 8.000000000000001e-06, "loss": 7.5946, "norm_diff": 4.4243, "norm_loss": 0.0, "num_token_doc": 66.8431, "num_token_overlap": 17.8041, "num_token_query": 52.2555, "num_token_union": 73.72, "num_word_context": 202.3706, "num_word_doc": 49.8682, "num_word_query": 39.8425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7601.4295, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4482, "query_norm": 1.6536, "queue_k_norm": 6.0802, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2555, "sent_len_1": 66.8431, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0125, "stdk": 0.1041, "stdq": 0.0464, "stdqueue_k": 0.1038, "stdqueue_q": 0.0, "step": 1600 }, { "accuracy": 27.1973, "active_queue_size": 16384.0, "cl_loss": 7.4558, "doc_norm": 5.9407, "encoder_q-embeddings": 5814.751, "encoder_q-layer.0": 4883.6411, "encoder_q-layer.1": 5808.7832, "encoder_q-layer.10": 8694.6602, "encoder_q-layer.11": 11267.9023, "encoder_q-layer.2": 6769.4385, "encoder_q-layer.3": 7192.9419, "encoder_q-layer.4": 7553.1406, "encoder_q-layer.5": 8174.5723, "encoder_q-layer.6": 6006.9336, "encoder_q-layer.7": 4990.3154, "encoder_q-layer.8": 4792.9268, "encoder_q-layer.9": 5095.0166, "epoch": 0.02, "inbatch_neg_score": 0.4478, "inbatch_pos_score": 1.0205, "learning_rate": 8.500000000000002e-06, "loss": 7.4558, "norm_diff": 4.2921, "norm_loss": 0.0, "num_token_doc": 66.8173, "num_token_overlap": 17.7754, "num_token_query": 52.2165, "num_token_union": 73.7347, "num_word_context": 202.3079, "num_word_doc": 49.8414, "num_word_query": 39.8314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9695.5757, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4468, "query_norm": 1.6485, "queue_k_norm": 5.9597, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2165, "sent_len_1": 66.8173, "sent_len_max_0": 128.0, "sent_len_max_1": 208.615, "stdk": 0.0995, "stdq": 0.0463, "stdqueue_k": 0.1002, "stdqueue_q": 0.0, "step": 1700 }, { "accuracy": 29.6387, "active_queue_size": 16384.0, "cl_loss": 7.3194, "doc_norm": 5.8217, "encoder_q-embeddings": 5825.3701, "encoder_q-layer.0": 4623.6348, "encoder_q-layer.1": 5611.2749, "encoder_q-layer.10": 5625.2266, "encoder_q-layer.11": 8248.5, "encoder_q-layer.2": 6283.001, "encoder_q-layer.3": 6518.7139, "encoder_q-layer.4": 7222.0186, "encoder_q-layer.5": 7975.2314, "encoder_q-layer.6": 7407.3882, "encoder_q-layer.7": 6946.1294, "encoder_q-layer.8": 6984.1709, "encoder_q-layer.9": 3592.012, "epoch": 0.02, "inbatch_neg_score": 0.0974, "inbatch_pos_score": 0.6758, "learning_rate": 9e-06, "loss": 7.3194, "norm_diff": 4.1666, "norm_loss": 0.0, "num_token_doc": 66.6446, "num_token_overlap": 17.7478, "num_token_query": 52.2005, "num_token_union": 73.6008, "num_word_context": 201.7966, "num_word_doc": 49.7198, "num_word_query": 39.8093, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9234.896, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0941, "query_norm": 1.6551, "queue_k_norm": 5.8287, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2005, "sent_len_1": 66.6446, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2438, "stdk": 0.0958, "stdq": 0.047, "stdqueue_k": 0.0964, "stdqueue_q": 0.0, "step": 1800 }, { "accuracy": 30.3223, "active_queue_size": 16384.0, "cl_loss": 7.0781, "doc_norm": 5.6869, "encoder_q-embeddings": 3047.5493, "encoder_q-layer.0": 2385.3447, "encoder_q-layer.1": 2850.2722, "encoder_q-layer.10": 6561.7236, "encoder_q-layer.11": 8623.8818, "encoder_q-layer.2": 3347.7217, "encoder_q-layer.3": 3693.5754, "encoder_q-layer.4": 4060.8708, "encoder_q-layer.5": 4385.3091, "encoder_q-layer.6": 3490.7144, "encoder_q-layer.7": 3419.145, "encoder_q-layer.8": 3793.5037, "encoder_q-layer.9": 3699.417, "epoch": 0.02, "inbatch_neg_score": 0.6749, "inbatch_pos_score": 1.2588, "learning_rate": 9.5e-06, "loss": 7.0781, "norm_diff": 4.0536, "norm_loss": 0.0, "num_token_doc": 66.6425, "num_token_overlap": 17.7583, "num_token_query": 52.1424, "num_token_union": 73.6188, "num_word_context": 202.1937, "num_word_doc": 49.7561, "num_word_query": 39.7814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5998.8252, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6782, "query_norm": 1.6333, "queue_k_norm": 5.6798, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1424, "sent_len_1": 66.6425, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6087, "stdk": 0.0929, "stdq": 0.047, "stdqueue_k": 0.0926, "stdqueue_q": 0.0, "step": 1900 }, { "accuracy": 31.543, "active_queue_size": 16384.0, "cl_loss": 6.892, "doc_norm": 5.5241, "encoder_q-embeddings": 2832.614, "encoder_q-layer.0": 2114.5664, "encoder_q-layer.1": 2650.0244, "encoder_q-layer.10": 3504.0867, "encoder_q-layer.11": 5775.0459, "encoder_q-layer.2": 3335.4194, "encoder_q-layer.3": 3507.0459, "encoder_q-layer.4": 4165.9937, "encoder_q-layer.5": 4652.3765, "encoder_q-layer.6": 5222.8462, "encoder_q-layer.7": 5460.3354, "encoder_q-layer.8": 5023.9321, "encoder_q-layer.9": 2592.27, "epoch": 0.02, "inbatch_neg_score": 0.3027, "inbatch_pos_score": 0.8701, "learning_rate": 1e-05, "loss": 6.892, "norm_diff": 3.8635, "norm_loss": 0.0, "num_token_doc": 66.7768, "num_token_overlap": 17.8041, "num_token_query": 52.2643, "num_token_union": 73.7036, "num_word_context": 202.4628, "num_word_doc": 49.8095, "num_word_query": 39.849, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5723.7852, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3, "query_norm": 1.6606, "queue_k_norm": 5.5196, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2643, "sent_len_1": 66.7768, "sent_len_max_0": 128.0, "sent_len_max_1": 209.995, "stdk": 0.0884, "stdq": 0.0483, "stdqueue_k": 0.089, "stdqueue_q": 0.0, "step": 2000 }, { "accuracy": 31.2012, "active_queue_size": 16384.0, "cl_loss": 6.8581, "doc_norm": 5.3254, "encoder_q-embeddings": 6215.8467, "encoder_q-layer.0": 5227.5713, "encoder_q-layer.1": 6725.0181, "encoder_q-layer.10": 12987.4512, "encoder_q-layer.11": 16159.6865, "encoder_q-layer.2": 8306.0732, "encoder_q-layer.3": 10903.998, "encoder_q-layer.4": 15267.0918, "encoder_q-layer.5": 19880.4805, "encoder_q-layer.6": 22481.0938, "encoder_q-layer.7": 25129.834, "encoder_q-layer.8": 23500.166, "encoder_q-layer.9": 14162.5449, "epoch": 0.02, "inbatch_neg_score": 1.1904, "inbatch_pos_score": 1.7812, "learning_rate": 1.05e-05, "loss": 6.8581, "norm_diff": 3.6116, "norm_loss": 0.0, "num_token_doc": 66.8092, "num_token_overlap": 17.8098, "num_token_query": 52.2369, "num_token_union": 73.6773, "num_word_context": 201.979, "num_word_doc": 49.8326, "num_word_query": 39.8165, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21659.9258, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.1865, "query_norm": 1.7137, "queue_k_norm": 5.3432, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2369, "sent_len_1": 66.8092, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0637, "stdk": 0.085, "stdq": 0.0492, "stdqueue_k": 0.0857, "stdqueue_q": 0.0, "step": 2100 }, { "accuracy": 32.3242, "active_queue_size": 16384.0, "cl_loss": 6.7216, "doc_norm": 5.1276, "encoder_q-embeddings": 3851.1951, "encoder_q-layer.0": 3126.9343, "encoder_q-layer.1": 3908.6709, "encoder_q-layer.10": 23959.1484, "encoder_q-layer.11": 24493.4785, "encoder_q-layer.2": 5178.6401, "encoder_q-layer.3": 6328.1045, "encoder_q-layer.4": 8932.7607, "encoder_q-layer.5": 11598.2217, "encoder_q-layer.6": 14876.3496, "encoder_q-layer.7": 17911.2695, "encoder_q-layer.8": 19108.3145, "encoder_q-layer.9": 17509.9121, "epoch": 0.02, "inbatch_neg_score": 0.4622, "inbatch_pos_score": 1.0322, "learning_rate": 1.1000000000000001e-05, "loss": 6.7216, "norm_diff": 3.4432, "norm_loss": 0.0, "num_token_doc": 66.7536, "num_token_overlap": 17.8166, "num_token_query": 52.2789, "num_token_union": 73.723, "num_word_context": 202.4563, "num_word_doc": 49.8399, "num_word_query": 39.8658, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18980.6168, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.46, "query_norm": 1.6844, "queue_k_norm": 5.146, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2789, "sent_len_1": 66.7536, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9837, "stdk": 0.0823, "stdq": 0.0492, "stdqueue_k": 0.0828, "stdqueue_q": 0.0, "step": 2200 }, { "accuracy": 30.4688, "active_queue_size": 16384.0, "cl_loss": 6.4371, "doc_norm": 4.9234, "encoder_q-embeddings": 4051.0078, "encoder_q-layer.0": 3396.6489, "encoder_q-layer.1": 3844.5347, "encoder_q-layer.10": 12879.5771, "encoder_q-layer.11": 13298.3799, "encoder_q-layer.2": 4692.3311, "encoder_q-layer.3": 5359.041, "encoder_q-layer.4": 7052.5342, "encoder_q-layer.5": 9082.5244, "encoder_q-layer.6": 11437.7051, "encoder_q-layer.7": 13338.8223, "encoder_q-layer.8": 14028.3027, "encoder_q-layer.9": 11617.9365, "epoch": 0.02, "inbatch_neg_score": 0.3313, "inbatch_pos_score": 0.8691, "learning_rate": 1.1500000000000002e-05, "loss": 6.4371, "norm_diff": 3.2581, "norm_loss": 0.0, "num_token_doc": 66.879, "num_token_overlap": 17.8183, "num_token_query": 52.4096, "num_token_union": 73.8881, "num_word_context": 202.6816, "num_word_doc": 49.9293, "num_word_query": 39.9835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12980.7664, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3306, "query_norm": 1.6652, "queue_k_norm": 4.9422, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4096, "sent_len_1": 66.879, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8775, "stdk": 0.0796, "stdq": 0.049, "stdqueue_k": 0.0798, "stdqueue_q": 0.0, "step": 2300 }, { "accuracy": 33.1543, "active_queue_size": 16384.0, "cl_loss": 6.2334, "doc_norm": 4.7045, "encoder_q-embeddings": 2665.3843, "encoder_q-layer.0": 2191.7581, "encoder_q-layer.1": 2971.9331, "encoder_q-layer.10": 13075.0449, "encoder_q-layer.11": 13306.8633, "encoder_q-layer.2": 3876.8401, "encoder_q-layer.3": 4614.0254, "encoder_q-layer.4": 6346.1416, "encoder_q-layer.5": 8266.5264, "encoder_q-layer.6": 10053.1758, "encoder_q-layer.7": 12138.5107, "encoder_q-layer.8": 12210.1416, "encoder_q-layer.9": 11369.3916, "epoch": 0.02, "inbatch_neg_score": 0.425, "inbatch_pos_score": 0.9888, "learning_rate": 1.2e-05, "loss": 6.2334, "norm_diff": 3.0221, "norm_loss": 0.0, "num_token_doc": 66.8896, "num_token_overlap": 17.827, "num_token_query": 52.3263, "num_token_union": 73.7654, "num_word_context": 202.1414, "num_word_doc": 49.8838, "num_word_query": 39.9152, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11796.6842, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4253, "query_norm": 1.6824, "queue_k_norm": 4.7314, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3263, "sent_len_1": 66.8896, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3438, "stdk": 0.0775, "stdq": 0.0494, "stdqueue_k": 0.0775, "stdqueue_q": 0.0, "step": 2400 }, { "accuracy": 34.0332, "active_queue_size": 16384.0, "cl_loss": 6.3035, "doc_norm": 4.5143, "encoder_q-embeddings": 2957.7427, "encoder_q-layer.0": 2541.1594, "encoder_q-layer.1": 3580.313, "encoder_q-layer.10": 15054.6504, "encoder_q-layer.11": 15391.0879, "encoder_q-layer.2": 5029.8745, "encoder_q-layer.3": 5969.3765, "encoder_q-layer.4": 8731.4863, "encoder_q-layer.5": 11397.125, "encoder_q-layer.6": 15404.3125, "encoder_q-layer.7": 18686.2988, "encoder_q-layer.8": 19523.7598, "encoder_q-layer.9": 16491.3164, "epoch": 0.02, "inbatch_neg_score": 0.6818, "inbatch_pos_score": 1.2441, "learning_rate": 1.25e-05, "loss": 6.3035, "norm_diff": 2.8141, "norm_loss": 0.0, "num_token_doc": 66.6353, "num_token_overlap": 17.7978, "num_token_query": 52.259, "num_token_union": 73.6443, "num_word_context": 202.0455, "num_word_doc": 49.7275, "num_word_query": 39.8859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16655.2242, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.6777, "query_norm": 1.7002, "queue_k_norm": 4.5107, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.259, "sent_len_1": 66.6353, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8787, "stdk": 0.0746, "stdq": 0.0494, "stdqueue_k": 0.0753, "stdqueue_q": 0.0, "step": 2500 }, { "accuracy": 31.6406, "active_queue_size": 16384.0, "cl_loss": 6.0593, "doc_norm": 4.2743, "encoder_q-embeddings": 2996.9902, "encoder_q-layer.0": 2643.3542, "encoder_q-layer.1": 3950.7659, "encoder_q-layer.10": 38908.0039, "encoder_q-layer.11": 32495.9648, "encoder_q-layer.2": 5455.1929, "encoder_q-layer.3": 6870.2012, "encoder_q-layer.4": 10079.4873, "encoder_q-layer.5": 14182.3281, "encoder_q-layer.6": 19444.2637, "encoder_q-layer.7": 24582.2051, "encoder_q-layer.8": 26918.043, "encoder_q-layer.9": 29444.8594, "epoch": 0.03, "inbatch_neg_score": 0.699, "inbatch_pos_score": 1.2432, "learning_rate": 1.3000000000000001e-05, "loss": 6.0593, "norm_diff": 2.6056, "norm_loss": 0.0, "num_token_doc": 66.9714, "num_token_overlap": 17.8105, "num_token_query": 52.295, "num_token_union": 73.842, "num_word_context": 202.5249, "num_word_doc": 49.9588, "num_word_query": 39.9162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26718.7908, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.6978, "query_norm": 1.6687, "queue_k_norm": 4.3058, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.295, "sent_len_1": 66.9714, "sent_len_max_0": 128.0, "sent_len_max_1": 211.3925, "stdk": 0.0723, "stdq": 0.0493, "stdqueue_k": 0.0729, "stdqueue_q": 0.0, "step": 2600 }, { "accuracy": 33.0078, "active_queue_size": 16384.0, "cl_loss": 5.8914, "doc_norm": 4.1054, "encoder_q-embeddings": 2847.5325, "encoder_q-layer.0": 2477.2297, "encoder_q-layer.1": 3450.3152, "encoder_q-layer.10": 21713.1074, "encoder_q-layer.11": 18043.4043, "encoder_q-layer.2": 4899.5161, "encoder_q-layer.3": 5688.6128, "encoder_q-layer.4": 8283.0996, "encoder_q-layer.5": 11439.5371, "encoder_q-layer.6": 14934.8047, "encoder_q-layer.7": 18457.8457, "encoder_q-layer.8": 19402.7734, "encoder_q-layer.9": 19730.2266, "epoch": 0.03, "inbatch_neg_score": 0.4985, "inbatch_pos_score": 1.0518, "learning_rate": 1.3500000000000001e-05, "loss": 5.8914, "norm_diff": 2.425, "norm_loss": 0.0, "num_token_doc": 66.7617, "num_token_overlap": 17.8258, "num_token_query": 52.3563, "num_token_union": 73.7332, "num_word_context": 202.2718, "num_word_doc": 49.7892, "num_word_query": 39.9214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17979.8742, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5005, "query_norm": 1.6804, "queue_k_norm": 4.11, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3563, "sent_len_1": 66.7617, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3587, "stdk": 0.0715, "stdq": 0.0499, "stdqueue_k": 0.0709, "stdqueue_q": 0.0, "step": 2700 }, { "accuracy": 32.2266, "active_queue_size": 16384.0, "cl_loss": 5.7814, "doc_norm": 3.904, "encoder_q-embeddings": 2816.0876, "encoder_q-layer.0": 2451.3533, "encoder_q-layer.1": 3736.3918, "encoder_q-layer.10": 26450.8047, "encoder_q-layer.11": 22455.375, "encoder_q-layer.2": 5276.7441, "encoder_q-layer.3": 6828.8042, "encoder_q-layer.4": 10190.3701, "encoder_q-layer.5": 14302.002, "encoder_q-layer.6": 19348.1387, "encoder_q-layer.7": 24084.9609, "encoder_q-layer.8": 24889.4141, "encoder_q-layer.9": 24377.5801, "epoch": 0.03, "inbatch_neg_score": 0.7, "inbatch_pos_score": 1.2383, "learning_rate": 1.4000000000000001e-05, "loss": 5.7814, "norm_diff": 2.2154, "norm_loss": 0.0, "num_token_doc": 66.6407, "num_token_overlap": 17.7912, "num_token_query": 52.266, "num_token_union": 73.6734, "num_word_context": 202.3062, "num_word_doc": 49.7462, "num_word_query": 39.8728, "postclip_grad_norm": 1.0, "preclip_grad_norm": 22656.8947, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.6973, "query_norm": 1.6886, "queue_k_norm": 3.9174, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.266, "sent_len_1": 66.6407, "sent_len_max_0": 128.0, "sent_len_max_1": 208.535, "stdk": 0.0689, "stdq": 0.0489, "stdqueue_k": 0.0693, "stdqueue_q": 0.0, "step": 2800 }, { "accuracy": 36.1328, "active_queue_size": 16384.0, "cl_loss": 5.742, "doc_norm": 3.735, "encoder_q-embeddings": 1911.0933, "encoder_q-layer.0": 1601.4347, "encoder_q-layer.1": 1995.5969, "encoder_q-layer.10": 8157.0444, "encoder_q-layer.11": 9689.3428, "encoder_q-layer.2": 2580.8628, "encoder_q-layer.3": 2932.1438, "encoder_q-layer.4": 3900.0732, "encoder_q-layer.5": 4908.4536, "encoder_q-layer.6": 6089.6938, "encoder_q-layer.7": 6396.1055, "encoder_q-layer.8": 6857.498, "encoder_q-layer.9": 6522.7632, "epoch": 0.03, "inbatch_neg_score": 0.5313, "inbatch_pos_score": 1.1035, "learning_rate": 1.45e-05, "loss": 5.742, "norm_diff": 2.0062, "norm_loss": 0.0, "num_token_doc": 66.6978, "num_token_overlap": 17.7642, "num_token_query": 52.155, "num_token_union": 73.6426, "num_word_context": 202.0571, "num_word_doc": 49.7621, "num_word_query": 39.7731, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7481.408, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5283, "query_norm": 1.7288, "queue_k_norm": 3.7391, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.155, "sent_len_1": 66.6978, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2625, "stdk": 0.0673, "stdq": 0.0508, "stdqueue_k": 0.0677, "stdqueue_q": 0.0, "step": 2900 }, { "accuracy": 33.7402, "active_queue_size": 16384.0, "cl_loss": 5.8604, "doc_norm": 3.5843, "encoder_q-embeddings": 4181.1938, "encoder_q-layer.0": 3603.9624, "encoder_q-layer.1": 5071.792, "encoder_q-layer.10": 40466.2266, "encoder_q-layer.11": 30196.8105, "encoder_q-layer.2": 7164.1982, "encoder_q-layer.3": 8426.4043, "encoder_q-layer.4": 12303.3975, "encoder_q-layer.5": 17532.6758, "encoder_q-layer.6": 23881.1016, "encoder_q-layer.7": 31186.0801, "encoder_q-layer.8": 33972.6523, "encoder_q-layer.9": 34927.9766, "epoch": 0.03, "inbatch_neg_score": 0.4177, "inbatch_pos_score": 0.9683, "learning_rate": 1.5e-05, "loss": 5.8604, "norm_diff": 1.8636, "norm_loss": 0.0, "num_token_doc": 66.7205, "num_token_overlap": 17.7605, "num_token_query": 52.2485, "num_token_union": 73.7607, "num_word_context": 202.4155, "num_word_doc": 49.8149, "num_word_query": 39.8685, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30833.6514, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.4148, "query_norm": 1.7206, "queue_k_norm": 3.5768, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2485, "sent_len_1": 66.7205, "sent_len_max_0": 128.0, "sent_len_max_1": 206.6513, "stdk": 0.0659, "stdq": 0.0501, "stdqueue_k": 0.0662, "stdqueue_q": 0.0, "step": 3000 }, { "accuracy": 35.791, "active_queue_size": 16384.0, "cl_loss": 5.7415, "doc_norm": 3.4275, "encoder_q-embeddings": 3936.364, "encoder_q-layer.0": 3533.0015, "encoder_q-layer.1": 5146.835, "encoder_q-layer.10": 38081.4062, "encoder_q-layer.11": 28515.0039, "encoder_q-layer.2": 7657.2256, "encoder_q-layer.3": 9308.7969, "encoder_q-layer.4": 13392.8477, "encoder_q-layer.5": 19531.4219, "encoder_q-layer.6": 27102.1699, "encoder_q-layer.7": 35091.6328, "encoder_q-layer.8": 37827.9727, "encoder_q-layer.9": 37079.2695, "epoch": 0.03, "inbatch_neg_score": 0.4669, "inbatch_pos_score": 1.0391, "learning_rate": 1.55e-05, "loss": 5.7415, "norm_diff": 1.6628, "norm_loss": 0.0, "num_token_doc": 66.7162, "num_token_overlap": 17.7944, "num_token_query": 52.3582, "num_token_union": 73.734, "num_word_context": 202.3965, "num_word_doc": 49.7863, "num_word_query": 39.9346, "postclip_grad_norm": 1.0, "preclip_grad_norm": 32647.2003, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.4663, "query_norm": 1.7647, "queue_k_norm": 3.4264, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3582, "sent_len_1": 66.7162, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4913, "stdk": 0.0648, "stdq": 0.0513, "stdqueue_k": 0.0651, "stdqueue_q": 0.0, "step": 3100 }, { "accuracy": 35.6445, "active_queue_size": 16384.0, "cl_loss": 5.7174, "doc_norm": 3.2913, "encoder_q-embeddings": 4262.0518, "encoder_q-layer.0": 4135.5698, "encoder_q-layer.1": 6200.4346, "encoder_q-layer.10": 54128.9336, "encoder_q-layer.11": 39806.0703, "encoder_q-layer.2": 9517.9502, "encoder_q-layer.3": 11451.3721, "encoder_q-layer.4": 16764.0059, "encoder_q-layer.5": 24477.3945, "encoder_q-layer.6": 32865.7383, "encoder_q-layer.7": 42106.3594, "encoder_q-layer.8": 45781.1992, "encoder_q-layer.9": 48417.7812, "epoch": 0.03, "inbatch_neg_score": 0.8683, "inbatch_pos_score": 1.4512, "learning_rate": 1.6000000000000003e-05, "loss": 5.7174, "norm_diff": 1.5314, "norm_loss": 0.0, "num_token_doc": 66.6713, "num_token_overlap": 17.7587, "num_token_query": 52.2722, "num_token_union": 73.638, "num_word_context": 202.0429, "num_word_doc": 49.7152, "num_word_query": 39.8597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 41584.8065, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.8687, "query_norm": 1.76, "queue_k_norm": 3.2934, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2722, "sent_len_1": 66.6713, "sent_len_max_0": 128.0, "sent_len_max_1": 209.665, "stdk": 0.0639, "stdq": 0.0516, "stdqueue_k": 0.0639, "stdqueue_q": 0.0, "step": 3200 }, { "accuracy": 35.1562, "active_queue_size": 16384.0, "cl_loss": 5.6526, "doc_norm": 3.1537, "encoder_q-embeddings": 3142.7322, "encoder_q-layer.0": 2747.6816, "encoder_q-layer.1": 3854.345, "encoder_q-layer.10": 25457.5781, "encoder_q-layer.11": 17431.4473, "encoder_q-layer.2": 5617.2969, "encoder_q-layer.3": 6341.6523, "encoder_q-layer.4": 8690.624, "encoder_q-layer.5": 12682.4258, "encoder_q-layer.6": 16603.5566, "encoder_q-layer.7": 20931.6348, "encoder_q-layer.8": 22678.0801, "encoder_q-layer.9": 23035.5781, "epoch": 0.03, "inbatch_neg_score": 0.5226, "inbatch_pos_score": 1.0742, "learning_rate": 1.65e-05, "loss": 5.6526, "norm_diff": 1.3817, "norm_loss": 0.0, "num_token_doc": 66.751, "num_token_overlap": 17.8036, "num_token_query": 52.1533, "num_token_union": 73.5942, "num_word_context": 202.1857, "num_word_doc": 49.8252, "num_word_query": 39.7775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20316.9678, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5215, "query_norm": 1.772, "queue_k_norm": 3.1578, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1533, "sent_len_1": 66.751, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6287, "stdk": 0.0629, "stdq": 0.0513, "stdqueue_k": 0.063, "stdqueue_q": 0.0, "step": 3300 }, { "accuracy": 36.1816, "active_queue_size": 16384.0, "cl_loss": 5.5808, "doc_norm": 3.0429, "encoder_q-embeddings": 4624.7754, "encoder_q-layer.0": 4363.9731, "encoder_q-layer.1": 6423.2905, "encoder_q-layer.10": 63763.6406, "encoder_q-layer.11": 46041.0625, "encoder_q-layer.2": 9636.083, "encoder_q-layer.3": 11731.332, "encoder_q-layer.4": 16911.8828, "encoder_q-layer.5": 24856.1211, "encoder_q-layer.6": 34507.2344, "encoder_q-layer.7": 45196.2695, "encoder_q-layer.8": 51135.082, "encoder_q-layer.9": 54889.9688, "epoch": 0.03, "inbatch_neg_score": 0.517, "inbatch_pos_score": 1.0811, "learning_rate": 1.7000000000000003e-05, "loss": 5.5808, "norm_diff": 1.2659, "norm_loss": 0.0, "num_token_doc": 66.8657, "num_token_overlap": 17.8241, "num_token_query": 52.2818, "num_token_union": 73.7428, "num_word_context": 202.3677, "num_word_doc": 49.8917, "num_word_query": 39.8832, "postclip_grad_norm": 1.0, "preclip_grad_norm": 46482.7217, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.5161, "query_norm": 1.7769, "queue_k_norm": 3.0446, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2818, "sent_len_1": 66.8657, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0712, "stdk": 0.0622, "stdq": 0.0515, "stdqueue_k": 0.0622, "stdqueue_q": 0.0, "step": 3400 }, { "accuracy": 35.6934, "active_queue_size": 16384.0, "cl_loss": 5.4962, "doc_norm": 2.9191, "encoder_q-embeddings": 3581.2793, "encoder_q-layer.0": 3506.2725, "encoder_q-layer.1": 5281.2217, "encoder_q-layer.10": 61527.2734, "encoder_q-layer.11": 43693.082, "encoder_q-layer.2": 8058.2681, "encoder_q-layer.3": 10071.6934, "encoder_q-layer.4": 14761.1738, "encoder_q-layer.5": 21626.4941, "encoder_q-layer.6": 30058.4316, "encoder_q-layer.7": 40291.0391, "encoder_q-layer.8": 46049.9648, "encoder_q-layer.9": 51478.0586, "epoch": 0.03, "inbatch_neg_score": 0.9005, "inbatch_pos_score": 1.4766, "learning_rate": 1.75e-05, "loss": 5.4962, "norm_diff": 1.1422, "norm_loss": 0.0, "num_token_doc": 66.8013, "num_token_overlap": 17.8615, "num_token_query": 52.4106, "num_token_union": 73.8075, "num_word_context": 202.4096, "num_word_doc": 49.8576, "num_word_query": 39.9905, "postclip_grad_norm": 1.0, "preclip_grad_norm": 42564.1591, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.9019, "query_norm": 1.777, "queue_k_norm": 2.9334, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4106, "sent_len_1": 66.8013, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7138, "stdk": 0.0611, "stdq": 0.0513, "stdqueue_k": 0.0613, "stdqueue_q": 0.0, "step": 3500 }, { "accuracy": 35.5957, "active_queue_size": 16384.0, "cl_loss": 5.4766, "doc_norm": 2.8338, "encoder_q-embeddings": 2524.3059, "encoder_q-layer.0": 2197.0862, "encoder_q-layer.1": 3028.6787, "encoder_q-layer.10": 26943.998, "encoder_q-layer.11": 20070.1191, "encoder_q-layer.2": 4221.8604, "encoder_q-layer.3": 4963.8594, "encoder_q-layer.4": 7016.354, "encoder_q-layer.5": 10246.5527, "encoder_q-layer.6": 13654.373, "encoder_q-layer.7": 17804.9746, "encoder_q-layer.8": 20927.7891, "encoder_q-layer.9": 22939.6367, "epoch": 0.04, "inbatch_neg_score": 0.6274, "inbatch_pos_score": 1.1689, "learning_rate": 1.8e-05, "loss": 5.4766, "norm_diff": 1.0756, "norm_loss": 0.0, "num_token_doc": 66.7321, "num_token_overlap": 17.7524, "num_token_query": 52.1626, "num_token_union": 73.6467, "num_word_context": 202.0648, "num_word_doc": 49.7748, "num_word_query": 39.7923, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19276.1053, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.6289, "query_norm": 1.7582, "queue_k_norm": 2.8254, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1626, "sent_len_1": 66.7321, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8562, "stdk": 0.0609, "stdq": 0.0512, "stdqueue_k": 0.0606, "stdqueue_q": 0.0, "step": 3600 }, { "accuracy": 36.1328, "active_queue_size": 16384.0, "cl_loss": 5.3432, "doc_norm": 2.7306, "encoder_q-embeddings": 1940.938, "encoder_q-layer.0": 1728.3113, "encoder_q-layer.1": 2443.2625, "encoder_q-layer.10": 28814.6387, "encoder_q-layer.11": 21358.1055, "encoder_q-layer.2": 3551.2188, "encoder_q-layer.3": 4279.5278, "encoder_q-layer.4": 6325.6641, "encoder_q-layer.5": 9269.0244, "encoder_q-layer.6": 12787.0283, "encoder_q-layer.7": 17557.332, "encoder_q-layer.8": 20658.666, "encoder_q-layer.9": 23338.9375, "epoch": 0.04, "inbatch_neg_score": 0.5513, "inbatch_pos_score": 1.1104, "learning_rate": 1.85e-05, "loss": 5.3432, "norm_diff": 0.9862, "norm_loss": 0.0, "num_token_doc": 66.7947, "num_token_overlap": 17.8274, "num_token_query": 52.3066, "num_token_union": 73.7267, "num_word_context": 202.5141, "num_word_doc": 49.872, "num_word_query": 39.8979, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19448.2529, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5513, "query_norm": 1.7444, "queue_k_norm": 2.7318, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3066, "sent_len_1": 66.7947, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0337, "stdk": 0.0602, "stdq": 0.0499, "stdqueue_k": 0.0598, "stdqueue_q": 0.0, "step": 3700 }, { "accuracy": 35.498, "active_queue_size": 16384.0, "cl_loss": 5.2839, "doc_norm": 2.6456, "encoder_q-embeddings": 3219.2358, "encoder_q-layer.0": 2909.1289, "encoder_q-layer.1": 4384.6748, "encoder_q-layer.10": 58390.3789, "encoder_q-layer.11": 44168.5508, "encoder_q-layer.2": 6643.6265, "encoder_q-layer.3": 7857.0498, "encoder_q-layer.4": 11356.7422, "encoder_q-layer.5": 17242.8516, "encoder_q-layer.6": 24051.8672, "encoder_q-layer.7": 33140.2461, "encoder_q-layer.8": 38936.3594, "encoder_q-layer.9": 45578.3789, "epoch": 0.04, "inbatch_neg_score": 0.9001, "inbatch_pos_score": 1.4668, "learning_rate": 1.9e-05, "loss": 5.2839, "norm_diff": 0.8827, "norm_loss": 0.0, "num_token_doc": 67.0024, "num_token_overlap": 17.814, "num_token_query": 52.417, "num_token_union": 73.9285, "num_word_context": 202.8075, "num_word_doc": 49.9609, "num_word_query": 39.9817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 37932.5461, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.8955, "query_norm": 1.7629, "queue_k_norm": 2.6465, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.417, "sent_len_1": 67.0024, "sent_len_max_0": 128.0, "sent_len_max_1": 210.17, "stdk": 0.0591, "stdq": 0.0501, "stdqueue_k": 0.0594, "stdqueue_q": 0.0, "step": 3800 }, { "accuracy": 38.916, "active_queue_size": 16384.0, "cl_loss": 5.1788, "doc_norm": 2.5564, "encoder_q-embeddings": 2381.9268, "encoder_q-layer.0": 2125.0713, "encoder_q-layer.1": 2422.2551, "encoder_q-layer.10": 20465.6309, "encoder_q-layer.11": 15379.8086, "encoder_q-layer.2": 2973.8096, "encoder_q-layer.3": 3250.7852, "encoder_q-layer.4": 4270.1807, "encoder_q-layer.5": 6398.2759, "encoder_q-layer.6": 8516.1543, "encoder_q-layer.7": 11490.9766, "encoder_q-layer.8": 13239.2861, "encoder_q-layer.9": 15730.25, "epoch": 0.04, "inbatch_neg_score": 0.5883, "inbatch_pos_score": 1.1602, "learning_rate": 1.9500000000000003e-05, "loss": 5.1788, "norm_diff": 0.8214, "norm_loss": 0.0, "num_token_doc": 66.8172, "num_token_overlap": 17.8488, "num_token_query": 52.3636, "num_token_union": 73.7554, "num_word_context": 202.397, "num_word_doc": 49.8579, "num_word_query": 39.9388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13393.6519, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5869, "query_norm": 1.7351, "queue_k_norm": 2.5581, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3636, "sent_len_1": 66.8172, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9812, "stdk": 0.0592, "stdq": 0.05, "stdqueue_k": 0.0589, "stdqueue_q": 0.0, "step": 3900 }, { "accuracy": 39.0625, "active_queue_size": 16384.0, "cl_loss": 5.0063, "doc_norm": 2.4751, "encoder_q-embeddings": 1936.1016, "encoder_q-layer.0": 1719.1364, "encoder_q-layer.1": 2179.8525, "encoder_q-layer.10": 19634.9941, "encoder_q-layer.11": 17065.7891, "encoder_q-layer.2": 2908.0972, "encoder_q-layer.3": 3356.9446, "encoder_q-layer.4": 4496.8428, "encoder_q-layer.5": 6443.0308, "encoder_q-layer.6": 9194.8975, "encoder_q-layer.7": 12808.7129, "encoder_q-layer.8": 14550.7627, "encoder_q-layer.9": 15960.6611, "epoch": 0.04, "inbatch_neg_score": 0.6709, "inbatch_pos_score": 1.2324, "learning_rate": 2e-05, "loss": 5.0063, "norm_diff": 0.7853, "norm_loss": 0.0, "num_token_doc": 67.0482, "num_token_overlap": 17.8454, "num_token_query": 52.3171, "num_token_union": 73.8594, "num_word_context": 202.6795, "num_word_doc": 50.0493, "num_word_query": 39.9193, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13920.028, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.668, "query_norm": 1.6898, "queue_k_norm": 2.4743, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3171, "sent_len_1": 67.0482, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8313, "stdk": 0.058, "stdq": 0.0494, "stdqueue_k": 0.0582, "stdqueue_q": 0.0, "step": 4000 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 4.8788, "doc_norm": 2.3935, "encoder_q-embeddings": 3527.4817, "encoder_q-layer.0": 3117.8015, "encoder_q-layer.1": 4400.0107, "encoder_q-layer.10": 55974.6055, "encoder_q-layer.11": 47996.5117, "encoder_q-layer.2": 6230.4448, "encoder_q-layer.3": 7314.7363, "encoder_q-layer.4": 10614.3779, "encoder_q-layer.5": 16050.2256, "encoder_q-layer.6": 22352.1211, "encoder_q-layer.7": 30952.7695, "encoder_q-layer.8": 37004.4883, "encoder_q-layer.9": 44665.2539, "epoch": 0.04, "inbatch_neg_score": 0.6734, "inbatch_pos_score": 1.2471, "learning_rate": 2.05e-05, "loss": 4.8788, "norm_diff": 0.7156, "norm_loss": 0.0, "num_token_doc": 66.8402, "num_token_overlap": 17.8238, "num_token_query": 52.2073, "num_token_union": 73.6849, "num_word_context": 202.1354, "num_word_doc": 49.8678, "num_word_query": 39.8029, "postclip_grad_norm": 1.0, "preclip_grad_norm": 37438.2463, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.6724, "query_norm": 1.6779, "queue_k_norm": 2.399, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2073, "sent_len_1": 66.8402, "sent_len_max_0": 128.0, "sent_len_max_1": 207.0975, "stdk": 0.0577, "stdq": 0.0483, "stdqueue_k": 0.0577, "stdqueue_q": 0.0, "step": 4100 }, { "accuracy": 40.0879, "active_queue_size": 16384.0, "cl_loss": 4.8338, "doc_norm": 2.3316, "encoder_q-embeddings": 4392.3398, "encoder_q-layer.0": 3894.4331, "encoder_q-layer.1": 5400.21, "encoder_q-layer.10": 61710.3633, "encoder_q-layer.11": 51779.3203, "encoder_q-layer.2": 7313.4766, "encoder_q-layer.3": 8674.6533, "encoder_q-layer.4": 12295.8369, "encoder_q-layer.5": 19405.8828, "encoder_q-layer.6": 26414.4746, "encoder_q-layer.7": 35791.7383, "encoder_q-layer.8": 42280.8125, "encoder_q-layer.9": 49045.6602, "epoch": 0.04, "inbatch_neg_score": 0.5847, "inbatch_pos_score": 1.1484, "learning_rate": 2.1e-05, "loss": 4.8338, "norm_diff": 0.6653, "norm_loss": 0.0, "num_token_doc": 66.8028, "num_token_overlap": 17.7955, "num_token_query": 52.352, "num_token_union": 73.7857, "num_word_context": 202.4143, "num_word_doc": 49.8781, "num_word_query": 39.9381, "postclip_grad_norm": 1.0, "preclip_grad_norm": 42076.3335, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.582, "query_norm": 1.6663, "queue_k_norm": 2.3292, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.352, "sent_len_1": 66.8028, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1113, "stdk": 0.0572, "stdq": 0.0482, "stdqueue_k": 0.0573, "stdqueue_q": 0.0, "step": 4200 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.7352, "doc_norm": 2.2596, "encoder_q-embeddings": 2626.4822, "encoder_q-layer.0": 2136.4575, "encoder_q-layer.1": 2500.6079, "encoder_q-layer.10": 24156.7715, "encoder_q-layer.11": 22179.457, "encoder_q-layer.2": 3028.3777, "encoder_q-layer.3": 3297.134, "encoder_q-layer.4": 3808.8184, "encoder_q-layer.5": 4699.0273, "encoder_q-layer.6": 6027.8862, "encoder_q-layer.7": 9429.4619, "encoder_q-layer.8": 12968.5059, "encoder_q-layer.9": 18132.3945, "epoch": 0.04, "inbatch_neg_score": 0.7029, "inbatch_pos_score": 1.2842, "learning_rate": 2.15e-05, "loss": 4.7352, "norm_diff": 0.5883, "norm_loss": 0.0, "num_token_doc": 66.8967, "num_token_overlap": 17.7886, "num_token_query": 52.2455, "num_token_union": 73.7865, "num_word_context": 202.3161, "num_word_doc": 49.9029, "num_word_query": 39.8463, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15295.4241, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7007, "query_norm": 1.6713, "queue_k_norm": 2.2622, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2455, "sent_len_1": 66.8967, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5613, "stdk": 0.0567, "stdq": 0.0488, "stdqueue_k": 0.0568, "stdqueue_q": 0.0, "step": 4300 }, { "accuracy": 42.5293, "active_queue_size": 16384.0, "cl_loss": 4.6092, "doc_norm": 2.2001, "encoder_q-embeddings": 5181.561, "encoder_q-layer.0": 4179.2739, "encoder_q-layer.1": 4948.4771, "encoder_q-layer.10": 27309.6133, "encoder_q-layer.11": 26768.7324, "encoder_q-layer.2": 6279.7173, "encoder_q-layer.3": 6887.7363, "encoder_q-layer.4": 8134.6602, "encoder_q-layer.5": 12156.2188, "encoder_q-layer.6": 16088.8164, "encoder_q-layer.7": 20103.6641, "encoder_q-layer.8": 22315.9043, "encoder_q-layer.9": 23306.627, "epoch": 0.04, "inbatch_neg_score": 0.579, "inbatch_pos_score": 1.1582, "learning_rate": 2.2000000000000003e-05, "loss": 4.6092, "norm_diff": 0.5569, "norm_loss": 0.0, "num_token_doc": 67.0009, "num_token_overlap": 17.8805, "num_token_query": 52.27, "num_token_union": 73.7961, "num_word_context": 202.6364, "num_word_doc": 50.0092, "num_word_query": 39.8687, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21982.9023, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5762, "query_norm": 1.6432, "queue_k_norm": 2.1987, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.27, "sent_len_1": 67.0009, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1525, "stdk": 0.0562, "stdq": 0.0486, "stdqueue_k": 0.0563, "stdqueue_q": 0.0, "step": 4400 }, { "accuracy": 44.2871, "active_queue_size": 16384.0, "cl_loss": 4.4921, "doc_norm": 2.1336, "encoder_q-embeddings": 2262.3696, "encoder_q-layer.0": 1834.944, "encoder_q-layer.1": 2173.9807, "encoder_q-layer.10": 14389.2988, "encoder_q-layer.11": 13485.6572, "encoder_q-layer.2": 2676.7212, "encoder_q-layer.3": 2945.6973, "encoder_q-layer.4": 3310.0886, "encoder_q-layer.5": 4358.9502, "encoder_q-layer.6": 5718.9795, "encoder_q-layer.7": 8118.0688, "encoder_q-layer.8": 10245.0254, "encoder_q-layer.9": 12584.6426, "epoch": 0.04, "inbatch_neg_score": 0.542, "inbatch_pos_score": 1.1191, "learning_rate": 2.25e-05, "loss": 4.4921, "norm_diff": 0.5251, "norm_loss": 0.0, "num_token_doc": 66.6767, "num_token_overlap": 17.7848, "num_token_query": 52.3052, "num_token_union": 73.6976, "num_word_context": 202.3518, "num_word_doc": 49.7367, "num_word_query": 39.8923, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10466.5184, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.542, "query_norm": 1.6085, "queue_k_norm": 2.1381, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3052, "sent_len_1": 66.6767, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1875, "stdk": 0.0556, "stdq": 0.0476, "stdqueue_k": 0.0557, "stdqueue_q": 0.0, "step": 4500 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.3362, "doc_norm": 2.0839, "encoder_q-embeddings": 2215.5498, "encoder_q-layer.0": 1610.77, "encoder_q-layer.1": 1951.4933, "encoder_q-layer.10": 15485.2363, "encoder_q-layer.11": 16651.832, "encoder_q-layer.2": 2617.3364, "encoder_q-layer.3": 2954.0156, "encoder_q-layer.4": 3841.5769, "encoder_q-layer.5": 5541.8457, "encoder_q-layer.6": 7356.5049, "encoder_q-layer.7": 9826.375, "encoder_q-layer.8": 11034.7344, "encoder_q-layer.9": 12359.6094, "epoch": 0.04, "inbatch_neg_score": 0.607, "inbatch_pos_score": 1.1797, "learning_rate": 2.3000000000000003e-05, "loss": 4.3362, "norm_diff": 0.493, "norm_loss": 0.0, "num_token_doc": 66.9329, "num_token_overlap": 17.8651, "num_token_query": 52.3443, "num_token_union": 73.8313, "num_word_context": 202.6934, "num_word_doc": 49.948, "num_word_query": 39.922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11791.4017, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6084, "query_norm": 1.5909, "queue_k_norm": 2.0796, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3443, "sent_len_1": 66.9329, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1625, "stdk": 0.0554, "stdq": 0.0466, "stdqueue_k": 0.0551, "stdqueue_q": 0.0, "step": 4600 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.2363, "doc_norm": 2.0227, "encoder_q-embeddings": 1750.8395, "encoder_q-layer.0": 1311.3773, "encoder_q-layer.1": 1456.052, "encoder_q-layer.10": 4559.3501, "encoder_q-layer.11": 7222.0405, "encoder_q-layer.2": 1694.5696, "encoder_q-layer.3": 1872.5104, "encoder_q-layer.4": 1976.2885, "encoder_q-layer.5": 2168.2866, "encoder_q-layer.6": 2597.7339, "encoder_q-layer.7": 2605.2012, "encoder_q-layer.8": 3094.0315, "encoder_q-layer.9": 3111.1213, "epoch": 0.05, "inbatch_neg_score": 0.5453, "inbatch_pos_score": 1.1113, "learning_rate": 2.35e-05, "loss": 4.2363, "norm_diff": 0.4478, "norm_loss": 0.0, "num_token_doc": 66.802, "num_token_overlap": 17.8636, "num_token_query": 52.4269, "num_token_union": 73.7559, "num_word_context": 202.2911, "num_word_doc": 49.8553, "num_word_query": 39.9989, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4428.3513, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5459, "query_norm": 1.5749, "queue_k_norm": 2.0238, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4269, "sent_len_1": 66.802, "sent_len_max_0": 128.0, "sent_len_max_1": 209.19, "stdk": 0.0544, "stdq": 0.0456, "stdqueue_k": 0.0545, "stdqueue_q": 0.0, "step": 4700 }, { "accuracy": 44.9707, "active_queue_size": 16384.0, "cl_loss": 4.202, "doc_norm": 1.9755, "encoder_q-embeddings": 2343.9106, "encoder_q-layer.0": 1929.6405, "encoder_q-layer.1": 2173.7283, "encoder_q-layer.10": 8983.5137, "encoder_q-layer.11": 11362.5352, "encoder_q-layer.2": 2489.4326, "encoder_q-layer.3": 2599.1121, "encoder_q-layer.4": 3007.5347, "encoder_q-layer.5": 4018.9243, "encoder_q-layer.6": 5477.0088, "encoder_q-layer.7": 6961.2129, "encoder_q-layer.8": 7808.0249, "encoder_q-layer.9": 7280.1299, "epoch": 0.05, "inbatch_neg_score": 0.5881, "inbatch_pos_score": 1.1582, "learning_rate": 2.4e-05, "loss": 4.202, "norm_diff": 0.4161, "norm_loss": 0.0, "num_token_doc": 66.6902, "num_token_overlap": 17.7669, "num_token_query": 52.1899, "num_token_union": 73.6647, "num_word_context": 202.2949, "num_word_doc": 49.7765, "num_word_query": 39.8187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8158.8427, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5889, "query_norm": 1.5594, "queue_k_norm": 1.9766, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1899, "sent_len_1": 66.6902, "sent_len_max_0": 128.0, "sent_len_max_1": 206.3638, "stdk": 0.0539, "stdq": 0.045, "stdqueue_k": 0.0541, "stdqueue_q": 0.0, "step": 4800 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.099, "doc_norm": 1.9292, "encoder_q-embeddings": 1964.7076, "encoder_q-layer.0": 1503.8083, "encoder_q-layer.1": 1687.9723, "encoder_q-layer.10": 6467.2354, "encoder_q-layer.11": 8189.4121, "encoder_q-layer.2": 2000.644, "encoder_q-layer.3": 2017.4161, "encoder_q-layer.4": 2097.4683, "encoder_q-layer.5": 2668.543, "encoder_q-layer.6": 3413.9189, "encoder_q-layer.7": 4599.5693, "encoder_q-layer.8": 5762.0371, "encoder_q-layer.9": 6077.5728, "epoch": 0.05, "inbatch_neg_score": 0.6212, "inbatch_pos_score": 1.2236, "learning_rate": 2.45e-05, "loss": 4.099, "norm_diff": 0.3221, "norm_loss": 0.0, "num_token_doc": 66.9253, "num_token_overlap": 17.8436, "num_token_query": 52.3401, "num_token_union": 73.8154, "num_word_context": 202.5564, "num_word_doc": 49.9327, "num_word_query": 39.9174, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5946.5186, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6201, "query_norm": 1.6071, "queue_k_norm": 1.9299, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3401, "sent_len_1": 66.9253, "sent_len_max_0": 128.0, "sent_len_max_1": 206.6775, "stdk": 0.0531, "stdq": 0.0464, "stdqueue_k": 0.0533, "stdqueue_q": 0.0, "step": 4900 }, { "accuracy": 44.4824, "active_queue_size": 16384.0, "cl_loss": 4.06, "doc_norm": 1.8922, "encoder_q-embeddings": 1471.5769, "encoder_q-layer.0": 1003.3736, "encoder_q-layer.1": 1093.9606, "encoder_q-layer.10": 4856.6133, "encoder_q-layer.11": 6949.8091, "encoder_q-layer.2": 1260.9398, "encoder_q-layer.3": 1425.4484, "encoder_q-layer.4": 1590.129, "encoder_q-layer.5": 1686.814, "encoder_q-layer.6": 1822.8987, "encoder_q-layer.7": 2124.0535, "encoder_q-layer.8": 2844.1865, "encoder_q-layer.9": 3014.0737, "epoch": 0.05, "inbatch_neg_score": 0.5697, "inbatch_pos_score": 1.1602, "learning_rate": 2.5e-05, "loss": 4.06, "norm_diff": 0.3012, "norm_loss": 0.0, "num_token_doc": 66.7798, "num_token_overlap": 17.7967, "num_token_query": 52.2738, "num_token_union": 73.716, "num_word_context": 202.3459, "num_word_doc": 49.8165, "num_word_query": 39.8594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4036.9243, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5718, "query_norm": 1.591, "queue_k_norm": 1.8935, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2738, "sent_len_1": 66.7798, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6475, "stdk": 0.0528, "stdq": 0.0459, "stdqueue_k": 0.0529, "stdqueue_q": 0.0, "step": 5000 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.0053, "doc_norm": 1.8581, "encoder_q-embeddings": 1610.392, "encoder_q-layer.0": 1044.3082, "encoder_q-layer.1": 1223.2954, "encoder_q-layer.10": 5455.3086, "encoder_q-layer.11": 8129.894, "encoder_q-layer.2": 1466.2543, "encoder_q-layer.3": 1592.8883, "encoder_q-layer.4": 1730.8719, "encoder_q-layer.5": 1959.2346, "encoder_q-layer.6": 2536.0754, "encoder_q-layer.7": 3180.6084, "encoder_q-layer.8": 4128.8906, "encoder_q-layer.9": 4326.8525, "epoch": 0.05, "inbatch_neg_score": 0.6213, "inbatch_pos_score": 1.1885, "learning_rate": 2.5500000000000003e-05, "loss": 4.0053, "norm_diff": 0.2781, "norm_loss": 0.0, "num_token_doc": 66.6373, "num_token_overlap": 17.8033, "num_token_query": 52.2627, "num_token_union": 73.6273, "num_word_context": 202.1996, "num_word_doc": 49.7172, "num_word_query": 39.8485, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5019.3375, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6191, "query_norm": 1.5799, "queue_k_norm": 1.8626, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2627, "sent_len_1": 66.6373, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6062, "stdk": 0.0522, "stdq": 0.0448, "stdqueue_k": 0.0524, "stdqueue_q": 0.0, "step": 5100 }, { "accuracy": 46.8262, "active_queue_size": 16384.0, "cl_loss": 3.9573, "doc_norm": 1.8353, "encoder_q-embeddings": 1568.4686, "encoder_q-layer.0": 1012.1459, "encoder_q-layer.1": 1200.114, "encoder_q-layer.10": 4697.3511, "encoder_q-layer.11": 7303.166, "encoder_q-layer.2": 1431.1674, "encoder_q-layer.3": 1618.9133, "encoder_q-layer.4": 1895.3325, "encoder_q-layer.5": 2385.2717, "encoder_q-layer.6": 3110.4268, "encoder_q-layer.7": 3956.27, "encoder_q-layer.8": 4367.3989, "encoder_q-layer.9": 4225.6353, "epoch": 0.05, "inbatch_neg_score": 0.6414, "inbatch_pos_score": 1.2383, "learning_rate": 2.6000000000000002e-05, "loss": 3.9573, "norm_diff": 0.2453, "norm_loss": 0.0, "num_token_doc": 66.7756, "num_token_overlap": 17.8011, "num_token_query": 52.2766, "num_token_union": 73.722, "num_word_context": 202.1698, "num_word_doc": 49.8346, "num_word_query": 39.8564, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4828.0343, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6401, "query_norm": 1.59, "queue_k_norm": 1.8374, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2766, "sent_len_1": 66.7756, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2725, "stdk": 0.0519, "stdq": 0.0457, "stdqueue_k": 0.052, "stdqueue_q": 0.0, "step": 5200 }, { "accuracy": 47.4121, "active_queue_size": 16384.0, "cl_loss": 3.9193, "doc_norm": 1.8177, "encoder_q-embeddings": 1836.5778, "encoder_q-layer.0": 1281.7701, "encoder_q-layer.1": 1480.6615, "encoder_q-layer.10": 4085.2485, "encoder_q-layer.11": 7141.4976, "encoder_q-layer.2": 1662.6976, "encoder_q-layer.3": 1806.5233, "encoder_q-layer.4": 1783.8577, "encoder_q-layer.5": 2078.1604, "encoder_q-layer.6": 2482.3926, "encoder_q-layer.7": 2606.7231, "encoder_q-layer.8": 3242.7805, "encoder_q-layer.9": 3408.2622, "epoch": 0.05, "inbatch_neg_score": 0.6353, "inbatch_pos_score": 1.2266, "learning_rate": 2.6500000000000004e-05, "loss": 3.9193, "norm_diff": 0.224, "norm_loss": 0.0, "num_token_doc": 66.6777, "num_token_overlap": 17.8412, "num_token_query": 52.2657, "num_token_union": 73.6032, "num_word_context": 202.136, "num_word_doc": 49.7897, "num_word_query": 39.873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4341.8495, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6357, "query_norm": 1.5937, "queue_k_norm": 1.8147, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2657, "sent_len_1": 66.6777, "sent_len_max_0": 128.0, "sent_len_max_1": 205.7738, "stdk": 0.0517, "stdq": 0.045, "stdqueue_k": 0.0516, "stdqueue_q": 0.0, "step": 5300 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.8904, "doc_norm": 1.7962, "encoder_q-embeddings": 1555.0726, "encoder_q-layer.0": 1169.8129, "encoder_q-layer.1": 1344.0837, "encoder_q-layer.10": 4614.8853, "encoder_q-layer.11": 6386.3696, "encoder_q-layer.2": 1544.5385, "encoder_q-layer.3": 1580.1956, "encoder_q-layer.4": 1736.5739, "encoder_q-layer.5": 2185.6565, "encoder_q-layer.6": 2903.5051, "encoder_q-layer.7": 3660.8738, "encoder_q-layer.8": 4267.9751, "encoder_q-layer.9": 4020.343, "epoch": 0.05, "inbatch_neg_score": 0.6731, "inbatch_pos_score": 1.291, "learning_rate": 2.7000000000000002e-05, "loss": 3.8904, "norm_diff": 0.1783, "norm_loss": 0.0, "num_token_doc": 66.7991, "num_token_overlap": 17.8001, "num_token_query": 52.3195, "num_token_union": 73.7755, "num_word_context": 202.5673, "num_word_doc": 49.8528, "num_word_query": 39.8997, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4476.1388, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6729, "query_norm": 1.6179, "queue_k_norm": 1.7954, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3195, "sent_len_1": 66.7991, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2663, "stdk": 0.0512, "stdq": 0.0464, "stdqueue_k": 0.0512, "stdqueue_q": 0.0, "step": 5400 }, { "accuracy": 44.873, "active_queue_size": 16384.0, "cl_loss": 3.8407, "doc_norm": 1.782, "encoder_q-embeddings": 1598.9094, "encoder_q-layer.0": 1132.7207, "encoder_q-layer.1": 1245.7561, "encoder_q-layer.10": 5825.2598, "encoder_q-layer.11": 8270.6221, "encoder_q-layer.2": 1491.9635, "encoder_q-layer.3": 1590.9912, "encoder_q-layer.4": 1865.5386, "encoder_q-layer.5": 2395.2095, "encoder_q-layer.6": 3201.1389, "encoder_q-layer.7": 3973.0305, "encoder_q-layer.8": 4439.3057, "encoder_q-layer.9": 4185.6768, "epoch": 0.05, "inbatch_neg_score": 0.6541, "inbatch_pos_score": 1.2402, "learning_rate": 2.7500000000000004e-05, "loss": 3.8407, "norm_diff": 0.1781, "norm_loss": 0.0, "num_token_doc": 66.9188, "num_token_overlap": 17.8298, "num_token_query": 52.332, "num_token_union": 73.8132, "num_word_context": 202.4227, "num_word_doc": 49.8977, "num_word_query": 39.9145, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5304.1932, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6523, "query_norm": 1.6039, "queue_k_norm": 1.7835, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.332, "sent_len_1": 66.9188, "sent_len_max_0": 128.0, "sent_len_max_1": 210.2788, "stdk": 0.0509, "stdq": 0.0458, "stdqueue_k": 0.051, "stdqueue_q": 0.0, "step": 5500 }, { "accuracy": 48.584, "active_queue_size": 16384.0, "cl_loss": 3.818, "doc_norm": 1.7693, "encoder_q-embeddings": 1647.0924, "encoder_q-layer.0": 1134.6381, "encoder_q-layer.1": 1291.5099, "encoder_q-layer.10": 6028.9189, "encoder_q-layer.11": 7340.252, "encoder_q-layer.2": 1537.4836, "encoder_q-layer.3": 1644.752, "encoder_q-layer.4": 1959.5785, "encoder_q-layer.5": 2613.6599, "encoder_q-layer.6": 3423.5747, "encoder_q-layer.7": 4435.9893, "encoder_q-layer.8": 5130.0356, "encoder_q-layer.9": 5152.4482, "epoch": 0.05, "inbatch_neg_score": 0.683, "inbatch_pos_score": 1.2734, "learning_rate": 2.8000000000000003e-05, "loss": 3.818, "norm_diff": 0.1652, "norm_loss": 0.0, "num_token_doc": 66.9255, "num_token_overlap": 17.8388, "num_token_query": 52.3309, "num_token_union": 73.8192, "num_word_context": 202.4578, "num_word_doc": 49.9497, "num_word_query": 39.8989, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5358.8048, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6797, "query_norm": 1.6042, "queue_k_norm": 1.7709, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3309, "sent_len_1": 66.9255, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3963, "stdk": 0.0506, "stdq": 0.0454, "stdqueue_k": 0.0507, "stdqueue_q": 0.0, "step": 5600 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8072, "doc_norm": 1.7582, "encoder_q-embeddings": 1526.3226, "encoder_q-layer.0": 1058.3661, "encoder_q-layer.1": 1175.5549, "encoder_q-layer.10": 4045.2957, "encoder_q-layer.11": 5939.0234, "encoder_q-layer.2": 1403.0663, "encoder_q-layer.3": 1410.6194, "encoder_q-layer.4": 1583.3746, "encoder_q-layer.5": 1776.2323, "encoder_q-layer.6": 2207.958, "encoder_q-layer.7": 2515.1851, "encoder_q-layer.8": 3185.2305, "encoder_q-layer.9": 3171.5867, "epoch": 0.06, "inbatch_neg_score": 0.6868, "inbatch_pos_score": 1.2969, "learning_rate": 2.8499999999999998e-05, "loss": 3.8072, "norm_diff": 0.1103, "norm_loss": 0.0, "num_token_doc": 66.7449, "num_token_overlap": 17.776, "num_token_query": 52.2917, "num_token_union": 73.7594, "num_word_context": 202.3008, "num_word_doc": 49.7955, "num_word_query": 39.8912, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3817.7809, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.686, "query_norm": 1.6479, "queue_k_norm": 1.7602, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2917, "sent_len_1": 66.7449, "sent_len_max_0": 128.0, "sent_len_max_1": 207.0325, "stdk": 0.0503, "stdq": 0.0472, "stdqueue_k": 0.0504, "stdqueue_q": 0.0, "step": 5700 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.7665, "doc_norm": 1.7469, "encoder_q-embeddings": 1745.978, "encoder_q-layer.0": 1258.9341, "encoder_q-layer.1": 1259.5861, "encoder_q-layer.10": 3482.2759, "encoder_q-layer.11": 5690.4653, "encoder_q-layer.2": 1361.326, "encoder_q-layer.3": 1401.9158, "encoder_q-layer.4": 1464.8293, "encoder_q-layer.5": 1452.5734, "encoder_q-layer.6": 1593.0314, "encoder_q-layer.7": 1933.0913, "encoder_q-layer.8": 2430.665, "encoder_q-layer.9": 2636.6301, "epoch": 0.06, "inbatch_neg_score": 0.6731, "inbatch_pos_score": 1.2715, "learning_rate": 2.9e-05, "loss": 3.7665, "norm_diff": 0.1359, "norm_loss": 0.0, "num_token_doc": 66.8254, "num_token_overlap": 17.7725, "num_token_query": 52.1457, "num_token_union": 73.6901, "num_word_context": 202.211, "num_word_doc": 49.8451, "num_word_query": 39.753, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3535.0714, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6724, "query_norm": 1.611, "queue_k_norm": 1.7529, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1457, "sent_len_1": 66.8254, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8913, "stdk": 0.05, "stdq": 0.0457, "stdqueue_k": 0.0502, "stdqueue_q": 0.0, "step": 5800 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7412, "doc_norm": 1.7446, "encoder_q-embeddings": 1548.2627, "encoder_q-layer.0": 1082.9391, "encoder_q-layer.1": 1280.6543, "encoder_q-layer.10": 3471.9482, "encoder_q-layer.11": 5391.0981, "encoder_q-layer.2": 1534.8849, "encoder_q-layer.3": 1675.0371, "encoder_q-layer.4": 1901.0038, "encoder_q-layer.5": 2429.3906, "encoder_q-layer.6": 2951.7976, "encoder_q-layer.7": 3408.9062, "encoder_q-layer.8": 3502.7678, "encoder_q-layer.9": 3191.7051, "epoch": 0.06, "inbatch_neg_score": 0.7017, "inbatch_pos_score": 1.3154, "learning_rate": 2.95e-05, "loss": 3.7412, "norm_diff": 0.0769, "norm_loss": 0.0, "num_token_doc": 66.5688, "num_token_overlap": 17.7903, "num_token_query": 52.2585, "num_token_union": 73.6262, "num_word_context": 202.0915, "num_word_doc": 49.6745, "num_word_query": 39.8348, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3980.2151, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7007, "query_norm": 1.6677, "queue_k_norm": 1.7462, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2585, "sent_len_1": 66.5688, "sent_len_max_0": 128.0, "sent_len_max_1": 207.82, "stdk": 0.05, "stdq": 0.0466, "stdqueue_k": 0.0501, "stdqueue_q": 0.0, "step": 5900 }, { "accuracy": 49.4629, "active_queue_size": 16384.0, "cl_loss": 3.7177, "doc_norm": 1.7341, "encoder_q-embeddings": 1543.8623, "encoder_q-layer.0": 1135.613, "encoder_q-layer.1": 1243.1074, "encoder_q-layer.10": 2970.1558, "encoder_q-layer.11": 5465.9805, "encoder_q-layer.2": 1457.9022, "encoder_q-layer.3": 1525.1432, "encoder_q-layer.4": 1607.4967, "encoder_q-layer.5": 1700.024, "encoder_q-layer.6": 1813.978, "encoder_q-layer.7": 2103.7239, "encoder_q-layer.8": 2480.6033, "encoder_q-layer.9": 2358.2993, "epoch": 0.06, "inbatch_neg_score": 0.6509, "inbatch_pos_score": 1.2725, "learning_rate": 3e-05, "loss": 3.7177, "norm_diff": 0.1027, "norm_loss": 0.0, "num_token_doc": 66.7059, "num_token_overlap": 17.8267, "num_token_query": 52.3475, "num_token_union": 73.6922, "num_word_context": 202.3393, "num_word_doc": 49.7844, "num_word_query": 39.9301, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3436.5238, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6519, "query_norm": 1.6315, "queue_k_norm": 1.7374, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3475, "sent_len_1": 66.7059, "sent_len_max_0": 128.0, "sent_len_max_1": 208.145, "stdk": 0.0497, "stdq": 0.0459, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 6000 }, { "accuracy": 47.6074, "active_queue_size": 16384.0, "cl_loss": 3.6991, "doc_norm": 1.7287, "encoder_q-embeddings": 3143.658, "encoder_q-layer.0": 2145.6179, "encoder_q-layer.1": 2360.709, "encoder_q-layer.10": 7492.7954, "encoder_q-layer.11": 11850.3477, "encoder_q-layer.2": 2758.1973, "encoder_q-layer.3": 2975.1953, "encoder_q-layer.4": 3176.1963, "encoder_q-layer.5": 3499.936, "encoder_q-layer.6": 4493.3242, "encoder_q-layer.7": 5665.0952, "encoder_q-layer.8": 6931.0688, "encoder_q-layer.9": 6820.2793, "epoch": 0.06, "inbatch_neg_score": 0.6016, "inbatch_pos_score": 1.1875, "learning_rate": 3.05e-05, "loss": 3.6991, "norm_diff": 0.13, "norm_loss": 0.0, "num_token_doc": 66.8494, "num_token_overlap": 17.8013, "num_token_query": 52.238, "num_token_union": 73.7167, "num_word_context": 202.1573, "num_word_doc": 49.9209, "num_word_query": 39.8458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7719.7449, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6016, "query_norm": 1.5987, "queue_k_norm": 1.7327, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.238, "sent_len_1": 66.8494, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7587, "stdk": 0.0495, "stdq": 0.0447, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 6100 }, { "accuracy": 49.9512, "active_queue_size": 16384.0, "cl_loss": 3.6786, "doc_norm": 1.7239, "encoder_q-embeddings": 2580.7747, "encoder_q-layer.0": 1758.2665, "encoder_q-layer.1": 1866.033, "encoder_q-layer.10": 5901.8765, "encoder_q-layer.11": 10351.4307, "encoder_q-layer.2": 2144.2893, "encoder_q-layer.3": 2247.1667, "encoder_q-layer.4": 2373.9653, "encoder_q-layer.5": 2505.0928, "encoder_q-layer.6": 2921.051, "encoder_q-layer.7": 3551.0088, "encoder_q-layer.8": 4090.3726, "encoder_q-layer.9": 4013.7776, "epoch": 0.06, "inbatch_neg_score": 0.5963, "inbatch_pos_score": 1.2178, "learning_rate": 3.1e-05, "loss": 3.6786, "norm_diff": 0.0866, "norm_loss": 0.0, "num_token_doc": 66.9008, "num_token_overlap": 17.8, "num_token_query": 52.3081, "num_token_union": 73.8226, "num_word_context": 202.4117, "num_word_doc": 49.925, "num_word_query": 39.8627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6143.1705, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5942, "query_norm": 1.6373, "queue_k_norm": 1.7241, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3081, "sent_len_1": 66.9008, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1125, "stdk": 0.0495, "stdq": 0.046, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 6200 }, { "accuracy": 49.7559, "active_queue_size": 16384.0, "cl_loss": 3.6613, "doc_norm": 1.7183, "encoder_q-embeddings": 3810.1562, "encoder_q-layer.0": 2778.8552, "encoder_q-layer.1": 3263.7368, "encoder_q-layer.10": 7948.7588, "encoder_q-layer.11": 11120.7061, "encoder_q-layer.2": 3962.6365, "encoder_q-layer.3": 3810.447, "encoder_q-layer.4": 4108.2231, "encoder_q-layer.5": 5090.9863, "encoder_q-layer.6": 6280.9414, "encoder_q-layer.7": 7711.1919, "encoder_q-layer.8": 8799.5176, "encoder_q-layer.9": 7513.3496, "epoch": 0.06, "inbatch_neg_score": 0.588, "inbatch_pos_score": 1.2383, "learning_rate": 3.15e-05, "loss": 3.6613, "norm_diff": 0.0517, "norm_loss": 0.0, "num_token_doc": 66.7841, "num_token_overlap": 17.8114, "num_token_query": 52.2449, "num_token_union": 73.6876, "num_word_context": 202.3466, "num_word_doc": 49.8212, "num_word_query": 39.8552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8718.8383, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5859, "query_norm": 1.6666, "queue_k_norm": 1.7175, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2449, "sent_len_1": 66.7841, "sent_len_max_0": 128.0, "sent_len_max_1": 209.025, "stdk": 0.0494, "stdq": 0.0474, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 6300 }, { "accuracy": 49.6582, "active_queue_size": 16384.0, "cl_loss": 3.6431, "doc_norm": 1.7115, "encoder_q-embeddings": 3356.3967, "encoder_q-layer.0": 2577.8862, "encoder_q-layer.1": 2673.6907, "encoder_q-layer.10": 6258.9702, "encoder_q-layer.11": 9241.125, "encoder_q-layer.2": 3068.5349, "encoder_q-layer.3": 3098.0293, "encoder_q-layer.4": 3447.0037, "encoder_q-layer.5": 4105.749, "encoder_q-layer.6": 5119.9272, "encoder_q-layer.7": 6341.6011, "encoder_q-layer.8": 7120.7568, "encoder_q-layer.9": 6540.0698, "epoch": 0.06, "inbatch_neg_score": 0.5835, "inbatch_pos_score": 1.1973, "learning_rate": 3.2000000000000005e-05, "loss": 3.6431, "norm_diff": 0.0752, "norm_loss": 0.0, "num_token_doc": 66.7698, "num_token_overlap": 17.7808, "num_token_query": 52.3183, "num_token_union": 73.7574, "num_word_context": 202.3034, "num_word_doc": 49.8284, "num_word_query": 39.9216, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7389.7727, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.584, "query_norm": 1.6363, "queue_k_norm": 1.7097, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3183, "sent_len_1": 66.7698, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0625, "stdk": 0.0493, "stdq": 0.0454, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 6400 }, { "accuracy": 51.123, "active_queue_size": 16384.0, "cl_loss": 3.6096, "doc_norm": 1.7013, "encoder_q-embeddings": 2433.075, "encoder_q-layer.0": 1592.6221, "encoder_q-layer.1": 1766.9277, "encoder_q-layer.10": 5757.5732, "encoder_q-layer.11": 9932.2705, "encoder_q-layer.2": 2070.1255, "encoder_q-layer.3": 2264.4326, "encoder_q-layer.4": 2290.76, "encoder_q-layer.5": 2285.8628, "encoder_q-layer.6": 2742.0352, "encoder_q-layer.7": 3320.4893, "encoder_q-layer.8": 4172.647, "encoder_q-layer.9": 3804.8691, "epoch": 0.06, "inbatch_neg_score": 0.5395, "inbatch_pos_score": 1.167, "learning_rate": 3.2500000000000004e-05, "loss": 3.6096, "norm_diff": 0.0782, "norm_loss": 0.0, "num_token_doc": 67.0028, "num_token_overlap": 17.857, "num_token_query": 52.3139, "num_token_union": 73.8162, "num_word_context": 202.477, "num_word_doc": 49.9798, "num_word_query": 39.8998, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5794.2095, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5381, "query_norm": 1.6231, "queue_k_norm": 1.7032, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3139, "sent_len_1": 67.0028, "sent_len_max_0": 128.0, "sent_len_max_1": 211.8, "stdk": 0.0491, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 6500 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.6083, "doc_norm": 1.6901, "encoder_q-embeddings": 3094.5879, "encoder_q-layer.0": 2287.7832, "encoder_q-layer.1": 2525.311, "encoder_q-layer.10": 9680.915, "encoder_q-layer.11": 16261.3047, "encoder_q-layer.2": 3082.4775, "encoder_q-layer.3": 2995.8606, "encoder_q-layer.4": 3405.6135, "encoder_q-layer.5": 4331.6108, "encoder_q-layer.6": 5729.854, "encoder_q-layer.7": 7587.3481, "encoder_q-layer.8": 8834.4619, "encoder_q-layer.9": 8341.9746, "epoch": 0.06, "inbatch_neg_score": 0.5513, "inbatch_pos_score": 1.1719, "learning_rate": 3.3e-05, "loss": 3.6083, "norm_diff": 0.0358, "norm_loss": 0.0, "num_token_doc": 66.7741, "num_token_overlap": 17.793, "num_token_query": 52.2861, "num_token_union": 73.7499, "num_word_context": 202.4269, "num_word_doc": 49.8019, "num_word_query": 39.8778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9990.0338, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5518, "query_norm": 1.6543, "queue_k_norm": 1.6955, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2861, "sent_len_1": 66.7741, "sent_len_max_0": 128.0, "sent_len_max_1": 211.0825, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 6600 }, { "accuracy": 51.123, "active_queue_size": 16384.0, "cl_loss": 3.5888, "doc_norm": 1.6865, "encoder_q-embeddings": 2496.8469, "encoder_q-layer.0": 1863.2004, "encoder_q-layer.1": 1939.5812, "encoder_q-layer.10": 5872.2368, "encoder_q-layer.11": 10477.3115, "encoder_q-layer.2": 2195.1646, "encoder_q-layer.3": 2372.4038, "encoder_q-layer.4": 2577.249, "encoder_q-layer.5": 2867.7798, "encoder_q-layer.6": 3561.594, "encoder_q-layer.7": 4181.897, "encoder_q-layer.8": 4492.2573, "encoder_q-layer.9": 3894.3765, "epoch": 0.07, "inbatch_neg_score": 0.514, "inbatch_pos_score": 1.1543, "learning_rate": 3.35e-05, "loss": 3.5888, "norm_diff": 0.07, "norm_loss": 0.0, "num_token_doc": 66.6511, "num_token_overlap": 17.7934, "num_token_query": 52.2529, "num_token_union": 73.6529, "num_word_context": 202.1863, "num_word_doc": 49.7398, "num_word_query": 39.8522, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6183.8165, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5137, "query_norm": 1.6165, "queue_k_norm": 1.6885, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2529, "sent_len_1": 66.6511, "sent_len_max_0": 128.0, "sent_len_max_1": 210.055, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 6700 }, { "accuracy": 48.877, "active_queue_size": 16384.0, "cl_loss": 3.5628, "doc_norm": 1.6756, "encoder_q-embeddings": 2541.6836, "encoder_q-layer.0": 1618.7639, "encoder_q-layer.1": 1874.6846, "encoder_q-layer.10": 4790.4346, "encoder_q-layer.11": 8891.4375, "encoder_q-layer.2": 2163.9873, "encoder_q-layer.3": 2269.9978, "encoder_q-layer.4": 2433.3352, "encoder_q-layer.5": 2516.9297, "encoder_q-layer.6": 3092.4509, "encoder_q-layer.7": 3405.8501, "encoder_q-layer.8": 4079.1306, "encoder_q-layer.9": 3673.6765, "epoch": 0.07, "inbatch_neg_score": 0.5023, "inbatch_pos_score": 1.1143, "learning_rate": 3.4000000000000007e-05, "loss": 3.5628, "norm_diff": 0.0723, "norm_loss": 0.0, "num_token_doc": 66.8796, "num_token_overlap": 17.8061, "num_token_query": 52.2947, "num_token_union": 73.7792, "num_word_context": 202.4695, "num_word_doc": 49.9306, "num_word_query": 39.8843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5505.3379, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4998, "query_norm": 1.6033, "queue_k_norm": 1.6787, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2947, "sent_len_1": 66.8796, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9575, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 6800 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.5481, "doc_norm": 1.6659, "encoder_q-embeddings": 3272.2678, "encoder_q-layer.0": 2270.511, "encoder_q-layer.1": 2439.0532, "encoder_q-layer.10": 4377.9614, "encoder_q-layer.11": 8232.75, "encoder_q-layer.2": 2710.8477, "encoder_q-layer.3": 2730.0625, "encoder_q-layer.4": 2734.3704, "encoder_q-layer.5": 2671.8762, "encoder_q-layer.6": 2910.7952, "encoder_q-layer.7": 3262.6812, "encoder_q-layer.8": 4176.6099, "encoder_q-layer.9": 3716.7246, "epoch": 0.07, "inbatch_neg_score": 0.4834, "inbatch_pos_score": 1.1104, "learning_rate": 3.45e-05, "loss": 3.5481, "norm_diff": 0.0642, "norm_loss": 0.0, "num_token_doc": 66.8204, "num_token_overlap": 17.8456, "num_token_query": 52.3722, "num_token_union": 73.7824, "num_word_context": 202.2966, "num_word_doc": 49.8892, "num_word_query": 39.9456, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5613.7156, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4824, "query_norm": 1.6017, "queue_k_norm": 1.6719, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3722, "sent_len_1": 66.8204, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0825, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 6900 }, { "accuracy": 50.8301, "active_queue_size": 16384.0, "cl_loss": 3.5434, "doc_norm": 1.6662, "encoder_q-embeddings": 2453.3176, "encoder_q-layer.0": 1624.791, "encoder_q-layer.1": 1820.0649, "encoder_q-layer.10": 4677.2568, "encoder_q-layer.11": 9062.7139, "encoder_q-layer.2": 2091.7603, "encoder_q-layer.3": 2261.6968, "encoder_q-layer.4": 2350.5693, "encoder_q-layer.5": 2335.5967, "encoder_q-layer.6": 2663.2969, "encoder_q-layer.7": 3013.8252, "encoder_q-layer.8": 3770.7224, "encoder_q-layer.9": 3609.5347, "epoch": 0.07, "inbatch_neg_score": 0.4659, "inbatch_pos_score": 1.0869, "learning_rate": 3.5e-05, "loss": 3.5434, "norm_diff": 0.0932, "norm_loss": 0.0, "num_token_doc": 66.7535, "num_token_overlap": 17.776, "num_token_query": 52.3102, "num_token_union": 73.7588, "num_word_context": 202.5238, "num_word_doc": 49.8175, "num_word_query": 39.8868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5377.2767, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4653, "query_norm": 1.573, "queue_k_norm": 1.6636, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3102, "sent_len_1": 66.7535, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6337, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 7000 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.5327, "doc_norm": 1.6554, "encoder_q-embeddings": 2678.9897, "encoder_q-layer.0": 1758.7148, "encoder_q-layer.1": 1875.6698, "encoder_q-layer.10": 4364.2773, "encoder_q-layer.11": 8079.5942, "encoder_q-layer.2": 2047.431, "encoder_q-layer.3": 2199.386, "encoder_q-layer.4": 2273.1047, "encoder_q-layer.5": 2303.3733, "encoder_q-layer.6": 2555.3103, "encoder_q-layer.7": 2781.4902, "encoder_q-layer.8": 3407.9663, "encoder_q-layer.9": 3171.1021, "epoch": 0.07, "inbatch_neg_score": 0.4696, "inbatch_pos_score": 1.0967, "learning_rate": 3.55e-05, "loss": 3.5327, "norm_diff": 0.0667, "norm_loss": 0.0, "num_token_doc": 66.7564, "num_token_overlap": 17.8023, "num_token_query": 52.335, "num_token_union": 73.7319, "num_word_context": 202.4114, "num_word_doc": 49.8422, "num_word_query": 39.9109, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5147.4765, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4683, "query_norm": 1.5887, "queue_k_norm": 1.6536, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.335, "sent_len_1": 66.7564, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6687, "stdk": 0.0488, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7100 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.5235, "doc_norm": 1.6507, "encoder_q-embeddings": 2386.5068, "encoder_q-layer.0": 1578.6993, "encoder_q-layer.1": 1813.7742, "encoder_q-layer.10": 4508.1997, "encoder_q-layer.11": 8288.2412, "encoder_q-layer.2": 2098.2588, "encoder_q-layer.3": 2203.7952, "encoder_q-layer.4": 2391.7163, "encoder_q-layer.5": 2404.0488, "encoder_q-layer.6": 2636.7791, "encoder_q-layer.7": 2762.2068, "encoder_q-layer.8": 3402.5581, "encoder_q-layer.9": 3167.0068, "epoch": 0.07, "inbatch_neg_score": 0.4746, "inbatch_pos_score": 1.1211, "learning_rate": 3.6e-05, "loss": 3.5235, "norm_diff": 0.0673, "norm_loss": 0.0, "num_token_doc": 66.9293, "num_token_overlap": 17.8203, "num_token_query": 52.3359, "num_token_union": 73.8119, "num_word_context": 202.3706, "num_word_doc": 49.9109, "num_word_query": 39.9293, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5132.8668, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4744, "query_norm": 1.5834, "queue_k_norm": 1.6466, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3359, "sent_len_1": 66.9293, "sent_len_max_0": 128.0, "sent_len_max_1": 212.0225, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7200 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.512, "doc_norm": 1.6345, "encoder_q-embeddings": 2459.3782, "encoder_q-layer.0": 1592.6765, "encoder_q-layer.1": 1698.1794, "encoder_q-layer.10": 4685.9409, "encoder_q-layer.11": 8985.166, "encoder_q-layer.2": 2064.2808, "encoder_q-layer.3": 2159.3853, "encoder_q-layer.4": 2437.9761, "encoder_q-layer.5": 2520.5757, "encoder_q-layer.6": 2828.8926, "encoder_q-layer.7": 2884.3047, "encoder_q-layer.8": 3642.6624, "encoder_q-layer.9": 3463.3457, "epoch": 0.07, "inbatch_neg_score": 0.4644, "inbatch_pos_score": 1.0967, "learning_rate": 3.65e-05, "loss": 3.512, "norm_diff": 0.0693, "norm_loss": 0.0, "num_token_doc": 66.7146, "num_token_overlap": 17.8283, "num_token_query": 52.268, "num_token_union": 73.6642, "num_word_context": 202.2365, "num_word_doc": 49.8066, "num_word_query": 39.8735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5471.9311, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4624, "query_norm": 1.5652, "queue_k_norm": 1.641, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.268, "sent_len_1": 66.7146, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7138, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 7300 }, { "accuracy": 52.3926, "active_queue_size": 16384.0, "cl_loss": 3.4951, "doc_norm": 1.6298, "encoder_q-embeddings": 2520.7473, "encoder_q-layer.0": 1734.8285, "encoder_q-layer.1": 1907.9573, "encoder_q-layer.10": 4719.4873, "encoder_q-layer.11": 8875.6133, "encoder_q-layer.2": 2207.3477, "encoder_q-layer.3": 2253.6631, "encoder_q-layer.4": 2251.2112, "encoder_q-layer.5": 2382.0618, "encoder_q-layer.6": 2683.1919, "encoder_q-layer.7": 2927.0046, "encoder_q-layer.8": 3842.5847, "encoder_q-layer.9": 3778.5259, "epoch": 0.07, "inbatch_neg_score": 0.4709, "inbatch_pos_score": 1.0908, "learning_rate": 3.7e-05, "loss": 3.4951, "norm_diff": 0.0944, "norm_loss": 0.0, "num_token_doc": 66.8488, "num_token_overlap": 17.8804, "num_token_query": 52.3851, "num_token_union": 73.7578, "num_word_context": 202.2355, "num_word_doc": 49.8643, "num_word_query": 39.9358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5431.7041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4697, "query_norm": 1.5355, "queue_k_norm": 1.6321, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3851, "sent_len_1": 66.8488, "sent_len_max_0": 128.0, "sent_len_max_1": 211.1413, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7400 }, { "accuracy": 49.8535, "active_queue_size": 16384.0, "cl_loss": 3.4805, "doc_norm": 1.6216, "encoder_q-embeddings": 3019.7271, "encoder_q-layer.0": 2242.9583, "encoder_q-layer.1": 2573.0962, "encoder_q-layer.10": 3928.4268, "encoder_q-layer.11": 7840.2202, "encoder_q-layer.2": 2899.7312, "encoder_q-layer.3": 3020.468, "encoder_q-layer.4": 3021.1985, "encoder_q-layer.5": 2741.709, "encoder_q-layer.6": 2671.8643, "encoder_q-layer.7": 2684.6843, "encoder_q-layer.8": 3062.6995, "encoder_q-layer.9": 2895.6284, "epoch": 0.07, "inbatch_neg_score": 0.4651, "inbatch_pos_score": 1.084, "learning_rate": 3.7500000000000003e-05, "loss": 3.4805, "norm_diff": 0.0631, "norm_loss": 0.0, "num_token_doc": 66.8379, "num_token_overlap": 17.82, "num_token_query": 52.2508, "num_token_union": 73.7162, "num_word_context": 202.0807, "num_word_doc": 49.8547, "num_word_query": 39.8418, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5277.9152, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4619, "query_norm": 1.5585, "queue_k_norm": 1.6237, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2508, "sent_len_1": 66.8379, "sent_len_max_0": 128.0, "sent_len_max_1": 210.7862, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7500 }, { "accuracy": 51.5137, "active_queue_size": 16384.0, "cl_loss": 3.4636, "doc_norm": 1.6163, "encoder_q-embeddings": 2158.9998, "encoder_q-layer.0": 1452.6938, "encoder_q-layer.1": 1594.5996, "encoder_q-layer.10": 4984.5332, "encoder_q-layer.11": 9045.1025, "encoder_q-layer.2": 1844.6598, "encoder_q-layer.3": 2032.0441, "encoder_q-layer.4": 2112.603, "encoder_q-layer.5": 2178.2515, "encoder_q-layer.6": 2687.937, "encoder_q-layer.7": 3057.9722, "encoder_q-layer.8": 3547.6328, "encoder_q-layer.9": 3547.6101, "epoch": 0.07, "inbatch_neg_score": 0.4519, "inbatch_pos_score": 1.0742, "learning_rate": 3.8e-05, "loss": 3.4636, "norm_diff": 0.0817, "norm_loss": 0.0, "num_token_doc": 66.7705, "num_token_overlap": 17.8265, "num_token_query": 52.3307, "num_token_union": 73.7213, "num_word_context": 202.0704, "num_word_doc": 49.7938, "num_word_query": 39.889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5140.3766, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4519, "query_norm": 1.5345, "queue_k_norm": 1.618, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3307, "sent_len_1": 66.7705, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6362, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7600 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4473, "doc_norm": 1.6072, "encoder_q-embeddings": 2454.8203, "encoder_q-layer.0": 1637.1006, "encoder_q-layer.1": 1798.3866, "encoder_q-layer.10": 4134.1382, "encoder_q-layer.11": 7960.481, "encoder_q-layer.2": 2041.2661, "encoder_q-layer.3": 2269.3313, "encoder_q-layer.4": 2326.053, "encoder_q-layer.5": 2265.0593, "encoder_q-layer.6": 2392.4583, "encoder_q-layer.7": 2651.0859, "encoder_q-layer.8": 3214.4424, "encoder_q-layer.9": 3355.6946, "epoch": 0.08, "inbatch_neg_score": 0.4566, "inbatch_pos_score": 1.1064, "learning_rate": 3.85e-05, "loss": 3.4473, "norm_diff": 0.0381, "norm_loss": 0.0, "num_token_doc": 67.0922, "num_token_overlap": 17.8532, "num_token_query": 52.3175, "num_token_union": 73.9044, "num_word_context": 202.6182, "num_word_doc": 50.0727, "num_word_query": 39.903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4840.6223, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4565, "query_norm": 1.5691, "queue_k_norm": 1.6106, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3175, "sent_len_1": 67.0922, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4613, "stdk": 0.0486, "stdq": 0.0464, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7700 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4389, "doc_norm": 1.6086, "encoder_q-embeddings": 2180.2651, "encoder_q-layer.0": 1372.9933, "encoder_q-layer.1": 1518.0212, "encoder_q-layer.10": 4058.1028, "encoder_q-layer.11": 8210.4365, "encoder_q-layer.2": 1724.9608, "encoder_q-layer.3": 1845.4457, "encoder_q-layer.4": 2054.7034, "encoder_q-layer.5": 2085.4009, "encoder_q-layer.6": 2410.5198, "encoder_q-layer.7": 2549.1321, "encoder_q-layer.8": 3126.4458, "encoder_q-layer.9": 2943.6838, "epoch": 0.08, "inbatch_neg_score": 0.462, "inbatch_pos_score": 1.0967, "learning_rate": 3.9000000000000006e-05, "loss": 3.4389, "norm_diff": 0.0359, "norm_loss": 0.0, "num_token_doc": 66.6675, "num_token_overlap": 17.7834, "num_token_query": 52.2386, "num_token_union": 73.6627, "num_word_context": 202.3263, "num_word_doc": 49.7778, "num_word_query": 39.8582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4761.417, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4614, "query_norm": 1.5727, "queue_k_norm": 1.6054, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2386, "sent_len_1": 66.6675, "sent_len_max_0": 128.0, "sent_len_max_1": 206.2475, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7800 }, { "accuracy": 51.5137, "active_queue_size": 16384.0, "cl_loss": 3.4364, "doc_norm": 1.597, "encoder_q-embeddings": 2609.0222, "encoder_q-layer.0": 1772.9128, "encoder_q-layer.1": 2079.6509, "encoder_q-layer.10": 4536.8477, "encoder_q-layer.11": 9706.4062, "encoder_q-layer.2": 2366.9932, "encoder_q-layer.3": 2355.4539, "encoder_q-layer.4": 2376.8296, "encoder_q-layer.5": 2418.1792, "encoder_q-layer.6": 2634.1106, "encoder_q-layer.7": 2727.0454, "encoder_q-layer.8": 3259.6326, "encoder_q-layer.9": 3170.5149, "epoch": 0.08, "inbatch_neg_score": 0.4862, "inbatch_pos_score": 1.1006, "learning_rate": 3.9500000000000005e-05, "loss": 3.4364, "norm_diff": 0.035, "norm_loss": 0.0, "num_token_doc": 66.8484, "num_token_overlap": 17.8515, "num_token_query": 52.3982, "num_token_union": 73.8211, "num_word_context": 202.5496, "num_word_doc": 49.8894, "num_word_query": 39.9476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5627.3718, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4836, "query_norm": 1.562, "queue_k_norm": 1.5983, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3982, "sent_len_1": 66.8484, "sent_len_max_0": 128.0, "sent_len_max_1": 206.1525, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7900 }, { "accuracy": 48.4863, "active_queue_size": 16384.0, "cl_loss": 3.4186, "doc_norm": 1.5932, "encoder_q-embeddings": 4256.6802, "encoder_q-layer.0": 3035.5332, "encoder_q-layer.1": 3288.405, "encoder_q-layer.10": 4368.5049, "encoder_q-layer.11": 9179.9678, "encoder_q-layer.2": 3422.0168, "encoder_q-layer.3": 3085.2505, "encoder_q-layer.4": 2790.5857, "encoder_q-layer.5": 2701.0269, "encoder_q-layer.6": 2911.6057, "encoder_q-layer.7": 2916.0032, "encoder_q-layer.8": 3535.8364, "encoder_q-layer.9": 3284.6248, "epoch": 0.08, "inbatch_neg_score": 0.5029, "inbatch_pos_score": 1.1152, "learning_rate": 4e-05, "loss": 3.4186, "norm_diff": 0.0671, "norm_loss": 0.0, "num_token_doc": 66.808, "num_token_overlap": 17.8309, "num_token_query": 52.4131, "num_token_union": 73.8208, "num_word_context": 202.3479, "num_word_doc": 49.8675, "num_word_query": 39.9874, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6212.5389, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5039, "query_norm": 1.5261, "queue_k_norm": 1.5958, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4131, "sent_len_1": 66.808, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4225, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 8000 }, { "accuracy": 52.5879, "active_queue_size": 16384.0, "cl_loss": 3.4101, "doc_norm": 1.5951, "encoder_q-embeddings": 5169.2095, "encoder_q-layer.0": 3541.009, "encoder_q-layer.1": 4003.2441, "encoder_q-layer.10": 8740.5645, "encoder_q-layer.11": 16119.3506, "encoder_q-layer.2": 4630.9741, "encoder_q-layer.3": 4952.4727, "encoder_q-layer.4": 5241.9565, "encoder_q-layer.5": 4922.6216, "encoder_q-layer.6": 5345.876, "encoder_q-layer.7": 6042.5122, "encoder_q-layer.8": 7464.5962, "encoder_q-layer.9": 7201.8105, "epoch": 0.08, "inbatch_neg_score": 0.4768, "inbatch_pos_score": 1.1191, "learning_rate": 4.05e-05, "loss": 3.4101, "norm_diff": 0.0482, "norm_loss": 0.0, "num_token_doc": 66.7427, "num_token_overlap": 17.8254, "num_token_query": 52.2746, "num_token_union": 73.6704, "num_word_context": 202.4349, "num_word_doc": 49.8018, "num_word_query": 39.8674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10285.5049, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4766, "query_norm": 1.5469, "queue_k_norm": 1.5921, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2746, "sent_len_1": 66.7427, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4762, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 8100 }, { "accuracy": 51.8066, "active_queue_size": 16384.0, "cl_loss": 3.3954, "doc_norm": 1.5837, "encoder_q-embeddings": 6630.4604, "encoder_q-layer.0": 4823.9316, "encoder_q-layer.1": 5572.3286, "encoder_q-layer.10": 8110.124, "encoder_q-layer.11": 16007.8662, "encoder_q-layer.2": 6647.6982, "encoder_q-layer.3": 6209.7769, "encoder_q-layer.4": 6505.6641, "encoder_q-layer.5": 5763.5591, "encoder_q-layer.6": 6040.9873, "encoder_q-layer.7": 7016.8677, "encoder_q-layer.8": 8444.6807, "encoder_q-layer.9": 7283.7485, "epoch": 0.08, "inbatch_neg_score": 0.4733, "inbatch_pos_score": 1.1094, "learning_rate": 4.1e-05, "loss": 3.3954, "norm_diff": 0.0594, "norm_loss": 0.0, "num_token_doc": 66.7375, "num_token_overlap": 17.8174, "num_token_query": 52.2992, "num_token_union": 73.6722, "num_word_context": 202.1543, "num_word_doc": 49.8071, "num_word_query": 39.8697, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11363.3646, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4731, "query_norm": 1.5243, "queue_k_norm": 1.5864, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2992, "sent_len_1": 66.7375, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4087, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 8200 }, { "accuracy": 52.002, "active_queue_size": 16384.0, "cl_loss": 3.3849, "doc_norm": 1.5794, "encoder_q-embeddings": 4950.1997, "encoder_q-layer.0": 3165.9282, "encoder_q-layer.1": 3441.6108, "encoder_q-layer.10": 10800.751, "encoder_q-layer.11": 20121.2852, "encoder_q-layer.2": 3990.9988, "encoder_q-layer.3": 4525.0088, "encoder_q-layer.4": 4806.0527, "encoder_q-layer.5": 4752.4453, "encoder_q-layer.6": 5509.7383, "encoder_q-layer.7": 6191.2603, "encoder_q-layer.8": 7405.8809, "encoder_q-layer.9": 7137.2363, "epoch": 0.08, "inbatch_neg_score": 0.4731, "inbatch_pos_score": 1.1172, "learning_rate": 4.15e-05, "loss": 3.3849, "norm_diff": 0.0359, "norm_loss": 0.0, "num_token_doc": 66.6679, "num_token_overlap": 17.8076, "num_token_query": 52.3412, "num_token_union": 73.668, "num_word_context": 202.3572, "num_word_doc": 49.776, "num_word_query": 39.9285, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11775.5335, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4746, "query_norm": 1.5434, "queue_k_norm": 1.5836, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3412, "sent_len_1": 66.6679, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1125, "stdk": 0.0487, "stdq": 0.0459, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 8300 }, { "accuracy": 51.9043, "active_queue_size": 16384.0, "cl_loss": 3.3655, "doc_norm": 1.5829, "encoder_q-embeddings": 4133.7266, "encoder_q-layer.0": 2635.4883, "encoder_q-layer.1": 2919.2761, "encoder_q-layer.10": 6695.668, "encoder_q-layer.11": 14051.8906, "encoder_q-layer.2": 3378.8345, "encoder_q-layer.3": 3517.9917, "encoder_q-layer.4": 3809.9094, "encoder_q-layer.5": 3831.4241, "encoder_q-layer.6": 4321.4902, "encoder_q-layer.7": 4696.4507, "encoder_q-layer.8": 5805.2222, "encoder_q-layer.9": 5307.3164, "epoch": 0.08, "inbatch_neg_score": 0.4818, "inbatch_pos_score": 1.1299, "learning_rate": 4.2e-05, "loss": 3.3655, "norm_diff": 0.0511, "norm_loss": 0.0, "num_token_doc": 66.8523, "num_token_overlap": 17.8444, "num_token_query": 52.2693, "num_token_union": 73.7343, "num_word_context": 202.328, "num_word_doc": 49.8759, "num_word_query": 39.8677, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8736.2097, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4819, "query_norm": 1.5318, "queue_k_norm": 1.5786, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2693, "sent_len_1": 66.8523, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9325, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 8400 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3716, "doc_norm": 1.5741, "encoder_q-embeddings": 4191.1489, "encoder_q-layer.0": 2675.1055, "encoder_q-layer.1": 3005.1509, "encoder_q-layer.10": 7860.8677, "encoder_q-layer.11": 15601.458, "encoder_q-layer.2": 3473.8606, "encoder_q-layer.3": 3828.3696, "encoder_q-layer.4": 4087.3352, "encoder_q-layer.5": 4231.5176, "encoder_q-layer.6": 4758.1196, "encoder_q-layer.7": 5127.1396, "encoder_q-layer.8": 6407.5088, "encoder_q-layer.9": 6201.877, "epoch": 0.08, "inbatch_neg_score": 0.491, "inbatch_pos_score": 1.126, "learning_rate": 4.25e-05, "loss": 3.3716, "norm_diff": 0.0618, "norm_loss": 0.0, "num_token_doc": 66.7151, "num_token_overlap": 17.7956, "num_token_query": 52.2081, "num_token_union": 73.6316, "num_word_context": 202.07, "num_word_doc": 49.7777, "num_word_query": 39.814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9174.7206, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4912, "query_norm": 1.5123, "queue_k_norm": 1.5761, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2081, "sent_len_1": 66.7151, "sent_len_max_0": 128.0, "sent_len_max_1": 210.345, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 8500 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3348, "doc_norm": 1.5741, "encoder_q-embeddings": 4855.1279, "encoder_q-layer.0": 3237.9341, "encoder_q-layer.1": 3741.8975, "encoder_q-layer.10": 8142.417, "encoder_q-layer.11": 16866.7852, "encoder_q-layer.2": 4273.5049, "encoder_q-layer.3": 4518.0415, "encoder_q-layer.4": 4528.9053, "encoder_q-layer.5": 4552.2041, "encoder_q-layer.6": 4819.1387, "encoder_q-layer.7": 5276.4609, "encoder_q-layer.8": 6058.647, "encoder_q-layer.9": 5280.2202, "epoch": 0.08, "inbatch_neg_score": 0.4921, "inbatch_pos_score": 1.1475, "learning_rate": 4.3e-05, "loss": 3.3348, "norm_diff": 0.0357, "norm_loss": 0.0, "num_token_doc": 66.9904, "num_token_overlap": 17.905, "num_token_query": 52.3506, "num_token_union": 73.7962, "num_word_context": 202.1383, "num_word_doc": 50.0166, "num_word_query": 39.9349, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10090.2749, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4924, "query_norm": 1.5384, "queue_k_norm": 1.5715, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3506, "sent_len_1": 66.9904, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9412, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 8600 }, { "accuracy": 52.002, "active_queue_size": 16384.0, "cl_loss": 3.3388, "doc_norm": 1.5708, "encoder_q-embeddings": 4499.5518, "encoder_q-layer.0": 3028.124, "encoder_q-layer.1": 3402.4644, "encoder_q-layer.10": 8342.5342, "encoder_q-layer.11": 15314.7275, "encoder_q-layer.2": 3964.9814, "encoder_q-layer.3": 4253.9863, "encoder_q-layer.4": 4462.1636, "encoder_q-layer.5": 4597.5508, "encoder_q-layer.6": 5404.1553, "encoder_q-layer.7": 5725.6162, "encoder_q-layer.8": 7170.1309, "encoder_q-layer.9": 7054.813, "epoch": 0.08, "inbatch_neg_score": 0.4907, "inbatch_pos_score": 1.126, "learning_rate": 4.35e-05, "loss": 3.3388, "norm_diff": 0.0422, "norm_loss": 0.0, "num_token_doc": 66.6642, "num_token_overlap": 17.7724, "num_token_query": 52.2273, "num_token_union": 73.6124, "num_word_context": 202.2748, "num_word_doc": 49.7097, "num_word_query": 39.8417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9610.8992, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4902, "query_norm": 1.5285, "queue_k_norm": 1.568, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2273, "sent_len_1": 66.6642, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0163, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 8700 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3313, "doc_norm": 1.5672, "encoder_q-embeddings": 5147.1265, "encoder_q-layer.0": 3537.8374, "encoder_q-layer.1": 3807.5757, "encoder_q-layer.10": 9824.3105, "encoder_q-layer.11": 19308.9121, "encoder_q-layer.2": 4504.0376, "encoder_q-layer.3": 4612.6201, "encoder_q-layer.4": 4721.7407, "encoder_q-layer.5": 4633.6196, "encoder_q-layer.6": 5010.6855, "encoder_q-layer.7": 5303.5977, "encoder_q-layer.8": 6463.2427, "encoder_q-layer.9": 6507.6611, "epoch": 0.09, "inbatch_neg_score": 0.4912, "inbatch_pos_score": 1.1191, "learning_rate": 4.4000000000000006e-05, "loss": 3.3313, "norm_diff": 0.0694, "norm_loss": 0.0, "num_token_doc": 66.8089, "num_token_overlap": 17.8197, "num_token_query": 52.2297, "num_token_union": 73.6855, "num_word_context": 202.2917, "num_word_doc": 49.8427, "num_word_query": 39.848, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11224.8485, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4915, "query_norm": 1.4978, "queue_k_norm": 1.5671, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2297, "sent_len_1": 66.8089, "sent_len_max_0": 128.0, "sent_len_max_1": 209.21, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 8800 }, { "accuracy": 52.7832, "active_queue_size": 16384.0, "cl_loss": 3.3248, "doc_norm": 1.5626, "encoder_q-embeddings": 5674.0703, "encoder_q-layer.0": 4177.5005, "encoder_q-layer.1": 4327.7876, "encoder_q-layer.10": 8057.874, "encoder_q-layer.11": 15605.8408, "encoder_q-layer.2": 4828.2471, "encoder_q-layer.3": 4662.2939, "encoder_q-layer.4": 4902.2583, "encoder_q-layer.5": 4707.8594, "encoder_q-layer.6": 5243.7993, "encoder_q-layer.7": 5696.1655, "encoder_q-layer.8": 6844.5518, "encoder_q-layer.9": 6063.457, "epoch": 0.09, "inbatch_neg_score": 0.4941, "inbatch_pos_score": 1.1387, "learning_rate": 4.4500000000000004e-05, "loss": 3.3248, "norm_diff": 0.0366, "norm_loss": 0.0, "num_token_doc": 66.9311, "num_token_overlap": 17.7717, "num_token_query": 52.1966, "num_token_union": 73.759, "num_word_context": 202.3016, "num_word_doc": 49.9294, "num_word_query": 39.7982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10298.8579, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4944, "query_norm": 1.526, "queue_k_norm": 1.561, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1966, "sent_len_1": 66.9311, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4212, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 8900 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2932, "doc_norm": 1.5557, "encoder_q-embeddings": 4047.8474, "encoder_q-layer.0": 2776.6963, "encoder_q-layer.1": 3043.9229, "encoder_q-layer.10": 7802.2725, "encoder_q-layer.11": 14670.0107, "encoder_q-layer.2": 3518.7126, "encoder_q-layer.3": 3809.6299, "encoder_q-layer.4": 4162.9932, "encoder_q-layer.5": 4290.6982, "encoder_q-layer.6": 4962.0649, "encoder_q-layer.7": 5526.4268, "encoder_q-layer.8": 6396.4653, "encoder_q-layer.9": 6024.2217, "epoch": 0.09, "inbatch_neg_score": 0.4924, "inbatch_pos_score": 1.124, "learning_rate": 4.5e-05, "loss": 3.2932, "norm_diff": 0.0371, "norm_loss": 0.0, "num_token_doc": 66.8647, "num_token_overlap": 17.8505, "num_token_query": 52.2911, "num_token_union": 73.714, "num_word_context": 202.3975, "num_word_doc": 49.8823, "num_word_query": 39.9105, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9106.4494, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4922, "query_norm": 1.5186, "queue_k_norm": 1.5617, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2911, "sent_len_1": 66.8647, "sent_len_max_0": 128.0, "sent_len_max_1": 209.36, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 9000 }, { "accuracy": 52.9785, "active_queue_size": 16384.0, "cl_loss": 3.3014, "doc_norm": 1.5599, "encoder_q-embeddings": 4653.127, "encoder_q-layer.0": 3009.7871, "encoder_q-layer.1": 3379.1997, "encoder_q-layer.10": 7837.0635, "encoder_q-layer.11": 16503.1582, "encoder_q-layer.2": 3826.5874, "encoder_q-layer.3": 4166.0791, "encoder_q-layer.4": 4338.126, "encoder_q-layer.5": 4223.8447, "encoder_q-layer.6": 4837.7017, "encoder_q-layer.7": 5575.0513, "encoder_q-layer.8": 6808.0757, "encoder_q-layer.9": 6142.8379, "epoch": 0.09, "inbatch_neg_score": 0.482, "inbatch_pos_score": 1.1113, "learning_rate": 4.55e-05, "loss": 3.3014, "norm_diff": 0.0661, "norm_loss": 0.0, "num_token_doc": 66.7025, "num_token_overlap": 17.7928, "num_token_query": 52.2529, "num_token_union": 73.6901, "num_word_context": 202.2961, "num_word_doc": 49.7973, "num_word_query": 39.8588, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9794.2388, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4827, "query_norm": 1.4938, "queue_k_norm": 1.5558, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2529, "sent_len_1": 66.7025, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5475, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 9100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.2945, "doc_norm": 1.5576, "encoder_q-embeddings": 3869.6597, "encoder_q-layer.0": 2464.7134, "encoder_q-layer.1": 2682.0203, "encoder_q-layer.10": 6643.8149, "encoder_q-layer.11": 13378.4658, "encoder_q-layer.2": 3018.1514, "encoder_q-layer.3": 3356.5759, "encoder_q-layer.4": 3480.8494, "encoder_q-layer.5": 3762.2397, "encoder_q-layer.6": 4119.7051, "encoder_q-layer.7": 4455.2998, "encoder_q-layer.8": 5633.9468, "encoder_q-layer.9": 5457.21, "epoch": 0.09, "inbatch_neg_score": 0.4842, "inbatch_pos_score": 1.167, "learning_rate": 4.600000000000001e-05, "loss": 3.2945, "norm_diff": 0.0139, "norm_loss": 0.0, "num_token_doc": 66.7989, "num_token_overlap": 17.779, "num_token_query": 52.3391, "num_token_union": 73.81, "num_word_context": 202.5399, "num_word_doc": 49.8596, "num_word_query": 39.9135, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8209.8563, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4846, "query_norm": 1.5437, "queue_k_norm": 1.5534, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3391, "sent_len_1": 66.7989, "sent_len_max_0": 128.0, "sent_len_max_1": 205.915, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 9200 }, { "accuracy": 54.9316, "active_queue_size": 16384.0, "cl_loss": 3.286, "doc_norm": 1.5531, "encoder_q-embeddings": 4140.1597, "encoder_q-layer.0": 2645.0662, "encoder_q-layer.1": 2958.0747, "encoder_q-layer.10": 6335.0996, "encoder_q-layer.11": 13702.3906, "encoder_q-layer.2": 3284.165, "encoder_q-layer.3": 3686.196, "encoder_q-layer.4": 3882.8528, "encoder_q-layer.5": 3875.2351, "encoder_q-layer.6": 4445.666, "encoder_q-layer.7": 4714.957, "encoder_q-layer.8": 5482.6538, "encoder_q-layer.9": 5131.8452, "epoch": 0.09, "inbatch_neg_score": 0.4972, "inbatch_pos_score": 1.1602, "learning_rate": 4.6500000000000005e-05, "loss": 3.286, "norm_diff": 0.0319, "norm_loss": 0.0, "num_token_doc": 66.8446, "num_token_overlap": 17.812, "num_token_query": 52.2535, "num_token_union": 73.7348, "num_word_context": 202.2576, "num_word_doc": 49.8885, "num_word_query": 39.8673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8428.4457, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4966, "query_norm": 1.5212, "queue_k_norm": 1.553, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2535, "sent_len_1": 66.8446, "sent_len_max_0": 128.0, "sent_len_max_1": 209.22, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 9300 }, { "accuracy": 54.834, "active_queue_size": 16384.0, "cl_loss": 3.2784, "doc_norm": 1.546, "encoder_q-embeddings": 3893.9509, "encoder_q-layer.0": 2507.5793, "encoder_q-layer.1": 2777.8286, "encoder_q-layer.10": 5923.5215, "encoder_q-layer.11": 12844.4883, "encoder_q-layer.2": 3252.457, "encoder_q-layer.3": 3462.8132, "encoder_q-layer.4": 3708.8767, "encoder_q-layer.5": 3821.6321, "encoder_q-layer.6": 4080.5774, "encoder_q-layer.7": 4424.0718, "encoder_q-layer.8": 5398.7749, "encoder_q-layer.9": 4907.4175, "epoch": 0.09, "inbatch_neg_score": 0.4764, "inbatch_pos_score": 1.1416, "learning_rate": 4.7e-05, "loss": 3.2784, "norm_diff": 0.0362, "norm_loss": 0.0, "num_token_doc": 66.679, "num_token_overlap": 17.7734, "num_token_query": 52.2507, "num_token_union": 73.651, "num_word_context": 202.2855, "num_word_doc": 49.7547, "num_word_query": 39.8488, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8015.2752, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4758, "query_norm": 1.5098, "queue_k_norm": 1.5478, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2507, "sent_len_1": 66.679, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2113, "stdk": 0.0486, "stdq": 0.0457, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 9400 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2653, "doc_norm": 1.5461, "encoder_q-embeddings": 3870.2952, "encoder_q-layer.0": 2512.5027, "encoder_q-layer.1": 2713.5386, "encoder_q-layer.10": 6291.6245, "encoder_q-layer.11": 12500.8027, "encoder_q-layer.2": 3060.5928, "encoder_q-layer.3": 3231.1216, "encoder_q-layer.4": 3680.7893, "encoder_q-layer.5": 3911.7761, "encoder_q-layer.6": 4919.9932, "encoder_q-layer.7": 4875.5977, "encoder_q-layer.8": 5749.7051, "encoder_q-layer.9": 5315.9009, "epoch": 0.09, "inbatch_neg_score": 0.4812, "inbatch_pos_score": 1.1514, "learning_rate": 4.75e-05, "loss": 3.2653, "norm_diff": 0.005, "norm_loss": 0.0, "num_token_doc": 66.8362, "num_token_overlap": 17.8077, "num_token_query": 52.2457, "num_token_union": 73.7641, "num_word_context": 202.2146, "num_word_doc": 49.8653, "num_word_query": 39.8485, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7869.3518, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.481, "query_norm": 1.5427, "queue_k_norm": 1.5453, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2457, "sent_len_1": 66.8362, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0712, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 9500 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.2673, "doc_norm": 1.5428, "encoder_q-embeddings": 3713.7119, "encoder_q-layer.0": 2361.7395, "encoder_q-layer.1": 2546.2588, "encoder_q-layer.10": 5938.0044, "encoder_q-layer.11": 12481.75, "encoder_q-layer.2": 2922.1033, "encoder_q-layer.3": 3249.9673, "encoder_q-layer.4": 3544.2917, "encoder_q-layer.5": 3526.8987, "encoder_q-layer.6": 4014.2603, "encoder_q-layer.7": 4279.3721, "encoder_q-layer.8": 5246.0874, "encoder_q-layer.9": 4896.5137, "epoch": 0.09, "inbatch_neg_score": 0.473, "inbatch_pos_score": 1.1201, "learning_rate": 4.8e-05, "loss": 3.2673, "norm_diff": 0.0317, "norm_loss": 0.0, "num_token_doc": 66.8614, "num_token_overlap": 17.786, "num_token_query": 52.2552, "num_token_union": 73.7887, "num_word_context": 202.3711, "num_word_doc": 49.9055, "num_word_query": 39.8746, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7652.3433, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4731, "query_norm": 1.5111, "queue_k_norm": 1.543, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2552, "sent_len_1": 66.8614, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1125, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 9600 }, { "accuracy": 55.4199, "active_queue_size": 16384.0, "cl_loss": 3.2551, "doc_norm": 1.5388, "encoder_q-embeddings": 7724.4307, "encoder_q-layer.0": 5798.7349, "encoder_q-layer.1": 6940.4751, "encoder_q-layer.10": 6481.3599, "encoder_q-layer.11": 12488.2607, "encoder_q-layer.2": 8283.7422, "encoder_q-layer.3": 8049.4185, "encoder_q-layer.4": 7450.3135, "encoder_q-layer.5": 6740.9302, "encoder_q-layer.6": 6038.6987, "encoder_q-layer.7": 5036.415, "encoder_q-layer.8": 5166.4131, "encoder_q-layer.9": 4644.9702, "epoch": 0.09, "inbatch_neg_score": 0.4658, "inbatch_pos_score": 1.1377, "learning_rate": 4.85e-05, "loss": 3.2551, "norm_diff": 0.0196, "norm_loss": 0.0, "num_token_doc": 66.6609, "num_token_overlap": 17.7925, "num_token_query": 52.2538, "num_token_union": 73.6688, "num_word_context": 202.1316, "num_word_doc": 49.7745, "num_word_query": 39.8557, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10998.0695, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4663, "query_norm": 1.5192, "queue_k_norm": 1.5395, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2538, "sent_len_1": 66.6609, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 9700 }, { "accuracy": 56.0059, "active_queue_size": 16384.0, "cl_loss": 3.2415, "doc_norm": 1.5374, "encoder_q-embeddings": 5290.8843, "encoder_q-layer.0": 3426.771, "encoder_q-layer.1": 3692.9055, "encoder_q-layer.10": 6360.1763, "encoder_q-layer.11": 12994.9717, "encoder_q-layer.2": 4144.4541, "encoder_q-layer.3": 4278.5229, "encoder_q-layer.4": 4580.1929, "encoder_q-layer.5": 4503.9351, "encoder_q-layer.6": 4764.2495, "encoder_q-layer.7": 5300.2295, "encoder_q-layer.8": 6052.4985, "encoder_q-layer.9": 5148.9478, "epoch": 0.1, "inbatch_neg_score": 0.4735, "inbatch_pos_score": 1.1523, "learning_rate": 4.9e-05, "loss": 3.2415, "norm_diff": 0.0072, "norm_loss": 0.0, "num_token_doc": 66.7966, "num_token_overlap": 17.832, "num_token_query": 52.3474, "num_token_union": 73.7453, "num_word_context": 202.3178, "num_word_doc": 49.8288, "num_word_query": 39.915, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8795.0762, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4736, "query_norm": 1.5365, "queue_k_norm": 1.5379, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3474, "sent_len_1": 66.7966, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7937, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 9800 }, { "accuracy": 55.0293, "active_queue_size": 16384.0, "cl_loss": 3.2577, "doc_norm": 1.536, "encoder_q-embeddings": 3965.0312, "encoder_q-layer.0": 2720.4705, "encoder_q-layer.1": 2899.3098, "encoder_q-layer.10": 6573.1543, "encoder_q-layer.11": 13619.1641, "encoder_q-layer.2": 3301.9797, "encoder_q-layer.3": 3603.7283, "encoder_q-layer.4": 3915.8186, "encoder_q-layer.5": 3987.7395, "encoder_q-layer.6": 4464.6328, "encoder_q-layer.7": 4853.52, "encoder_q-layer.8": 5757.2568, "encoder_q-layer.9": 4971.25, "epoch": 0.1, "inbatch_neg_score": 0.4876, "inbatch_pos_score": 1.1455, "learning_rate": 4.9500000000000004e-05, "loss": 3.2577, "norm_diff": 0.0099, "norm_loss": 0.0, "num_token_doc": 66.5552, "num_token_overlap": 17.7669, "num_token_query": 52.2705, "num_token_union": 73.6306, "num_word_context": 202.1532, "num_word_doc": 49.6725, "num_word_query": 39.8703, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8333.8754, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4871, "query_norm": 1.5263, "queue_k_norm": 1.5373, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2705, "sent_len_1": 66.5552, "sent_len_max_0": 128.0, "sent_len_max_1": 209.385, "stdk": 0.0486, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 9900 }, { "accuracy": 55.127, "active_queue_size": 16384.0, "cl_loss": 3.2372, "doc_norm": 1.5317, "encoder_q-embeddings": 4066.665, "encoder_q-layer.0": 2531.5667, "encoder_q-layer.1": 2764.9553, "encoder_q-layer.10": 6023.729, "encoder_q-layer.11": 13082.6074, "encoder_q-layer.2": 3219.3904, "encoder_q-layer.3": 3436.3438, "encoder_q-layer.4": 3732.7102, "encoder_q-layer.5": 3792.7502, "encoder_q-layer.6": 4317.8643, "encoder_q-layer.7": 4409.5298, "encoder_q-layer.8": 5125.1157, "encoder_q-layer.9": 4647.499, "epoch": 0.1, "inbatch_neg_score": 0.4712, "inbatch_pos_score": 1.1279, "learning_rate": 5e-05, "loss": 3.2372, "norm_diff": 0.0302, "norm_loss": 0.0, "num_token_doc": 66.7345, "num_token_overlap": 17.7997, "num_token_query": 52.3007, "num_token_union": 73.7359, "num_word_context": 202.532, "num_word_doc": 49.8078, "num_word_query": 39.8835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8111.3208, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4707, "query_norm": 1.5015, "queue_k_norm": 1.5349, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3007, "sent_len_1": 66.7345, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3125, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 10000 }, { "dev_runtime": 26.2352, "dev_samples_per_second": 1.22, "dev_steps_per_second": 0.038, "epoch": 0.1, "step": 10000, "test_accuracy": 92.02880859375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4619826078414917, "test_doc_norm": 1.4461114406585693, "test_inbatch_neg_score": 0.7083711624145508, "test_inbatch_pos_score": 1.5512254238128662, "test_loss": 0.4619826078414917, "test_loss_align": 1.1766282320022583, "test_loss_unif": 3.569481611251831, "test_loss_unif_q@queue": 3.56948184967041, "test_norm_diff": 0.02744072675704956, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.4624329209327698, "test_query_norm": 1.4735522270202637, "test_queue_k_norm": 1.5347042083740234, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.040990859270095825, "test_stdq": 0.04000476375222206, "test_stdqueue_k": 0.04863972216844559, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.2352, "dev_samples_per_second": 1.22, "dev_steps_per_second": 0.038, "epoch": 0.1, "eval_beir-arguana_ndcg@10": 0.37537, "eval_beir-arguana_recall@10": 0.63371, "eval_beir-arguana_recall@100": 0.93385, "eval_beir-arguana_recall@20": 0.76031, "eval_beir-avg_ndcg@10": 0.31246633333333335, "eval_beir-avg_recall@10": 0.38181783333333336, "eval_beir-avg_recall@100": 0.5680568333333335, "eval_beir-avg_recall@20": 0.44207175000000004, "eval_beir-cqadupstack_ndcg@10": 0.22453333333333336, "eval_beir-cqadupstack_recall@10": 0.3083483333333333, "eval_beir-cqadupstack_recall@100": 0.5327883333333333, "eval_beir-cqadupstack_recall@20": 0.3701275000000001, "eval_beir-fiqa_ndcg@10": 0.18573, "eval_beir-fiqa_recall@10": 0.24068, "eval_beir-fiqa_recall@100": 0.50827, "eval_beir-fiqa_recall@20": 0.3229, "eval_beir-nfcorpus_ndcg@10": 0.23252, "eval_beir-nfcorpus_recall@10": 0.10981, "eval_beir-nfcorpus_recall@100": 0.22973, "eval_beir-nfcorpus_recall@20": 0.13932, "eval_beir-nq_ndcg@10": 0.19993, "eval_beir-nq_recall@10": 0.3421, "eval_beir-nq_recall@100": 0.68902, "eval_beir-nq_recall@20": 0.45162, "eval_beir-quora_ndcg@10": 0.62799, "eval_beir-quora_recall@10": 0.75657, "eval_beir-quora_recall@100": 0.90869, "eval_beir-quora_recall@20": 0.81208, "eval_beir-scidocs_ndcg@10": 0.12779, "eval_beir-scidocs_recall@10": 0.13453, "eval_beir-scidocs_recall@100": 0.323, "eval_beir-scidocs_recall@20": 0.18382, "eval_beir-scifact_ndcg@10": 0.59309, "eval_beir-scifact_recall@10": 0.73706, "eval_beir-scifact_recall@100": 0.88922, "eval_beir-scifact_recall@20": 0.80011, "eval_beir-trec-covid_ndcg@10": 0.42538, "eval_beir-trec-covid_recall@10": 0.454, "eval_beir-trec-covid_recall@100": 0.3134, "eval_beir-trec-covid_recall@20": 0.405, "eval_beir-webis-touche2020_ndcg@10": 0.13233, "eval_beir-webis-touche2020_recall@10": 0.10137, "eval_beir-webis-touche2020_recall@100": 0.3526, "eval_beir-webis-touche2020_recall@20": 0.17543, "eval_senteval-avg_sts": 0.6995125183256627, "eval_senteval-sickr_spearman": 0.6436366921778064, "eval_senteval-stsb_spearman": 0.755388344473519, "step": 10000, "test_accuracy": 92.02880859375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4619826078414917, "test_doc_norm": 1.4461114406585693, "test_inbatch_neg_score": 0.7083711624145508, "test_inbatch_pos_score": 1.5512254238128662, "test_loss": 0.4619826078414917, "test_loss_align": 1.1766282320022583, "test_loss_unif": 3.569481611251831, "test_loss_unif_q@queue": 3.56948184967041, "test_norm_diff": 0.02744072675704956, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.4624329209327698, "test_query_norm": 1.4735522270202637, "test_queue_k_norm": 1.5347042083740234, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.040990859270095825, "test_stdq": 0.04000476375222206, "test_stdqueue_k": 0.04863972216844559, "test_stdqueue_q": 0.0 }, { "accuracy": 55.5176, "active_queue_size": 16384.0, "cl_loss": 3.2385, "doc_norm": 1.5343, "encoder_q-embeddings": 8470.5273, "encoder_q-layer.0": 5490.019, "encoder_q-layer.1": 5911.4995, "encoder_q-layer.10": 12871.6562, "encoder_q-layer.11": 28118.8301, "encoder_q-layer.2": 6954.519, "encoder_q-layer.3": 7697.2964, "encoder_q-layer.4": 8407.5605, "encoder_q-layer.5": 8632.2305, "encoder_q-layer.6": 9359.3359, "encoder_q-layer.7": 9962.6348, "encoder_q-layer.8": 11720.1309, "encoder_q-layer.9": 10250.666, "epoch": 0.1, "inbatch_neg_score": 0.482, "inbatch_pos_score": 1.1436, "learning_rate": 4.994444444444445e-05, "loss": 3.2385, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.7887, "num_token_overlap": 17.7719, "num_token_query": 52.2827, "num_token_union": 73.747, "num_word_context": 202.5575, "num_word_doc": 49.8304, "num_word_query": 39.8756, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17786.4353, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4807, "query_norm": 1.5114, "queue_k_norm": 1.5338, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2827, "sent_len_1": 66.7887, "sent_len_max_0": 128.0, "sent_len_max_1": 210.84, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 10100 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2085, "doc_norm": 1.5304, "encoder_q-embeddings": 15437.1025, "encoder_q-layer.0": 11995.5469, "encoder_q-layer.1": 13397.4863, "encoder_q-layer.10": 10364.4297, "encoder_q-layer.11": 22633.2285, "encoder_q-layer.2": 16519.2852, "encoder_q-layer.3": 17921.5723, "encoder_q-layer.4": 16960.8301, "encoder_q-layer.5": 17457.0371, "encoder_q-layer.6": 16842.6738, "encoder_q-layer.7": 13036.3594, "encoder_q-layer.8": 9877.0605, "encoder_q-layer.9": 8476.8281, "epoch": 0.1, "inbatch_neg_score": 0.4978, "inbatch_pos_score": 1.165, "learning_rate": 4.9888888888888894e-05, "loss": 3.2085, "norm_diff": 0.0075, "norm_loss": 0.0, "num_token_doc": 66.8726, "num_token_overlap": 17.8272, "num_token_query": 52.3381, "num_token_union": 73.764, "num_word_context": 202.4125, "num_word_doc": 49.8915, "num_word_query": 39.9282, "postclip_grad_norm": 1.0, "preclip_grad_norm": 22760.1301, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4966, "query_norm": 1.5274, "queue_k_norm": 1.531, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3381, "sent_len_1": 66.8726, "sent_len_max_0": 128.0, "sent_len_max_1": 207.695, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 10200 }, { "accuracy": 53.7598, "active_queue_size": 16384.0, "cl_loss": 3.2204, "doc_norm": 1.5292, "encoder_q-embeddings": 8644.0732, "encoder_q-layer.0": 5376.8887, "encoder_q-layer.1": 5923.0962, "encoder_q-layer.10": 11562.1777, "encoder_q-layer.11": 27141.2578, "encoder_q-layer.2": 6875.666, "encoder_q-layer.3": 7705.6274, "encoder_q-layer.4": 8681.9297, "encoder_q-layer.5": 8682.8447, "encoder_q-layer.6": 9569.0498, "encoder_q-layer.7": 10445.0986, "encoder_q-layer.8": 11218.917, "encoder_q-layer.9": 9758.1357, "epoch": 0.1, "inbatch_neg_score": 0.4922, "inbatch_pos_score": 1.1289, "learning_rate": 4.9833333333333336e-05, "loss": 3.2204, "norm_diff": 0.0386, "norm_loss": 0.0, "num_token_doc": 66.6409, "num_token_overlap": 17.7526, "num_token_query": 52.1155, "num_token_union": 73.5784, "num_word_context": 201.8637, "num_word_doc": 49.7347, "num_word_query": 39.7388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16829.6107, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4915, "query_norm": 1.4906, "queue_k_norm": 1.5324, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1155, "sent_len_1": 66.6409, "sent_len_max_0": 128.0, "sent_len_max_1": 206.665, "stdk": 0.0484, "stdq": 0.0442, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 10300 }, { "accuracy": 52.6855, "active_queue_size": 16384.0, "cl_loss": 3.2094, "doc_norm": 1.5284, "encoder_q-embeddings": 8269.5605, "encoder_q-layer.0": 5545.7461, "encoder_q-layer.1": 6050.1499, "encoder_q-layer.10": 13476.2646, "encoder_q-layer.11": 29706.7949, "encoder_q-layer.2": 7052.3135, "encoder_q-layer.3": 7627.2329, "encoder_q-layer.4": 8480.082, "encoder_q-layer.5": 8475.3496, "encoder_q-layer.6": 9363.0127, "encoder_q-layer.7": 9742.6201, "encoder_q-layer.8": 11832.1055, "encoder_q-layer.9": 10446.4414, "epoch": 0.1, "inbatch_neg_score": 0.4781, "inbatch_pos_score": 1.1396, "learning_rate": 4.977777777777778e-05, "loss": 3.2094, "norm_diff": 0.0091, "norm_loss": 0.0, "num_token_doc": 66.8288, "num_token_overlap": 17.8091, "num_token_query": 52.2753, "num_token_union": 73.7725, "num_word_context": 202.4213, "num_word_doc": 49.887, "num_word_query": 39.8613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17364.471, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4773, "query_norm": 1.5257, "queue_k_norm": 1.531, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2753, "sent_len_1": 66.8288, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1262, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 10400 }, { "accuracy": 56.5918, "active_queue_size": 16384.0, "cl_loss": 3.1873, "doc_norm": 1.5283, "encoder_q-embeddings": 9520.8887, "encoder_q-layer.0": 6438.916, "encoder_q-layer.1": 7159.6338, "encoder_q-layer.10": 11955.4521, "encoder_q-layer.11": 25433.1465, "encoder_q-layer.2": 8234.4268, "encoder_q-layer.3": 8943.3887, "encoder_q-layer.4": 9235.6699, "encoder_q-layer.5": 9482.1113, "encoder_q-layer.6": 10522.6299, "encoder_q-layer.7": 9705.4873, "encoder_q-layer.8": 10710.7588, "encoder_q-layer.9": 9395.2061, "epoch": 0.1, "inbatch_neg_score": 0.4688, "inbatch_pos_score": 1.1455, "learning_rate": 4.972222222222223e-05, "loss": 3.1873, "norm_diff": 0.028, "norm_loss": 0.0, "num_token_doc": 66.916, "num_token_overlap": 17.8518, "num_token_query": 52.3059, "num_token_union": 73.7948, "num_word_context": 202.2587, "num_word_doc": 49.9419, "num_word_query": 39.8845, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17321.5833, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4692, "query_norm": 1.5002, "queue_k_norm": 1.5308, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3059, "sent_len_1": 66.916, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9175, "stdk": 0.0484, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 10500 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1972, "doc_norm": 1.5287, "encoder_q-embeddings": 8398.6084, "encoder_q-layer.0": 5642.4243, "encoder_q-layer.1": 6198.1152, "encoder_q-layer.10": 10895.0879, "encoder_q-layer.11": 23814.0156, "encoder_q-layer.2": 6877.6807, "encoder_q-layer.3": 7345.4053, "encoder_q-layer.4": 7979.2344, "encoder_q-layer.5": 8148.5913, "encoder_q-layer.6": 8842.6123, "encoder_q-layer.7": 8917.3184, "encoder_q-layer.8": 10531.8496, "encoder_q-layer.9": 9299.917, "epoch": 0.1, "inbatch_neg_score": 0.4764, "inbatch_pos_score": 1.1582, "learning_rate": 4.966666666666667e-05, "loss": 3.1972, "norm_diff": 0.0165, "norm_loss": 0.0, "num_token_doc": 66.9634, "num_token_overlap": 17.8153, "num_token_query": 52.3073, "num_token_union": 73.8281, "num_word_context": 202.5545, "num_word_doc": 49.9656, "num_word_query": 39.9109, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15795.3986, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4751, "query_norm": 1.5122, "queue_k_norm": 1.5301, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3073, "sent_len_1": 66.9634, "sent_len_max_0": 128.0, "sent_len_max_1": 211.0863, "stdk": 0.0484, "stdq": 0.0456, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 10600 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1918, "doc_norm": 1.5302, "encoder_q-embeddings": 8856.1777, "encoder_q-layer.0": 5542.5254, "encoder_q-layer.1": 6113.9985, "encoder_q-layer.10": 10984.4619, "encoder_q-layer.11": 24576.9648, "encoder_q-layer.2": 7011.0415, "encoder_q-layer.3": 7185.2178, "encoder_q-layer.4": 7479.4814, "encoder_q-layer.5": 7401.2896, "encoder_q-layer.6": 8027.1562, "encoder_q-layer.7": 8021.5415, "encoder_q-layer.8": 9423.3008, "encoder_q-layer.9": 8691.0742, "epoch": 0.1, "inbatch_neg_score": 0.4762, "inbatch_pos_score": 1.1377, "learning_rate": 4.961111111111111e-05, "loss": 3.1918, "norm_diff": 0.0428, "norm_loss": 0.0, "num_token_doc": 66.8344, "num_token_overlap": 17.777, "num_token_query": 52.2408, "num_token_union": 73.7408, "num_word_context": 202.2788, "num_word_doc": 49.8818, "num_word_query": 39.8406, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15287.6303, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4758, "query_norm": 1.4875, "queue_k_norm": 1.5307, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2408, "sent_len_1": 66.8344, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0175, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 10700 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1901, "doc_norm": 1.5334, "encoder_q-embeddings": 9370.0537, "encoder_q-layer.0": 6074.3223, "encoder_q-layer.1": 7213.0273, "encoder_q-layer.10": 11191.6953, "encoder_q-layer.11": 27096.2031, "encoder_q-layer.2": 8459.8955, "encoder_q-layer.3": 9414.4971, "encoder_q-layer.4": 10813.7617, "encoder_q-layer.5": 9913.9072, "encoder_q-layer.6": 9967.9844, "encoder_q-layer.7": 9923.6943, "encoder_q-layer.8": 11694.4053, "encoder_q-layer.9": 10473.168, "epoch": 0.11, "inbatch_neg_score": 0.4705, "inbatch_pos_score": 1.1357, "learning_rate": 4.955555555555556e-05, "loss": 3.1901, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.7398, "num_token_overlap": 17.8203, "num_token_query": 52.3391, "num_token_union": 73.7161, "num_word_context": 202.2966, "num_word_doc": 49.8184, "num_word_query": 39.9345, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17350.6304, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4712, "query_norm": 1.4799, "queue_k_norm": 1.5284, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3391, "sent_len_1": 66.7398, "sent_len_max_0": 128.0, "sent_len_max_1": 207.555, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 10800 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.1735, "doc_norm": 1.5323, "encoder_q-embeddings": 9616.8682, "encoder_q-layer.0": 6677.2119, "encoder_q-layer.1": 8348.751, "encoder_q-layer.10": 10766.1113, "encoder_q-layer.11": 25198.6777, "encoder_q-layer.2": 9445.7578, "encoder_q-layer.3": 10584.2383, "encoder_q-layer.4": 10242.2363, "encoder_q-layer.5": 10374.0078, "encoder_q-layer.6": 10642.2998, "encoder_q-layer.7": 11304.9727, "encoder_q-layer.8": 11811.2891, "encoder_q-layer.9": 9518.6289, "epoch": 0.11, "inbatch_neg_score": 0.4783, "inbatch_pos_score": 1.1426, "learning_rate": 4.9500000000000004e-05, "loss": 3.1735, "norm_diff": 0.0447, "norm_loss": 0.0, "num_token_doc": 66.859, "num_token_overlap": 17.816, "num_token_query": 52.2495, "num_token_union": 73.7469, "num_word_context": 202.363, "num_word_doc": 49.9163, "num_word_query": 39.8723, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17362.0332, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4773, "query_norm": 1.4876, "queue_k_norm": 1.5293, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2495, "sent_len_1": 66.859, "sent_len_max_0": 128.0, "sent_len_max_1": 205.82, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 10900 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1663, "doc_norm": 1.5287, "encoder_q-embeddings": 15831.6494, "encoder_q-layer.0": 10952.8643, "encoder_q-layer.1": 12128.1016, "encoder_q-layer.10": 10585.2656, "encoder_q-layer.11": 24324.4004, "encoder_q-layer.2": 14930.709, "encoder_q-layer.3": 16268.5264, "encoder_q-layer.4": 16066.8125, "encoder_q-layer.5": 15304.8008, "encoder_q-layer.6": 15659.2383, "encoder_q-layer.7": 12642.6758, "encoder_q-layer.8": 11797.5303, "encoder_q-layer.9": 9241.4463, "epoch": 0.11, "inbatch_neg_score": 0.4857, "inbatch_pos_score": 1.1318, "learning_rate": 4.9444444444444446e-05, "loss": 3.1663, "norm_diff": 0.0527, "norm_loss": 0.0, "num_token_doc": 66.867, "num_token_overlap": 17.8073, "num_token_query": 52.269, "num_token_union": 73.7777, "num_word_context": 202.4007, "num_word_doc": 49.8965, "num_word_query": 39.8676, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21782.7937, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4854, "query_norm": 1.476, "queue_k_norm": 1.5283, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.269, "sent_len_1": 66.867, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4325, "stdk": 0.0485, "stdq": 0.0441, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 11000 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.1578, "doc_norm": 1.5288, "encoder_q-embeddings": 5245.0195, "encoder_q-layer.0": 3389.897, "encoder_q-layer.1": 3896.2329, "encoder_q-layer.10": 5666.1733, "encoder_q-layer.11": 11671.3154, "encoder_q-layer.2": 4600.2295, "encoder_q-layer.3": 5005.1846, "encoder_q-layer.4": 5403.165, "encoder_q-layer.5": 5541.7261, "encoder_q-layer.6": 5739.4951, "encoder_q-layer.7": 5129.2358, "encoder_q-layer.8": 5542.6909, "encoder_q-layer.9": 4878.8335, "epoch": 0.11, "inbatch_neg_score": 0.4666, "inbatch_pos_score": 1.1299, "learning_rate": 4.938888888888889e-05, "loss": 3.1578, "norm_diff": 0.045, "norm_loss": 0.0, "num_token_doc": 66.7316, "num_token_overlap": 17.8326, "num_token_query": 52.3544, "num_token_union": 73.7056, "num_word_context": 202.213, "num_word_doc": 49.7645, "num_word_query": 39.9445, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8626.9586, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4673, "query_norm": 1.4838, "queue_k_norm": 1.5284, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3544, "sent_len_1": 66.7316, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8137, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 11100 }, { "accuracy": 55.4199, "active_queue_size": 16384.0, "cl_loss": 3.1572, "doc_norm": 1.5294, "encoder_q-embeddings": 6365.7549, "encoder_q-layer.0": 4739.0581, "encoder_q-layer.1": 5321.228, "encoder_q-layer.10": 5221.3032, "encoder_q-layer.11": 10932.9902, "encoder_q-layer.2": 6316.3955, "encoder_q-layer.3": 6807.5537, "encoder_q-layer.4": 7164.3823, "encoder_q-layer.5": 7514.7104, "encoder_q-layer.6": 8032.2188, "encoder_q-layer.7": 7586.4531, "encoder_q-layer.8": 7247.811, "encoder_q-layer.9": 5271.1118, "epoch": 0.11, "inbatch_neg_score": 0.4602, "inbatch_pos_score": 1.1221, "learning_rate": 4.933333333333334e-05, "loss": 3.1572, "norm_diff": 0.0577, "norm_loss": 0.0, "num_token_doc": 66.809, "num_token_overlap": 17.8399, "num_token_query": 52.3297, "num_token_union": 73.7396, "num_word_context": 202.1031, "num_word_doc": 49.8471, "num_word_query": 39.9048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10292.0115, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4609, "query_norm": 1.4717, "queue_k_norm": 1.5283, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3297, "sent_len_1": 66.809, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2225, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 11200 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.159, "doc_norm": 1.5256, "encoder_q-embeddings": 4956.9556, "encoder_q-layer.0": 3443.97, "encoder_q-layer.1": 3892.8638, "encoder_q-layer.10": 5295.043, "encoder_q-layer.11": 13928.624, "encoder_q-layer.2": 4544.0439, "encoder_q-layer.3": 5030.6982, "encoder_q-layer.4": 5220.188, "encoder_q-layer.5": 5599.0991, "encoder_q-layer.6": 5943.248, "encoder_q-layer.7": 6408.2705, "encoder_q-layer.8": 8612.0605, "encoder_q-layer.9": 6287.3154, "epoch": 0.11, "inbatch_neg_score": 0.4452, "inbatch_pos_score": 1.0957, "learning_rate": 4.927777777777778e-05, "loss": 3.159, "norm_diff": 0.0619, "norm_loss": 0.0, "num_token_doc": 66.6845, "num_token_overlap": 17.7669, "num_token_query": 52.1976, "num_token_union": 73.6574, "num_word_context": 202.0927, "num_word_doc": 49.7515, "num_word_query": 39.8046, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9492.839, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4446, "query_norm": 1.4637, "queue_k_norm": 1.5303, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1976, "sent_len_1": 66.6845, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6312, "stdk": 0.0484, "stdq": 0.0442, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 11300 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1611, "doc_norm": 1.529, "encoder_q-embeddings": 3044.8621, "encoder_q-layer.0": 2151.5347, "encoder_q-layer.1": 2423.0085, "encoder_q-layer.10": 2646.8672, "encoder_q-layer.11": 6090.3403, "encoder_q-layer.2": 2877.8752, "encoder_q-layer.3": 3073.2515, "encoder_q-layer.4": 3271.7444, "encoder_q-layer.5": 3456.3152, "encoder_q-layer.6": 3430.105, "encoder_q-layer.7": 2896.2219, "encoder_q-layer.8": 3045.4187, "encoder_q-layer.9": 2415.5693, "epoch": 0.11, "inbatch_neg_score": 0.436, "inbatch_pos_score": 1.1182, "learning_rate": 4.922222222222222e-05, "loss": 3.1611, "norm_diff": 0.0494, "norm_loss": 0.0, "num_token_doc": 66.6523, "num_token_overlap": 17.7656, "num_token_query": 52.206, "num_token_union": 73.614, "num_word_context": 202.2453, "num_word_doc": 49.7342, "num_word_query": 39.8264, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4750.7947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4368, "query_norm": 1.4796, "queue_k_norm": 1.5313, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.206, "sent_len_1": 66.6523, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3013, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 11400 }, { "accuracy": 53.3691, "active_queue_size": 16384.0, "cl_loss": 3.1445, "doc_norm": 1.5337, "encoder_q-embeddings": 2587.4407, "encoder_q-layer.0": 1752.8766, "encoder_q-layer.1": 2130.4663, "encoder_q-layer.10": 2727.9504, "encoder_q-layer.11": 5730.7422, "encoder_q-layer.2": 2421.5835, "encoder_q-layer.3": 2591.4065, "encoder_q-layer.4": 2723.4182, "encoder_q-layer.5": 2844.3115, "encoder_q-layer.6": 2886.7053, "encoder_q-layer.7": 2687.0303, "encoder_q-layer.8": 2686.9619, "encoder_q-layer.9": 2420.5352, "epoch": 0.11, "inbatch_neg_score": 0.4325, "inbatch_pos_score": 1.0996, "learning_rate": 4.9166666666666665e-05, "loss": 3.1445, "norm_diff": 0.0462, "norm_loss": 0.0, "num_token_doc": 66.7912, "num_token_overlap": 17.8374, "num_token_query": 52.3669, "num_token_union": 73.7455, "num_word_context": 202.5442, "num_word_doc": 49.8562, "num_word_query": 39.9251, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4254.8911, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4326, "query_norm": 1.4876, "queue_k_norm": 1.5294, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3669, "sent_len_1": 66.7912, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0425, "stdk": 0.0487, "stdq": 0.0455, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 11500 }, { "accuracy": 54.9316, "active_queue_size": 16384.0, "cl_loss": 3.1417, "doc_norm": 1.5285, "encoder_q-embeddings": 3186.9944, "encoder_q-layer.0": 2179.0491, "encoder_q-layer.1": 2593.3979, "encoder_q-layer.10": 2555.0513, "encoder_q-layer.11": 6040.0449, "encoder_q-layer.2": 2871.6821, "encoder_q-layer.3": 3168.5762, "encoder_q-layer.4": 3393.793, "encoder_q-layer.5": 3501.1311, "encoder_q-layer.6": 3101.4697, "encoder_q-layer.7": 2845.3088, "encoder_q-layer.8": 2824.4897, "encoder_q-layer.9": 2304.5923, "epoch": 0.11, "inbatch_neg_score": 0.4377, "inbatch_pos_score": 1.0889, "learning_rate": 4.9111111111111114e-05, "loss": 3.1417, "norm_diff": 0.063, "norm_loss": 0.0, "num_token_doc": 66.8522, "num_token_overlap": 17.859, "num_token_query": 52.3146, "num_token_union": 73.7419, "num_word_context": 202.3521, "num_word_doc": 49.8702, "num_word_query": 39.8967, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4674.8737, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4377, "query_norm": 1.4655, "queue_k_norm": 1.5294, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3146, "sent_len_1": 66.8522, "sent_len_max_0": 128.0, "sent_len_max_1": 210.805, "stdk": 0.0485, "stdq": 0.0442, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 11600 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.1386, "doc_norm": 1.5318, "encoder_q-embeddings": 5889.0513, "encoder_q-layer.0": 4487.6084, "encoder_q-layer.1": 5038.7729, "encoder_q-layer.10": 2552.2932, "encoder_q-layer.11": 5537.3276, "encoder_q-layer.2": 5991.6357, "encoder_q-layer.3": 6432.623, "encoder_q-layer.4": 6850.2095, "encoder_q-layer.5": 7061.2456, "encoder_q-layer.6": 6000.2656, "encoder_q-layer.7": 4876.1963, "encoder_q-layer.8": 4974.2847, "encoder_q-layer.9": 2507.9534, "epoch": 0.11, "inbatch_neg_score": 0.4189, "inbatch_pos_score": 1.0703, "learning_rate": 4.905555555555556e-05, "loss": 3.1386, "norm_diff": 0.0825, "norm_loss": 0.0, "num_token_doc": 66.8229, "num_token_overlap": 17.8657, "num_token_query": 52.3646, "num_token_union": 73.7369, "num_word_context": 202.5553, "num_word_doc": 49.869, "num_word_query": 39.9448, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7958.1763, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4172, "query_norm": 1.4493, "queue_k_norm": 1.5319, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3646, "sent_len_1": 66.8229, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8512, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 11700 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.141, "doc_norm": 1.5284, "encoder_q-embeddings": 3845.1978, "encoder_q-layer.0": 2876.7109, "encoder_q-layer.1": 3293.5925, "encoder_q-layer.10": 2504.4863, "encoder_q-layer.11": 5191.311, "encoder_q-layer.2": 3700.0571, "encoder_q-layer.3": 3805.4873, "encoder_q-layer.4": 4113.8726, "encoder_q-layer.5": 4025.7346, "encoder_q-layer.6": 3600.9175, "encoder_q-layer.7": 3019.2632, "encoder_q-layer.8": 3216.2244, "encoder_q-layer.9": 2368.835, "epoch": 0.12, "inbatch_neg_score": 0.4361, "inbatch_pos_score": 1.1025, "learning_rate": 4.9e-05, "loss": 3.141, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 66.8986, "num_token_overlap": 17.8144, "num_token_query": 52.3227, "num_token_union": 73.8267, "num_word_context": 202.5345, "num_word_doc": 49.9236, "num_word_query": 39.9102, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5273.155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4351, "query_norm": 1.4695, "queue_k_norm": 1.531, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3227, "sent_len_1": 66.8986, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8837, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 11800 }, { "accuracy": 54.6387, "active_queue_size": 16384.0, "cl_loss": 3.1275, "doc_norm": 1.5325, "encoder_q-embeddings": 2921.3667, "encoder_q-layer.0": 2094.1204, "encoder_q-layer.1": 2392.2642, "encoder_q-layer.10": 2293.6104, "encoder_q-layer.11": 5278.3745, "encoder_q-layer.2": 2755.1228, "encoder_q-layer.3": 2895.8723, "encoder_q-layer.4": 2992.0339, "encoder_q-layer.5": 3031.9934, "encoder_q-layer.6": 2834.2361, "encoder_q-layer.7": 2689.8882, "encoder_q-layer.8": 2685.2578, "encoder_q-layer.9": 2212.9983, "epoch": 0.12, "inbatch_neg_score": 0.4182, "inbatch_pos_score": 1.0762, "learning_rate": 4.894444444444445e-05, "loss": 3.1275, "norm_diff": 0.0671, "norm_loss": 0.0, "num_token_doc": 66.8906, "num_token_overlap": 17.8206, "num_token_query": 52.2739, "num_token_union": 73.7537, "num_word_context": 202.2004, "num_word_doc": 49.9034, "num_word_query": 39.8681, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4259.4479, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.418, "query_norm": 1.4653, "queue_k_norm": 1.5278, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2739, "sent_len_1": 66.8906, "sent_len_max_0": 128.0, "sent_len_max_1": 208.595, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 11900 }, { "accuracy": 55.127, "active_queue_size": 16384.0, "cl_loss": 3.1172, "doc_norm": 1.5268, "encoder_q-embeddings": 3416.394, "encoder_q-layer.0": 2768.3765, "encoder_q-layer.1": 3101.2859, "encoder_q-layer.10": 1397.6925, "encoder_q-layer.11": 2851.5286, "encoder_q-layer.2": 3440.7361, "encoder_q-layer.3": 3391.9463, "encoder_q-layer.4": 3422.7673, "encoder_q-layer.5": 3419.4053, "encoder_q-layer.6": 3103.9763, "encoder_q-layer.7": 3012.189, "encoder_q-layer.8": 2591.7593, "encoder_q-layer.9": 1602.4058, "epoch": 0.12, "inbatch_neg_score": 0.4227, "inbatch_pos_score": 1.0859, "learning_rate": 4.888888888888889e-05, "loss": 3.1172, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 66.8474, "num_token_overlap": 17.8825, "num_token_query": 52.3671, "num_token_union": 73.7363, "num_word_context": 202.2923, "num_word_doc": 49.9032, "num_word_query": 39.9451, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4364.2333, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4216, "query_norm": 1.4678, "queue_k_norm": 1.529, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3671, "sent_len_1": 66.8474, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7887, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 12000 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.127, "doc_norm": 1.5304, "encoder_q-embeddings": 1800.1279, "encoder_q-layer.0": 1247.6676, "encoder_q-layer.1": 1477.9283, "encoder_q-layer.10": 1301.9867, "encoder_q-layer.11": 2828.5225, "encoder_q-layer.2": 1730.3669, "encoder_q-layer.3": 1793.207, "encoder_q-layer.4": 1810.785, "encoder_q-layer.5": 1835.3987, "encoder_q-layer.6": 1752.9932, "encoder_q-layer.7": 1482.6123, "encoder_q-layer.8": 1367.4419, "encoder_q-layer.9": 1172.5098, "epoch": 0.12, "inbatch_neg_score": 0.4143, "inbatch_pos_score": 1.1006, "learning_rate": 4.883333333333334e-05, "loss": 3.127, "norm_diff": 0.0489, "norm_loss": 0.0, "num_token_doc": 66.8571, "num_token_overlap": 17.8339, "num_token_query": 52.288, "num_token_union": 73.7554, "num_word_context": 202.4945, "num_word_doc": 49.8818, "num_word_query": 39.8838, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2479.3489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4143, "query_norm": 1.4815, "queue_k_norm": 1.5279, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.288, "sent_len_1": 66.8571, "sent_len_max_0": 128.0, "sent_len_max_1": 209.81, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 12100 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1469, "doc_norm": 1.5304, "encoder_q-embeddings": 2617.1101, "encoder_q-layer.0": 1877.4193, "encoder_q-layer.1": 2161.9988, "encoder_q-layer.10": 1200.3258, "encoder_q-layer.11": 2984.5427, "encoder_q-layer.2": 2634.2737, "encoder_q-layer.3": 2762.0112, "encoder_q-layer.4": 2847.4392, "encoder_q-layer.5": 2713.0967, "encoder_q-layer.6": 2362.7581, "encoder_q-layer.7": 1631.5819, "encoder_q-layer.8": 1395.6871, "encoder_q-layer.9": 1139.1963, "epoch": 0.12, "inbatch_neg_score": 0.4028, "inbatch_pos_score": 1.082, "learning_rate": 4.8777777777777775e-05, "loss": 3.1469, "norm_diff": 0.0638, "norm_loss": 0.0, "num_token_doc": 66.6628, "num_token_overlap": 17.7863, "num_token_query": 52.2771, "num_token_union": 73.6651, "num_word_context": 202.3471, "num_word_doc": 49.7238, "num_word_query": 39.8736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3357.4773, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4031, "query_norm": 1.4665, "queue_k_norm": 1.5283, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2771, "sent_len_1": 66.6628, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5425, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 12200 }, { "accuracy": 54.248, "active_queue_size": 16384.0, "cl_loss": 3.1326, "doc_norm": 1.5239, "encoder_q-embeddings": 2681.9387, "encoder_q-layer.0": 2032.9773, "encoder_q-layer.1": 2396.1858, "encoder_q-layer.10": 1261.2302, "encoder_q-layer.11": 2876.0896, "encoder_q-layer.2": 2941.5767, "encoder_q-layer.3": 2661.4861, "encoder_q-layer.4": 2405.1135, "encoder_q-layer.5": 2369.7056, "encoder_q-layer.6": 2211.4155, "encoder_q-layer.7": 1597.7296, "encoder_q-layer.8": 1409.2609, "encoder_q-layer.9": 1180.9, "epoch": 0.12, "inbatch_neg_score": 0.4144, "inbatch_pos_score": 1.0889, "learning_rate": 4.8722222222222224e-05, "loss": 3.1326, "norm_diff": 0.0312, "norm_loss": 0.0, "num_token_doc": 66.9099, "num_token_overlap": 17.8402, "num_token_query": 52.3625, "num_token_union": 73.8201, "num_word_context": 202.3114, "num_word_doc": 49.9079, "num_word_query": 39.9457, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3366.8043, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4133, "query_norm": 1.4927, "queue_k_norm": 1.5251, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3625, "sent_len_1": 66.9099, "sent_len_max_0": 128.0, "sent_len_max_1": 210.53, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 12300 }, { "accuracy": 56.2012, "active_queue_size": 16384.0, "cl_loss": 3.1418, "doc_norm": 1.5252, "encoder_q-embeddings": 3418.9812, "encoder_q-layer.0": 2482.231, "encoder_q-layer.1": 3005.9182, "encoder_q-layer.10": 1241.3077, "encoder_q-layer.11": 2955.7793, "encoder_q-layer.2": 3466.5522, "encoder_q-layer.3": 3961.925, "encoder_q-layer.4": 3652.4609, "encoder_q-layer.5": 3463.4934, "encoder_q-layer.6": 3063.8618, "encoder_q-layer.7": 2339.0308, "encoder_q-layer.8": 1757.8164, "encoder_q-layer.9": 1235.3314, "epoch": 0.12, "inbatch_neg_score": 0.3888, "inbatch_pos_score": 1.0801, "learning_rate": 4.866666666666667e-05, "loss": 3.1418, "norm_diff": 0.0534, "norm_loss": 0.0, "num_token_doc": 66.7722, "num_token_overlap": 17.8121, "num_token_query": 52.297, "num_token_union": 73.7268, "num_word_context": 202.1849, "num_word_doc": 49.8049, "num_word_query": 39.8714, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4290.4855, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3882, "query_norm": 1.4719, "queue_k_norm": 1.5215, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.297, "sent_len_1": 66.7722, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8575, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 12400 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.1347, "doc_norm": 1.5198, "encoder_q-embeddings": 14147.8721, "encoder_q-layer.0": 12699.6064, "encoder_q-layer.1": 13612.668, "encoder_q-layer.10": 1375.9296, "encoder_q-layer.11": 3467.7456, "encoder_q-layer.2": 15775.583, "encoder_q-layer.3": 15017.8174, "encoder_q-layer.4": 14415.4434, "encoder_q-layer.5": 14884.9131, "encoder_q-layer.6": 13470.6875, "encoder_q-layer.7": 11873.0342, "encoder_q-layer.8": 8528.3486, "encoder_q-layer.9": 2468.0012, "epoch": 0.12, "inbatch_neg_score": 0.3805, "inbatch_pos_score": 1.042, "learning_rate": 4.8611111111111115e-05, "loss": 3.1347, "norm_diff": 0.0551, "norm_loss": 0.0, "num_token_doc": 66.8828, "num_token_overlap": 17.8544, "num_token_query": 52.2252, "num_token_union": 73.7234, "num_word_context": 202.3692, "num_word_doc": 49.931, "num_word_query": 39.8292, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17878.4044, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3784, "query_norm": 1.4647, "queue_k_norm": 1.5174, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2252, "sent_len_1": 66.8828, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0475, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 12500 }, { "accuracy": 55.3223, "active_queue_size": 16384.0, "cl_loss": 3.1353, "doc_norm": 1.5162, "encoder_q-embeddings": 2441.4155, "encoder_q-layer.0": 1760.5878, "encoder_q-layer.1": 2065.0251, "encoder_q-layer.10": 1260.2494, "encoder_q-layer.11": 2844.4729, "encoder_q-layer.2": 2457.1794, "encoder_q-layer.3": 2571.6848, "encoder_q-layer.4": 2649.6257, "encoder_q-layer.5": 3025.9919, "encoder_q-layer.6": 2717.0676, "encoder_q-layer.7": 2189.8972, "encoder_q-layer.8": 1718.7522, "encoder_q-layer.9": 1241.0837, "epoch": 0.12, "inbatch_neg_score": 0.3097, "inbatch_pos_score": 1.002, "learning_rate": 4.855555555555556e-05, "loss": 3.1353, "norm_diff": 0.0478, "norm_loss": 0.0, "num_token_doc": 66.7667, "num_token_overlap": 17.7874, "num_token_query": 52.3703, "num_token_union": 73.7676, "num_word_context": 202.5679, "num_word_doc": 49.8364, "num_word_query": 39.9517, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3304.6277, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3103, "query_norm": 1.4684, "queue_k_norm": 1.5167, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3703, "sent_len_1": 66.7667, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6275, "stdk": 0.0484, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 12600 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.1122, "doc_norm": 1.5139, "encoder_q-embeddings": 2285.0217, "encoder_q-layer.0": 1595.8903, "encoder_q-layer.1": 1691.7, "encoder_q-layer.10": 1220.8265, "encoder_q-layer.11": 2727.3257, "encoder_q-layer.2": 2023.9152, "encoder_q-layer.3": 2205.0981, "encoder_q-layer.4": 2407.6484, "encoder_q-layer.5": 2593.0686, "encoder_q-layer.6": 2709.6111, "encoder_q-layer.7": 2202.2224, "encoder_q-layer.8": 1826.8951, "encoder_q-layer.9": 1184.9089, "epoch": 0.12, "inbatch_neg_score": 0.3437, "inbatch_pos_score": 0.9922, "learning_rate": 4.85e-05, "loss": 3.1122, "norm_diff": 0.069, "norm_loss": 0.0, "num_token_doc": 66.7376, "num_token_overlap": 17.8273, "num_token_query": 52.3609, "num_token_union": 73.7259, "num_word_context": 202.4189, "num_word_doc": 49.8118, "num_word_query": 39.947, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3078.8234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3435, "query_norm": 1.4448, "queue_k_norm": 1.5149, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3609, "sent_len_1": 66.7376, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9737, "stdk": 0.0483, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 12700 }, { "accuracy": 55.4199, "active_queue_size": 16384.0, "cl_loss": 3.1191, "doc_norm": 1.512, "encoder_q-embeddings": 2573.0122, "encoder_q-layer.0": 1784.2512, "encoder_q-layer.1": 2184.9136, "encoder_q-layer.10": 1221.2296, "encoder_q-layer.11": 2311.6724, "encoder_q-layer.2": 2741.8213, "encoder_q-layer.3": 3053.5823, "encoder_q-layer.4": 3399.0542, "encoder_q-layer.5": 3006.1641, "encoder_q-layer.6": 2871.1987, "encoder_q-layer.7": 2030.6932, "encoder_q-layer.8": 1695.0886, "encoder_q-layer.9": 1203.1268, "epoch": 0.12, "inbatch_neg_score": 0.3288, "inbatch_pos_score": 1.0078, "learning_rate": 4.844444444444445e-05, "loss": 3.1191, "norm_diff": 0.0493, "norm_loss": 0.0, "num_token_doc": 66.7136, "num_token_overlap": 17.7897, "num_token_query": 52.1939, "num_token_union": 73.6744, "num_word_context": 201.9426, "num_word_doc": 49.8064, "num_word_query": 39.8161, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3513.276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3286, "query_norm": 1.4627, "queue_k_norm": 1.5129, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1939, "sent_len_1": 66.7136, "sent_len_max_0": 128.0, "sent_len_max_1": 206.54, "stdk": 0.0482, "stdq": 0.0453, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 12800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.1106, "doc_norm": 1.5063, "encoder_q-embeddings": 1566.6166, "encoder_q-layer.0": 1122.4679, "encoder_q-layer.1": 1192.5088, "encoder_q-layer.10": 659.5461, "encoder_q-layer.11": 1234.1831, "encoder_q-layer.2": 1392.3782, "encoder_q-layer.3": 1470.782, "encoder_q-layer.4": 1448.4641, "encoder_q-layer.5": 1582.8909, "encoder_q-layer.6": 1433.3016, "encoder_q-layer.7": 1174.9772, "encoder_q-layer.8": 896.5245, "encoder_q-layer.9": 618.5817, "epoch": 0.13, "inbatch_neg_score": 0.3414, "inbatch_pos_score": 1.0234, "learning_rate": 4.838888888888889e-05, "loss": 3.1106, "norm_diff": 0.0236, "norm_loss": 0.0, "num_token_doc": 66.6315, "num_token_overlap": 17.8114, "num_token_query": 52.3023, "num_token_union": 73.6373, "num_word_context": 202.28, "num_word_doc": 49.7366, "num_word_query": 39.882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1837.4836, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3406, "query_norm": 1.4827, "queue_k_norm": 1.5102, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3023, "sent_len_1": 66.6315, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1775, "stdk": 0.0481, "stdq": 0.046, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 12900 }, { "accuracy": 56.1035, "active_queue_size": 16384.0, "cl_loss": 3.1097, "doc_norm": 1.5106, "encoder_q-embeddings": 4233.082, "encoder_q-layer.0": 3220.1582, "encoder_q-layer.1": 3616.9607, "encoder_q-layer.10": 607.9471, "encoder_q-layer.11": 1219.5592, "encoder_q-layer.2": 4676.2969, "encoder_q-layer.3": 4623.436, "encoder_q-layer.4": 5015.0713, "encoder_q-layer.5": 3658.9973, "encoder_q-layer.6": 2853.4734, "encoder_q-layer.7": 2701.4983, "encoder_q-layer.8": 1832.0573, "encoder_q-layer.9": 727.521, "epoch": 0.13, "inbatch_neg_score": 0.3345, "inbatch_pos_score": 1.0234, "learning_rate": 4.8333333333333334e-05, "loss": 3.1097, "norm_diff": 0.0071, "norm_loss": 0.0, "num_token_doc": 66.8521, "num_token_overlap": 17.8145, "num_token_query": 52.1913, "num_token_union": 73.7117, "num_word_context": 202.4049, "num_word_doc": 49.9012, "num_word_query": 39.8314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5094.2, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3352, "query_norm": 1.5049, "queue_k_norm": 1.5095, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1913, "sent_len_1": 66.8521, "sent_len_max_0": 128.0, "sent_len_max_1": 207.985, "stdk": 0.0483, "stdq": 0.0464, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 13000 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1059, "doc_norm": 1.512, "encoder_q-embeddings": 2064.1128, "encoder_q-layer.0": 1556.8123, "encoder_q-layer.1": 1704.5872, "encoder_q-layer.10": 587.3082, "encoder_q-layer.11": 1142.5902, "encoder_q-layer.2": 1990.354, "encoder_q-layer.3": 2285.6323, "encoder_q-layer.4": 2688.3694, "encoder_q-layer.5": 2509.8198, "encoder_q-layer.6": 2354.8914, "encoder_q-layer.7": 1823.047, "encoder_q-layer.8": 1352.0824, "encoder_q-layer.9": 716.1687, "epoch": 0.13, "inbatch_neg_score": 0.3377, "inbatch_pos_score": 1.0273, "learning_rate": 4.8277777777777776e-05, "loss": 3.1059, "norm_diff": 0.0467, "norm_loss": 0.0, "num_token_doc": 66.7283, "num_token_overlap": 17.8398, "num_token_query": 52.2732, "num_token_union": 73.6852, "num_word_context": 202.3354, "num_word_doc": 49.8234, "num_word_query": 39.8813, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2726.5229, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3391, "query_norm": 1.4653, "queue_k_norm": 1.5098, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2732, "sent_len_1": 66.7283, "sent_len_max_0": 128.0, "sent_len_max_1": 207.985, "stdk": 0.0484, "stdq": 0.0448, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 13100 }, { "accuracy": 56.2988, "active_queue_size": 16384.0, "cl_loss": 3.104, "doc_norm": 1.508, "encoder_q-embeddings": 3233.7439, "encoder_q-layer.0": 2236.3477, "encoder_q-layer.1": 2595.2554, "encoder_q-layer.10": 548.4468, "encoder_q-layer.11": 1178.2587, "encoder_q-layer.2": 3044.9358, "encoder_q-layer.3": 3369.3843, "encoder_q-layer.4": 3396.6895, "encoder_q-layer.5": 3365.2095, "encoder_q-layer.6": 3224.1416, "encoder_q-layer.7": 3075.531, "encoder_q-layer.8": 2571.1287, "encoder_q-layer.9": 767.5817, "epoch": 0.13, "inbatch_neg_score": 0.3646, "inbatch_pos_score": 1.0439, "learning_rate": 4.8222222222222225e-05, "loss": 3.104, "norm_diff": 0.0343, "norm_loss": 0.0, "num_token_doc": 66.7773, "num_token_overlap": 17.7619, "num_token_query": 52.3277, "num_token_union": 73.7903, "num_word_context": 202.3485, "num_word_doc": 49.8298, "num_word_query": 39.9118, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4092.1675, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.363, "query_norm": 1.4737, "queue_k_norm": 1.5098, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3277, "sent_len_1": 66.7773, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9475, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 13200 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.103, "doc_norm": 1.5081, "encoder_q-embeddings": 4819.9653, "encoder_q-layer.0": 3738.5786, "encoder_q-layer.1": 4112.1768, "encoder_q-layer.10": 609.6918, "encoder_q-layer.11": 1258.6583, "encoder_q-layer.2": 4976.1343, "encoder_q-layer.3": 5283.0562, "encoder_q-layer.4": 5166.6885, "encoder_q-layer.5": 5091.6323, "encoder_q-layer.6": 4037.4219, "encoder_q-layer.7": 2621.072, "encoder_q-layer.8": 1256.5381, "encoder_q-layer.9": 686.9518, "epoch": 0.13, "inbatch_neg_score": 0.3575, "inbatch_pos_score": 1.0352, "learning_rate": 4.8166666666666674e-05, "loss": 3.103, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.6635, "num_token_overlap": 17.8188, "num_token_query": 52.3158, "num_token_union": 73.6782, "num_word_context": 202.2551, "num_word_doc": 49.7517, "num_word_query": 39.8744, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5667.8084, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3567, "query_norm": 1.4852, "queue_k_norm": 1.5066, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3158, "sent_len_1": 66.6635, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4038, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 13300 }, { "accuracy": 54.248, "active_queue_size": 16384.0, "cl_loss": 3.0961, "doc_norm": 1.5014, "encoder_q-embeddings": 2813.0393, "encoder_q-layer.0": 1965.0183, "encoder_q-layer.1": 2104.8242, "encoder_q-layer.10": 573.8886, "encoder_q-layer.11": 1293.5685, "encoder_q-layer.2": 2406.655, "encoder_q-layer.3": 2676.761, "encoder_q-layer.4": 2839.573, "encoder_q-layer.5": 3023.8811, "encoder_q-layer.6": 2238.1401, "encoder_q-layer.7": 1501.0217, "encoder_q-layer.8": 980.7302, "encoder_q-layer.9": 578.1656, "epoch": 0.13, "inbatch_neg_score": 0.3731, "inbatch_pos_score": 1.0439, "learning_rate": 4.811111111111111e-05, "loss": 3.0961, "norm_diff": 0.0113, "norm_loss": 0.0, "num_token_doc": 66.5931, "num_token_overlap": 17.8417, "num_token_query": 52.3823, "num_token_union": 73.6481, "num_word_context": 202.436, "num_word_doc": 49.7113, "num_word_query": 39.9736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3122.6047, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3726, "query_norm": 1.4914, "queue_k_norm": 1.5028, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3823, "sent_len_1": 66.5931, "sent_len_max_0": 128.0, "sent_len_max_1": 210.805, "stdk": 0.0482, "stdq": 0.0454, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 13400 }, { "accuracy": 56.5918, "active_queue_size": 16384.0, "cl_loss": 3.1085, "doc_norm": 1.502, "encoder_q-embeddings": 2127.6292, "encoder_q-layer.0": 1586.5671, "encoder_q-layer.1": 1746.8689, "encoder_q-layer.10": 569.3246, "encoder_q-layer.11": 1281.6547, "encoder_q-layer.2": 2138.9526, "encoder_q-layer.3": 2254.5894, "encoder_q-layer.4": 2412.1929, "encoder_q-layer.5": 2212.5044, "encoder_q-layer.6": 2003.4904, "encoder_q-layer.7": 1266.1362, "encoder_q-layer.8": 813.3386, "encoder_q-layer.9": 554.475, "epoch": 0.13, "inbatch_neg_score": 0.3818, "inbatch_pos_score": 1.0625, "learning_rate": 4.805555555555556e-05, "loss": 3.1085, "norm_diff": 0.0136, "norm_loss": 0.0, "num_token_doc": 66.8867, "num_token_overlap": 17.7928, "num_token_query": 52.1558, "num_token_union": 73.7543, "num_word_context": 202.252, "num_word_doc": 49.9249, "num_word_query": 39.8024, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2587.8299, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3813, "query_norm": 1.4884, "queue_k_norm": 1.5008, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1558, "sent_len_1": 66.8867, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4925, "stdk": 0.0483, "stdq": 0.0449, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 13500 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1271, "doc_norm": 1.4939, "encoder_q-embeddings": 3121.0852, "encoder_q-layer.0": 2055.937, "encoder_q-layer.1": 2390.6487, "encoder_q-layer.10": 638.313, "encoder_q-layer.11": 1373.2356, "encoder_q-layer.2": 2808.5361, "encoder_q-layer.3": 3201.3564, "encoder_q-layer.4": 3397.2898, "encoder_q-layer.5": 3112.043, "encoder_q-layer.6": 2714.9211, "encoder_q-layer.7": 1951.676, "encoder_q-layer.8": 1316.3708, "encoder_q-layer.9": 627.3073, "epoch": 0.13, "inbatch_neg_score": 0.3677, "inbatch_pos_score": 1.0303, "learning_rate": 4.8e-05, "loss": 3.1271, "norm_diff": 0.0221, "norm_loss": 0.0, "num_token_doc": 66.8791, "num_token_overlap": 17.8291, "num_token_query": 52.358, "num_token_union": 73.8013, "num_word_context": 202.3422, "num_word_doc": 49.9129, "num_word_query": 39.9394, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3571.8492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3677, "query_norm": 1.4718, "queue_k_norm": 1.4947, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.358, "sent_len_1": 66.8791, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1825, "stdk": 0.048, "stdq": 0.0449, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 13600 }, { "accuracy": 54.0527, "active_queue_size": 16384.0, "cl_loss": 3.1077, "doc_norm": 1.4933, "encoder_q-embeddings": 2575.2903, "encoder_q-layer.0": 1888.9448, "encoder_q-layer.1": 2286.7798, "encoder_q-layer.10": 582.2852, "encoder_q-layer.11": 1281.1689, "encoder_q-layer.2": 2799.5916, "encoder_q-layer.3": 2816.7791, "encoder_q-layer.4": 2656.5244, "encoder_q-layer.5": 2349.5461, "encoder_q-layer.6": 1912.1196, "encoder_q-layer.7": 1106.4957, "encoder_q-layer.8": 665.6257, "encoder_q-layer.9": 529.8287, "epoch": 0.13, "inbatch_neg_score": 0.3802, "inbatch_pos_score": 1.04, "learning_rate": 4.794444444444445e-05, "loss": 3.1077, "norm_diff": 0.0211, "norm_loss": 0.0, "num_token_doc": 66.9031, "num_token_overlap": 17.8163, "num_token_query": 52.2019, "num_token_union": 73.7192, "num_word_context": 202.2169, "num_word_doc": 49.885, "num_word_query": 39.8122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2992.5396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3811, "query_norm": 1.4722, "queue_k_norm": 1.4931, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2019, "sent_len_1": 66.9031, "sent_len_max_0": 128.0, "sent_len_max_1": 210.8375, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 13700 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.1171, "doc_norm": 1.4892, "encoder_q-embeddings": 1851.6925, "encoder_q-layer.0": 1308.2056, "encoder_q-layer.1": 1554.7643, "encoder_q-layer.10": 741.0724, "encoder_q-layer.11": 1529.5801, "encoder_q-layer.2": 1874.9747, "encoder_q-layer.3": 2129.3152, "encoder_q-layer.4": 2303.4897, "encoder_q-layer.5": 2460.978, "encoder_q-layer.6": 1711.8699, "encoder_q-layer.7": 1199.2979, "encoder_q-layer.8": 900.2332, "encoder_q-layer.9": 712.7906, "epoch": 0.13, "inbatch_neg_score": 0.368, "inbatch_pos_score": 1.0723, "learning_rate": 4.7888888888888886e-05, "loss": 3.1171, "norm_diff": 0.0262, "norm_loss": 0.0, "num_token_doc": 66.8412, "num_token_overlap": 17.7677, "num_token_query": 52.193, "num_token_union": 73.7563, "num_word_context": 202.5149, "num_word_doc": 49.8867, "num_word_query": 39.7909, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2418.4788, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3689, "query_norm": 1.5146, "queue_k_norm": 1.4894, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.193, "sent_len_1": 66.8412, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8375, "stdk": 0.048, "stdq": 0.0467, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 13800 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0965, "doc_norm": 1.4876, "encoder_q-embeddings": 4090.9707, "encoder_q-layer.0": 2768.262, "encoder_q-layer.1": 3201.3132, "encoder_q-layer.10": 580.4832, "encoder_q-layer.11": 1463.459, "encoder_q-layer.2": 3761.447, "encoder_q-layer.3": 4199.5498, "encoder_q-layer.4": 4599.9653, "encoder_q-layer.5": 4229.9419, "encoder_q-layer.6": 3000.0898, "encoder_q-layer.7": 2117.0083, "encoder_q-layer.8": 1293.6323, "encoder_q-layer.9": 696.3948, "epoch": 0.14, "inbatch_neg_score": 0.3781, "inbatch_pos_score": 1.0527, "learning_rate": 4.7833333333333335e-05, "loss": 3.0965, "norm_diff": 0.0152, "norm_loss": 0.0, "num_token_doc": 66.8258, "num_token_overlap": 17.8336, "num_token_query": 52.3842, "num_token_union": 73.7472, "num_word_context": 202.2737, "num_word_doc": 49.8404, "num_word_query": 39.9499, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4591.898, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3767, "query_norm": 1.5006, "queue_k_norm": 1.487, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3842, "sent_len_1": 66.8258, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3975, "stdk": 0.048, "stdq": 0.0456, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 13900 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.148, "doc_norm": 1.4807, "encoder_q-embeddings": 1431.5521, "encoder_q-layer.0": 1075.4169, "encoder_q-layer.1": 1208.0631, "encoder_q-layer.10": 620.9399, "encoder_q-layer.11": 1970.4237, "encoder_q-layer.2": 1468.5435, "encoder_q-layer.3": 1619.9114, "encoder_q-layer.4": 1602.4318, "encoder_q-layer.5": 1351.478, "encoder_q-layer.6": 1112.7437, "encoder_q-layer.7": 839.5068, "encoder_q-layer.8": 740.226, "encoder_q-layer.9": 551.9702, "epoch": 0.14, "inbatch_neg_score": 0.4125, "inbatch_pos_score": 1.0898, "learning_rate": 4.7777777777777784e-05, "loss": 3.148, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.7159, "num_token_overlap": 17.7957, "num_token_query": 52.2416, "num_token_union": 73.6701, "num_word_context": 202.0938, "num_word_doc": 49.7866, "num_word_query": 39.8268, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1869.5218, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4114, "query_norm": 1.5598, "queue_k_norm": 1.4837, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2416, "sent_len_1": 66.7159, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8363, "stdk": 0.0478, "stdq": 0.0457, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 14000 }, { "accuracy": 54.6387, "active_queue_size": 16384.0, "cl_loss": 3.1222, "doc_norm": 1.4861, "encoder_q-embeddings": 2440.8606, "encoder_q-layer.0": 1658.1492, "encoder_q-layer.1": 1879.3768, "encoder_q-layer.10": 581.7808, "encoder_q-layer.11": 1550.5854, "encoder_q-layer.2": 2077.9492, "encoder_q-layer.3": 2189.6799, "encoder_q-layer.4": 2182.6169, "encoder_q-layer.5": 2014.5494, "encoder_q-layer.6": 1755.1063, "encoder_q-layer.7": 1268.8123, "encoder_q-layer.8": 946.7007, "encoder_q-layer.9": 622.07, "epoch": 0.14, "inbatch_neg_score": 0.3901, "inbatch_pos_score": 1.0664, "learning_rate": 4.7722222222222226e-05, "loss": 3.1222, "norm_diff": 0.0273, "norm_loss": 0.0, "num_token_doc": 66.8514, "num_token_overlap": 17.8462, "num_token_query": 52.317, "num_token_union": 73.7466, "num_word_context": 202.3613, "num_word_doc": 49.8725, "num_word_query": 39.9074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2627.6777, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3899, "query_norm": 1.5135, "queue_k_norm": 1.4817, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.317, "sent_len_1": 66.8514, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3787, "stdk": 0.0481, "stdq": 0.0455, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 14100 }, { "accuracy": 55.3223, "active_queue_size": 16384.0, "cl_loss": 3.1111, "doc_norm": 1.476, "encoder_q-embeddings": 5440.4683, "encoder_q-layer.0": 4165.3706, "encoder_q-layer.1": 4379.4448, "encoder_q-layer.10": 576.1895, "encoder_q-layer.11": 1269.214, "encoder_q-layer.2": 4779.0518, "encoder_q-layer.3": 5511.8008, "encoder_q-layer.4": 6166.0229, "encoder_q-layer.5": 4434.4404, "encoder_q-layer.6": 3201.6042, "encoder_q-layer.7": 2254.5168, "encoder_q-layer.8": 1117.6492, "encoder_q-layer.9": 619.7675, "epoch": 0.14, "inbatch_neg_score": 0.3447, "inbatch_pos_score": 0.9985, "learning_rate": 4.766666666666667e-05, "loss": 3.1111, "norm_diff": 0.0371, "norm_loss": 0.0, "num_token_doc": 66.8631, "num_token_overlap": 17.8412, "num_token_query": 52.3975, "num_token_union": 73.8125, "num_word_context": 202.4998, "num_word_doc": 49.9001, "num_word_query": 39.9729, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5856.209, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.343, "query_norm": 1.4388, "queue_k_norm": 1.4787, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3975, "sent_len_1": 66.8631, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9075, "stdk": 0.0478, "stdq": 0.0442, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 14200 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.1292, "doc_norm": 1.4794, "encoder_q-embeddings": 2002.4219, "encoder_q-layer.0": 1411.865, "encoder_q-layer.1": 1526.6392, "encoder_q-layer.10": 531.4731, "encoder_q-layer.11": 1127.6426, "encoder_q-layer.2": 1720.4834, "encoder_q-layer.3": 1802.9833, "encoder_q-layer.4": 1894.3379, "encoder_q-layer.5": 1893.4955, "encoder_q-layer.6": 1768.1675, "encoder_q-layer.7": 1248.4474, "encoder_q-layer.8": 763.097, "encoder_q-layer.9": 564.893, "epoch": 0.14, "inbatch_neg_score": 0.3252, "inbatch_pos_score": 1.0127, "learning_rate": 4.761111111111111e-05, "loss": 3.1292, "norm_diff": 0.0117, "norm_loss": 0.0, "num_token_doc": 66.7819, "num_token_overlap": 17.7814, "num_token_query": 52.196, "num_token_union": 73.7018, "num_word_context": 202.1446, "num_word_doc": 49.827, "num_word_query": 39.7964, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2240.0436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3257, "query_norm": 1.4687, "queue_k_norm": 1.4766, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.196, "sent_len_1": 66.7819, "sent_len_max_0": 128.0, "sent_len_max_1": 210.22, "stdk": 0.0481, "stdq": 0.0456, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 14300 }, { "accuracy": 55.8105, "active_queue_size": 16384.0, "cl_loss": 3.0942, "doc_norm": 1.4736, "encoder_q-embeddings": 2419.4294, "encoder_q-layer.0": 1760.1619, "encoder_q-layer.1": 1933.0579, "encoder_q-layer.10": 573.0908, "encoder_q-layer.11": 1188.6954, "encoder_q-layer.2": 2212.0168, "encoder_q-layer.3": 2421.5103, "encoder_q-layer.4": 2384.0374, "encoder_q-layer.5": 2348.2395, "encoder_q-layer.6": 2211.188, "encoder_q-layer.7": 1462.7124, "encoder_q-layer.8": 881.3624, "encoder_q-layer.9": 550.6721, "epoch": 0.14, "inbatch_neg_score": 0.3335, "inbatch_pos_score": 1.0273, "learning_rate": 4.755555555555556e-05, "loss": 3.0942, "norm_diff": 0.0136, "norm_loss": 0.0, "num_token_doc": 66.8782, "num_token_overlap": 17.8246, "num_token_query": 52.2887, "num_token_union": 73.7643, "num_word_context": 202.2116, "num_word_doc": 49.8963, "num_word_query": 39.8654, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2796.7018, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3335, "query_norm": 1.4851, "queue_k_norm": 1.4718, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2887, "sent_len_1": 66.8782, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5037, "stdk": 0.048, "stdq": 0.0461, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 14400 }, { "accuracy": 54.7363, "active_queue_size": 16384.0, "cl_loss": 3.1169, "doc_norm": 1.4689, "encoder_q-embeddings": 3343.2944, "encoder_q-layer.0": 2143.9458, "encoder_q-layer.1": 2503.6636, "encoder_q-layer.10": 605.0542, "encoder_q-layer.11": 1303.71, "encoder_q-layer.2": 2898.3586, "encoder_q-layer.3": 3130.6868, "encoder_q-layer.4": 3197.6082, "encoder_q-layer.5": 2875.27, "encoder_q-layer.6": 2864.6741, "encoder_q-layer.7": 1682.2997, "encoder_q-layer.8": 968.3528, "encoder_q-layer.9": 619.7616, "epoch": 0.14, "inbatch_neg_score": 0.327, "inbatch_pos_score": 0.9966, "learning_rate": 4.75e-05, "loss": 3.1169, "norm_diff": 0.0242, "norm_loss": 0.0, "num_token_doc": 66.7095, "num_token_overlap": 17.7939, "num_token_query": 52.1976, "num_token_union": 73.6327, "num_word_context": 202.2649, "num_word_doc": 49.8046, "num_word_query": 39.7967, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3612.6465, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3274, "query_norm": 1.4447, "queue_k_norm": 1.4692, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1976, "sent_len_1": 66.7095, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7788, "stdk": 0.0479, "stdq": 0.0448, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 14500 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1092, "doc_norm": 1.4706, "encoder_q-embeddings": 3612.2717, "encoder_q-layer.0": 2595.2976, "encoder_q-layer.1": 3111.3066, "encoder_q-layer.10": 586.6301, "encoder_q-layer.11": 1202.5724, "encoder_q-layer.2": 3444.2458, "encoder_q-layer.3": 3503.9668, "encoder_q-layer.4": 3401.9529, "encoder_q-layer.5": 3104.6702, "encoder_q-layer.6": 3183.4873, "encoder_q-layer.7": 2463.4321, "encoder_q-layer.8": 1443.196, "encoder_q-layer.9": 606.4993, "epoch": 0.14, "inbatch_neg_score": 0.3411, "inbatch_pos_score": 1.0186, "learning_rate": 4.7444444444444445e-05, "loss": 3.1092, "norm_diff": 0.0148, "norm_loss": 0.0, "num_token_doc": 66.6994, "num_token_overlap": 17.7962, "num_token_query": 52.2405, "num_token_union": 73.645, "num_word_context": 201.9579, "num_word_doc": 49.7522, "num_word_query": 39.8181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4178.1953, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3416, "query_norm": 1.4854, "queue_k_norm": 1.4655, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2405, "sent_len_1": 66.6994, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2287, "stdk": 0.048, "stdq": 0.0457, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 14600 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1033, "doc_norm": 1.4631, "encoder_q-embeddings": 4919.8076, "encoder_q-layer.0": 3429.8914, "encoder_q-layer.1": 3900.9934, "encoder_q-layer.10": 607.5361, "encoder_q-layer.11": 1349.6677, "encoder_q-layer.2": 4903.9707, "encoder_q-layer.3": 5621.917, "encoder_q-layer.4": 5832.165, "encoder_q-layer.5": 4593.8867, "encoder_q-layer.6": 3585.7004, "encoder_q-layer.7": 3178.6375, "encoder_q-layer.8": 1944.448, "encoder_q-layer.9": 760.371, "epoch": 0.14, "inbatch_neg_score": 0.3669, "inbatch_pos_score": 1.0303, "learning_rate": 4.7388888888888894e-05, "loss": 3.1033, "norm_diff": 0.0115, "norm_loss": 0.0, "num_token_doc": 66.5757, "num_token_overlap": 17.7632, "num_token_query": 52.2514, "num_token_union": 73.597, "num_word_context": 201.925, "num_word_doc": 49.6472, "num_word_query": 39.8191, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5711.209, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3669, "query_norm": 1.4715, "queue_k_norm": 1.4658, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2514, "sent_len_1": 66.5757, "sent_len_max_0": 128.0, "sent_len_max_1": 210.9437, "stdk": 0.0478, "stdq": 0.045, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 14700 }, { "accuracy": 55.7129, "active_queue_size": 16384.0, "cl_loss": 3.121, "doc_norm": 1.4661, "encoder_q-embeddings": 4280.665, "encoder_q-layer.0": 3196.7864, "encoder_q-layer.1": 3718.9819, "encoder_q-layer.10": 705.7745, "encoder_q-layer.11": 1294.7999, "encoder_q-layer.2": 4144.8394, "encoder_q-layer.3": 4277.3457, "encoder_q-layer.4": 4087.7808, "encoder_q-layer.5": 4049.4714, "encoder_q-layer.6": 4087.2732, "encoder_q-layer.7": 2566.3877, "encoder_q-layer.8": 1340.3143, "encoder_q-layer.9": 705.1943, "epoch": 0.14, "inbatch_neg_score": 0.3429, "inbatch_pos_score": 1.0098, "learning_rate": 4.7333333333333336e-05, "loss": 3.121, "norm_diff": 0.022, "norm_loss": 0.0, "num_token_doc": 66.8945, "num_token_overlap": 17.7891, "num_token_query": 52.2624, "num_token_union": 73.8058, "num_word_context": 202.4589, "num_word_doc": 49.8995, "num_word_query": 39.8639, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4906.2338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3435, "query_norm": 1.4881, "queue_k_norm": 1.4627, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2624, "sent_len_1": 66.8945, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8338, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 14800 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.0963, "doc_norm": 1.4598, "encoder_q-embeddings": 4242.8823, "encoder_q-layer.0": 2904.7261, "encoder_q-layer.1": 3326.2642, "encoder_q-layer.10": 1166.9825, "encoder_q-layer.11": 2698.3264, "encoder_q-layer.2": 3806.9011, "encoder_q-layer.3": 4072.7839, "encoder_q-layer.4": 4114.2812, "encoder_q-layer.5": 3834.4521, "encoder_q-layer.6": 3585.8457, "encoder_q-layer.7": 2885.7322, "encoder_q-layer.8": 1800.2924, "encoder_q-layer.9": 1123.6002, "epoch": 0.15, "inbatch_neg_score": 0.3306, "inbatch_pos_score": 1.0068, "learning_rate": 4.727777777777778e-05, "loss": 3.0963, "norm_diff": 0.0455, "norm_loss": 0.0, "num_token_doc": 66.6962, "num_token_overlap": 17.8343, "num_token_query": 52.4065, "num_token_union": 73.717, "num_word_context": 202.3299, "num_word_doc": 49.7847, "num_word_query": 39.9761, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4789.1848, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3296, "query_norm": 1.5053, "queue_k_norm": 1.4598, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4065, "sent_len_1": 66.6962, "sent_len_max_0": 128.0, "sent_len_max_1": 208.61, "stdk": 0.0477, "stdq": 0.0459, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 14900 }, { "accuracy": 55.6152, "active_queue_size": 16384.0, "cl_loss": 3.1057, "doc_norm": 1.4623, "encoder_q-embeddings": 2886.9514, "encoder_q-layer.0": 2043.7367, "encoder_q-layer.1": 2497.3433, "encoder_q-layer.10": 1236.9838, "encoder_q-layer.11": 2305.3083, "encoder_q-layer.2": 2790.646, "encoder_q-layer.3": 3059.0322, "encoder_q-layer.4": 3469.959, "encoder_q-layer.5": 3738.5688, "encoder_q-layer.6": 3965.5789, "encoder_q-layer.7": 3635.0723, "encoder_q-layer.8": 2633.6301, "encoder_q-layer.9": 1195.9498, "epoch": 0.15, "inbatch_neg_score": 0.2921, "inbatch_pos_score": 0.9624, "learning_rate": 4.722222222222222e-05, "loss": 3.1057, "norm_diff": 0.0585, "norm_loss": 0.0, "num_token_doc": 66.8416, "num_token_overlap": 17.7781, "num_token_query": 52.1954, "num_token_union": 73.7277, "num_word_context": 202.3655, "num_word_doc": 49.8706, "num_word_query": 39.8001, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4124.997, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.291, "query_norm": 1.5208, "queue_k_norm": 1.4607, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1954, "sent_len_1": 66.8416, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3113, "stdk": 0.0479, "stdq": 0.0457, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 15000 }, { "accuracy": 56.8848, "active_queue_size": 16384.0, "cl_loss": 3.0909, "doc_norm": 1.4579, "encoder_q-embeddings": 4143.3228, "encoder_q-layer.0": 2767.9792, "encoder_q-layer.1": 3069.0259, "encoder_q-layer.10": 1246.1105, "encoder_q-layer.11": 2749.979, "encoder_q-layer.2": 3578.8682, "encoder_q-layer.3": 4205.5894, "encoder_q-layer.4": 4544.5508, "encoder_q-layer.5": 5033.0376, "encoder_q-layer.6": 5329.9341, "encoder_q-layer.7": 4049.5391, "encoder_q-layer.8": 2230.6887, "encoder_q-layer.9": 1187.3547, "epoch": 0.15, "inbatch_neg_score": 0.3173, "inbatch_pos_score": 0.999, "learning_rate": 4.716666666666667e-05, "loss": 3.0909, "norm_diff": 0.031, "norm_loss": 0.0, "num_token_doc": 66.9491, "num_token_overlap": 17.8046, "num_token_query": 52.2522, "num_token_union": 73.8415, "num_word_context": 202.4305, "num_word_doc": 49.979, "num_word_query": 39.8721, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5346.6576, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3174, "query_norm": 1.4889, "queue_k_norm": 1.4575, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2522, "sent_len_1": 66.9491, "sent_len_max_0": 128.0, "sent_len_max_1": 207.0625, "stdk": 0.0478, "stdq": 0.0457, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 15100 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0917, "doc_norm": 1.4548, "encoder_q-embeddings": 8208.1748, "encoder_q-layer.0": 5680.1768, "encoder_q-layer.1": 6276.7017, "encoder_q-layer.10": 1049.882, "encoder_q-layer.11": 2384.4956, "encoder_q-layer.2": 7501.5225, "encoder_q-layer.3": 8077.7646, "encoder_q-layer.4": 8150.0894, "encoder_q-layer.5": 9397.3633, "encoder_q-layer.6": 8896.7832, "encoder_q-layer.7": 6105.0083, "encoder_q-layer.8": 2852.6975, "encoder_q-layer.9": 1150.0483, "epoch": 0.15, "inbatch_neg_score": 0.3104, "inbatch_pos_score": 0.9834, "learning_rate": 4.711111111111111e-05, "loss": 3.0917, "norm_diff": 0.0423, "norm_loss": 0.0, "num_token_doc": 66.8514, "num_token_overlap": 17.8242, "num_token_query": 52.2745, "num_token_union": 73.739, "num_word_context": 202.394, "num_word_doc": 49.8568, "num_word_query": 39.8612, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9752.7411, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3113, "query_norm": 1.4971, "queue_k_norm": 1.4544, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2745, "sent_len_1": 66.8514, "sent_len_max_0": 128.0, "sent_len_max_1": 211.13, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 15200 }, { "accuracy": 54.4434, "active_queue_size": 16384.0, "cl_loss": 3.1079, "doc_norm": 1.4536, "encoder_q-embeddings": 2200.0991, "encoder_q-layer.0": 1497.626, "encoder_q-layer.1": 1709.5465, "encoder_q-layer.10": 1103.0933, "encoder_q-layer.11": 2166.3713, "encoder_q-layer.2": 1863.0106, "encoder_q-layer.3": 1999.8591, "encoder_q-layer.4": 1973.8645, "encoder_q-layer.5": 1769.4701, "encoder_q-layer.6": 1825.4762, "encoder_q-layer.7": 1487.4448, "encoder_q-layer.8": 1399.9948, "encoder_q-layer.9": 1035.3771, "epoch": 0.15, "inbatch_neg_score": 0.2881, "inbatch_pos_score": 0.9434, "learning_rate": 4.7055555555555555e-05, "loss": 3.1079, "norm_diff": 0.0899, "norm_loss": 0.0, "num_token_doc": 66.735, "num_token_overlap": 17.7848, "num_token_query": 52.2861, "num_token_union": 73.7457, "num_word_context": 202.3207, "num_word_doc": 49.8072, "num_word_query": 39.881, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2549.7173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2886, "query_norm": 1.5435, "queue_k_norm": 1.4522, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2861, "sent_len_1": 66.735, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1587, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 15300 }, { "accuracy": 55.0293, "active_queue_size": 16384.0, "cl_loss": 3.1109, "doc_norm": 1.4437, "encoder_q-embeddings": 965.5519, "encoder_q-layer.0": 666.4868, "encoder_q-layer.1": 856.0638, "encoder_q-layer.10": 535.1511, "encoder_q-layer.11": 1145.8536, "encoder_q-layer.2": 1066.7041, "encoder_q-layer.3": 1109.4865, "encoder_q-layer.4": 1076.7642, "encoder_q-layer.5": 1099.8364, "encoder_q-layer.6": 1095.6036, "encoder_q-layer.7": 829.6399, "encoder_q-layer.8": 619.302, "encoder_q-layer.9": 533.4446, "epoch": 0.15, "inbatch_neg_score": 0.3097, "inbatch_pos_score": 0.9951, "learning_rate": 4.7e-05, "loss": 3.1109, "norm_diff": 0.1496, "norm_loss": 0.0, "num_token_doc": 66.6494, "num_token_overlap": 17.779, "num_token_query": 52.2651, "num_token_union": 73.6884, "num_word_context": 202.1666, "num_word_doc": 49.7252, "num_word_query": 39.8572, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1335.7674, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3096, "query_norm": 1.5933, "queue_k_norm": 1.4491, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2651, "sent_len_1": 66.6494, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3762, "stdk": 0.0475, "stdq": 0.0459, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 15400 }, { "accuracy": 55.2246, "active_queue_size": 16384.0, "cl_loss": 3.0944, "doc_norm": 1.4485, "encoder_q-embeddings": 594.6689, "encoder_q-layer.0": 384.631, "encoder_q-layer.1": 433.2507, "encoder_q-layer.10": 544.9697, "encoder_q-layer.11": 1204.1052, "encoder_q-layer.2": 494.4194, "encoder_q-layer.3": 556.1273, "encoder_q-layer.4": 582.5511, "encoder_q-layer.5": 619.4827, "encoder_q-layer.6": 623.5941, "encoder_q-layer.7": 601.93, "encoder_q-layer.8": 594.7734, "encoder_q-layer.9": 513.4236, "epoch": 0.15, "inbatch_neg_score": 0.3194, "inbatch_pos_score": 1.0, "learning_rate": 4.6944444444444446e-05, "loss": 3.0944, "norm_diff": 0.1791, "norm_loss": 0.0, "num_token_doc": 66.8158, "num_token_overlap": 17.7921, "num_token_query": 52.3228, "num_token_union": 73.7559, "num_word_context": 202.2901, "num_word_doc": 49.8387, "num_word_query": 39.9155, "postclip_grad_norm": 1.0, "preclip_grad_norm": 892.6576, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3191, "query_norm": 1.6275, "queue_k_norm": 1.4489, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3228, "sent_len_1": 66.8158, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5838, "stdk": 0.0477, "stdq": 0.0457, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 15500 }, { "accuracy": 56.3965, "active_queue_size": 16384.0, "cl_loss": 3.0808, "doc_norm": 1.4452, "encoder_q-embeddings": 1397.5522, "encoder_q-layer.0": 925.0747, "encoder_q-layer.1": 1000.437, "encoder_q-layer.10": 551.2416, "encoder_q-layer.11": 1170.9346, "encoder_q-layer.2": 1172.5759, "encoder_q-layer.3": 1255.05, "encoder_q-layer.4": 1240.0184, "encoder_q-layer.5": 1024.675, "encoder_q-layer.6": 979.2985, "encoder_q-layer.7": 695.5445, "encoder_q-layer.8": 590.9091, "encoder_q-layer.9": 491.0156, "epoch": 0.15, "inbatch_neg_score": 0.342, "inbatch_pos_score": 1.0156, "learning_rate": 4.6888888888888895e-05, "loss": 3.0808, "norm_diff": 0.1963, "norm_loss": 0.0, "num_token_doc": 66.8261, "num_token_overlap": 17.8104, "num_token_query": 52.3194, "num_token_union": 73.7361, "num_word_context": 202.3507, "num_word_doc": 49.8214, "num_word_query": 39.9142, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1493.975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3411, "query_norm": 1.6415, "queue_k_norm": 1.4469, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3194, "sent_len_1": 66.8261, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9812, "stdk": 0.0477, "stdq": 0.045, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 15600 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.0735, "doc_norm": 1.4505, "encoder_q-embeddings": 4573.3726, "encoder_q-layer.0": 3405.2852, "encoder_q-layer.1": 3401.2476, "encoder_q-layer.10": 584.0751, "encoder_q-layer.11": 1274.4475, "encoder_q-layer.2": 3939.8203, "encoder_q-layer.3": 4048.1689, "encoder_q-layer.4": 3609.8772, "encoder_q-layer.5": 3790.0566, "encoder_q-layer.6": 3752.842, "encoder_q-layer.7": 3797.5796, "encoder_q-layer.8": 2447.9749, "encoder_q-layer.9": 605.4104, "epoch": 0.15, "inbatch_neg_score": 0.3598, "inbatch_pos_score": 1.0391, "learning_rate": 4.683333333333334e-05, "loss": 3.0735, "norm_diff": 0.2491, "norm_loss": 0.0, "num_token_doc": 66.8587, "num_token_overlap": 17.825, "num_token_query": 52.3131, "num_token_union": 73.7295, "num_word_context": 202.408, "num_word_doc": 49.8955, "num_word_query": 39.8852, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5031.1765, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3584, "query_norm": 1.6996, "queue_k_norm": 1.446, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3131, "sent_len_1": 66.8587, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3187, "stdk": 0.0479, "stdq": 0.0462, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 15700 }, { "accuracy": 55.6152, "active_queue_size": 16384.0, "cl_loss": 3.0703, "doc_norm": 1.4439, "encoder_q-embeddings": 832.3023, "encoder_q-layer.0": 536.0534, "encoder_q-layer.1": 625.1378, "encoder_q-layer.10": 644.717, "encoder_q-layer.11": 1304.0416, "encoder_q-layer.2": 637.1049, "encoder_q-layer.3": 699.2086, "encoder_q-layer.4": 731.9185, "encoder_q-layer.5": 737.2835, "encoder_q-layer.6": 732.686, "encoder_q-layer.7": 718.2636, "encoder_q-layer.8": 739.8898, "encoder_q-layer.9": 595.9796, "epoch": 0.15, "inbatch_neg_score": 0.4071, "inbatch_pos_score": 1.0488, "learning_rate": 4.677777777777778e-05, "loss": 3.0703, "norm_diff": 0.3191, "norm_loss": 0.0, "num_token_doc": 66.8684, "num_token_overlap": 17.8115, "num_token_query": 52.2643, "num_token_union": 73.7959, "num_word_context": 202.4607, "num_word_doc": 49.8937, "num_word_query": 39.8576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1077.6055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4045, "query_norm": 1.7629, "queue_k_norm": 1.4437, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2643, "sent_len_1": 66.8684, "sent_len_max_0": 128.0, "sent_len_max_1": 210.63, "stdk": 0.0476, "stdq": 0.0443, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 15800 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0822, "doc_norm": 1.4458, "encoder_q-embeddings": 1106.7332, "encoder_q-layer.0": 722.8094, "encoder_q-layer.1": 848.0064, "encoder_q-layer.10": 539.9369, "encoder_q-layer.11": 1108.4779, "encoder_q-layer.2": 1017.7246, "encoder_q-layer.3": 1133.1345, "encoder_q-layer.4": 1189.9481, "encoder_q-layer.5": 1222.8451, "encoder_q-layer.6": 1275.5558, "encoder_q-layer.7": 1167.9498, "encoder_q-layer.8": 866.8259, "encoder_q-layer.9": 493.2969, "epoch": 0.16, "inbatch_neg_score": 0.4228, "inbatch_pos_score": 1.1074, "learning_rate": 4.672222222222222e-05, "loss": 3.0822, "norm_diff": 0.3101, "norm_loss": 0.0, "num_token_doc": 66.7662, "num_token_overlap": 17.8106, "num_token_query": 52.274, "num_token_union": 73.6903, "num_word_context": 202.2018, "num_word_doc": 49.7968, "num_word_query": 39.8541, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1480.6514, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4221, "query_norm": 1.7559, "queue_k_norm": 1.4465, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.274, "sent_len_1": 66.7662, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3675, "stdk": 0.0476, "stdq": 0.0465, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 15900 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0826, "doc_norm": 1.4456, "encoder_q-embeddings": 1064.9799, "encoder_q-layer.0": 724.5314, "encoder_q-layer.1": 782.3876, "encoder_q-layer.10": 574.9747, "encoder_q-layer.11": 1041.5978, "encoder_q-layer.2": 883.3203, "encoder_q-layer.3": 941.5323, "encoder_q-layer.4": 952.0574, "encoder_q-layer.5": 874.9368, "encoder_q-layer.6": 897.9813, "encoder_q-layer.7": 852.3672, "encoder_q-layer.8": 757.2916, "encoder_q-layer.9": 518.1257, "epoch": 0.16, "inbatch_neg_score": 0.4605, "inbatch_pos_score": 1.126, "learning_rate": 4.666666666666667e-05, "loss": 3.0826, "norm_diff": 0.2772, "norm_loss": 0.0, "num_token_doc": 66.8155, "num_token_overlap": 17.8286, "num_token_query": 52.2802, "num_token_union": 73.7129, "num_word_context": 202.201, "num_word_doc": 49.863, "num_word_query": 39.8869, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1252.0124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4583, "query_norm": 1.7228, "queue_k_norm": 1.4485, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2802, "sent_len_1": 66.8155, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3288, "stdk": 0.0476, "stdq": 0.0454, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 16000 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.0933, "doc_norm": 1.4512, "encoder_q-embeddings": 1293.1431, "encoder_q-layer.0": 849.4103, "encoder_q-layer.1": 978.7183, "encoder_q-layer.10": 562.129, "encoder_q-layer.11": 1209.2097, "encoder_q-layer.2": 1093.0424, "encoder_q-layer.3": 1165.5465, "encoder_q-layer.4": 1250.1301, "encoder_q-layer.5": 1286.5581, "encoder_q-layer.6": 1288.3448, "encoder_q-layer.7": 983.1445, "encoder_q-layer.8": 709.6675, "encoder_q-layer.9": 542.0948, "epoch": 0.16, "inbatch_neg_score": 0.4754, "inbatch_pos_score": 1.1738, "learning_rate": 4.6611111111111114e-05, "loss": 3.0933, "norm_diff": 0.3022, "norm_loss": 0.0, "num_token_doc": 66.805, "num_token_overlap": 17.8187, "num_token_query": 52.3421, "num_token_union": 73.783, "num_word_context": 202.4398, "num_word_doc": 49.8649, "num_word_query": 39.9352, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1531.3766, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4736, "query_norm": 1.7534, "queue_k_norm": 1.4509, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3421, "sent_len_1": 66.805, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5025, "stdk": 0.0477, "stdq": 0.047, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 16100 }, { "accuracy": 56.6895, "active_queue_size": 16384.0, "cl_loss": 3.0824, "doc_norm": 1.4542, "encoder_q-embeddings": 1233.4163, "encoder_q-layer.0": 819.0531, "encoder_q-layer.1": 914.5851, "encoder_q-layer.10": 508.4724, "encoder_q-layer.11": 1281.4116, "encoder_q-layer.2": 1047.0581, "encoder_q-layer.3": 1187.3555, "encoder_q-layer.4": 1181.7848, "encoder_q-layer.5": 1261.8951, "encoder_q-layer.6": 1228.2152, "encoder_q-layer.7": 959.7764, "encoder_q-layer.8": 704.7484, "encoder_q-layer.9": 497.8902, "epoch": 0.16, "inbatch_neg_score": 0.5075, "inbatch_pos_score": 1.1787, "learning_rate": 4.6555555555555556e-05, "loss": 3.0824, "norm_diff": 0.2444, "norm_loss": 0.0, "num_token_doc": 66.7951, "num_token_overlap": 17.9051, "num_token_query": 52.3755, "num_token_union": 73.7438, "num_word_context": 202.3616, "num_word_doc": 49.8642, "num_word_query": 39.9557, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1510.2245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5063, "query_norm": 1.6986, "queue_k_norm": 1.4534, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3755, "sent_len_1": 66.7951, "sent_len_max_0": 128.0, "sent_len_max_1": 206.78, "stdk": 0.0477, "stdq": 0.0456, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 16200 }, { "accuracy": 54.9316, "active_queue_size": 16384.0, "cl_loss": 3.1179, "doc_norm": 1.462, "encoder_q-embeddings": 1807.3304, "encoder_q-layer.0": 1314.0244, "encoder_q-layer.1": 1553.3912, "encoder_q-layer.10": 611.0566, "encoder_q-layer.11": 1400.3561, "encoder_q-layer.2": 1873.6017, "encoder_q-layer.3": 2077.948, "encoder_q-layer.4": 2248.2395, "encoder_q-layer.5": 2379.895, "encoder_q-layer.6": 2427.9995, "encoder_q-layer.7": 2289.2668, "encoder_q-layer.8": 1225.2007, "encoder_q-layer.9": 613.3046, "epoch": 0.16, "inbatch_neg_score": 0.5172, "inbatch_pos_score": 1.1973, "learning_rate": 4.6500000000000005e-05, "loss": 3.1179, "norm_diff": 0.2157, "norm_loss": 0.0, "num_token_doc": 66.8749, "num_token_overlap": 17.8149, "num_token_query": 52.2192, "num_token_union": 73.7397, "num_word_context": 202.4365, "num_word_doc": 49.9067, "num_word_query": 39.8039, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2598.5315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5171, "query_norm": 1.6777, "queue_k_norm": 1.4584, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2192, "sent_len_1": 66.8749, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6075, "stdk": 0.0478, "stdq": 0.0465, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 16300 }, { "accuracy": 56.1035, "active_queue_size": 16384.0, "cl_loss": 3.1231, "doc_norm": 1.4636, "encoder_q-embeddings": 1475.3333, "encoder_q-layer.0": 1004.947, "encoder_q-layer.1": 1041.304, "encoder_q-layer.10": 512.7552, "encoder_q-layer.11": 1308.6438, "encoder_q-layer.2": 1255.6035, "encoder_q-layer.3": 1403.0906, "encoder_q-layer.4": 1396.5695, "encoder_q-layer.5": 1402.9877, "encoder_q-layer.6": 1363.1227, "encoder_q-layer.7": 1070.3529, "encoder_q-layer.8": 730.2403, "encoder_q-layer.9": 514.6284, "epoch": 0.16, "inbatch_neg_score": 0.495, "inbatch_pos_score": 1.1758, "learning_rate": 4.644444444444445e-05, "loss": 3.1231, "norm_diff": 0.148, "norm_loss": 0.0, "num_token_doc": 66.7049, "num_token_overlap": 17.8, "num_token_query": 52.2465, "num_token_union": 73.6527, "num_word_context": 202.1829, "num_word_doc": 49.7809, "num_word_query": 39.8273, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1712.3454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4951, "query_norm": 1.6116, "queue_k_norm": 1.462, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2465, "sent_len_1": 66.7049, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4525, "stdk": 0.0478, "stdq": 0.0463, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 16400 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0892, "doc_norm": 1.4647, "encoder_q-embeddings": 1605.3804, "encoder_q-layer.0": 1074.5787, "encoder_q-layer.1": 1243.0248, "encoder_q-layer.10": 586.9182, "encoder_q-layer.11": 1582.0211, "encoder_q-layer.2": 1454.8235, "encoder_q-layer.3": 1384.0919, "encoder_q-layer.4": 1309.9598, "encoder_q-layer.5": 1328.9366, "encoder_q-layer.6": 1217.1696, "encoder_q-layer.7": 853.2006, "encoder_q-layer.8": 710.3555, "encoder_q-layer.9": 553.7396, "epoch": 0.16, "inbatch_neg_score": 0.4755, "inbatch_pos_score": 1.1309, "learning_rate": 4.638888888888889e-05, "loss": 3.0892, "norm_diff": 0.086, "norm_loss": 0.0, "num_token_doc": 66.7844, "num_token_overlap": 17.8266, "num_token_query": 52.2545, "num_token_union": 73.6523, "num_word_context": 202.3704, "num_word_doc": 49.8349, "num_word_query": 39.8384, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1793.318, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4746, "query_norm": 1.5507, "queue_k_norm": 1.4665, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2545, "sent_len_1": 66.7844, "sent_len_max_0": 128.0, "sent_len_max_1": 211.3887, "stdk": 0.0477, "stdq": 0.0453, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 16500 }, { "accuracy": 55.2246, "active_queue_size": 16384.0, "cl_loss": 3.0945, "doc_norm": 1.4657, "encoder_q-embeddings": 1851.2383, "encoder_q-layer.0": 1314.7947, "encoder_q-layer.1": 1580.1134, "encoder_q-layer.10": 564.2521, "encoder_q-layer.11": 1349.3713, "encoder_q-layer.2": 1822.9171, "encoder_q-layer.3": 2049.1196, "encoder_q-layer.4": 1935.3672, "encoder_q-layer.5": 1976.0593, "encoder_q-layer.6": 1811.4126, "encoder_q-layer.7": 1288.4867, "encoder_q-layer.8": 820.0121, "encoder_q-layer.9": 525.3834, "epoch": 0.16, "inbatch_neg_score": 0.4429, "inbatch_pos_score": 1.1123, "learning_rate": 4.633333333333333e-05, "loss": 3.0945, "norm_diff": 0.0775, "norm_loss": 0.0, "num_token_doc": 66.6915, "num_token_overlap": 17.7608, "num_token_query": 52.1407, "num_token_union": 73.6339, "num_word_context": 202.2178, "num_word_doc": 49.7761, "num_word_query": 39.764, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2251.037, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4424, "query_norm": 1.5432, "queue_k_norm": 1.469, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1407, "sent_len_1": 66.6915, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9363, "stdk": 0.0477, "stdq": 0.0462, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 16600 }, { "accuracy": 55.7129, "active_queue_size": 16384.0, "cl_loss": 3.0932, "doc_norm": 1.47, "encoder_q-embeddings": 4426.5952, "encoder_q-layer.0": 3051.2349, "encoder_q-layer.1": 3295.4626, "encoder_q-layer.10": 564.0757, "encoder_q-layer.11": 1358.183, "encoder_q-layer.2": 3721.791, "encoder_q-layer.3": 3882.3564, "encoder_q-layer.4": 4163.5879, "encoder_q-layer.5": 4043.1555, "encoder_q-layer.6": 3492.6807, "encoder_q-layer.7": 1885.9763, "encoder_q-layer.8": 873.7946, "encoder_q-layer.9": 562.5757, "epoch": 0.16, "inbatch_neg_score": 0.419, "inbatch_pos_score": 1.0859, "learning_rate": 4.627777777777778e-05, "loss": 3.0932, "norm_diff": 0.0432, "norm_loss": 0.0, "num_token_doc": 66.8081, "num_token_overlap": 17.8162, "num_token_query": 52.3083, "num_token_union": 73.7329, "num_word_context": 202.1784, "num_word_doc": 49.8661, "num_word_query": 39.8927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4643.2546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4187, "query_norm": 1.5132, "queue_k_norm": 1.4699, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3083, "sent_len_1": 66.8081, "sent_len_max_0": 128.0, "sent_len_max_1": 208.92, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 16700 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.069, "doc_norm": 1.4695, "encoder_q-embeddings": 1320.0695, "encoder_q-layer.0": 958.5123, "encoder_q-layer.1": 1086.2407, "encoder_q-layer.10": 594.162, "encoder_q-layer.11": 1297.8002, "encoder_q-layer.2": 1248.8505, "encoder_q-layer.3": 1345.9999, "encoder_q-layer.4": 1302.0553, "encoder_q-layer.5": 1204.7278, "encoder_q-layer.6": 1217.78, "encoder_q-layer.7": 910.3162, "encoder_q-layer.8": 692.5202, "encoder_q-layer.9": 537.6296, "epoch": 0.16, "inbatch_neg_score": 0.4048, "inbatch_pos_score": 1.0723, "learning_rate": 4.6222222222222224e-05, "loss": 3.069, "norm_diff": 0.042, "norm_loss": 0.0, "num_token_doc": 66.8009, "num_token_overlap": 17.8636, "num_token_query": 52.4315, "num_token_union": 73.7527, "num_word_context": 202.3428, "num_word_doc": 49.803, "num_word_query": 39.9903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1589.183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4048, "query_norm": 1.5116, "queue_k_norm": 1.4698, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4315, "sent_len_1": 66.8009, "sent_len_max_0": 128.0, "sent_len_max_1": 210.8963, "stdk": 0.0478, "stdq": 0.0452, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 16800 }, { "accuracy": 57.666, "active_queue_size": 16384.0, "cl_loss": 3.0664, "doc_norm": 1.4717, "encoder_q-embeddings": 1739.5962, "encoder_q-layer.0": 1182.3384, "encoder_q-layer.1": 1395.5498, "encoder_q-layer.10": 544.4627, "encoder_q-layer.11": 1113.7601, "encoder_q-layer.2": 1453.0846, "encoder_q-layer.3": 1650.5281, "encoder_q-layer.4": 1727.5521, "encoder_q-layer.5": 1735.8201, "encoder_q-layer.6": 1268.8257, "encoder_q-layer.7": 780.5443, "encoder_q-layer.8": 639.9957, "encoder_q-layer.9": 520.2263, "epoch": 0.16, "inbatch_neg_score": 0.383, "inbatch_pos_score": 1.0713, "learning_rate": 4.6166666666666666e-05, "loss": 3.0664, "norm_diff": 0.0438, "norm_loss": 0.0, "num_token_doc": 67.0516, "num_token_overlap": 17.829, "num_token_query": 52.2399, "num_token_union": 73.8403, "num_word_context": 202.5865, "num_word_doc": 50.0221, "num_word_query": 39.8418, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1913.2653, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3838, "query_norm": 1.5155, "queue_k_norm": 1.4702, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2399, "sent_len_1": 67.0516, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1925, "stdk": 0.0479, "stdq": 0.0458, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 16900 }, { "accuracy": 55.6152, "active_queue_size": 16384.0, "cl_loss": 3.0529, "doc_norm": 1.467, "encoder_q-embeddings": 2553.2761, "encoder_q-layer.0": 1967.7743, "encoder_q-layer.1": 1874.8845, "encoder_q-layer.10": 514.6971, "encoder_q-layer.11": 1274.8781, "encoder_q-layer.2": 1830.5427, "encoder_q-layer.3": 1396.6841, "encoder_q-layer.4": 1181.9965, "encoder_q-layer.5": 1044.1302, "encoder_q-layer.6": 926.0981, "encoder_q-layer.7": 705.7833, "encoder_q-layer.8": 667.916, "encoder_q-layer.9": 522.2153, "epoch": 0.17, "inbatch_neg_score": 0.3725, "inbatch_pos_score": 1.0479, "learning_rate": 4.6111111111111115e-05, "loss": 3.0529, "norm_diff": 0.0431, "norm_loss": 0.0, "num_token_doc": 66.7235, "num_token_overlap": 17.8007, "num_token_query": 52.2598, "num_token_union": 73.7087, "num_word_context": 202.0811, "num_word_doc": 49.8211, "num_word_query": 39.8582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2288.4313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3711, "query_norm": 1.5101, "queue_k_norm": 1.4689, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2598, "sent_len_1": 66.7235, "sent_len_max_0": 128.0, "sent_len_max_1": 208.07, "stdk": 0.0477, "stdq": 0.0459, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 17000 }, { "accuracy": 54.9316, "active_queue_size": 16384.0, "cl_loss": 3.0835, "doc_norm": 1.469, "encoder_q-embeddings": 885.0573, "encoder_q-layer.0": 648.5664, "encoder_q-layer.1": 692.8525, "encoder_q-layer.10": 506.3506, "encoder_q-layer.11": 1195.9244, "encoder_q-layer.2": 685.8518, "encoder_q-layer.3": 687.1282, "encoder_q-layer.4": 719.9574, "encoder_q-layer.5": 735.6149, "encoder_q-layer.6": 734.1187, "encoder_q-layer.7": 683.4611, "encoder_q-layer.8": 597.7191, "encoder_q-layer.9": 494.1255, "epoch": 0.17, "inbatch_neg_score": 0.3593, "inbatch_pos_score": 1.0273, "learning_rate": 4.605555555555556e-05, "loss": 3.0835, "norm_diff": 0.0264, "norm_loss": 0.0, "num_token_doc": 66.9437, "num_token_overlap": 17.8302, "num_token_query": 52.3391, "num_token_union": 73.8027, "num_word_context": 202.4738, "num_word_doc": 49.9473, "num_word_query": 39.9111, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1071.3432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3589, "query_norm": 1.4954, "queue_k_norm": 1.469, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3391, "sent_len_1": 66.9437, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3925, "stdk": 0.0478, "stdq": 0.0453, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 17100 }, { "accuracy": 55.5176, "active_queue_size": 16384.0, "cl_loss": 3.0631, "doc_norm": 1.4617, "encoder_q-embeddings": 28767.9492, "encoder_q-layer.0": 21355.0449, "encoder_q-layer.1": 23931.8008, "encoder_q-layer.10": 742.8406, "encoder_q-layer.11": 1220.7925, "encoder_q-layer.2": 26659.873, "encoder_q-layer.3": 29123.6992, "encoder_q-layer.4": 28362.6953, "encoder_q-layer.5": 22254.9414, "encoder_q-layer.6": 19317.9395, "encoder_q-layer.7": 16331.5303, "encoder_q-layer.8": 8922.4131, "encoder_q-layer.9": 1885.8082, "epoch": 0.17, "inbatch_neg_score": 0.3538, "inbatch_pos_score": 1.0469, "learning_rate": 4.600000000000001e-05, "loss": 3.0631, "norm_diff": 0.0548, "norm_loss": 0.0, "num_token_doc": 66.7863, "num_token_overlap": 17.7619, "num_token_query": 52.1762, "num_token_union": 73.7541, "num_word_context": 202.1023, "num_word_doc": 49.818, "num_word_query": 39.7792, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31912.461, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.3535, "query_norm": 1.5165, "queue_k_norm": 1.4674, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1762, "sent_len_1": 66.7863, "sent_len_max_0": 128.0, "sent_len_max_1": 210.425, "stdk": 0.0476, "stdq": 0.0465, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 17200 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.0559, "doc_norm": 1.4673, "encoder_q-embeddings": 1066.7754, "encoder_q-layer.0": 776.986, "encoder_q-layer.1": 843.0379, "encoder_q-layer.10": 620.3893, "encoder_q-layer.11": 1197.7218, "encoder_q-layer.2": 917.9984, "encoder_q-layer.3": 972.2526, "encoder_q-layer.4": 1052.5592, "encoder_q-layer.5": 977.4799, "encoder_q-layer.6": 914.6619, "encoder_q-layer.7": 817.3245, "encoder_q-layer.8": 728.0279, "encoder_q-layer.9": 608.5592, "epoch": 0.17, "inbatch_neg_score": 0.3666, "inbatch_pos_score": 1.0537, "learning_rate": 4.594444444444444e-05, "loss": 3.0559, "norm_diff": 0.0411, "norm_loss": 0.0, "num_token_doc": 66.7451, "num_token_overlap": 17.8349, "num_token_query": 52.325, "num_token_union": 73.6845, "num_word_context": 202.2598, "num_word_doc": 49.8276, "num_word_query": 39.9018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1314.0452, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3665, "query_norm": 1.5085, "queue_k_norm": 1.4647, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.325, "sent_len_1": 66.7451, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2837, "stdk": 0.0478, "stdq": 0.046, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 17300 }, { "accuracy": 55.6152, "active_queue_size": 16384.0, "cl_loss": 3.0678, "doc_norm": 1.4687, "encoder_q-embeddings": 2645.7241, "encoder_q-layer.0": 1813.7657, "encoder_q-layer.1": 2015.248, "encoder_q-layer.10": 1086.803, "encoder_q-layer.11": 2143.8896, "encoder_q-layer.2": 2402.8984, "encoder_q-layer.3": 2635.2427, "encoder_q-layer.4": 2771.4663, "encoder_q-layer.5": 2649.1931, "encoder_q-layer.6": 2450.0642, "encoder_q-layer.7": 1709.2408, "encoder_q-layer.8": 1287.5077, "encoder_q-layer.9": 1019.0382, "epoch": 0.17, "inbatch_neg_score": 0.3809, "inbatch_pos_score": 1.0596, "learning_rate": 4.588888888888889e-05, "loss": 3.0678, "norm_diff": 0.0492, "norm_loss": 0.0, "num_token_doc": 66.5544, "num_token_overlap": 17.8363, "num_token_query": 52.3491, "num_token_union": 73.5789, "num_word_context": 202.1936, "num_word_doc": 49.6554, "num_word_query": 39.8987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3191.7799, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3801, "query_norm": 1.5179, "queue_k_norm": 1.4643, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3491, "sent_len_1": 66.5544, "sent_len_max_0": 128.0, "sent_len_max_1": 208.945, "stdk": 0.0479, "stdq": 0.0459, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 17400 }, { "accuracy": 57.1777, "active_queue_size": 16384.0, "cl_loss": 3.0546, "doc_norm": 1.4637, "encoder_q-embeddings": 7795.1577, "encoder_q-layer.0": 5641.1631, "encoder_q-layer.1": 6337.5176, "encoder_q-layer.10": 1065.9275, "encoder_q-layer.11": 2192.9138, "encoder_q-layer.2": 7549.668, "encoder_q-layer.3": 7336.9229, "encoder_q-layer.4": 8887.0049, "encoder_q-layer.5": 7709.335, "encoder_q-layer.6": 6713.0063, "encoder_q-layer.7": 4606.5728, "encoder_q-layer.8": 1822.281, "encoder_q-layer.9": 1079.5122, "epoch": 0.17, "inbatch_neg_score": 0.3608, "inbatch_pos_score": 1.0654, "learning_rate": 4.5833333333333334e-05, "loss": 3.0546, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.6817, "num_token_overlap": 17.837, "num_token_query": 52.428, "num_token_union": 73.7134, "num_word_context": 202.2527, "num_word_doc": 49.734, "num_word_query": 40.008, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8794.8235, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3601, "query_norm": 1.5171, "queue_k_norm": 1.4657, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.428, "sent_len_1": 66.6817, "sent_len_max_0": 128.0, "sent_len_max_1": 210.4212, "stdk": 0.0477, "stdq": 0.0468, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 17500 }, { "accuracy": 56.7871, "active_queue_size": 16384.0, "cl_loss": 3.0532, "doc_norm": 1.4676, "encoder_q-embeddings": 2083.9395, "encoder_q-layer.0": 1543.7332, "encoder_q-layer.1": 1872.446, "encoder_q-layer.10": 1004.665, "encoder_q-layer.11": 2051.4375, "encoder_q-layer.2": 1989.6499, "encoder_q-layer.3": 2047.7083, "encoder_q-layer.4": 2078.3542, "encoder_q-layer.5": 1889.731, "encoder_q-layer.6": 1995.3745, "encoder_q-layer.7": 1771.6935, "encoder_q-layer.8": 1098.9115, "encoder_q-layer.9": 914.3431, "epoch": 0.17, "inbatch_neg_score": 0.375, "inbatch_pos_score": 1.0674, "learning_rate": 4.577777777777778e-05, "loss": 3.0532, "norm_diff": 0.0359, "norm_loss": 0.0, "num_token_doc": 66.6273, "num_token_overlap": 17.7946, "num_token_query": 52.3367, "num_token_union": 73.7017, "num_word_context": 202.1861, "num_word_doc": 49.7032, "num_word_query": 39.91, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2706.8961, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3745, "query_norm": 1.5036, "queue_k_norm": 1.4641, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3367, "sent_len_1": 66.6273, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8713, "stdk": 0.0479, "stdq": 0.046, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 17600 }, { "accuracy": 56.9824, "active_queue_size": 16384.0, "cl_loss": 3.0613, "doc_norm": 1.4657, "encoder_q-embeddings": 2932.104, "encoder_q-layer.0": 2056.1892, "encoder_q-layer.1": 2556.625, "encoder_q-layer.10": 1015.4135, "encoder_q-layer.11": 2125.9858, "encoder_q-layer.2": 2942.8201, "encoder_q-layer.3": 3109.6746, "encoder_q-layer.4": 3150.7734, "encoder_q-layer.5": 2726.5259, "encoder_q-layer.6": 2199.7749, "encoder_q-layer.7": 1623.5958, "encoder_q-layer.8": 1122.4036, "encoder_q-layer.9": 921.2403, "epoch": 0.17, "inbatch_neg_score": 0.369, "inbatch_pos_score": 1.0518, "learning_rate": 4.572222222222222e-05, "loss": 3.0613, "norm_diff": 0.0143, "norm_loss": 0.0, "num_token_doc": 66.8117, "num_token_overlap": 17.8171, "num_token_query": 52.2796, "num_token_union": 73.7016, "num_word_context": 202.3067, "num_word_doc": 49.8339, "num_word_query": 39.8656, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3464.4255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3691, "query_norm": 1.4775, "queue_k_norm": 1.4625, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2796, "sent_len_1": 66.8117, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6238, "stdk": 0.0479, "stdq": 0.0454, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 17700 }, { "accuracy": 54.541, "active_queue_size": 16384.0, "cl_loss": 3.0536, "doc_norm": 1.4643, "encoder_q-embeddings": 4363.1401, "encoder_q-layer.0": 2854.5308, "encoder_q-layer.1": 3160.4558, "encoder_q-layer.10": 1024.6837, "encoder_q-layer.11": 2254.7351, "encoder_q-layer.2": 3495.0759, "encoder_q-layer.3": 3649.9285, "encoder_q-layer.4": 3241.5491, "encoder_q-layer.5": 2956.3203, "encoder_q-layer.6": 2763.0071, "encoder_q-layer.7": 1979.1677, "encoder_q-layer.8": 1390.1576, "encoder_q-layer.9": 1041.744, "epoch": 0.17, "inbatch_neg_score": 0.3667, "inbatch_pos_score": 1.0449, "learning_rate": 4.566666666666667e-05, "loss": 3.0536, "norm_diff": 0.0197, "norm_loss": 0.0, "num_token_doc": 66.9009, "num_token_overlap": 17.841, "num_token_query": 52.3335, "num_token_union": 73.7723, "num_word_context": 202.2966, "num_word_doc": 49.8825, "num_word_query": 39.9236, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4337.444, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3662, "query_norm": 1.484, "queue_k_norm": 1.4624, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3335, "sent_len_1": 66.9009, "sent_len_max_0": 128.0, "sent_len_max_1": 212.4288, "stdk": 0.0479, "stdq": 0.0457, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 17800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.0517, "doc_norm": 1.463, "encoder_q-embeddings": 3477.2222, "encoder_q-layer.0": 2521.7656, "encoder_q-layer.1": 2926.814, "encoder_q-layer.10": 1350.6628, "encoder_q-layer.11": 2570.2366, "encoder_q-layer.2": 3541.5613, "encoder_q-layer.3": 3703.429, "encoder_q-layer.4": 4261.4395, "encoder_q-layer.5": 3956.7004, "encoder_q-layer.6": 3658.8865, "encoder_q-layer.7": 2761.8364, "encoder_q-layer.8": 1935.3427, "encoder_q-layer.9": 1304.272, "epoch": 0.17, "inbatch_neg_score": 0.3259, "inbatch_pos_score": 1.0215, "learning_rate": 4.561111111111112e-05, "loss": 3.0517, "norm_diff": 0.0234, "norm_loss": 0.0, "num_token_doc": 66.9895, "num_token_overlap": 17.8512, "num_token_query": 52.3687, "num_token_union": 73.8845, "num_word_context": 202.4191, "num_word_doc": 49.9729, "num_word_query": 39.9265, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4449.9234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3262, "query_norm": 1.4863, "queue_k_norm": 1.4621, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3687, "sent_len_1": 66.9895, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5888, "stdk": 0.0478, "stdq": 0.0473, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 17900 }, { "accuracy": 56.4941, "active_queue_size": 16384.0, "cl_loss": 3.0631, "doc_norm": 1.4584, "encoder_q-embeddings": 5856.123, "encoder_q-layer.0": 3976.9553, "encoder_q-layer.1": 4658.5566, "encoder_q-layer.10": 1124.2648, "encoder_q-layer.11": 2333.9949, "encoder_q-layer.2": 5188.1089, "encoder_q-layer.3": 5499.8188, "encoder_q-layer.4": 5416.7056, "encoder_q-layer.5": 5309.2046, "encoder_q-layer.6": 3884.9758, "encoder_q-layer.7": 2546.8684, "encoder_q-layer.8": 1778.5402, "encoder_q-layer.9": 1106.8147, "epoch": 0.18, "inbatch_neg_score": 0.3294, "inbatch_pos_score": 1.0098, "learning_rate": 4.555555555555556e-05, "loss": 3.0631, "norm_diff": 0.0132, "norm_loss": 0.0, "num_token_doc": 66.7337, "num_token_overlap": 17.7788, "num_token_query": 52.1137, "num_token_union": 73.6388, "num_word_context": 202.302, "num_word_doc": 49.8106, "num_word_query": 39.7698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6228.376, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3291, "query_norm": 1.4716, "queue_k_norm": 1.4597, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1137, "sent_len_1": 66.7337, "sent_len_max_0": 128.0, "sent_len_max_1": 206.86, "stdk": 0.0477, "stdq": 0.0462, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18000 }, { "accuracy": 56.3965, "active_queue_size": 16384.0, "cl_loss": 3.0383, "doc_norm": 1.4576, "encoder_q-embeddings": 2081.2168, "encoder_q-layer.0": 1406.1272, "encoder_q-layer.1": 1615.7365, "encoder_q-layer.10": 1074.6371, "encoder_q-layer.11": 2257.5967, "encoder_q-layer.2": 1978.2324, "encoder_q-layer.3": 2151.8088, "encoder_q-layer.4": 2286.3728, "encoder_q-layer.5": 2274.1887, "encoder_q-layer.6": 2053.7695, "encoder_q-layer.7": 1541.5846, "encoder_q-layer.8": 1409.9912, "encoder_q-layer.9": 1046.9346, "epoch": 0.18, "inbatch_neg_score": 0.3237, "inbatch_pos_score": 1.0176, "learning_rate": 4.55e-05, "loss": 3.0383, "norm_diff": 0.0204, "norm_loss": 0.0, "num_token_doc": 66.7686, "num_token_overlap": 17.849, "num_token_query": 52.4219, "num_token_union": 73.7804, "num_word_context": 202.4969, "num_word_doc": 49.8153, "num_word_query": 39.9808, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2677.4131, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3242, "query_norm": 1.478, "queue_k_norm": 1.4577, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4219, "sent_len_1": 66.7686, "sent_len_max_0": 128.0, "sent_len_max_1": 208.945, "stdk": 0.0478, "stdq": 0.0464, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18100 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0507, "doc_norm": 1.4563, "encoder_q-embeddings": 11991.8115, "encoder_q-layer.0": 8862.0049, "encoder_q-layer.1": 12243.6631, "encoder_q-layer.10": 1088.9805, "encoder_q-layer.11": 2319.8979, "encoder_q-layer.2": 11457.2061, "encoder_q-layer.3": 12436.7012, "encoder_q-layer.4": 12774.1338, "encoder_q-layer.5": 13057.4209, "encoder_q-layer.6": 14417.4971, "encoder_q-layer.7": 11236.332, "encoder_q-layer.8": 6476.189, "encoder_q-layer.9": 1820.006, "epoch": 0.18, "inbatch_neg_score": 0.3559, "inbatch_pos_score": 1.0342, "learning_rate": 4.5444444444444444e-05, "loss": 3.0507, "norm_diff": 0.021, "norm_loss": 0.0, "num_token_doc": 66.8896, "num_token_overlap": 17.8265, "num_token_query": 52.2751, "num_token_union": 73.7632, "num_word_context": 202.4105, "num_word_doc": 49.8819, "num_word_query": 39.8879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15238.6923, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3555, "query_norm": 1.4773, "queue_k_norm": 1.4556, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2751, "sent_len_1": 66.8896, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3262, "stdk": 0.0478, "stdq": 0.0456, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18200 }, { "accuracy": 56.2988, "active_queue_size": 16384.0, "cl_loss": 3.0363, "doc_norm": 1.4572, "encoder_q-embeddings": 6160.9761, "encoder_q-layer.0": 4619.8784, "encoder_q-layer.1": 5267.2944, "encoder_q-layer.10": 982.4326, "encoder_q-layer.11": 2089.9731, "encoder_q-layer.2": 6165.0894, "encoder_q-layer.3": 6240.8643, "encoder_q-layer.4": 6651.6392, "encoder_q-layer.5": 6344.4248, "encoder_q-layer.6": 6445.4126, "encoder_q-layer.7": 4846.8516, "encoder_q-layer.8": 1925.6786, "encoder_q-layer.9": 1026.5853, "epoch": 0.18, "inbatch_neg_score": 0.338, "inbatch_pos_score": 1.0225, "learning_rate": 4.538888888888889e-05, "loss": 3.0363, "norm_diff": 0.0063, "norm_loss": 0.0, "num_token_doc": 66.7524, "num_token_overlap": 17.8587, "num_token_query": 52.3295, "num_token_union": 73.659, "num_word_context": 202.2751, "num_word_doc": 49.7955, "num_word_query": 39.9127, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7506.7718, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3364, "query_norm": 1.4561, "queue_k_norm": 1.4543, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3295, "sent_len_1": 66.7524, "sent_len_max_0": 128.0, "sent_len_max_1": 211.3512, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18300 }, { "accuracy": 55.6152, "active_queue_size": 16384.0, "cl_loss": 3.0382, "doc_norm": 1.4542, "encoder_q-embeddings": 3082.4478, "encoder_q-layer.0": 2292.5127, "encoder_q-layer.1": 2709.7998, "encoder_q-layer.10": 1025.6354, "encoder_q-layer.11": 2174.4736, "encoder_q-layer.2": 3063.5037, "encoder_q-layer.3": 3383.0715, "encoder_q-layer.4": 3920.3318, "encoder_q-layer.5": 3636.4905, "encoder_q-layer.6": 3617.1389, "encoder_q-layer.7": 3154.9558, "encoder_q-layer.8": 1966.4843, "encoder_q-layer.9": 1088.4919, "epoch": 0.18, "inbatch_neg_score": 0.3341, "inbatch_pos_score": 0.999, "learning_rate": 4.5333333333333335e-05, "loss": 3.0382, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.616, "num_token_overlap": 17.8078, "num_token_query": 52.3471, "num_token_union": 73.7012, "num_word_context": 202.2709, "num_word_doc": 49.7321, "num_word_query": 39.9512, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4167.0172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.333, "query_norm": 1.4438, "queue_k_norm": 1.4554, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3471, "sent_len_1": 66.616, "sent_len_max_0": 128.0, "sent_len_max_1": 206.3887, "stdk": 0.0477, "stdq": 0.0449, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18400 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.0337, "doc_norm": 1.456, "encoder_q-embeddings": 1823.7726, "encoder_q-layer.0": 1248.9364, "encoder_q-layer.1": 1431.2513, "encoder_q-layer.10": 1102.7592, "encoder_q-layer.11": 2301.6079, "encoder_q-layer.2": 1709.2817, "encoder_q-layer.3": 1867.632, "encoder_q-layer.4": 2025.0795, "encoder_q-layer.5": 2100.1768, "encoder_q-layer.6": 2171.6616, "encoder_q-layer.7": 1847.4475, "encoder_q-layer.8": 1707.8744, "encoder_q-layer.9": 1140.7688, "epoch": 0.18, "inbatch_neg_score": 0.333, "inbatch_pos_score": 1.0195, "learning_rate": 4.527777777777778e-05, "loss": 3.0337, "norm_diff": 0.018, "norm_loss": 0.0, "num_token_doc": 66.8307, "num_token_overlap": 17.8265, "num_token_query": 52.224, "num_token_union": 73.6579, "num_word_context": 202.0853, "num_word_doc": 49.8529, "num_word_query": 39.8387, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2590.5153, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.335, "query_norm": 1.474, "queue_k_norm": 1.4548, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.224, "sent_len_1": 66.8307, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5362, "stdk": 0.0479, "stdq": 0.0465, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18500 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.0421, "doc_norm": 1.4571, "encoder_q-embeddings": 6573.6992, "encoder_q-layer.0": 4626.2432, "encoder_q-layer.1": 5386.2329, "encoder_q-layer.10": 1091.0062, "encoder_q-layer.11": 2342.4514, "encoder_q-layer.2": 5946.7383, "encoder_q-layer.3": 6528.0298, "encoder_q-layer.4": 7155.2612, "encoder_q-layer.5": 7410.6392, "encoder_q-layer.6": 6696.5142, "encoder_q-layer.7": 4286.626, "encoder_q-layer.8": 2228.7947, "encoder_q-layer.9": 1255.9324, "epoch": 0.18, "inbatch_neg_score": 0.3356, "inbatch_pos_score": 1.0098, "learning_rate": 4.522222222222223e-05, "loss": 3.0421, "norm_diff": 0.0171, "norm_loss": 0.0, "num_token_doc": 66.7275, "num_token_overlap": 17.8162, "num_token_query": 52.3017, "num_token_union": 73.715, "num_word_context": 202.1974, "num_word_doc": 49.7801, "num_word_query": 39.9009, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7776.9523, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3369, "query_norm": 1.44, "queue_k_norm": 1.4527, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3017, "sent_len_1": 66.7275, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6413, "stdk": 0.048, "stdq": 0.0447, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18600 }, { "accuracy": 56.3965, "active_queue_size": 16384.0, "cl_loss": 3.0325, "doc_norm": 1.4551, "encoder_q-embeddings": 4468.5962, "encoder_q-layer.0": 3175.4329, "encoder_q-layer.1": 3837.1323, "encoder_q-layer.10": 990.4596, "encoder_q-layer.11": 2095.2183, "encoder_q-layer.2": 4312.7534, "encoder_q-layer.3": 4239.6792, "encoder_q-layer.4": 4071.2488, "encoder_q-layer.5": 4539.8325, "encoder_q-layer.6": 4203.1631, "encoder_q-layer.7": 2853.7849, "encoder_q-layer.8": 1637.3187, "encoder_q-layer.9": 1109.2208, "epoch": 0.18, "inbatch_neg_score": 0.3327, "inbatch_pos_score": 1.0166, "learning_rate": 4.516666666666667e-05, "loss": 3.0325, "norm_diff": 0.0108, "norm_loss": 0.0, "num_token_doc": 66.8766, "num_token_overlap": 17.871, "num_token_query": 52.2506, "num_token_union": 73.6737, "num_word_context": 202.3712, "num_word_doc": 49.8652, "num_word_query": 39.8374, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5130.6177, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3337, "query_norm": 1.4636, "queue_k_norm": 1.452, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2506, "sent_len_1": 66.8766, "sent_len_max_0": 128.0, "sent_len_max_1": 209.7175, "stdk": 0.0479, "stdq": 0.046, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18700 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0383, "doc_norm": 1.4507, "encoder_q-embeddings": 4140.8433, "encoder_q-layer.0": 2907.0593, "encoder_q-layer.1": 3239.1047, "encoder_q-layer.10": 1064.448, "encoder_q-layer.11": 2249.7598, "encoder_q-layer.2": 3866.9932, "encoder_q-layer.3": 4303.8828, "encoder_q-layer.4": 4133.8994, "encoder_q-layer.5": 3941.6814, "encoder_q-layer.6": 3803.7117, "encoder_q-layer.7": 2695.1753, "encoder_q-layer.8": 1699.5275, "encoder_q-layer.9": 1052.4651, "epoch": 0.18, "inbatch_neg_score": 0.3375, "inbatch_pos_score": 1.0166, "learning_rate": 4.511111111111112e-05, "loss": 3.0383, "norm_diff": 0.0051, "norm_loss": 0.0, "num_token_doc": 66.7705, "num_token_overlap": 17.8095, "num_token_query": 52.2001, "num_token_union": 73.6966, "num_word_context": 202.5554, "num_word_doc": 49.8529, "num_word_query": 39.8131, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4817.7924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3379, "query_norm": 1.4521, "queue_k_norm": 1.4522, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2001, "sent_len_1": 66.7705, "sent_len_max_0": 128.0, "sent_len_max_1": 207.395, "stdk": 0.0477, "stdq": 0.0454, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18800 }, { "accuracy": 56.7871, "active_queue_size": 16384.0, "cl_loss": 3.0364, "doc_norm": 1.4503, "encoder_q-embeddings": 3878.1814, "encoder_q-layer.0": 2792.7207, "encoder_q-layer.1": 2952.1133, "encoder_q-layer.10": 1058.7822, "encoder_q-layer.11": 2158.8662, "encoder_q-layer.2": 3352.2126, "encoder_q-layer.3": 3479.9668, "encoder_q-layer.4": 3560.0608, "encoder_q-layer.5": 3819.854, "encoder_q-layer.6": 3699.7915, "encoder_q-layer.7": 2923.7173, "encoder_q-layer.8": 1830.4847, "encoder_q-layer.9": 1070.1831, "epoch": 0.18, "inbatch_neg_score": 0.3343, "inbatch_pos_score": 1.0283, "learning_rate": 4.5055555555555554e-05, "loss": 3.0364, "norm_diff": 0.0132, "norm_loss": 0.0, "num_token_doc": 66.7194, "num_token_overlap": 17.7624, "num_token_query": 52.2088, "num_token_union": 73.6649, "num_word_context": 202.393, "num_word_doc": 49.7766, "num_word_query": 39.8118, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4429.4673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3335, "query_norm": 1.4609, "queue_k_norm": 1.4504, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2088, "sent_len_1": 66.7194, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5687, "stdk": 0.0477, "stdq": 0.0462, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 18900 }, { "accuracy": 56.1035, "active_queue_size": 16384.0, "cl_loss": 3.0318, "doc_norm": 1.4498, "encoder_q-embeddings": 2543.2803, "encoder_q-layer.0": 1639.1462, "encoder_q-layer.1": 1941.9711, "encoder_q-layer.10": 1129.0187, "encoder_q-layer.11": 2548.0562, "encoder_q-layer.2": 2307.7412, "encoder_q-layer.3": 2569.3826, "encoder_q-layer.4": 2955.9038, "encoder_q-layer.5": 2760.1023, "encoder_q-layer.6": 2803.9512, "encoder_q-layer.7": 2581.6287, "encoder_q-layer.8": 1844.6843, "encoder_q-layer.9": 1132.8583, "epoch": 0.19, "inbatch_neg_score": 0.3639, "inbatch_pos_score": 1.0605, "learning_rate": 4.5e-05, "loss": 3.0318, "norm_diff": 0.0474, "norm_loss": 0.0, "num_token_doc": 66.892, "num_token_overlap": 17.8217, "num_token_query": 52.2901, "num_token_union": 73.7574, "num_word_context": 202.252, "num_word_doc": 49.9241, "num_word_query": 39.887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3343.2078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3638, "query_norm": 1.4972, "queue_k_norm": 1.4499, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2901, "sent_len_1": 66.892, "sent_len_max_0": 128.0, "sent_len_max_1": 207.705, "stdk": 0.0478, "stdq": 0.047, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 19000 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0349, "doc_norm": 1.4489, "encoder_q-embeddings": 1463.1489, "encoder_q-layer.0": 1073.0092, "encoder_q-layer.1": 1225.4243, "encoder_q-layer.10": 1059.7365, "encoder_q-layer.11": 2190.7224, "encoder_q-layer.2": 1411.4994, "encoder_q-layer.3": 1493.4905, "encoder_q-layer.4": 1588.2853, "encoder_q-layer.5": 1497.7083, "encoder_q-layer.6": 1506.894, "encoder_q-layer.7": 1378.5532, "encoder_q-layer.8": 1314.2617, "encoder_q-layer.9": 1065.6967, "epoch": 0.19, "inbatch_neg_score": 0.3444, "inbatch_pos_score": 1.0449, "learning_rate": 4.4944444444444445e-05, "loss": 3.0349, "norm_diff": 0.0261, "norm_loss": 0.0, "num_token_doc": 66.7033, "num_token_overlap": 17.8414, "num_token_query": 52.4242, "num_token_union": 73.7123, "num_word_context": 202.3659, "num_word_doc": 49.7823, "num_word_query": 39.9821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2120.5873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3447, "query_norm": 1.4749, "queue_k_norm": 1.4509, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4242, "sent_len_1": 66.7033, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9038, "stdk": 0.0477, "stdq": 0.0467, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 19100 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0488, "doc_norm": 1.4518, "encoder_q-embeddings": 3002.0879, "encoder_q-layer.0": 2228.4951, "encoder_q-layer.1": 2481.949, "encoder_q-layer.10": 1012.5816, "encoder_q-layer.11": 2172.5471, "encoder_q-layer.2": 3026.1619, "encoder_q-layer.3": 3229.0752, "encoder_q-layer.4": 3084.9646, "encoder_q-layer.5": 3276.3005, "encoder_q-layer.6": 3209.2104, "encoder_q-layer.7": 2568.9783, "encoder_q-layer.8": 1551.9635, "encoder_q-layer.9": 1015.523, "epoch": 0.19, "inbatch_neg_score": 0.3504, "inbatch_pos_score": 1.0303, "learning_rate": 4.4888888888888894e-05, "loss": 3.0488, "norm_diff": 0.0168, "norm_loss": 0.0, "num_token_doc": 66.6967, "num_token_overlap": 17.7255, "num_token_query": 52.1633, "num_token_union": 73.646, "num_word_context": 202.1829, "num_word_doc": 49.7616, "num_word_query": 39.7805, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3803.3635, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3506, "query_norm": 1.4686, "queue_k_norm": 1.4507, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1633, "sent_len_1": 66.6967, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5325, "stdk": 0.0478, "stdq": 0.0462, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 19200 }, { "accuracy": 55.0293, "active_queue_size": 16384.0, "cl_loss": 3.0311, "doc_norm": 1.451, "encoder_q-embeddings": 2851.2236, "encoder_q-layer.0": 1905.8187, "encoder_q-layer.1": 2202.9146, "encoder_q-layer.10": 1050.9177, "encoder_q-layer.11": 2207.7769, "encoder_q-layer.2": 2539.7183, "encoder_q-layer.3": 2800.4907, "encoder_q-layer.4": 2904.6013, "encoder_q-layer.5": 2608.2859, "encoder_q-layer.6": 2722.9829, "encoder_q-layer.7": 2541.8511, "encoder_q-layer.8": 1676.8649, "encoder_q-layer.9": 1021.5643, "epoch": 0.19, "inbatch_neg_score": 0.3327, "inbatch_pos_score": 1.0107, "learning_rate": 4.483333333333333e-05, "loss": 3.0311, "norm_diff": 0.0136, "norm_loss": 0.0, "num_token_doc": 66.7806, "num_token_overlap": 17.8651, "num_token_query": 52.328, "num_token_union": 73.689, "num_word_context": 202.1926, "num_word_doc": 49.827, "num_word_query": 39.9243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3432.2505, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.333, "query_norm": 1.4615, "queue_k_norm": 1.4501, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.328, "sent_len_1": 66.7806, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0613, "stdk": 0.0478, "stdq": 0.0465, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 19300 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0455, "doc_norm": 1.4428, "encoder_q-embeddings": 16510.3672, "encoder_q-layer.0": 12508.8281, "encoder_q-layer.1": 13484.7236, "encoder_q-layer.10": 1962.0991, "encoder_q-layer.11": 4153.0259, "encoder_q-layer.2": 16649.957, "encoder_q-layer.3": 16632.2891, "encoder_q-layer.4": 15749.4727, "encoder_q-layer.5": 16235.8252, "encoder_q-layer.6": 14132.3398, "encoder_q-layer.7": 11693.1396, "encoder_q-layer.8": 5755.1577, "encoder_q-layer.9": 2413.9185, "epoch": 0.19, "inbatch_neg_score": 0.3432, "inbatch_pos_score": 1.042, "learning_rate": 4.477777777777778e-05, "loss": 3.0455, "norm_diff": 0.0177, "norm_loss": 0.0, "num_token_doc": 66.7932, "num_token_overlap": 17.8139, "num_token_query": 52.3093, "num_token_union": 73.7279, "num_word_context": 202.2977, "num_word_doc": 49.8132, "num_word_query": 39.8936, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19247.2197, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3447, "query_norm": 1.4605, "queue_k_norm": 1.4487, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3093, "sent_len_1": 66.7932, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5825, "stdk": 0.0475, "stdq": 0.0464, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 19400 }, { "accuracy": 56.3965, "active_queue_size": 16384.0, "cl_loss": 3.0358, "doc_norm": 1.4484, "encoder_q-embeddings": 5561.5864, "encoder_q-layer.0": 3913.0818, "encoder_q-layer.1": 4017.8889, "encoder_q-layer.10": 2127.24, "encoder_q-layer.11": 4371.7329, "encoder_q-layer.2": 4604.0752, "encoder_q-layer.3": 4831.8887, "encoder_q-layer.4": 5167.8188, "encoder_q-layer.5": 4728.4155, "encoder_q-layer.6": 4621.9102, "encoder_q-layer.7": 3976.1282, "encoder_q-layer.8": 2982.1931, "encoder_q-layer.9": 2067.5522, "epoch": 0.19, "inbatch_neg_score": 0.349, "inbatch_pos_score": 1.0391, "learning_rate": 4.472222222222223e-05, "loss": 3.0358, "norm_diff": 0.008, "norm_loss": 0.0, "num_token_doc": 66.7877, "num_token_overlap": 17.7832, "num_token_query": 52.2062, "num_token_union": 73.6966, "num_word_context": 202.3029, "num_word_doc": 49.8464, "num_word_query": 39.8197, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6323.8732, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3489, "query_norm": 1.4499, "queue_k_norm": 1.4481, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2062, "sent_len_1": 66.7877, "sent_len_max_0": 128.0, "sent_len_max_1": 206.0925, "stdk": 0.0477, "stdq": 0.046, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 19500 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.0334, "doc_norm": 1.4499, "encoder_q-embeddings": 6885.4888, "encoder_q-layer.0": 4918.8965, "encoder_q-layer.1": 6233.6343, "encoder_q-layer.10": 2131.5845, "encoder_q-layer.11": 4420.1309, "encoder_q-layer.2": 7669.8779, "encoder_q-layer.3": 8118.52, "encoder_q-layer.4": 9734.2344, "encoder_q-layer.5": 9122.0801, "encoder_q-layer.6": 7708.4873, "encoder_q-layer.7": 5809.8262, "encoder_q-layer.8": 3761.906, "encoder_q-layer.9": 2270.177, "epoch": 0.19, "inbatch_neg_score": 0.3393, "inbatch_pos_score": 1.042, "learning_rate": 4.466666666666667e-05, "loss": 3.0334, "norm_diff": 0.0237, "norm_loss": 0.0, "num_token_doc": 66.6921, "num_token_overlap": 17.772, "num_token_query": 52.1758, "num_token_union": 73.6368, "num_word_context": 202.3652, "num_word_doc": 49.7985, "num_word_query": 39.7889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9536.1133, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3396, "query_norm": 1.4736, "queue_k_norm": 1.4501, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1758, "sent_len_1": 66.6921, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3338, "stdk": 0.0478, "stdq": 0.047, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 19600 }, { "accuracy": 56.8848, "active_queue_size": 16384.0, "cl_loss": 3.0406, "doc_norm": 1.4476, "encoder_q-embeddings": 9028.3994, "encoder_q-layer.0": 6616.9316, "encoder_q-layer.1": 6773.1973, "encoder_q-layer.10": 1917.3025, "encoder_q-layer.11": 4381.8115, "encoder_q-layer.2": 7346.2725, "encoder_q-layer.3": 7927.3555, "encoder_q-layer.4": 9156.2314, "encoder_q-layer.5": 9259.8418, "encoder_q-layer.6": 6903.1865, "encoder_q-layer.7": 4446.8877, "encoder_q-layer.8": 2537.7305, "encoder_q-layer.9": 1925.0212, "epoch": 0.19, "inbatch_neg_score": 0.3617, "inbatch_pos_score": 1.04, "learning_rate": 4.461111111111111e-05, "loss": 3.0406, "norm_diff": 0.0083, "norm_loss": 0.0, "num_token_doc": 66.696, "num_token_overlap": 17.7943, "num_token_query": 52.2854, "num_token_union": 73.6947, "num_word_context": 202.4517, "num_word_doc": 49.7679, "num_word_query": 39.8868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9824.3314, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3606, "query_norm": 1.4505, "queue_k_norm": 1.4481, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2854, "sent_len_1": 66.696, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3862, "stdk": 0.0477, "stdq": 0.0457, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 19700 }, { "accuracy": 57.2754, "active_queue_size": 16384.0, "cl_loss": 3.0201, "doc_norm": 1.4507, "encoder_q-embeddings": 7525.4829, "encoder_q-layer.0": 5063.3862, "encoder_q-layer.1": 6170.4575, "encoder_q-layer.10": 1889.8898, "encoder_q-layer.11": 4201.4009, "encoder_q-layer.2": 7398.0513, "encoder_q-layer.3": 8084.4888, "encoder_q-layer.4": 9035.0596, "encoder_q-layer.5": 8353.4678, "encoder_q-layer.6": 7867.8345, "encoder_q-layer.7": 5152.3047, "encoder_q-layer.8": 2493.2227, "encoder_q-layer.9": 1887.465, "epoch": 0.19, "inbatch_neg_score": 0.3413, "inbatch_pos_score": 1.0254, "learning_rate": 4.4555555555555555e-05, "loss": 3.0201, "norm_diff": 0.0169, "norm_loss": 0.0, "num_token_doc": 66.8796, "num_token_overlap": 17.8587, "num_token_query": 52.3765, "num_token_union": 73.7889, "num_word_context": 202.2605, "num_word_doc": 49.9316, "num_word_query": 39.9593, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9250.1486, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3406, "query_norm": 1.4339, "queue_k_norm": 1.4498, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3765, "sent_len_1": 66.8796, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1788, "stdk": 0.0478, "stdq": 0.0455, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 19800 }, { "accuracy": 56.7871, "active_queue_size": 16384.0, "cl_loss": 3.034, "doc_norm": 1.4481, "encoder_q-embeddings": 17206.9512, "encoder_q-layer.0": 12942.8936, "encoder_q-layer.1": 15180.7676, "encoder_q-layer.10": 2078.3477, "encoder_q-layer.11": 4511.0166, "encoder_q-layer.2": 18507.7441, "encoder_q-layer.3": 19474.6113, "encoder_q-layer.4": 19964.1973, "encoder_q-layer.5": 16074.46, "encoder_q-layer.6": 11600.1211, "encoder_q-layer.7": 8319.3369, "encoder_q-layer.8": 3780.6299, "encoder_q-layer.9": 2436.219, "epoch": 0.19, "inbatch_neg_score": 0.3472, "inbatch_pos_score": 1.0537, "learning_rate": 4.4500000000000004e-05, "loss": 3.034, "norm_diff": 0.032, "norm_loss": 0.0, "num_token_doc": 66.6504, "num_token_overlap": 17.7814, "num_token_query": 52.2785, "num_token_union": 73.662, "num_word_context": 202.1173, "num_word_doc": 49.7478, "num_word_query": 39.8768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20440.4154, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3486, "query_norm": 1.48, "queue_k_norm": 1.4477, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2785, "sent_len_1": 66.6504, "sent_len_max_0": 128.0, "sent_len_max_1": 208.46, "stdk": 0.0477, "stdq": 0.0472, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 19900 }, { "accuracy": 56.3965, "active_queue_size": 16384.0, "cl_loss": 3.016, "doc_norm": 1.4502, "encoder_q-embeddings": 5837.1138, "encoder_q-layer.0": 3856.1248, "encoder_q-layer.1": 4423.5396, "encoder_q-layer.10": 1887.1229, "encoder_q-layer.11": 4159.627, "encoder_q-layer.2": 5412.689, "encoder_q-layer.3": 6009.6519, "encoder_q-layer.4": 6085.1152, "encoder_q-layer.5": 5484.2085, "encoder_q-layer.6": 4840.0151, "encoder_q-layer.7": 3202.7485, "encoder_q-layer.8": 2205.4128, "encoder_q-layer.9": 1802.8774, "epoch": 0.2, "inbatch_neg_score": 0.3353, "inbatch_pos_score": 1.0352, "learning_rate": 4.4444444444444447e-05, "loss": 3.016, "norm_diff": 0.0132, "norm_loss": 0.0, "num_token_doc": 66.6884, "num_token_overlap": 17.7915, "num_token_query": 52.2671, "num_token_union": 73.6527, "num_word_context": 202.1888, "num_word_doc": 49.7519, "num_word_query": 39.8598, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6720.5933, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.335, "query_norm": 1.4634, "queue_k_norm": 1.4491, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2671, "sent_len_1": 66.6884, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7138, "stdk": 0.0478, "stdq": 0.0468, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 20000 }, { "dev_runtime": 26.6457, "dev_samples_per_second": 1.201, "dev_steps_per_second": 0.038, "epoch": 0.2, "step": 20000, "test_accuracy": 92.2607421875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4412658214569092, "test_doc_norm": 1.3885679244995117, "test_inbatch_neg_score": 0.6010603308677673, "test_inbatch_pos_score": 1.5232863426208496, "test_loss": 0.4412658214569092, "test_loss_align": 1.07114839553833, "test_loss_unif": 3.7676405906677246, "test_loss_unif_q@queue": 3.7676405906677246, "test_norm_diff": 0.08122877031564713, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3309769034385681, "test_query_norm": 1.4697967767715454, "test_queue_k_norm": 1.4493052959442139, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.041342779994010925, "test_stdq": 0.04280583932995796, "test_stdqueue_k": 0.047781091183423996, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.6457, "dev_samples_per_second": 1.201, "dev_steps_per_second": 0.038, "epoch": 0.2, "eval_beir-arguana_ndcg@10": 0.35591, "eval_beir-arguana_recall@10": 0.59886, "eval_beir-arguana_recall@100": 0.90469, "eval_beir-arguana_recall@20": 0.73186, "eval_beir-avg_ndcg@10": 0.3462159166666667, "eval_beir-avg_recall@10": 0.40923333333333334, "eval_beir-avg_recall@100": 0.5903608333333333, "eval_beir-avg_recall@20": 0.4666261666666666, "eval_beir-cqadupstack_ndcg@10": 0.2357591666666666, "eval_beir-cqadupstack_recall@10": 0.3234333333333333, "eval_beir-cqadupstack_recall@100": 0.5447183333333333, "eval_beir-cqadupstack_recall@20": 0.3821916666666667, "eval_beir-fiqa_ndcg@10": 0.20943, "eval_beir-fiqa_recall@10": 0.2614, "eval_beir-fiqa_recall@100": 0.52162, "eval_beir-fiqa_recall@20": 0.34285, "eval_beir-nfcorpus_ndcg@10": 0.25317, "eval_beir-nfcorpus_recall@10": 0.11832, "eval_beir-nfcorpus_recall@100": 0.24365, "eval_beir-nfcorpus_recall@20": 0.1503, "eval_beir-nq_ndcg@10": 0.22718, "eval_beir-nq_recall@10": 0.37836, "eval_beir-nq_recall@100": 0.71693, "eval_beir-nq_recall@20": 0.48506, "eval_beir-quora_ndcg@10": 0.74314, "eval_beir-quora_recall@10": 0.85663, "eval_beir-quora_recall@100": 0.96675, "eval_beir-quora_recall@20": 0.9034, "eval_beir-scidocs_ndcg@10": 0.14109, "eval_beir-scidocs_recall@10": 0.14687, "eval_beir-scidocs_recall@100": 0.34917, "eval_beir-scidocs_recall@20": 0.19667, "eval_beir-scifact_ndcg@10": 0.59047, "eval_beir-scifact_recall@10": 0.74406, "eval_beir-scifact_recall@100": 0.89922, "eval_beir-scifact_recall@20": 0.80333, "eval_beir-trec-covid_ndcg@10": 0.49622, "eval_beir-trec-covid_recall@10": 0.524, "eval_beir-trec-covid_recall@100": 0.3254, "eval_beir-trec-covid_recall@20": 0.467, "eval_beir-webis-touche2020_ndcg@10": 0.20979, "eval_beir-webis-touche2020_recall@10": 0.1404, "eval_beir-webis-touche2020_recall@100": 0.43146, "eval_beir-webis-touche2020_recall@20": 0.2036, "eval_senteval-avg_sts": 0.7435480599156672, "eval_senteval-sickr_spearman": 0.7098955485805813, "eval_senteval-stsb_spearman": 0.7772005712507529, "step": 20000, "test_accuracy": 92.2607421875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4412658214569092, "test_doc_norm": 1.3885679244995117, "test_inbatch_neg_score": 0.6010603308677673, "test_inbatch_pos_score": 1.5232863426208496, "test_loss": 0.4412658214569092, "test_loss_align": 1.07114839553833, "test_loss_unif": 3.7676405906677246, "test_loss_unif_q@queue": 3.7676405906677246, "test_norm_diff": 0.08122877031564713, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3309769034385681, "test_query_norm": 1.4697967767715454, "test_queue_k_norm": 1.4493052959442139, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.041342779994010925, "test_stdq": 0.04280583932995796, "test_stdqueue_k": 0.047781091183423996, "test_stdqueue_q": 0.0 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0087, "doc_norm": 1.4522, "encoder_q-embeddings": 7648.3076, "encoder_q-layer.0": 5056.4546, "encoder_q-layer.1": 5789.2539, "encoder_q-layer.10": 1967.7886, "encoder_q-layer.11": 4189.4917, "encoder_q-layer.2": 6719.2783, "encoder_q-layer.3": 6795.9858, "encoder_q-layer.4": 7131.3877, "encoder_q-layer.5": 6607.6318, "encoder_q-layer.6": 5660.7334, "encoder_q-layer.7": 3944.1541, "encoder_q-layer.8": 2671.7532, "encoder_q-layer.9": 1917.4792, "epoch": 0.2, "inbatch_neg_score": 0.3294, "inbatch_pos_score": 1.0205, "learning_rate": 4.438888888888889e-05, "loss": 3.0087, "norm_diff": 0.0062, "norm_loss": 0.0, "num_token_doc": 66.6589, "num_token_overlap": 17.8134, "num_token_query": 52.345, "num_token_union": 73.6904, "num_word_context": 202.2591, "num_word_doc": 49.7294, "num_word_query": 39.9184, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8194.3288, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3291, "query_norm": 1.4511, "queue_k_norm": 1.4503, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.345, "sent_len_1": 66.6589, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5225, "stdk": 0.0479, "stdq": 0.046, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 20100 }, { "accuracy": 55.6152, "active_queue_size": 16384.0, "cl_loss": 3.0241, "doc_norm": 1.4486, "encoder_q-embeddings": 9087.5771, "encoder_q-layer.0": 6371.5005, "encoder_q-layer.1": 7563.793, "encoder_q-layer.10": 2112.0515, "encoder_q-layer.11": 4626.5815, "encoder_q-layer.2": 8685.582, "encoder_q-layer.3": 8633.6201, "encoder_q-layer.4": 8841.0801, "encoder_q-layer.5": 9220.8652, "encoder_q-layer.6": 8496.5029, "encoder_q-layer.7": 5870.5557, "encoder_q-layer.8": 3332.9648, "encoder_q-layer.9": 2079.6978, "epoch": 0.2, "inbatch_neg_score": 0.3429, "inbatch_pos_score": 1.0254, "learning_rate": 4.433333333333334e-05, "loss": 3.0241, "norm_diff": 0.0195, "norm_loss": 0.0, "num_token_doc": 66.7583, "num_token_overlap": 17.8277, "num_token_query": 52.3338, "num_token_union": 73.7063, "num_word_context": 202.111, "num_word_doc": 49.8448, "num_word_query": 39.9059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10557.62, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.343, "query_norm": 1.468, "queue_k_norm": 1.4509, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3338, "sent_len_1": 66.7583, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2038, "stdk": 0.0477, "stdq": 0.0465, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 20200 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 2.9976, "doc_norm": 1.4474, "encoder_q-embeddings": 4934.5679, "encoder_q-layer.0": 3411.3901, "encoder_q-layer.1": 4052.6226, "encoder_q-layer.10": 1841.8643, "encoder_q-layer.11": 4126.6841, "encoder_q-layer.2": 4593.438, "encoder_q-layer.3": 4635.541, "encoder_q-layer.4": 5164.918, "encoder_q-layer.5": 5170.5488, "encoder_q-layer.6": 4237.8228, "encoder_q-layer.7": 3013.6033, "encoder_q-layer.8": 2275.2256, "encoder_q-layer.9": 1851.4608, "epoch": 0.2, "inbatch_neg_score": 0.3321, "inbatch_pos_score": 1.0225, "learning_rate": 4.427777777777778e-05, "loss": 2.9976, "norm_diff": 0.0065, "norm_loss": 0.0, "num_token_doc": 66.7527, "num_token_overlap": 17.8754, "num_token_query": 52.3947, "num_token_union": 73.6753, "num_word_context": 202.2013, "num_word_doc": 49.8191, "num_word_query": 39.9472, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5970.2908, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.333, "query_norm": 1.4455, "queue_k_norm": 1.449, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3947, "sent_len_1": 66.7527, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6062, "stdk": 0.0477, "stdq": 0.0458, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 20300 }, { "accuracy": 56.6895, "active_queue_size": 16384.0, "cl_loss": 3.0259, "doc_norm": 1.4446, "encoder_q-embeddings": 4904.3252, "encoder_q-layer.0": 3371.7114, "encoder_q-layer.1": 4071.3374, "encoder_q-layer.10": 2211.2466, "encoder_q-layer.11": 4659.2671, "encoder_q-layer.2": 4654.4604, "encoder_q-layer.3": 5019.8022, "encoder_q-layer.4": 5016.1543, "encoder_q-layer.5": 4509.4487, "encoder_q-layer.6": 3990.124, "encoder_q-layer.7": 2745.2234, "encoder_q-layer.8": 2477.2786, "encoder_q-layer.9": 2158.9729, "epoch": 0.2, "inbatch_neg_score": 0.3356, "inbatch_pos_score": 1.0264, "learning_rate": 4.422222222222222e-05, "loss": 3.0259, "norm_diff": 0.0204, "norm_loss": 0.0, "num_token_doc": 66.7628, "num_token_overlap": 17.7655, "num_token_query": 52.22, "num_token_union": 73.6892, "num_word_context": 202.5382, "num_word_doc": 49.8045, "num_word_query": 39.8137, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5922.8374, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3357, "query_norm": 1.465, "queue_k_norm": 1.4504, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.22, "sent_len_1": 66.7628, "sent_len_max_0": 128.0, "sent_len_max_1": 210.985, "stdk": 0.0476, "stdq": 0.0465, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 20400 }, { "accuracy": 56.5918, "active_queue_size": 16384.0, "cl_loss": 3.0168, "doc_norm": 1.4488, "encoder_q-embeddings": 3103.9185, "encoder_q-layer.0": 2025.8182, "encoder_q-layer.1": 2440.1826, "encoder_q-layer.10": 1893.9717, "encoder_q-layer.11": 4211.3057, "encoder_q-layer.2": 2843.3962, "encoder_q-layer.3": 2975.5798, "encoder_q-layer.4": 3048.6956, "encoder_q-layer.5": 3009.7703, "encoder_q-layer.6": 2807.5977, "encoder_q-layer.7": 2560.1367, "encoder_q-layer.8": 2435.3301, "encoder_q-layer.9": 1913.3408, "epoch": 0.2, "inbatch_neg_score": 0.341, "inbatch_pos_score": 1.0371, "learning_rate": 4.4166666666666665e-05, "loss": 3.0168, "norm_diff": 0.0259, "norm_loss": 0.0, "num_token_doc": 66.6694, "num_token_overlap": 17.8227, "num_token_query": 52.2927, "num_token_union": 73.6106, "num_word_context": 202.1153, "num_word_doc": 49.757, "num_word_query": 39.8763, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4132.2693, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3416, "query_norm": 1.4748, "queue_k_norm": 1.4496, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2927, "sent_len_1": 66.6694, "sent_len_max_0": 128.0, "sent_len_max_1": 207.61, "stdk": 0.0478, "stdq": 0.0465, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 20500 }, { "accuracy": 55.7129, "active_queue_size": 16384.0, "cl_loss": 3.0231, "doc_norm": 1.4503, "encoder_q-embeddings": 6497.1099, "encoder_q-layer.0": 4684.2612, "encoder_q-layer.1": 5269.9136, "encoder_q-layer.10": 2075.7742, "encoder_q-layer.11": 4933.8091, "encoder_q-layer.2": 6096.2939, "encoder_q-layer.3": 6454.3281, "encoder_q-layer.4": 6732.1133, "encoder_q-layer.5": 6481.6489, "encoder_q-layer.6": 5336.207, "encoder_q-layer.7": 3968.9646, "encoder_q-layer.8": 2807.3281, "encoder_q-layer.9": 2108.5928, "epoch": 0.2, "inbatch_neg_score": 0.3506, "inbatch_pos_score": 1.04, "learning_rate": 4.4111111111111114e-05, "loss": 3.0231, "norm_diff": 0.0336, "norm_loss": 0.0, "num_token_doc": 66.6607, "num_token_overlap": 17.8126, "num_token_query": 52.3201, "num_token_union": 73.6647, "num_word_context": 202.2995, "num_word_doc": 49.7443, "num_word_query": 39.9084, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7642.4211, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3513, "query_norm": 1.4839, "queue_k_norm": 1.448, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3201, "sent_len_1": 66.6607, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6863, "stdk": 0.0478, "stdq": 0.0468, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 20600 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.0166, "doc_norm": 1.4446, "encoder_q-embeddings": 3928.3372, "encoder_q-layer.0": 2777.6162, "encoder_q-layer.1": 3155.759, "encoder_q-layer.10": 2108.3516, "encoder_q-layer.11": 4477.9263, "encoder_q-layer.2": 3771.748, "encoder_q-layer.3": 4306.3193, "encoder_q-layer.4": 4437.9541, "encoder_q-layer.5": 4354.8159, "encoder_q-layer.6": 3867.6233, "encoder_q-layer.7": 3140.4573, "encoder_q-layer.8": 2624.9961, "encoder_q-layer.9": 2024.7045, "epoch": 0.2, "inbatch_neg_score": 0.3383, "inbatch_pos_score": 1.002, "learning_rate": 4.4055555555555557e-05, "loss": 3.0166, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.7634, "num_token_overlap": 17.8525, "num_token_query": 52.2879, "num_token_union": 73.6701, "num_word_context": 202.5798, "num_word_doc": 49.833, "num_word_query": 39.8779, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5228.5629, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3376, "query_norm": 1.4551, "queue_k_norm": 1.4487, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2879, "sent_len_1": 66.7634, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4913, "stdk": 0.0476, "stdq": 0.0462, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 20700 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0189, "doc_norm": 1.449, "encoder_q-embeddings": 4966.8228, "encoder_q-layer.0": 3325.1826, "encoder_q-layer.1": 3786.7395, "encoder_q-layer.10": 2156.8484, "encoder_q-layer.11": 4484.8799, "encoder_q-layer.2": 4238.1592, "encoder_q-layer.3": 4382.1509, "encoder_q-layer.4": 4580.4971, "encoder_q-layer.5": 4790.3848, "encoder_q-layer.6": 4172.7856, "encoder_q-layer.7": 3311.5447, "encoder_q-layer.8": 2481.5239, "encoder_q-layer.9": 1994.9054, "epoch": 0.2, "inbatch_neg_score": 0.338, "inbatch_pos_score": 1.0127, "learning_rate": 4.4000000000000006e-05, "loss": 3.0189, "norm_diff": 0.0085, "norm_loss": 0.0, "num_token_doc": 66.8669, "num_token_overlap": 17.8313, "num_token_query": 52.3206, "num_token_union": 73.7733, "num_word_context": 202.5247, "num_word_doc": 49.9096, "num_word_query": 39.921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5784.7522, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3386, "query_norm": 1.4427, "queue_k_norm": 1.4493, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3206, "sent_len_1": 66.8669, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1287, "stdk": 0.0478, "stdq": 0.0455, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 20800 }, { "accuracy": 55.9082, "active_queue_size": 16384.0, "cl_loss": 2.9988, "doc_norm": 1.4449, "encoder_q-embeddings": 7954.0977, "encoder_q-layer.0": 5123.5601, "encoder_q-layer.1": 5877.0151, "encoder_q-layer.10": 2063.9712, "encoder_q-layer.11": 4173.9575, "encoder_q-layer.2": 6866.4487, "encoder_q-layer.3": 7059.4082, "encoder_q-layer.4": 7539.5645, "encoder_q-layer.5": 7663.3003, "encoder_q-layer.6": 7762.4214, "encoder_q-layer.7": 6025.9683, "encoder_q-layer.8": 3439.8416, "encoder_q-layer.9": 2003.2874, "epoch": 0.2, "inbatch_neg_score": 0.3391, "inbatch_pos_score": 1.0215, "learning_rate": 4.394444444444445e-05, "loss": 2.9988, "norm_diff": 0.0182, "norm_loss": 0.0, "num_token_doc": 66.8312, "num_token_overlap": 17.8354, "num_token_query": 52.2489, "num_token_union": 73.712, "num_word_context": 202.1757, "num_word_doc": 49.8901, "num_word_query": 39.8555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9052.9231, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3376, "query_norm": 1.4631, "queue_k_norm": 1.4494, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2489, "sent_len_1": 66.8312, "sent_len_max_0": 128.0, "sent_len_max_1": 210.2175, "stdk": 0.0476, "stdq": 0.0461, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 20900 }, { "accuracy": 56.4941, "active_queue_size": 16384.0, "cl_loss": 3.0144, "doc_norm": 1.4475, "encoder_q-embeddings": 8549.6006, "encoder_q-layer.0": 5819.8584, "encoder_q-layer.1": 6770.5137, "encoder_q-layer.10": 1964.0135, "encoder_q-layer.11": 4246.1548, "encoder_q-layer.2": 7971.9028, "encoder_q-layer.3": 8733.6631, "encoder_q-layer.4": 9566.5352, "encoder_q-layer.5": 9752.8955, "encoder_q-layer.6": 8272.5742, "encoder_q-layer.7": 5450.5244, "encoder_q-layer.8": 2859.4165, "encoder_q-layer.9": 2070.7693, "epoch": 0.21, "inbatch_neg_score": 0.3485, "inbatch_pos_score": 1.041, "learning_rate": 4.388888888888889e-05, "loss": 3.0144, "norm_diff": 0.0215, "norm_loss": 0.0, "num_token_doc": 66.8496, "num_token_overlap": 17.8032, "num_token_query": 52.2175, "num_token_union": 73.7149, "num_word_context": 202.4032, "num_word_doc": 49.8655, "num_word_query": 39.806, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10211.6849, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3481, "query_norm": 1.4666, "queue_k_norm": 1.4498, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2175, "sent_len_1": 66.8496, "sent_len_max_0": 128.0, "sent_len_max_1": 209.44, "stdk": 0.0477, "stdq": 0.0464, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 21000 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0179, "doc_norm": 1.4508, "encoder_q-embeddings": 3658.6824, "encoder_q-layer.0": 2509.7302, "encoder_q-layer.1": 2940.427, "encoder_q-layer.10": 1950.6553, "encoder_q-layer.11": 4193.6274, "encoder_q-layer.2": 3548.0137, "encoder_q-layer.3": 3307.5872, "encoder_q-layer.4": 2673.1082, "encoder_q-layer.5": 2440.8777, "encoder_q-layer.6": 2368.4014, "encoder_q-layer.7": 2382.0132, "encoder_q-layer.8": 2364.5298, "encoder_q-layer.9": 1913.4099, "epoch": 0.21, "inbatch_neg_score": 0.344, "inbatch_pos_score": 1.0156, "learning_rate": 4.383333333333334e-05, "loss": 3.0179, "norm_diff": 0.0098, "norm_loss": 0.0, "num_token_doc": 66.843, "num_token_overlap": 17.8451, "num_token_query": 52.3976, "num_token_union": 73.7558, "num_word_context": 202.5005, "num_word_doc": 49.883, "num_word_query": 39.9665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4312.8529, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3433, "query_norm": 1.4599, "queue_k_norm": 1.4501, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3976, "sent_len_1": 66.843, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4538, "stdk": 0.0479, "stdq": 0.046, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 21100 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.0132, "doc_norm": 1.4448, "encoder_q-embeddings": 6555.8003, "encoder_q-layer.0": 4495.5747, "encoder_q-layer.1": 4961.0894, "encoder_q-layer.10": 1941.4204, "encoder_q-layer.11": 4114.2549, "encoder_q-layer.2": 5309.3267, "encoder_q-layer.3": 5387.4834, "encoder_q-layer.4": 5171.6147, "encoder_q-layer.5": 4848.9946, "encoder_q-layer.6": 4242.9453, "encoder_q-layer.7": 3627.6421, "encoder_q-layer.8": 3319.0007, "encoder_q-layer.9": 2211.137, "epoch": 0.21, "inbatch_neg_score": 0.3487, "inbatch_pos_score": 1.0039, "learning_rate": 4.377777777777778e-05, "loss": 3.0132, "norm_diff": 0.0237, "norm_loss": 0.0, "num_token_doc": 66.7598, "num_token_overlap": 17.8058, "num_token_query": 52.232, "num_token_union": 73.691, "num_word_context": 202.4036, "num_word_doc": 49.8315, "num_word_query": 39.8256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6910.9642, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3494, "query_norm": 1.4211, "queue_k_norm": 1.4487, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.232, "sent_len_1": 66.7598, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2163, "stdk": 0.0476, "stdq": 0.0445, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 21200 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0061, "doc_norm": 1.4513, "encoder_q-embeddings": 3274.0911, "encoder_q-layer.0": 2192.2146, "encoder_q-layer.1": 2810.9993, "encoder_q-layer.10": 1017.1526, "encoder_q-layer.11": 2217.3391, "encoder_q-layer.2": 2688.4563, "encoder_q-layer.3": 2899.0193, "encoder_q-layer.4": 2324.0569, "encoder_q-layer.5": 1706.1493, "encoder_q-layer.6": 1415.1812, "encoder_q-layer.7": 1255.6382, "encoder_q-layer.8": 1190.2013, "encoder_q-layer.9": 987.7615, "epoch": 0.21, "inbatch_neg_score": 0.346, "inbatch_pos_score": 1.0205, "learning_rate": 4.3722222222222224e-05, "loss": 3.0061, "norm_diff": 0.0108, "norm_loss": 0.0, "num_token_doc": 66.7424, "num_token_overlap": 17.8328, "num_token_query": 52.2773, "num_token_union": 73.6741, "num_word_context": 202.2725, "num_word_doc": 49.7852, "num_word_query": 39.8639, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3302.3991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3469, "query_norm": 1.4412, "queue_k_norm": 1.4502, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2773, "sent_len_1": 66.7424, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3225, "stdk": 0.0479, "stdq": 0.0454, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 21300 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0135, "doc_norm": 1.4498, "encoder_q-embeddings": 3239.3992, "encoder_q-layer.0": 2255.7095, "encoder_q-layer.1": 2447.7051, "encoder_q-layer.10": 1179.6644, "encoder_q-layer.11": 2412.2886, "encoder_q-layer.2": 2993.803, "encoder_q-layer.3": 3068.7412, "encoder_q-layer.4": 3058.0793, "encoder_q-layer.5": 2601.5852, "encoder_q-layer.6": 2440.8149, "encoder_q-layer.7": 1876.9003, "encoder_q-layer.8": 1378.6626, "encoder_q-layer.9": 1173.12, "epoch": 0.21, "inbatch_neg_score": 0.3384, "inbatch_pos_score": 1.0391, "learning_rate": 4.3666666666666666e-05, "loss": 3.0135, "norm_diff": 0.0139, "norm_loss": 0.0, "num_token_doc": 66.7445, "num_token_overlap": 17.8531, "num_token_query": 52.3795, "num_token_union": 73.7115, "num_word_context": 202.3025, "num_word_doc": 49.819, "num_word_query": 39.9554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3634.1125, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3394, "query_norm": 1.4635, "queue_k_norm": 1.4507, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3795, "sent_len_1": 66.7445, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0613, "stdk": 0.0478, "stdq": 0.0467, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 21400 }, { "accuracy": 55.4199, "active_queue_size": 16384.0, "cl_loss": 3.0096, "doc_norm": 1.4469, "encoder_q-embeddings": 4616.2681, "encoder_q-layer.0": 3334.9495, "encoder_q-layer.1": 3515.7781, "encoder_q-layer.10": 1094.5999, "encoder_q-layer.11": 2328.7703, "encoder_q-layer.2": 3801.6726, "encoder_q-layer.3": 3916.2227, "encoder_q-layer.4": 3712.259, "encoder_q-layer.5": 3893.3372, "encoder_q-layer.6": 3902.3052, "encoder_q-layer.7": 2618.8179, "encoder_q-layer.8": 1784.5548, "encoder_q-layer.9": 1156.0872, "epoch": 0.21, "inbatch_neg_score": 0.3349, "inbatch_pos_score": 1.0137, "learning_rate": 4.3611111111111116e-05, "loss": 3.0096, "norm_diff": 0.0087, "norm_loss": 0.0, "num_token_doc": 66.763, "num_token_overlap": 17.8389, "num_token_query": 52.2963, "num_token_union": 73.6825, "num_word_context": 202.1902, "num_word_doc": 49.7845, "num_word_query": 39.8862, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4892.0186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.335, "query_norm": 1.4454, "queue_k_norm": 1.4498, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2963, "sent_len_1": 66.763, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9638, "stdk": 0.0477, "stdq": 0.0462, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 21500 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 2.9992, "doc_norm": 1.4449, "encoder_q-embeddings": 1174.0294, "encoder_q-layer.0": 838.3684, "encoder_q-layer.1": 970.7277, "encoder_q-layer.10": 925.473, "encoder_q-layer.11": 1969.9739, "encoder_q-layer.2": 1045.5178, "encoder_q-layer.3": 1128.7515, "encoder_q-layer.4": 1205.1431, "encoder_q-layer.5": 1107.4276, "encoder_q-layer.6": 1150.6261, "encoder_q-layer.7": 1092.9193, "encoder_q-layer.8": 1087.5159, "encoder_q-layer.9": 935.1888, "epoch": 0.21, "inbatch_neg_score": 0.3307, "inbatch_pos_score": 1.002, "learning_rate": 4.355555555555556e-05, "loss": 2.9992, "norm_diff": 0.0107, "norm_loss": 0.0, "num_token_doc": 66.7369, "num_token_overlap": 17.7923, "num_token_query": 52.3181, "num_token_union": 73.75, "num_word_context": 202.353, "num_word_doc": 49.7828, "num_word_query": 39.8919, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1698.0109, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3303, "query_norm": 1.4357, "queue_k_norm": 1.4505, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3181, "sent_len_1": 66.7369, "sent_len_max_0": 128.0, "sent_len_max_1": 208.51, "stdk": 0.0476, "stdq": 0.046, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 21600 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 3.0035, "doc_norm": 1.4519, "encoder_q-embeddings": 1790.219, "encoder_q-layer.0": 1298.7367, "encoder_q-layer.1": 1536.6973, "encoder_q-layer.10": 1034.2305, "encoder_q-layer.11": 2124.821, "encoder_q-layer.2": 1774.3064, "encoder_q-layer.3": 1926.9365, "encoder_q-layer.4": 2040.3932, "encoder_q-layer.5": 1886.902, "encoder_q-layer.6": 1752.0967, "encoder_q-layer.7": 1525.521, "encoder_q-layer.8": 1226.0709, "encoder_q-layer.9": 1023.0245, "epoch": 0.21, "inbatch_neg_score": 0.3205, "inbatch_pos_score": 1.0254, "learning_rate": 4.35e-05, "loss": 3.0035, "norm_diff": 0.0159, "norm_loss": 0.0, "num_token_doc": 66.856, "num_token_overlap": 17.8318, "num_token_query": 52.3759, "num_token_union": 73.7772, "num_word_context": 202.5705, "num_word_doc": 49.8875, "num_word_query": 39.9488, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2455.7333, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3196, "query_norm": 1.4362, "queue_k_norm": 1.4507, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3759, "sent_len_1": 66.856, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3812, "stdk": 0.0479, "stdq": 0.0465, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 21700 }, { "accuracy": 56.5918, "active_queue_size": 16384.0, "cl_loss": 3.0112, "doc_norm": 1.4477, "encoder_q-embeddings": 1642.6666, "encoder_q-layer.0": 1130.905, "encoder_q-layer.1": 1335.7019, "encoder_q-layer.10": 956.8483, "encoder_q-layer.11": 2098.1279, "encoder_q-layer.2": 1497.2656, "encoder_q-layer.3": 1792.3381, "encoder_q-layer.4": 1879.5712, "encoder_q-layer.5": 1890.1499, "encoder_q-layer.6": 2101.9875, "encoder_q-layer.7": 1627.6434, "encoder_q-layer.8": 1211.2476, "encoder_q-layer.9": 998.5731, "epoch": 0.21, "inbatch_neg_score": 0.3162, "inbatch_pos_score": 1.0029, "learning_rate": 4.344444444444445e-05, "loss": 3.0112, "norm_diff": 0.0183, "norm_loss": 0.0, "num_token_doc": 66.808, "num_token_overlap": 17.8234, "num_token_query": 52.3391, "num_token_union": 73.7531, "num_word_context": 202.6782, "num_word_doc": 49.8468, "num_word_query": 39.9161, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2331.9259, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3149, "query_norm": 1.4294, "queue_k_norm": 1.4476, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3391, "sent_len_1": 66.808, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9688, "stdk": 0.0477, "stdq": 0.046, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 21800 }, { "accuracy": 56.6895, "active_queue_size": 16384.0, "cl_loss": 2.9949, "doc_norm": 1.4499, "encoder_q-embeddings": 1372.9779, "encoder_q-layer.0": 903.3901, "encoder_q-layer.1": 1012.7534, "encoder_q-layer.10": 964.8366, "encoder_q-layer.11": 2103.0791, "encoder_q-layer.2": 1202.6195, "encoder_q-layer.3": 1313.3702, "encoder_q-layer.4": 1469.3732, "encoder_q-layer.5": 1469.9485, "encoder_q-layer.6": 1446.8643, "encoder_q-layer.7": 1333.3768, "encoder_q-layer.8": 1134.504, "encoder_q-layer.9": 936.5585, "epoch": 0.21, "inbatch_neg_score": 0.3117, "inbatch_pos_score": 1.0, "learning_rate": 4.338888888888889e-05, "loss": 2.9949, "norm_diff": 0.015, "norm_loss": 0.0, "num_token_doc": 66.784, "num_token_overlap": 17.82, "num_token_query": 52.2582, "num_token_union": 73.6969, "num_word_context": 202.1685, "num_word_doc": 49.8417, "num_word_query": 39.8406, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1962.8865, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.312, "query_norm": 1.4366, "queue_k_norm": 1.4488, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2582, "sent_len_1": 66.784, "sent_len_max_0": 128.0, "sent_len_max_1": 207.935, "stdk": 0.0479, "stdq": 0.0461, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 21900 }, { "accuracy": 58.6426, "active_queue_size": 16384.0, "cl_loss": 2.9979, "doc_norm": 1.4465, "encoder_q-embeddings": 2808.0916, "encoder_q-layer.0": 2041.1088, "encoder_q-layer.1": 2443.5562, "encoder_q-layer.10": 938.5392, "encoder_q-layer.11": 1945.1184, "encoder_q-layer.2": 2863.8289, "encoder_q-layer.3": 3257.2737, "encoder_q-layer.4": 3255.4709, "encoder_q-layer.5": 2746.2524, "encoder_q-layer.6": 2797.9968, "encoder_q-layer.7": 2256.8167, "encoder_q-layer.8": 1667.9736, "encoder_q-layer.9": 1120.8199, "epoch": 0.21, "inbatch_neg_score": 0.3135, "inbatch_pos_score": 1.0117, "learning_rate": 4.3333333333333334e-05, "loss": 2.9979, "norm_diff": 0.0101, "norm_loss": 0.0, "num_token_doc": 66.7351, "num_token_overlap": 17.8318, "num_token_query": 52.4264, "num_token_union": 73.7805, "num_word_context": 202.3254, "num_word_doc": 49.8163, "num_word_query": 39.9956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3622.3695, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3132, "query_norm": 1.439, "queue_k_norm": 1.4489, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4264, "sent_len_1": 66.7351, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3975, "stdk": 0.0478, "stdq": 0.0461, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 22000 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 2.9904, "doc_norm": 1.4473, "encoder_q-embeddings": 1943.9537, "encoder_q-layer.0": 1412.3665, "encoder_q-layer.1": 1546.0107, "encoder_q-layer.10": 1110.4662, "encoder_q-layer.11": 2136.7251, "encoder_q-layer.2": 1774.9502, "encoder_q-layer.3": 1871.2881, "encoder_q-layer.4": 1986.8713, "encoder_q-layer.5": 2035.3929, "encoder_q-layer.6": 1977.5852, "encoder_q-layer.7": 1549.1881, "encoder_q-layer.8": 1279.0304, "encoder_q-layer.9": 1060.7292, "epoch": 0.22, "inbatch_neg_score": 0.3056, "inbatch_pos_score": 0.9917, "learning_rate": 4.3277777777777776e-05, "loss": 2.9904, "norm_diff": 0.0107, "norm_loss": 0.0, "num_token_doc": 66.8179, "num_token_overlap": 17.858, "num_token_query": 52.5407, "num_token_union": 73.8332, "num_word_context": 202.5047, "num_word_doc": 49.85, "num_word_query": 40.0837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2510.8852, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3057, "query_norm": 1.438, "queue_k_norm": 1.4487, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.5407, "sent_len_1": 66.8179, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3225, "stdk": 0.0478, "stdq": 0.046, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 22100 }, { "accuracy": 57.959, "active_queue_size": 16384.0, "cl_loss": 3.0048, "doc_norm": 1.4488, "encoder_q-embeddings": 2862.4021, "encoder_q-layer.0": 1883.397, "encoder_q-layer.1": 2041.3818, "encoder_q-layer.10": 956.9928, "encoder_q-layer.11": 2129.1233, "encoder_q-layer.2": 2249.1174, "encoder_q-layer.3": 2527.0588, "encoder_q-layer.4": 2508.7004, "encoder_q-layer.5": 2282.9353, "encoder_q-layer.6": 2395.5701, "encoder_q-layer.7": 1697.5823, "encoder_q-layer.8": 1252.7767, "encoder_q-layer.9": 956.6752, "epoch": 0.22, "inbatch_neg_score": 0.3075, "inbatch_pos_score": 1.0078, "learning_rate": 4.3222222222222226e-05, "loss": 3.0048, "norm_diff": 0.0142, "norm_loss": 0.0, "num_token_doc": 66.7039, "num_token_overlap": 17.8317, "num_token_query": 52.3049, "num_token_union": 73.6581, "num_word_context": 202.3304, "num_word_doc": 49.7983, "num_word_query": 39.8961, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3128.2873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3076, "query_norm": 1.4431, "queue_k_norm": 1.4466, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3049, "sent_len_1": 66.7039, "sent_len_max_0": 128.0, "sent_len_max_1": 208.61, "stdk": 0.0479, "stdq": 0.046, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 22200 }, { "accuracy": 57.373, "active_queue_size": 16384.0, "cl_loss": 3.0042, "doc_norm": 1.4476, "encoder_q-embeddings": 2130.2781, "encoder_q-layer.0": 1563.6802, "encoder_q-layer.1": 1881.5074, "encoder_q-layer.10": 1001.362, "encoder_q-layer.11": 2218.6184, "encoder_q-layer.2": 2216.4673, "encoder_q-layer.3": 2615.2295, "encoder_q-layer.4": 2638.0122, "encoder_q-layer.5": 2591.228, "encoder_q-layer.6": 2402.4768, "encoder_q-layer.7": 1917.1736, "encoder_q-layer.8": 1217.6031, "encoder_q-layer.9": 991.3515, "epoch": 0.22, "inbatch_neg_score": 0.3186, "inbatch_pos_score": 1.0127, "learning_rate": 4.316666666666667e-05, "loss": 3.0042, "norm_diff": 0.0176, "norm_loss": 0.0, "num_token_doc": 66.9527, "num_token_overlap": 17.7944, "num_token_query": 52.3111, "num_token_union": 73.8712, "num_word_context": 202.6931, "num_word_doc": 49.9557, "num_word_query": 39.8799, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2994.5869, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3169, "query_norm": 1.4651, "queue_k_norm": 1.4445, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3111, "sent_len_1": 66.9527, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4187, "stdk": 0.0479, "stdq": 0.0465, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 22300 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 2.9879, "doc_norm": 1.4455, "encoder_q-embeddings": 1442.9835, "encoder_q-layer.0": 981.5523, "encoder_q-layer.1": 1116.318, "encoder_q-layer.10": 957.0583, "encoder_q-layer.11": 1953.95, "encoder_q-layer.2": 1353.8724, "encoder_q-layer.3": 1398.4443, "encoder_q-layer.4": 1428.5514, "encoder_q-layer.5": 1432.8667, "encoder_q-layer.6": 1520.1362, "encoder_q-layer.7": 1234.9169, "encoder_q-layer.8": 1133.7399, "encoder_q-layer.9": 972.4309, "epoch": 0.22, "inbatch_neg_score": 0.3019, "inbatch_pos_score": 1.001, "learning_rate": 4.311111111111111e-05, "loss": 2.9879, "norm_diff": 0.0285, "norm_loss": 0.0, "num_token_doc": 66.8625, "num_token_overlap": 17.8351, "num_token_query": 52.4443, "num_token_union": 73.8484, "num_word_context": 202.5391, "num_word_doc": 49.8734, "num_word_query": 40.03, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1940.6616, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.302, "query_norm": 1.4741, "queue_k_norm": 1.4452, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4443, "sent_len_1": 66.8625, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6425, "stdk": 0.0478, "stdq": 0.047, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 22400 }, { "accuracy": 54.9316, "active_queue_size": 16384.0, "cl_loss": 2.9846, "doc_norm": 1.4424, "encoder_q-embeddings": 4794.687, "encoder_q-layer.0": 3679.1917, "encoder_q-layer.1": 4053.5408, "encoder_q-layer.10": 998.5261, "encoder_q-layer.11": 2106.7417, "encoder_q-layer.2": 4964.5317, "encoder_q-layer.3": 5204.4312, "encoder_q-layer.4": 5236.6582, "encoder_q-layer.5": 4663.2573, "encoder_q-layer.6": 3846.116, "encoder_q-layer.7": 2566.7434, "encoder_q-layer.8": 1584.3621, "encoder_q-layer.9": 1175.0394, "epoch": 0.22, "inbatch_neg_score": 0.3049, "inbatch_pos_score": 0.9751, "learning_rate": 4.305555555555556e-05, "loss": 2.9846, "norm_diff": 0.0105, "norm_loss": 0.0, "num_token_doc": 66.9056, "num_token_overlap": 17.8861, "num_token_query": 52.3969, "num_token_union": 73.8002, "num_word_context": 202.3294, "num_word_doc": 49.9325, "num_word_query": 39.9636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5680.2077, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3054, "query_norm": 1.4461, "queue_k_norm": 1.4471, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3969, "sent_len_1": 66.9056, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1362, "stdk": 0.0477, "stdq": 0.0459, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 22500 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 2.9907, "doc_norm": 1.446, "encoder_q-embeddings": 1431.1722, "encoder_q-layer.0": 1006.4311, "encoder_q-layer.1": 1081.9803, "encoder_q-layer.10": 988.0908, "encoder_q-layer.11": 2110.3752, "encoder_q-layer.2": 1211.4631, "encoder_q-layer.3": 1297.8535, "encoder_q-layer.4": 1436.7424, "encoder_q-layer.5": 1381.0671, "encoder_q-layer.6": 1359.1838, "encoder_q-layer.7": 1148.7585, "encoder_q-layer.8": 1083.6617, "encoder_q-layer.9": 952.2085, "epoch": 0.22, "inbatch_neg_score": 0.3282, "inbatch_pos_score": 1.0078, "learning_rate": 4.3e-05, "loss": 2.9907, "norm_diff": 0.0078, "norm_loss": 0.0, "num_token_doc": 66.7908, "num_token_overlap": 17.835, "num_token_query": 52.375, "num_token_union": 73.7516, "num_word_context": 202.2882, "num_word_doc": 49.8285, "num_word_query": 39.945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1956.2617, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3276, "query_norm": 1.4505, "queue_k_norm": 1.4465, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.375, "sent_len_1": 66.7908, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4825, "stdk": 0.0479, "stdq": 0.0463, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 22600 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 2.9869, "doc_norm": 1.4515, "encoder_q-embeddings": 3122.978, "encoder_q-layer.0": 2110.9602, "encoder_q-layer.1": 2426.4978, "encoder_q-layer.10": 941.353, "encoder_q-layer.11": 2033.8132, "encoder_q-layer.2": 2933.5249, "encoder_q-layer.3": 3316.218, "encoder_q-layer.4": 3553.5898, "encoder_q-layer.5": 2993.6082, "encoder_q-layer.6": 2205.3018, "encoder_q-layer.7": 1602.4758, "encoder_q-layer.8": 1218.0171, "encoder_q-layer.9": 940.8788, "epoch": 0.22, "inbatch_neg_score": 0.3136, "inbatch_pos_score": 0.999, "learning_rate": 4.294444444444445e-05, "loss": 2.9869, "norm_diff": 0.0167, "norm_loss": 0.0, "num_token_doc": 66.7528, "num_token_overlap": 17.8288, "num_token_query": 52.2804, "num_token_union": 73.6965, "num_word_context": 202.1776, "num_word_doc": 49.8366, "num_word_query": 39.9017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3606.6534, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3135, "query_norm": 1.4347, "queue_k_norm": 1.448, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2804, "sent_len_1": 66.7528, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5625, "stdk": 0.0481, "stdq": 0.046, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 22700 }, { "accuracy": 56.2988, "active_queue_size": 16384.0, "cl_loss": 2.979, "doc_norm": 1.4487, "encoder_q-embeddings": 1054.2184, "encoder_q-layer.0": 686.0527, "encoder_q-layer.1": 776.4977, "encoder_q-layer.10": 969.4739, "encoder_q-layer.11": 2093.3843, "encoder_q-layer.2": 900.8354, "encoder_q-layer.3": 948.4575, "encoder_q-layer.4": 1014.9787, "encoder_q-layer.5": 1000.0281, "encoder_q-layer.6": 1026.223, "encoder_q-layer.7": 1058.4406, "encoder_q-layer.8": 1093.666, "encoder_q-layer.9": 947.6553, "epoch": 0.22, "inbatch_neg_score": 0.3335, "inbatch_pos_score": 1.0176, "learning_rate": 4.2888888888888886e-05, "loss": 2.979, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.9, "num_token_overlap": 17.8227, "num_token_query": 52.2426, "num_token_union": 73.7639, "num_word_context": 202.3749, "num_word_doc": 49.8989, "num_word_query": 39.8293, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1624.6068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3337, "query_norm": 1.4366, "queue_k_norm": 1.4476, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2426, "sent_len_1": 66.9, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5387, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 22800 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 2.9773, "doc_norm": 1.4476, "encoder_q-embeddings": 1394.9718, "encoder_q-layer.0": 959.2746, "encoder_q-layer.1": 1108.1683, "encoder_q-layer.10": 970.7237, "encoder_q-layer.11": 1983.0637, "encoder_q-layer.2": 1335.4128, "encoder_q-layer.3": 1371.3995, "encoder_q-layer.4": 1445.8491, "encoder_q-layer.5": 1377.098, "encoder_q-layer.6": 1351.9148, "encoder_q-layer.7": 1288.9749, "encoder_q-layer.8": 1147.4475, "encoder_q-layer.9": 923.2316, "epoch": 0.22, "inbatch_neg_score": 0.3361, "inbatch_pos_score": 1.0098, "learning_rate": 4.2833333333333335e-05, "loss": 2.9773, "norm_diff": 0.0187, "norm_loss": 0.0, "num_token_doc": 66.8698, "num_token_overlap": 17.8369, "num_token_query": 52.2578, "num_token_union": 73.6789, "num_word_context": 202.3355, "num_word_doc": 49.8547, "num_word_query": 39.8458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1965.4604, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3369, "query_norm": 1.4288, "queue_k_norm": 1.447, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2578, "sent_len_1": 66.8698, "sent_len_max_0": 128.0, "sent_len_max_1": 210.4313, "stdk": 0.0479, "stdq": 0.0454, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 22900 }, { "accuracy": 54.0527, "active_queue_size": 16384.0, "cl_loss": 2.9789, "doc_norm": 1.4483, "encoder_q-embeddings": 1062.0156, "encoder_q-layer.0": 710.3765, "encoder_q-layer.1": 788.0776, "encoder_q-layer.10": 925.9805, "encoder_q-layer.11": 2162.3247, "encoder_q-layer.2": 891.4085, "encoder_q-layer.3": 942.6699, "encoder_q-layer.4": 1011.5992, "encoder_q-layer.5": 1020.49, "encoder_q-layer.6": 1065.6176, "encoder_q-layer.7": 998.3315, "encoder_q-layer.8": 1033.7559, "encoder_q-layer.9": 901.3305, "epoch": 0.22, "inbatch_neg_score": 0.3396, "inbatch_pos_score": 0.9932, "learning_rate": 4.277777777777778e-05, "loss": 2.9789, "norm_diff": 0.0317, "norm_loss": 0.0, "num_token_doc": 66.8154, "num_token_overlap": 17.8254, "num_token_query": 52.4577, "num_token_union": 73.8353, "num_word_context": 202.5017, "num_word_doc": 49.8578, "num_word_query": 40.0088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1637.512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3389, "query_norm": 1.4165, "queue_k_norm": 1.4486, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4577, "sent_len_1": 66.8154, "sent_len_max_0": 128.0, "sent_len_max_1": 208.925, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 23000 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 2.974, "doc_norm": 1.4497, "encoder_q-embeddings": 1690.2173, "encoder_q-layer.0": 1212.6946, "encoder_q-layer.1": 1346.6896, "encoder_q-layer.10": 920.3775, "encoder_q-layer.11": 2075.8279, "encoder_q-layer.2": 1611.8782, "encoder_q-layer.3": 1575.6096, "encoder_q-layer.4": 1729.2191, "encoder_q-layer.5": 1701.255, "encoder_q-layer.6": 1546.877, "encoder_q-layer.7": 1327.6355, "encoder_q-layer.8": 1180.097, "encoder_q-layer.9": 968.7928, "epoch": 0.23, "inbatch_neg_score": 0.3421, "inbatch_pos_score": 1.0107, "learning_rate": 4.272222222222223e-05, "loss": 2.974, "norm_diff": 0.0088, "norm_loss": 0.0, "num_token_doc": 66.8501, "num_token_overlap": 17.8289, "num_token_query": 52.3131, "num_token_union": 73.7405, "num_word_context": 202.5565, "num_word_doc": 49.8707, "num_word_query": 39.8908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2208.9708, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3433, "query_norm": 1.4436, "queue_k_norm": 1.4503, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3131, "sent_len_1": 66.8501, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8587, "stdk": 0.048, "stdq": 0.046, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 23100 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 2.9613, "doc_norm": 1.4536, "encoder_q-embeddings": 1091.9261, "encoder_q-layer.0": 709.6923, "encoder_q-layer.1": 782.2422, "encoder_q-layer.10": 1029.8265, "encoder_q-layer.11": 2003.775, "encoder_q-layer.2": 881.0604, "encoder_q-layer.3": 967.0206, "encoder_q-layer.4": 1043.3914, "encoder_q-layer.5": 1013.1366, "encoder_q-layer.6": 1097.516, "encoder_q-layer.7": 1070.5948, "encoder_q-layer.8": 1166.4646, "encoder_q-layer.9": 1006.6326, "epoch": 0.23, "inbatch_neg_score": 0.3364, "inbatch_pos_score": 1.0176, "learning_rate": 4.266666666666667e-05, "loss": 2.9613, "norm_diff": 0.0094, "norm_loss": 0.0, "num_token_doc": 66.979, "num_token_overlap": 17.9074, "num_token_query": 52.4458, "num_token_union": 73.8297, "num_word_context": 202.4483, "num_word_doc": 49.9944, "num_word_query": 40.0087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1619.6253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3376, "query_norm": 1.4458, "queue_k_norm": 1.4501, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4458, "sent_len_1": 66.979, "sent_len_max_0": 128.0, "sent_len_max_1": 208.305, "stdk": 0.0482, "stdq": 0.0461, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 23200 }, { "accuracy": 55.8105, "active_queue_size": 16384.0, "cl_loss": 2.9829, "doc_norm": 1.4529, "encoder_q-embeddings": 5732.3896, "encoder_q-layer.0": 3676.9714, "encoder_q-layer.1": 4136.7495, "encoder_q-layer.10": 2260.7485, "encoder_q-layer.11": 4066.0669, "encoder_q-layer.2": 5196.2207, "encoder_q-layer.3": 5569.9634, "encoder_q-layer.4": 6344.9033, "encoder_q-layer.5": 6736.1997, "encoder_q-layer.6": 6620.9312, "encoder_q-layer.7": 3834.8992, "encoder_q-layer.8": 2749.2939, "encoder_q-layer.9": 2065.4553, "epoch": 0.23, "inbatch_neg_score": 0.3365, "inbatch_pos_score": 1.0283, "learning_rate": 4.261111111111111e-05, "loss": 2.9829, "norm_diff": 0.0253, "norm_loss": 0.0, "num_token_doc": 66.7489, "num_token_overlap": 17.7724, "num_token_query": 52.2444, "num_token_union": 73.7092, "num_word_context": 202.2953, "num_word_doc": 49.813, "num_word_query": 39.8432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7092.8552, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3367, "query_norm": 1.4782, "queue_k_norm": 1.4519, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2444, "sent_len_1": 66.7489, "sent_len_max_0": 128.0, "sent_len_max_1": 207.19, "stdk": 0.0481, "stdq": 0.047, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 23300 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.9823, "doc_norm": 1.4515, "encoder_q-embeddings": 13625.8037, "encoder_q-layer.0": 10391.2119, "encoder_q-layer.1": 10902.3838, "encoder_q-layer.10": 2435.158, "encoder_q-layer.11": 4491.8936, "encoder_q-layer.2": 12734.0225, "encoder_q-layer.3": 12504.6143, "encoder_q-layer.4": 14905.6914, "encoder_q-layer.5": 13210.8779, "encoder_q-layer.6": 12535.0566, "encoder_q-layer.7": 8781.9639, "encoder_q-layer.8": 4409.0298, "encoder_q-layer.9": 2508.2236, "epoch": 0.23, "inbatch_neg_score": 0.3549, "inbatch_pos_score": 1.0449, "learning_rate": 4.255555555555556e-05, "loss": 2.9823, "norm_diff": 0.0087, "norm_loss": 0.0, "num_token_doc": 66.8987, "num_token_overlap": 17.7887, "num_token_query": 52.2625, "num_token_union": 73.8142, "num_word_context": 202.3906, "num_word_doc": 49.9126, "num_word_query": 39.8329, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16059.8741, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3552, "query_norm": 1.4547, "queue_k_norm": 1.4537, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2625, "sent_len_1": 66.8987, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6813, "stdk": 0.048, "stdq": 0.0456, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 23400 }, { "accuracy": 56.7871, "active_queue_size": 16384.0, "cl_loss": 2.9779, "doc_norm": 1.452, "encoder_q-embeddings": 7192.5933, "encoder_q-layer.0": 4905.2871, "encoder_q-layer.1": 5680.9976, "encoder_q-layer.10": 1876.4349, "encoder_q-layer.11": 3988.6123, "encoder_q-layer.2": 6584.2349, "encoder_q-layer.3": 6643.0586, "encoder_q-layer.4": 6930.5298, "encoder_q-layer.5": 7513.0488, "encoder_q-layer.6": 5716.8335, "encoder_q-layer.7": 3436.3977, "encoder_q-layer.8": 2448.5945, "encoder_q-layer.9": 1903.2559, "epoch": 0.23, "inbatch_neg_score": 0.3558, "inbatch_pos_score": 1.0508, "learning_rate": 4.25e-05, "loss": 2.9779, "norm_diff": 0.0245, "norm_loss": 0.0, "num_token_doc": 66.655, "num_token_overlap": 17.7914, "num_token_query": 52.3179, "num_token_union": 73.6774, "num_word_context": 202.1653, "num_word_doc": 49.7615, "num_word_query": 39.8954, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8048.5764, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3567, "query_norm": 1.4759, "queue_k_norm": 1.4533, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3179, "sent_len_1": 66.655, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3088, "stdk": 0.048, "stdq": 0.0461, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 23500 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 2.9598, "doc_norm": 1.4579, "encoder_q-embeddings": 4473.9229, "encoder_q-layer.0": 3043.2812, "encoder_q-layer.1": 3289.9507, "encoder_q-layer.10": 1953.4393, "encoder_q-layer.11": 4004.679, "encoder_q-layer.2": 4172.9985, "encoder_q-layer.3": 4574.708, "encoder_q-layer.4": 5028.2881, "encoder_q-layer.5": 4539.5962, "encoder_q-layer.6": 4959.9844, "encoder_q-layer.7": 4017.27, "encoder_q-layer.8": 3246.3474, "encoder_q-layer.9": 2155.2507, "epoch": 0.23, "inbatch_neg_score": 0.3657, "inbatch_pos_score": 1.0352, "learning_rate": 4.2444444444444445e-05, "loss": 2.9598, "norm_diff": 0.0296, "norm_loss": 0.0, "num_token_doc": 66.6423, "num_token_overlap": 17.7965, "num_token_query": 52.0683, "num_token_union": 73.4978, "num_word_context": 201.8802, "num_word_doc": 49.7586, "num_word_query": 39.7112, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5787.1293, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3647, "query_norm": 1.4876, "queue_k_norm": 1.4523, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0683, "sent_len_1": 66.6423, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3512, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 23600 }, { "accuracy": 58.0566, "active_queue_size": 16384.0, "cl_loss": 2.9714, "doc_norm": 1.4548, "encoder_q-embeddings": 2021.7715, "encoder_q-layer.0": 1360.2948, "encoder_q-layer.1": 1502.3397, "encoder_q-layer.10": 1914.9567, "encoder_q-layer.11": 3720.8948, "encoder_q-layer.2": 1756.1558, "encoder_q-layer.3": 1850.7527, "encoder_q-layer.4": 2010.6361, "encoder_q-layer.5": 1986.8132, "encoder_q-layer.6": 1950.6744, "encoder_q-layer.7": 1979.3259, "encoder_q-layer.8": 2036.7948, "encoder_q-layer.9": 1753.1611, "epoch": 0.23, "inbatch_neg_score": 0.3716, "inbatch_pos_score": 1.0713, "learning_rate": 4.238888888888889e-05, "loss": 2.9714, "norm_diff": 0.0422, "norm_loss": 0.0, "num_token_doc": 66.922, "num_token_overlap": 17.7873, "num_token_query": 52.1185, "num_token_union": 73.7003, "num_word_context": 202.1342, "num_word_doc": 49.9051, "num_word_query": 39.7249, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3030.0552, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3708, "query_norm": 1.497, "queue_k_norm": 1.4558, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1185, "sent_len_1": 66.922, "sent_len_max_0": 128.0, "sent_len_max_1": 211.5137, "stdk": 0.0481, "stdq": 0.0463, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 23700 }, { "accuracy": 58.8379, "active_queue_size": 16384.0, "cl_loss": 2.9602, "doc_norm": 1.4635, "encoder_q-embeddings": 4444.8179, "encoder_q-layer.0": 3048.1208, "encoder_q-layer.1": 3357.7302, "encoder_q-layer.10": 1899.1792, "encoder_q-layer.11": 3772.6489, "encoder_q-layer.2": 3647.5947, "encoder_q-layer.3": 3834.3535, "encoder_q-layer.4": 3962.6821, "encoder_q-layer.5": 3883.354, "encoder_q-layer.6": 3764.2322, "encoder_q-layer.7": 3208.5288, "encoder_q-layer.8": 2618.9844, "encoder_q-layer.9": 1878.5842, "epoch": 0.23, "inbatch_neg_score": 0.3635, "inbatch_pos_score": 1.083, "learning_rate": 4.233333333333334e-05, "loss": 2.9602, "norm_diff": 0.0176, "norm_loss": 0.0, "num_token_doc": 66.7901, "num_token_overlap": 17.8186, "num_token_query": 52.3851, "num_token_union": 73.8064, "num_word_context": 202.6392, "num_word_doc": 49.8299, "num_word_query": 39.9504, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5136.1125, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3643, "query_norm": 1.4811, "queue_k_norm": 1.4584, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3851, "sent_len_1": 66.7901, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9825, "stdk": 0.0484, "stdq": 0.0465, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 23800 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 2.9744, "doc_norm": 1.4586, "encoder_q-embeddings": 3451.8083, "encoder_q-layer.0": 2264.4292, "encoder_q-layer.1": 2679.5999, "encoder_q-layer.10": 1884.9006, "encoder_q-layer.11": 4178.1958, "encoder_q-layer.2": 3044.5813, "encoder_q-layer.3": 3272.3838, "encoder_q-layer.4": 3286.979, "encoder_q-layer.5": 3337.0344, "encoder_q-layer.6": 3344.9185, "encoder_q-layer.7": 2715.8276, "encoder_q-layer.8": 2402.7837, "encoder_q-layer.9": 1867.3721, "epoch": 0.23, "inbatch_neg_score": 0.3807, "inbatch_pos_score": 1.0869, "learning_rate": 4.227777777777778e-05, "loss": 2.9744, "norm_diff": 0.0144, "norm_loss": 0.0, "num_token_doc": 66.7537, "num_token_overlap": 17.8062, "num_token_query": 52.3412, "num_token_union": 73.7266, "num_word_context": 202.5594, "num_word_doc": 49.8373, "num_word_query": 39.9213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4405.7515, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3813, "query_norm": 1.473, "queue_k_norm": 1.4576, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3412, "sent_len_1": 66.7537, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8363, "stdk": 0.0481, "stdq": 0.046, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 23900 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.9578, "doc_norm": 1.4641, "encoder_q-embeddings": 3988.6106, "encoder_q-layer.0": 2648.1792, "encoder_q-layer.1": 2927.4788, "encoder_q-layer.10": 1978.1864, "encoder_q-layer.11": 4293.9292, "encoder_q-layer.2": 3218.8962, "encoder_q-layer.3": 3296.8416, "encoder_q-layer.4": 3594.3296, "encoder_q-layer.5": 3641.8262, "encoder_q-layer.6": 3811.0129, "encoder_q-layer.7": 3520.3628, "encoder_q-layer.8": 2690.9299, "encoder_q-layer.9": 1894.2914, "epoch": 0.23, "inbatch_neg_score": 0.3698, "inbatch_pos_score": 1.0635, "learning_rate": 4.222222222222222e-05, "loss": 2.9578, "norm_diff": 0.0135, "norm_loss": 0.0, "num_token_doc": 66.6679, "num_token_overlap": 17.8285, "num_token_query": 52.3835, "num_token_union": 73.7044, "num_word_context": 202.1869, "num_word_doc": 49.7174, "num_word_query": 39.9592, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4913.1001, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3701, "query_norm": 1.4513, "queue_k_norm": 1.4585, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3835, "sent_len_1": 66.6679, "sent_len_max_0": 128.0, "sent_len_max_1": 207.0813, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 24000 }, { "accuracy": 58.6426, "active_queue_size": 16384.0, "cl_loss": 2.9651, "doc_norm": 1.4612, "encoder_q-embeddings": 2931.4871, "encoder_q-layer.0": 1930.3723, "encoder_q-layer.1": 2214.575, "encoder_q-layer.10": 2002.1215, "encoder_q-layer.11": 4119.9478, "encoder_q-layer.2": 2583.3286, "encoder_q-layer.3": 2656.5884, "encoder_q-layer.4": 2767.2612, "encoder_q-layer.5": 2719.6504, "encoder_q-layer.6": 2840.0703, "encoder_q-layer.7": 2796.1042, "encoder_q-layer.8": 2347.9585, "encoder_q-layer.9": 1876.1858, "epoch": 0.24, "inbatch_neg_score": 0.3514, "inbatch_pos_score": 1.0469, "learning_rate": 4.216666666666667e-05, "loss": 2.9651, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 66.6276, "num_token_overlap": 17.8364, "num_token_query": 52.3829, "num_token_union": 73.6428, "num_word_context": 202.1614, "num_word_doc": 49.7387, "num_word_query": 39.9444, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3888.6864, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3525, "query_norm": 1.4343, "queue_k_norm": 1.4598, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3829, "sent_len_1": 66.6276, "sent_len_max_0": 128.0, "sent_len_max_1": 205.0825, "stdk": 0.0482, "stdq": 0.0456, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 24100 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 2.9725, "doc_norm": 1.4577, "encoder_q-embeddings": 5719.7085, "encoder_q-layer.0": 4149.7256, "encoder_q-layer.1": 5003.3232, "encoder_q-layer.10": 1957.8416, "encoder_q-layer.11": 3950.6536, "encoder_q-layer.2": 5729.2524, "encoder_q-layer.3": 5338.812, "encoder_q-layer.4": 5941.0029, "encoder_q-layer.5": 5531.4102, "encoder_q-layer.6": 5218.1216, "encoder_q-layer.7": 4450.5083, "encoder_q-layer.8": 2890.1238, "encoder_q-layer.9": 1924.0322, "epoch": 0.24, "inbatch_neg_score": 0.3425, "inbatch_pos_score": 1.0361, "learning_rate": 4.211111111111111e-05, "loss": 2.9725, "norm_diff": 0.0121, "norm_loss": 0.0, "num_token_doc": 66.7341, "num_token_overlap": 17.7882, "num_token_query": 52.3722, "num_token_union": 73.734, "num_word_context": 202.337, "num_word_doc": 49.8027, "num_word_query": 39.9512, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6901.1454, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3423, "query_norm": 1.4495, "queue_k_norm": 1.4615, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3722, "sent_len_1": 66.7341, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2587, "stdk": 0.0481, "stdq": 0.0465, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 24200 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.9676, "doc_norm": 1.4623, "encoder_q-embeddings": 2226.5723, "encoder_q-layer.0": 1377.2737, "encoder_q-layer.1": 1611.0551, "encoder_q-layer.10": 2197.7476, "encoder_q-layer.11": 4087.8894, "encoder_q-layer.2": 1909.757, "encoder_q-layer.3": 2027.3755, "encoder_q-layer.4": 2225.8879, "encoder_q-layer.5": 2180.1121, "encoder_q-layer.6": 2328.4924, "encoder_q-layer.7": 2188.6816, "encoder_q-layer.8": 2455.9692, "encoder_q-layer.9": 2084.7075, "epoch": 0.24, "inbatch_neg_score": 0.3271, "inbatch_pos_score": 1.0176, "learning_rate": 4.205555555555556e-05, "loss": 2.9676, "norm_diff": 0.0371, "norm_loss": 0.0, "num_token_doc": 66.5974, "num_token_overlap": 17.7815, "num_token_query": 52.2317, "num_token_union": 73.6109, "num_word_context": 201.8212, "num_word_doc": 49.7046, "num_word_query": 39.8273, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3303.0283, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3279, "query_norm": 1.4252, "queue_k_norm": 1.4586, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2317, "sent_len_1": 66.5974, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2488, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 24300 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 2.9535, "doc_norm": 1.4637, "encoder_q-embeddings": 3639.7688, "encoder_q-layer.0": 2531.8596, "encoder_q-layer.1": 2646.8506, "encoder_q-layer.10": 1758.6492, "encoder_q-layer.11": 3915.0437, "encoder_q-layer.2": 3103.9036, "encoder_q-layer.3": 3336.2427, "encoder_q-layer.4": 3599.0725, "encoder_q-layer.5": 3750.9949, "encoder_q-layer.6": 3765.4646, "encoder_q-layer.7": 2934.3137, "encoder_q-layer.8": 2152.5881, "encoder_q-layer.9": 1761.2515, "epoch": 0.24, "inbatch_neg_score": 0.3217, "inbatch_pos_score": 1.0215, "learning_rate": 4.2e-05, "loss": 2.9535, "norm_diff": 0.0242, "norm_loss": 0.0, "num_token_doc": 66.7208, "num_token_overlap": 17.8414, "num_token_query": 52.2523, "num_token_union": 73.6214, "num_word_context": 202.1245, "num_word_doc": 49.7841, "num_word_query": 39.8461, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4559.4609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3228, "query_norm": 1.4395, "queue_k_norm": 1.4573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2523, "sent_len_1": 66.7208, "sent_len_max_0": 128.0, "sent_len_max_1": 208.495, "stdk": 0.0484, "stdq": 0.0464, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 24400 }, { "accuracy": 55.0293, "active_queue_size": 16384.0, "cl_loss": 2.9671, "doc_norm": 1.4553, "encoder_q-embeddings": 2619.3235, "encoder_q-layer.0": 1778.6995, "encoder_q-layer.1": 2080.4727, "encoder_q-layer.10": 2026.5348, "encoder_q-layer.11": 4213.1504, "encoder_q-layer.2": 2305.2998, "encoder_q-layer.3": 2535.1865, "encoder_q-layer.4": 2770.4102, "encoder_q-layer.5": 2965.5874, "encoder_q-layer.6": 3073.9646, "encoder_q-layer.7": 2829.1631, "encoder_q-layer.8": 2362.9385, "encoder_q-layer.9": 1912.2555, "epoch": 0.24, "inbatch_neg_score": 0.2911, "inbatch_pos_score": 0.96, "learning_rate": 4.194444444444445e-05, "loss": 2.9671, "norm_diff": 0.0503, "norm_loss": 0.0, "num_token_doc": 66.6937, "num_token_overlap": 17.7987, "num_token_query": 52.2898, "num_token_union": 73.6916, "num_word_context": 202.2428, "num_word_doc": 49.8093, "num_word_query": 39.8931, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3796.9445, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.405, "queue_k_norm": 1.4584, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2898, "sent_len_1": 66.6937, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8363, "stdk": 0.048, "stdq": 0.0456, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 24500 }, { "accuracy": 55.9082, "active_queue_size": 16384.0, "cl_loss": 2.9667, "doc_norm": 1.4555, "encoder_q-embeddings": 3465.0625, "encoder_q-layer.0": 2326.7654, "encoder_q-layer.1": 2551.0728, "encoder_q-layer.10": 1754.8059, "encoder_q-layer.11": 3798.7622, "encoder_q-layer.2": 3194.1311, "encoder_q-layer.3": 3486.0923, "encoder_q-layer.4": 3777.1897, "encoder_q-layer.5": 3638.1399, "encoder_q-layer.6": 3510.5134, "encoder_q-layer.7": 3007.772, "encoder_q-layer.8": 2177.137, "encoder_q-layer.9": 1783.1708, "epoch": 0.24, "inbatch_neg_score": 0.2898, "inbatch_pos_score": 0.9673, "learning_rate": 4.188888888888889e-05, "loss": 2.9667, "norm_diff": 0.0445, "norm_loss": 0.0, "num_token_doc": 66.8748, "num_token_overlap": 17.7933, "num_token_query": 52.2548, "num_token_union": 73.7748, "num_word_context": 202.3692, "num_word_doc": 49.9323, "num_word_query": 39.8639, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4516.776, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.4111, "queue_k_norm": 1.458, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2548, "sent_len_1": 66.8748, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8288, "stdk": 0.0481, "stdq": 0.0456, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 24600 }, { "accuracy": 56.5918, "active_queue_size": 16384.0, "cl_loss": 2.9507, "doc_norm": 1.4538, "encoder_q-embeddings": 6294.7324, "encoder_q-layer.0": 4366.2554, "encoder_q-layer.1": 5157.1333, "encoder_q-layer.10": 1995.4155, "encoder_q-layer.11": 3996.2971, "encoder_q-layer.2": 7479.0981, "encoder_q-layer.3": 7992.0049, "encoder_q-layer.4": 10342.6807, "encoder_q-layer.5": 13003.6582, "encoder_q-layer.6": 12376.5635, "encoder_q-layer.7": 9594.6855, "encoder_q-layer.8": 4461.9688, "encoder_q-layer.9": 2109.5042, "epoch": 0.24, "inbatch_neg_score": 0.284, "inbatch_pos_score": 0.9541, "learning_rate": 4.183333333333334e-05, "loss": 2.9507, "norm_diff": 0.0622, "norm_loss": 0.0, "num_token_doc": 66.639, "num_token_overlap": 17.7974, "num_token_query": 52.215, "num_token_union": 73.6453, "num_word_context": 202.2146, "num_word_doc": 49.7376, "num_word_query": 39.8103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11186.3873, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2842, "query_norm": 1.3916, "queue_k_norm": 1.454, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.215, "sent_len_1": 66.639, "sent_len_max_0": 128.0, "sent_len_max_1": 208.115, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 24700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 2.9506, "doc_norm": 1.4462, "encoder_q-embeddings": 9054.4971, "encoder_q-layer.0": 6428.8423, "encoder_q-layer.1": 8234.001, "encoder_q-layer.10": 1797.4967, "encoder_q-layer.11": 3785.9182, "encoder_q-layer.2": 8943.2744, "encoder_q-layer.3": 10185.7871, "encoder_q-layer.4": 9594.1426, "encoder_q-layer.5": 9000.9365, "encoder_q-layer.6": 6435.9248, "encoder_q-layer.7": 4337.7832, "encoder_q-layer.8": 2486.2427, "encoder_q-layer.9": 1877.3201, "epoch": 0.24, "inbatch_neg_score": 0.2749, "inbatch_pos_score": 0.9512, "learning_rate": 4.177777777777778e-05, "loss": 2.9506, "norm_diff": 0.0274, "norm_loss": 0.0, "num_token_doc": 66.8828, "num_token_overlap": 17.8238, "num_token_query": 52.3139, "num_token_union": 73.7707, "num_word_context": 202.2463, "num_word_doc": 49.9065, "num_word_query": 39.9227, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10356.0823, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2742, "query_norm": 1.4188, "queue_k_norm": 1.4546, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3139, "sent_len_1": 66.8828, "sent_len_max_0": 128.0, "sent_len_max_1": 210.775, "stdk": 0.0479, "stdq": 0.0456, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 24800 }, { "accuracy": 57.5684, "active_queue_size": 16384.0, "cl_loss": 2.955, "doc_norm": 1.451, "encoder_q-embeddings": 1859.1826, "encoder_q-layer.0": 1262.3002, "encoder_q-layer.1": 1382.9807, "encoder_q-layer.10": 1886.6133, "encoder_q-layer.11": 3786.1328, "encoder_q-layer.2": 1557.6721, "encoder_q-layer.3": 1696.92, "encoder_q-layer.4": 1772.4413, "encoder_q-layer.5": 1840.7793, "encoder_q-layer.6": 1930.67, "encoder_q-layer.7": 1867.4543, "encoder_q-layer.8": 1973.9812, "encoder_q-layer.9": 1727.2153, "epoch": 0.24, "inbatch_neg_score": 0.2904, "inbatch_pos_score": 0.9834, "learning_rate": 4.172222222222222e-05, "loss": 2.955, "norm_diff": 0.0314, "norm_loss": 0.0, "num_token_doc": 66.7795, "num_token_overlap": 17.7674, "num_token_query": 52.2583, "num_token_union": 73.6981, "num_word_context": 202.3397, "num_word_doc": 49.8151, "num_word_query": 39.8562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2869.9754, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2908, "query_norm": 1.4196, "queue_k_norm": 1.4504, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2583, "sent_len_1": 66.7795, "sent_len_max_0": 128.0, "sent_len_max_1": 209.67, "stdk": 0.0481, "stdq": 0.0454, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 24900 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 2.9513, "doc_norm": 1.4541, "encoder_q-embeddings": 5047.0815, "encoder_q-layer.0": 3718.6895, "encoder_q-layer.1": 4069.1189, "encoder_q-layer.10": 1973.6865, "encoder_q-layer.11": 4040.0012, "encoder_q-layer.2": 4398.1665, "encoder_q-layer.3": 4744.9111, "encoder_q-layer.4": 4862.3184, "encoder_q-layer.5": 5228.375, "encoder_q-layer.6": 4642.6733, "encoder_q-layer.7": 3540.2424, "encoder_q-layer.8": 3026.198, "encoder_q-layer.9": 2190.4683, "epoch": 0.24, "inbatch_neg_score": 0.2879, "inbatch_pos_score": 0.9927, "learning_rate": 4.166666666666667e-05, "loss": 2.9513, "norm_diff": 0.0115, "norm_loss": 0.0, "num_token_doc": 66.7289, "num_token_overlap": 17.7873, "num_token_query": 52.2169, "num_token_union": 73.6512, "num_word_context": 202.0083, "num_word_doc": 49.7757, "num_word_query": 39.8183, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6088.5123, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2876, "query_norm": 1.4452, "queue_k_norm": 1.4518, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2169, "sent_len_1": 66.7289, "sent_len_max_0": 128.0, "sent_len_max_1": 210.47, "stdk": 0.0483, "stdq": 0.0464, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 25000 }, { "accuracy": 56.9824, "active_queue_size": 16384.0, "cl_loss": 2.9555, "doc_norm": 1.4514, "encoder_q-embeddings": 6116.5127, "encoder_q-layer.0": 4300.8477, "encoder_q-layer.1": 5190.3975, "encoder_q-layer.10": 2029.0122, "encoder_q-layer.11": 3807.2654, "encoder_q-layer.2": 6579.4907, "encoder_q-layer.3": 7032.6157, "encoder_q-layer.4": 7354.915, "encoder_q-layer.5": 6883.917, "encoder_q-layer.6": 7114.896, "encoder_q-layer.7": 5766.6201, "encoder_q-layer.8": 4772.439, "encoder_q-layer.9": 3076.4751, "epoch": 0.25, "inbatch_neg_score": 0.2851, "inbatch_pos_score": 0.9795, "learning_rate": 4.1611111111111114e-05, "loss": 2.9555, "norm_diff": 0.0055, "norm_loss": 0.0, "num_token_doc": 66.5721, "num_token_overlap": 17.7324, "num_token_query": 52.1454, "num_token_union": 73.5666, "num_word_context": 201.8148, "num_word_doc": 49.6814, "num_word_query": 39.776, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8321.1519, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2834, "query_norm": 1.4498, "queue_k_norm": 1.4496, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1454, "sent_len_1": 66.5721, "sent_len_max_0": 128.0, "sent_len_max_1": 208.06, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 25100 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 2.9376, "doc_norm": 1.451, "encoder_q-embeddings": 2221.5439, "encoder_q-layer.0": 1563.4261, "encoder_q-layer.1": 1691.3875, "encoder_q-layer.10": 1807.5114, "encoder_q-layer.11": 3721.3521, "encoder_q-layer.2": 1851.3271, "encoder_q-layer.3": 2013.7108, "encoder_q-layer.4": 2382.0422, "encoder_q-layer.5": 2310.7065, "encoder_q-layer.6": 2408.0801, "encoder_q-layer.7": 2205.3787, "encoder_q-layer.8": 2264.5947, "encoder_q-layer.9": 1864.2117, "epoch": 0.25, "inbatch_neg_score": 0.2746, "inbatch_pos_score": 0.9614, "learning_rate": 4.155555555555556e-05, "loss": 2.9376, "norm_diff": 0.0373, "norm_loss": 0.0, "num_token_doc": 66.7621, "num_token_overlap": 17.815, "num_token_query": 52.2622, "num_token_union": 73.6955, "num_word_context": 202.3383, "num_word_doc": 49.839, "num_word_query": 39.859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3289.0033, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2749, "query_norm": 1.4137, "queue_k_norm": 1.4479, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2622, "sent_len_1": 66.7621, "sent_len_max_0": 128.0, "sent_len_max_1": 206.4725, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 25200 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 2.954, "doc_norm": 1.4482, "encoder_q-embeddings": 10790.1904, "encoder_q-layer.0": 7410.1719, "encoder_q-layer.1": 8267.3516, "encoder_q-layer.10": 3748.4958, "encoder_q-layer.11": 7377.3218, "encoder_q-layer.2": 10117.1943, "encoder_q-layer.3": 10530.5811, "encoder_q-layer.4": 11447.0254, "encoder_q-layer.5": 11006.9678, "encoder_q-layer.6": 10251.5674, "encoder_q-layer.7": 6825.3477, "encoder_q-layer.8": 4477.0732, "encoder_q-layer.9": 3620.9958, "epoch": 0.25, "inbatch_neg_score": 0.2863, "inbatch_pos_score": 0.9878, "learning_rate": 4.15e-05, "loss": 2.954, "norm_diff": 0.0097, "norm_loss": 0.0, "num_token_doc": 66.6917, "num_token_overlap": 17.7917, "num_token_query": 52.3679, "num_token_union": 73.7406, "num_word_context": 202.4247, "num_word_doc": 49.7678, "num_word_query": 39.9416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12749.7272, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2866, "query_norm": 1.4431, "queue_k_norm": 1.4472, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3679, "sent_len_1": 66.6917, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1188, "stdk": 0.0482, "stdq": 0.0462, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 25300 }, { "accuracy": 56.6895, "active_queue_size": 16384.0, "cl_loss": 2.9536, "doc_norm": 1.4455, "encoder_q-embeddings": 9491.7881, "encoder_q-layer.0": 6680.626, "encoder_q-layer.1": 7302.5356, "encoder_q-layer.10": 3816.4863, "encoder_q-layer.11": 8120.7402, "encoder_q-layer.2": 8651.6797, "encoder_q-layer.3": 9565.5068, "encoder_q-layer.4": 10302.2012, "encoder_q-layer.5": 10600.3164, "encoder_q-layer.6": 11793.2334, "encoder_q-layer.7": 8780.251, "encoder_q-layer.8": 5401.7349, "encoder_q-layer.9": 3999.9709, "epoch": 0.25, "inbatch_neg_score": 0.3025, "inbatch_pos_score": 0.9897, "learning_rate": 4.144444444444445e-05, "loss": 2.9536, "norm_diff": 0.0139, "norm_loss": 0.0, "num_token_doc": 66.7686, "num_token_overlap": 17.7826, "num_token_query": 52.2537, "num_token_union": 73.719, "num_word_context": 202.2482, "num_word_doc": 49.803, "num_word_query": 39.8615, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12405.5767, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.301, "query_norm": 1.4343, "queue_k_norm": 1.446, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2537, "sent_len_1": 66.7686, "sent_len_max_0": 128.0, "sent_len_max_1": 207.0163, "stdk": 0.0481, "stdq": 0.0461, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 25400 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.9345, "doc_norm": 1.4442, "encoder_q-embeddings": 5669.0322, "encoder_q-layer.0": 4150.3057, "encoder_q-layer.1": 4531.8311, "encoder_q-layer.10": 3787.9504, "encoder_q-layer.11": 7738.4858, "encoder_q-layer.2": 5204.6733, "encoder_q-layer.3": 5150.1978, "encoder_q-layer.4": 5119.6963, "encoder_q-layer.5": 4922.6577, "encoder_q-layer.6": 4915.6172, "encoder_q-layer.7": 4633.8413, "encoder_q-layer.8": 4589.4492, "encoder_q-layer.9": 4042.7427, "epoch": 0.25, "inbatch_neg_score": 0.2899, "inbatch_pos_score": 0.9829, "learning_rate": 4.138888888888889e-05, "loss": 2.9345, "norm_diff": 0.0149, "norm_loss": 0.0, "num_token_doc": 66.7115, "num_token_overlap": 17.8036, "num_token_query": 52.2562, "num_token_union": 73.6646, "num_word_context": 202.3202, "num_word_doc": 49.7747, "num_word_query": 39.8498, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7377.6534, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2886, "query_norm": 1.4293, "queue_k_norm": 1.4456, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2562, "sent_len_1": 66.7115, "sent_len_max_0": 128.0, "sent_len_max_1": 208.065, "stdk": 0.0481, "stdq": 0.0464, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 25500 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.941, "doc_norm": 1.4448, "encoder_q-embeddings": 3469.2314, "encoder_q-layer.0": 2231.9631, "encoder_q-layer.1": 2503.8113, "encoder_q-layer.10": 3811.2637, "encoder_q-layer.11": 7684.2749, "encoder_q-layer.2": 2912.4924, "encoder_q-layer.3": 3158.5059, "encoder_q-layer.4": 3416.772, "encoder_q-layer.5": 3436.1733, "encoder_q-layer.6": 3652.9365, "encoder_q-layer.7": 3788.8125, "encoder_q-layer.8": 4079.6526, "encoder_q-layer.9": 3819.116, "epoch": 0.25, "inbatch_neg_score": 0.2986, "inbatch_pos_score": 0.9985, "learning_rate": 4.133333333333333e-05, "loss": 2.941, "norm_diff": 0.0211, "norm_loss": 0.0, "num_token_doc": 66.823, "num_token_overlap": 17.7951, "num_token_query": 52.1706, "num_token_union": 73.6607, "num_word_context": 202.1207, "num_word_doc": 49.8641, "num_word_query": 39.7896, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5617.2319, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2991, "query_norm": 1.4237, "queue_k_norm": 1.4443, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1706, "sent_len_1": 66.823, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2287, "stdk": 0.0482, "stdq": 0.0461, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 25600 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.9346, "doc_norm": 1.4441, "encoder_q-embeddings": 3430.5874, "encoder_q-layer.0": 2289.915, "encoder_q-layer.1": 2481.5505, "encoder_q-layer.10": 3670.0051, "encoder_q-layer.11": 7376.3794, "encoder_q-layer.2": 2830.9927, "encoder_q-layer.3": 2933.2317, "encoder_q-layer.4": 3113.3682, "encoder_q-layer.5": 3346.3315, "encoder_q-layer.6": 3676.3401, "encoder_q-layer.7": 3775.1743, "encoder_q-layer.8": 4065.312, "encoder_q-layer.9": 3678.2241, "epoch": 0.25, "inbatch_neg_score": 0.2976, "inbatch_pos_score": 0.9995, "learning_rate": 4.127777777777778e-05, "loss": 2.9346, "norm_diff": 0.0305, "norm_loss": 0.0, "num_token_doc": 66.9901, "num_token_overlap": 17.8361, "num_token_query": 52.2644, "num_token_union": 73.7969, "num_word_context": 202.5628, "num_word_doc": 50.0044, "num_word_query": 39.8528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5522.5832, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2974, "query_norm": 1.4136, "queue_k_norm": 1.4471, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2644, "sent_len_1": 66.9901, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5325, "stdk": 0.0481, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 25700 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 2.9249, "doc_norm": 1.4443, "encoder_q-embeddings": 3591.405, "encoder_q-layer.0": 2377.0708, "encoder_q-layer.1": 2897.1387, "encoder_q-layer.10": 1755.8489, "encoder_q-layer.11": 4031.6748, "encoder_q-layer.2": 3460.3074, "encoder_q-layer.3": 3660.4409, "encoder_q-layer.4": 3932.3701, "encoder_q-layer.5": 4339.8784, "encoder_q-layer.6": 5009.7583, "encoder_q-layer.7": 3931.7087, "encoder_q-layer.8": 2680.3235, "encoder_q-layer.9": 1934.8689, "epoch": 0.25, "inbatch_neg_score": 0.2875, "inbatch_pos_score": 0.9854, "learning_rate": 4.1222222222222224e-05, "loss": 2.9249, "norm_diff": 0.0223, "norm_loss": 0.0, "num_token_doc": 66.9711, "num_token_overlap": 17.829, "num_token_query": 52.3503, "num_token_union": 73.8039, "num_word_context": 202.275, "num_word_doc": 49.9856, "num_word_query": 39.9411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5051.6455, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2869, "query_norm": 1.422, "queue_k_norm": 1.4452, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3503, "sent_len_1": 66.9711, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2912, "stdk": 0.0482, "stdq": 0.0464, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 25800 }, { "accuracy": 57.7637, "active_queue_size": 16384.0, "cl_loss": 2.9317, "doc_norm": 1.4469, "encoder_q-embeddings": 4246.8896, "encoder_q-layer.0": 2918.1638, "encoder_q-layer.1": 3538.6416, "encoder_q-layer.10": 1881.8762, "encoder_q-layer.11": 3804.9473, "encoder_q-layer.2": 3870.7727, "encoder_q-layer.3": 4145.0991, "encoder_q-layer.4": 4242.6763, "encoder_q-layer.5": 4604.5889, "encoder_q-layer.6": 4784.5693, "encoder_q-layer.7": 3556.4487, "encoder_q-layer.8": 3249.0044, "encoder_q-layer.9": 2155.4844, "epoch": 0.25, "inbatch_neg_score": 0.2934, "inbatch_pos_score": 0.98, "learning_rate": 4.116666666666667e-05, "loss": 2.9317, "norm_diff": 0.0428, "norm_loss": 0.0, "num_token_doc": 66.8173, "num_token_overlap": 17.8401, "num_token_query": 52.2969, "num_token_union": 73.7073, "num_word_context": 202.0951, "num_word_doc": 49.8541, "num_word_query": 39.8999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5464.0542, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2942, "query_norm": 1.4041, "queue_k_norm": 1.4467, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2969, "sent_len_1": 66.8173, "sent_len_max_0": 128.0, "sent_len_max_1": 208.625, "stdk": 0.0483, "stdq": 0.0457, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 25900 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 2.9498, "doc_norm": 1.442, "encoder_q-embeddings": 5359.4229, "encoder_q-layer.0": 3725.3972, "encoder_q-layer.1": 4375.7646, "encoder_q-layer.10": 1804.6088, "encoder_q-layer.11": 3781.949, "encoder_q-layer.2": 5087.2969, "encoder_q-layer.3": 5003.3633, "encoder_q-layer.4": 4614.813, "encoder_q-layer.5": 4153.439, "encoder_q-layer.6": 3285.158, "encoder_q-layer.7": 2682.7427, "encoder_q-layer.8": 2277.6746, "encoder_q-layer.9": 1815.6716, "epoch": 0.25, "inbatch_neg_score": 0.2998, "inbatch_pos_score": 0.9995, "learning_rate": 4.111111111111111e-05, "loss": 2.9498, "norm_diff": 0.0179, "norm_loss": 0.0, "num_token_doc": 66.6649, "num_token_overlap": 17.7745, "num_token_query": 52.2573, "num_token_union": 73.6373, "num_word_context": 202.2206, "num_word_doc": 49.74, "num_word_query": 39.8577, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5827.7855, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2986, "query_norm": 1.4241, "queue_k_norm": 1.4456, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2573, "sent_len_1": 66.6649, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2812, "stdk": 0.0481, "stdq": 0.0462, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 26000 }, { "accuracy": 58.7402, "active_queue_size": 16384.0, "cl_loss": 2.9264, "doc_norm": 1.441, "encoder_q-embeddings": 4928.7583, "encoder_q-layer.0": 3364.0195, "encoder_q-layer.1": 3762.4304, "encoder_q-layer.10": 1722.744, "encoder_q-layer.11": 3587.0598, "encoder_q-layer.2": 4116.29, "encoder_q-layer.3": 4483.3159, "encoder_q-layer.4": 4779.7861, "encoder_q-layer.5": 4946.1084, "encoder_q-layer.6": 4988.3159, "encoder_q-layer.7": 4488.2114, "encoder_q-layer.8": 3122.8401, "encoder_q-layer.9": 2068.1565, "epoch": 0.25, "inbatch_neg_score": 0.2944, "inbatch_pos_score": 1.0195, "learning_rate": 4.105555555555556e-05, "loss": 2.9264, "norm_diff": 0.0051, "norm_loss": 0.0, "num_token_doc": 66.6887, "num_token_overlap": 17.7786, "num_token_query": 52.3086, "num_token_union": 73.7189, "num_word_context": 202.4614, "num_word_doc": 49.7675, "num_word_query": 39.9028, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5957.5276, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2939, "query_norm": 1.4434, "queue_k_norm": 1.4441, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3086, "sent_len_1": 66.6887, "sent_len_max_0": 128.0, "sent_len_max_1": 209.29, "stdk": 0.0481, "stdq": 0.047, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 26100 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 2.9243, "doc_norm": 1.445, "encoder_q-embeddings": 2677.3743, "encoder_q-layer.0": 1882.0144, "encoder_q-layer.1": 2231.2773, "encoder_q-layer.10": 1920.0254, "encoder_q-layer.11": 3620.522, "encoder_q-layer.2": 2913.7158, "encoder_q-layer.3": 3363.179, "encoder_q-layer.4": 3365.9163, "encoder_q-layer.5": 3371.5037, "encoder_q-layer.6": 3494.552, "encoder_q-layer.7": 2853.7424, "encoder_q-layer.8": 2398.0845, "encoder_q-layer.9": 2101.0598, "epoch": 0.26, "inbatch_neg_score": 0.2901, "inbatch_pos_score": 1.0, "learning_rate": 4.1e-05, "loss": 2.9243, "norm_diff": 0.0119, "norm_loss": 0.0, "num_token_doc": 66.8334, "num_token_overlap": 17.8268, "num_token_query": 52.2372, "num_token_union": 73.6929, "num_word_context": 202.2295, "num_word_doc": 49.8564, "num_word_query": 39.8478, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4089.0707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2896, "query_norm": 1.4371, "queue_k_norm": 1.4444, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2372, "sent_len_1": 66.8334, "sent_len_max_0": 128.0, "sent_len_max_1": 208.455, "stdk": 0.0482, "stdq": 0.0467, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 26200 }, { "accuracy": 58.4473, "active_queue_size": 16384.0, "cl_loss": 2.9274, "doc_norm": 1.4468, "encoder_q-embeddings": 1795.8486, "encoder_q-layer.0": 1122.4768, "encoder_q-layer.1": 1264.7664, "encoder_q-layer.10": 1815.3542, "encoder_q-layer.11": 3646.1631, "encoder_q-layer.2": 1469.8915, "encoder_q-layer.3": 1606.7322, "encoder_q-layer.4": 1732.5148, "encoder_q-layer.5": 1760.6709, "encoder_q-layer.6": 1845.1901, "encoder_q-layer.7": 1878.5763, "encoder_q-layer.8": 2087.5776, "encoder_q-layer.9": 1832.657, "epoch": 0.26, "inbatch_neg_score": 0.2902, "inbatch_pos_score": 1.002, "learning_rate": 4.094444444444445e-05, "loss": 2.9274, "norm_diff": 0.0153, "norm_loss": 0.0, "num_token_doc": 66.6429, "num_token_overlap": 17.8155, "num_token_query": 52.2065, "num_token_union": 73.5654, "num_word_context": 202.0002, "num_word_doc": 49.7369, "num_word_query": 39.7948, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2805.125, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2903, "query_norm": 1.4346, "queue_k_norm": 1.4433, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2065, "sent_len_1": 66.6429, "sent_len_max_0": 128.0, "sent_len_max_1": 209.19, "stdk": 0.0483, "stdq": 0.0464, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 26300 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 2.9233, "doc_norm": 1.4458, "encoder_q-embeddings": 2892.8677, "encoder_q-layer.0": 2043.7327, "encoder_q-layer.1": 2424.2949, "encoder_q-layer.10": 1852.5267, "encoder_q-layer.11": 3781.7188, "encoder_q-layer.2": 3115.2942, "encoder_q-layer.3": 3211.7188, "encoder_q-layer.4": 3536.4924, "encoder_q-layer.5": 3551.5413, "encoder_q-layer.6": 3452.0972, "encoder_q-layer.7": 3091.6157, "encoder_q-layer.8": 2635.3577, "encoder_q-layer.9": 1842.219, "epoch": 0.26, "inbatch_neg_score": 0.2937, "inbatch_pos_score": 0.9941, "learning_rate": 4.088888888888889e-05, "loss": 2.9233, "norm_diff": 0.009, "norm_loss": 0.0, "num_token_doc": 66.7324, "num_token_overlap": 17.8167, "num_token_query": 52.44, "num_token_union": 73.7591, "num_word_context": 202.2552, "num_word_doc": 49.7787, "num_word_query": 39.9991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4353.155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.4474, "queue_k_norm": 1.4453, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.44, "sent_len_1": 66.7324, "sent_len_max_0": 128.0, "sent_len_max_1": 209.51, "stdk": 0.0483, "stdq": 0.0465, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 26400 }, { "accuracy": 56.2012, "active_queue_size": 16384.0, "cl_loss": 2.9186, "doc_norm": 1.4479, "encoder_q-embeddings": 3415.3, "encoder_q-layer.0": 2370.0876, "encoder_q-layer.1": 2601.0198, "encoder_q-layer.10": 1906.5669, "encoder_q-layer.11": 3917.6914, "encoder_q-layer.2": 3043.4824, "encoder_q-layer.3": 3274.623, "encoder_q-layer.4": 3228.8611, "encoder_q-layer.5": 3381.4236, "encoder_q-layer.6": 3542.594, "encoder_q-layer.7": 3367.7427, "encoder_q-layer.8": 2761.3606, "encoder_q-layer.9": 2086.6328, "epoch": 0.26, "inbatch_neg_score": 0.2943, "inbatch_pos_score": 0.9893, "learning_rate": 4.0833333333333334e-05, "loss": 2.9186, "norm_diff": 0.0144, "norm_loss": 0.0, "num_token_doc": 66.7919, "num_token_overlap": 17.8303, "num_token_query": 52.3761, "num_token_union": 73.772, "num_word_context": 202.3591, "num_word_doc": 49.8073, "num_word_query": 39.9485, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4506.206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2939, "query_norm": 1.4348, "queue_k_norm": 1.4447, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3761, "sent_len_1": 66.7919, "sent_len_max_0": 128.0, "sent_len_max_1": 210.35, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 26500 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.9262, "doc_norm": 1.4477, "encoder_q-embeddings": 1726.4053, "encoder_q-layer.0": 1070.0094, "encoder_q-layer.1": 1162.4916, "encoder_q-layer.10": 1788.3546, "encoder_q-layer.11": 3641.2244, "encoder_q-layer.2": 1324.2551, "encoder_q-layer.3": 1415.7386, "encoder_q-layer.4": 1499.5673, "encoder_q-layer.5": 1505.2178, "encoder_q-layer.6": 1676.7332, "encoder_q-layer.7": 1775.6038, "encoder_q-layer.8": 2014.6669, "encoder_q-layer.9": 1784.611, "epoch": 0.26, "inbatch_neg_score": 0.306, "inbatch_pos_score": 1.0186, "learning_rate": 4.0777777777777783e-05, "loss": 2.9262, "norm_diff": 0.0116, "norm_loss": 0.0, "num_token_doc": 66.7467, "num_token_overlap": 17.7967, "num_token_query": 52.3322, "num_token_union": 73.7207, "num_word_context": 202.5957, "num_word_doc": 49.79, "num_word_query": 39.9011, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2678.6008, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3052, "query_norm": 1.4569, "queue_k_norm": 1.4447, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3322, "sent_len_1": 66.7467, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7188, "stdk": 0.0483, "stdq": 0.0461, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 26600 }, { "accuracy": 57.959, "active_queue_size": 16384.0, "cl_loss": 2.92, "doc_norm": 1.4506, "encoder_q-embeddings": 2047.5253, "encoder_q-layer.0": 1352.8724, "encoder_q-layer.1": 1512.2792, "encoder_q-layer.10": 1701.5146, "encoder_q-layer.11": 3407.6729, "encoder_q-layer.2": 1700.3792, "encoder_q-layer.3": 1825.2549, "encoder_q-layer.4": 1938.3156, "encoder_q-layer.5": 1894.0459, "encoder_q-layer.6": 1948.8821, "encoder_q-layer.7": 1987.8608, "encoder_q-layer.8": 1995.8942, "encoder_q-layer.9": 1698.4207, "epoch": 0.26, "inbatch_neg_score": 0.3193, "inbatch_pos_score": 1.0205, "learning_rate": 4.0722222222222226e-05, "loss": 2.92, "norm_diff": 0.0305, "norm_loss": 0.0, "num_token_doc": 66.7111, "num_token_overlap": 17.8181, "num_token_query": 52.2904, "num_token_union": 73.669, "num_word_context": 202.427, "num_word_doc": 49.7622, "num_word_query": 39.8735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2889.5818, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3184, "query_norm": 1.4811, "queue_k_norm": 1.4448, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2904, "sent_len_1": 66.7111, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4375, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 26700 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 2.9156, "doc_norm": 1.4446, "encoder_q-embeddings": 1642.6901, "encoder_q-layer.0": 1108.6106, "encoder_q-layer.1": 1172.4124, "encoder_q-layer.10": 1900.4346, "encoder_q-layer.11": 3891.0813, "encoder_q-layer.2": 1305.3975, "encoder_q-layer.3": 1364.272, "encoder_q-layer.4": 1506.7435, "encoder_q-layer.5": 1508.0421, "encoder_q-layer.6": 1703.7426, "encoder_q-layer.7": 1940.4833, "encoder_q-layer.8": 2044.2128, "encoder_q-layer.9": 1887.1173, "epoch": 0.26, "inbatch_neg_score": 0.3333, "inbatch_pos_score": 1.0117, "learning_rate": 4.066666666666667e-05, "loss": 2.9156, "norm_diff": 0.0365, "norm_loss": 0.0, "num_token_doc": 66.7394, "num_token_overlap": 17.8122, "num_token_query": 52.4424, "num_token_union": 73.7885, "num_word_context": 202.4181, "num_word_doc": 49.7897, "num_word_query": 40.0007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2739.2893, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3333, "query_norm": 1.4811, "queue_k_norm": 1.4462, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4424, "sent_len_1": 66.7394, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0288, "stdk": 0.0482, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 26800 }, { "accuracy": 57.7637, "active_queue_size": 16384.0, "cl_loss": 2.919, "doc_norm": 1.4452, "encoder_q-embeddings": 1761.4896, "encoder_q-layer.0": 1108.3333, "encoder_q-layer.1": 1248.9983, "encoder_q-layer.10": 2002.257, "encoder_q-layer.11": 3719.7659, "encoder_q-layer.2": 1428.2581, "encoder_q-layer.3": 1594.5461, "encoder_q-layer.4": 1640.4449, "encoder_q-layer.5": 1744.9512, "encoder_q-layer.6": 1870.5134, "encoder_q-layer.7": 1897.8732, "encoder_q-layer.8": 2184.8333, "encoder_q-layer.9": 1924.2506, "epoch": 0.26, "inbatch_neg_score": 0.3522, "inbatch_pos_score": 1.0508, "learning_rate": 4.061111111111111e-05, "loss": 2.919, "norm_diff": 0.0547, "norm_loss": 0.0, "num_token_doc": 66.7743, "num_token_overlap": 17.8011, "num_token_query": 52.388, "num_token_union": 73.8103, "num_word_context": 202.6996, "num_word_doc": 49.8395, "num_word_query": 39.9852, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2830.3072, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3521, "query_norm": 1.5, "queue_k_norm": 1.4482, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.388, "sent_len_1": 66.7743, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2, "stdk": 0.0482, "stdq": 0.0459, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 26900 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.8958, "doc_norm": 1.45, "encoder_q-embeddings": 1807.3359, "encoder_q-layer.0": 1153.4003, "encoder_q-layer.1": 1250.203, "encoder_q-layer.10": 1904.3054, "encoder_q-layer.11": 3820.3435, "encoder_q-layer.2": 1458.0116, "encoder_q-layer.3": 1549.5703, "encoder_q-layer.4": 1677.2507, "encoder_q-layer.5": 1714.7179, "encoder_q-layer.6": 1913.16, "encoder_q-layer.7": 1973.2997, "encoder_q-layer.8": 2309.7898, "encoder_q-layer.9": 1885.587, "epoch": 0.26, "inbatch_neg_score": 0.3613, "inbatch_pos_score": 1.0625, "learning_rate": 4.055555555555556e-05, "loss": 2.8958, "norm_diff": 0.0385, "norm_loss": 0.0, "num_token_doc": 66.7513, "num_token_overlap": 17.8084, "num_token_query": 52.2517, "num_token_union": 73.7001, "num_word_context": 202.1058, "num_word_doc": 49.7929, "num_word_query": 39.8354, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2897.7001, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3618, "query_norm": 1.4885, "queue_k_norm": 1.4496, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2517, "sent_len_1": 66.7513, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8425, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 27000 }, { "accuracy": 56.6895, "active_queue_size": 16384.0, "cl_loss": 2.9061, "doc_norm": 1.4493, "encoder_q-embeddings": 3567.1641, "encoder_q-layer.0": 2448.313, "encoder_q-layer.1": 2861.4595, "encoder_q-layer.10": 1864.9822, "encoder_q-layer.11": 4024.085, "encoder_q-layer.2": 3539.2461, "encoder_q-layer.3": 4155.291, "encoder_q-layer.4": 5077.5801, "encoder_q-layer.5": 5326.2314, "encoder_q-layer.6": 5503.4531, "encoder_q-layer.7": 5619.3882, "encoder_q-layer.8": 3790.467, "encoder_q-layer.9": 2081.1582, "epoch": 0.26, "inbatch_neg_score": 0.3779, "inbatch_pos_score": 1.0703, "learning_rate": 4.05e-05, "loss": 2.9061, "norm_diff": 0.0338, "norm_loss": 0.0, "num_token_doc": 66.7372, "num_token_overlap": 17.7943, "num_token_query": 52.289, "num_token_union": 73.6989, "num_word_context": 202.1848, "num_word_doc": 49.8099, "num_word_query": 39.8711, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5886.4156, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3789, "query_norm": 1.4832, "queue_k_norm": 1.45, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.289, "sent_len_1": 66.7372, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0737, "stdk": 0.0482, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 27100 }, { "accuracy": 57.7637, "active_queue_size": 16384.0, "cl_loss": 2.933, "doc_norm": 1.4557, "encoder_q-embeddings": 2479.1121, "encoder_q-layer.0": 1737.3931, "encoder_q-layer.1": 1850.5667, "encoder_q-layer.10": 2246.9111, "encoder_q-layer.11": 4195.2261, "encoder_q-layer.2": 2152.6572, "encoder_q-layer.3": 2268.0083, "encoder_q-layer.4": 2334.5476, "encoder_q-layer.5": 2216.595, "encoder_q-layer.6": 2520.4673, "encoder_q-layer.7": 2359.1267, "encoder_q-layer.8": 2375.647, "encoder_q-layer.9": 2149.2705, "epoch": 0.27, "inbatch_neg_score": 0.3833, "inbatch_pos_score": 1.0977, "learning_rate": 4.0444444444444444e-05, "loss": 2.933, "norm_diff": 0.0532, "norm_loss": 0.0, "num_token_doc": 66.6659, "num_token_overlap": 17.7339, "num_token_query": 52.1731, "num_token_union": 73.6393, "num_word_context": 202.1387, "num_word_doc": 49.7295, "num_word_query": 39.7768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3522.753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3833, "query_norm": 1.5089, "queue_k_norm": 1.4545, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1731, "sent_len_1": 66.6659, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2825, "stdk": 0.0483, "stdq": 0.047, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 27200 }, { "accuracy": 57.4707, "active_queue_size": 16384.0, "cl_loss": 2.914, "doc_norm": 1.4556, "encoder_q-embeddings": 2175.1372, "encoder_q-layer.0": 1473.7317, "encoder_q-layer.1": 1677.3021, "encoder_q-layer.10": 1835.7489, "encoder_q-layer.11": 3913.6304, "encoder_q-layer.2": 1994.8315, "encoder_q-layer.3": 2110.3184, "encoder_q-layer.4": 2240.1348, "encoder_q-layer.5": 2164.6489, "encoder_q-layer.6": 2233.1001, "encoder_q-layer.7": 2202.1748, "encoder_q-layer.8": 2374.7795, "encoder_q-layer.9": 1972.7075, "epoch": 0.27, "inbatch_neg_score": 0.3864, "inbatch_pos_score": 1.082, "learning_rate": 4.038888888888889e-05, "loss": 2.914, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.7402, "num_token_overlap": 17.7825, "num_token_query": 52.2568, "num_token_union": 73.6963, "num_word_context": 202.2117, "num_word_doc": 49.7951, "num_word_query": 39.8542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3275.1642, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3872, "query_norm": 1.4849, "queue_k_norm": 1.4574, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2568, "sent_len_1": 66.7402, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3613, "stdk": 0.0482, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 27300 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.9033, "doc_norm": 1.4552, "encoder_q-embeddings": 4043.5632, "encoder_q-layer.0": 2693.7539, "encoder_q-layer.1": 3323.1082, "encoder_q-layer.10": 1782.6454, "encoder_q-layer.11": 3827.4946, "encoder_q-layer.2": 3795.2273, "encoder_q-layer.3": 3598.1482, "encoder_q-layer.4": 3807.231, "encoder_q-layer.5": 3710.7996, "encoder_q-layer.6": 3406.5635, "encoder_q-layer.7": 2372.9453, "encoder_q-layer.8": 2147.7141, "encoder_q-layer.9": 1709.0731, "epoch": 0.27, "inbatch_neg_score": 0.3693, "inbatch_pos_score": 1.0723, "learning_rate": 4.0333333333333336e-05, "loss": 2.9033, "norm_diff": 0.0089, "norm_loss": 0.0, "num_token_doc": 66.8092, "num_token_overlap": 17.8412, "num_token_query": 52.4538, "num_token_union": 73.8305, "num_word_context": 202.4904, "num_word_doc": 49.8506, "num_word_query": 40.0312, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4782.7739, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3706, "query_norm": 1.4505, "queue_k_norm": 1.4584, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4538, "sent_len_1": 66.8092, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1575, "stdk": 0.0482, "stdq": 0.0457, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 27400 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.9112, "doc_norm": 1.4641, "encoder_q-embeddings": 2123.957, "encoder_q-layer.0": 1427.2714, "encoder_q-layer.1": 1594.7604, "encoder_q-layer.10": 1729.994, "encoder_q-layer.11": 3690.9631, "encoder_q-layer.2": 1789.0831, "encoder_q-layer.3": 1843.1484, "encoder_q-layer.4": 1966.2875, "encoder_q-layer.5": 1954.3302, "encoder_q-layer.6": 2099.0383, "encoder_q-layer.7": 2210.9836, "encoder_q-layer.8": 2331.3945, "encoder_q-layer.9": 1791.7277, "epoch": 0.27, "inbatch_neg_score": 0.3629, "inbatch_pos_score": 1.0703, "learning_rate": 4.027777777777778e-05, "loss": 2.9112, "norm_diff": 0.0103, "norm_loss": 0.0, "num_token_doc": 66.7456, "num_token_overlap": 17.8152, "num_token_query": 52.3737, "num_token_union": 73.723, "num_word_context": 202.1856, "num_word_doc": 49.8124, "num_word_query": 39.9433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3134.0813, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3628, "query_norm": 1.4538, "queue_k_norm": 1.4599, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3737, "sent_len_1": 66.7456, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6213, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 27500 }, { "accuracy": 57.5684, "active_queue_size": 16384.0, "cl_loss": 2.9104, "doc_norm": 1.4587, "encoder_q-embeddings": 2064.9124, "encoder_q-layer.0": 1286.9901, "encoder_q-layer.1": 1441.231, "encoder_q-layer.10": 1983.9116, "encoder_q-layer.11": 4033.6475, "encoder_q-layer.2": 1688.308, "encoder_q-layer.3": 1825.7776, "encoder_q-layer.4": 1989.8247, "encoder_q-layer.5": 2088.8481, "encoder_q-layer.6": 2220.5156, "encoder_q-layer.7": 2222.0149, "encoder_q-layer.8": 2413.5303, "encoder_q-layer.9": 1919.2085, "epoch": 0.27, "inbatch_neg_score": 0.3618, "inbatch_pos_score": 1.0439, "learning_rate": 4.022222222222222e-05, "loss": 2.9104, "norm_diff": 0.0277, "norm_loss": 0.0, "num_token_doc": 66.7615, "num_token_overlap": 17.7837, "num_token_query": 52.2388, "num_token_union": 73.6856, "num_word_context": 202.0215, "num_word_doc": 49.804, "num_word_query": 39.84, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3192.9361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3613, "query_norm": 1.431, "queue_k_norm": 1.4616, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2388, "sent_len_1": 66.7615, "sent_len_max_0": 128.0, "sent_len_max_1": 209.52, "stdk": 0.0482, "stdq": 0.0453, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 27600 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.8967, "doc_norm": 1.4607, "encoder_q-embeddings": 1887.0679, "encoder_q-layer.0": 1194.9979, "encoder_q-layer.1": 1346.9465, "encoder_q-layer.10": 1892.2181, "encoder_q-layer.11": 3943.9082, "encoder_q-layer.2": 1556.0615, "encoder_q-layer.3": 1637.7474, "encoder_q-layer.4": 1727.3387, "encoder_q-layer.5": 1708.7312, "encoder_q-layer.6": 1856.9597, "encoder_q-layer.7": 1912.8253, "encoder_q-layer.8": 2059.749, "encoder_q-layer.9": 1798.213, "epoch": 0.27, "inbatch_neg_score": 0.3546, "inbatch_pos_score": 1.0586, "learning_rate": 4.016666666666667e-05, "loss": 2.8967, "norm_diff": 0.0213, "norm_loss": 0.0, "num_token_doc": 66.758, "num_token_overlap": 17.8067, "num_token_query": 52.2134, "num_token_union": 73.6896, "num_word_context": 202.3925, "num_word_doc": 49.8384, "num_word_query": 39.8324, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2922.8744, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3552, "query_norm": 1.4394, "queue_k_norm": 1.4621, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2134, "sent_len_1": 66.758, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6587, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 27700 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 2.9113, "doc_norm": 1.4636, "encoder_q-embeddings": 20773.8105, "encoder_q-layer.0": 16696.3125, "encoder_q-layer.1": 18129.3652, "encoder_q-layer.10": 3738.1736, "encoder_q-layer.11": 7492.4204, "encoder_q-layer.2": 20677.3262, "encoder_q-layer.3": 23478.3926, "encoder_q-layer.4": 21357.3359, "encoder_q-layer.5": 18197.9629, "encoder_q-layer.6": 13213.5957, "encoder_q-layer.7": 7460.7983, "encoder_q-layer.8": 5447.1431, "encoder_q-layer.9": 3745.9326, "epoch": 0.27, "inbatch_neg_score": 0.3491, "inbatch_pos_score": 1.0547, "learning_rate": 4.011111111111111e-05, "loss": 2.9113, "norm_diff": 0.0128, "norm_loss": 0.0, "num_token_doc": 66.7403, "num_token_overlap": 17.7998, "num_token_query": 52.1252, "num_token_union": 73.5911, "num_word_context": 202.3552, "num_word_doc": 49.7972, "num_word_query": 39.7636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 23501.7516, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3499, "query_norm": 1.4538, "queue_k_norm": 1.4604, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1252, "sent_len_1": 66.7403, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1838, "stdk": 0.0484, "stdq": 0.0465, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 27800 }, { "accuracy": 57.8613, "active_queue_size": 16384.0, "cl_loss": 2.8897, "doc_norm": 1.4619, "encoder_q-embeddings": 2408.999, "encoder_q-layer.0": 1543.7765, "encoder_q-layer.1": 1769.9084, "encoder_q-layer.10": 1840.1954, "encoder_q-layer.11": 3938.113, "encoder_q-layer.2": 2099.3176, "encoder_q-layer.3": 2361.4451, "encoder_q-layer.4": 2465.6245, "encoder_q-layer.5": 2604.9587, "encoder_q-layer.6": 2853.9116, "encoder_q-layer.7": 2456.6729, "encoder_q-layer.8": 2109.876, "encoder_q-layer.9": 1818.5303, "epoch": 0.27, "inbatch_neg_score": 0.3582, "inbatch_pos_score": 1.0635, "learning_rate": 4.0055555555555554e-05, "loss": 2.8897, "norm_diff": 0.0149, "norm_loss": 0.0, "num_token_doc": 66.8846, "num_token_overlap": 17.8371, "num_token_query": 52.2905, "num_token_union": 73.7528, "num_word_context": 202.3399, "num_word_doc": 49.9125, "num_word_query": 39.8652, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3542.9033, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3589, "query_norm": 1.447, "queue_k_norm": 1.4631, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2905, "sent_len_1": 66.8846, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8525, "stdk": 0.0483, "stdq": 0.0456, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 27900 }, { "accuracy": 57.666, "active_queue_size": 16384.0, "cl_loss": 2.9167, "doc_norm": 1.4611, "encoder_q-embeddings": 2200.3171, "encoder_q-layer.0": 1417.6553, "encoder_q-layer.1": 1605.0182, "encoder_q-layer.10": 1890.667, "encoder_q-layer.11": 3693.282, "encoder_q-layer.2": 1840.0713, "encoder_q-layer.3": 1953.9348, "encoder_q-layer.4": 1962.0768, "encoder_q-layer.5": 1883.7103, "encoder_q-layer.6": 2013.9307, "encoder_q-layer.7": 2063.6816, "encoder_q-layer.8": 2074.865, "encoder_q-layer.9": 1892.9801, "epoch": 0.27, "inbatch_neg_score": 0.3451, "inbatch_pos_score": 1.0557, "learning_rate": 4e-05, "loss": 2.9167, "norm_diff": 0.02, "norm_loss": 0.0, "num_token_doc": 66.7669, "num_token_overlap": 17.7817, "num_token_query": 52.3114, "num_token_union": 73.7503, "num_word_context": 202.2457, "num_word_doc": 49.8018, "num_word_query": 39.892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3066.5839, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3452, "query_norm": 1.4411, "queue_k_norm": 1.4615, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3114, "sent_len_1": 66.7669, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6188, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 28000 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.8879, "doc_norm": 1.4585, "encoder_q-embeddings": 1793.882, "encoder_q-layer.0": 1179.693, "encoder_q-layer.1": 1360.2533, "encoder_q-layer.10": 1824.6587, "encoder_q-layer.11": 3765.7651, "encoder_q-layer.2": 1478.0616, "encoder_q-layer.3": 1587.1716, "encoder_q-layer.4": 1728.8951, "encoder_q-layer.5": 1779.444, "encoder_q-layer.6": 1921.1605, "encoder_q-layer.7": 2110.0222, "encoder_q-layer.8": 2226.3662, "encoder_q-layer.9": 1853.1838, "epoch": 0.27, "inbatch_neg_score": 0.3467, "inbatch_pos_score": 1.0449, "learning_rate": 3.9944444444444446e-05, "loss": 2.8879, "norm_diff": 0.0127, "norm_loss": 0.0, "num_token_doc": 66.8639, "num_token_overlap": 17.8682, "num_token_query": 52.2059, "num_token_union": 73.6361, "num_word_context": 202.0634, "num_word_doc": 49.8891, "num_word_query": 39.8166, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2920.5198, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3464, "query_norm": 1.4469, "queue_k_norm": 1.4641, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2059, "sent_len_1": 66.8639, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4375, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 28100 }, { "accuracy": 57.1777, "active_queue_size": 16384.0, "cl_loss": 2.9, "doc_norm": 1.4611, "encoder_q-embeddings": 2603.2239, "encoder_q-layer.0": 1811.0276, "encoder_q-layer.1": 2127.1147, "encoder_q-layer.10": 1811.6764, "encoder_q-layer.11": 3716.262, "encoder_q-layer.2": 2594.5461, "encoder_q-layer.3": 3074.1423, "encoder_q-layer.4": 3454.6091, "encoder_q-layer.5": 3373.7776, "encoder_q-layer.6": 4187.6855, "encoder_q-layer.7": 3261.5217, "encoder_q-layer.8": 2072.1472, "encoder_q-layer.9": 1777.4906, "epoch": 0.28, "inbatch_neg_score": 0.3413, "inbatch_pos_score": 1.0381, "learning_rate": 3.9888888888888895e-05, "loss": 2.9, "norm_diff": 0.0151, "norm_loss": 0.0, "num_token_doc": 66.8336, "num_token_overlap": 17.8261, "num_token_query": 52.2796, "num_token_union": 73.757, "num_word_context": 202.1617, "num_word_doc": 49.8891, "num_word_query": 39.8778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4151.263, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3408, "query_norm": 1.4471, "queue_k_norm": 1.4645, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2796, "sent_len_1": 66.8336, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1262, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 28200 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.9007, "doc_norm": 1.46, "encoder_q-embeddings": 1671.2657, "encoder_q-layer.0": 1077.4852, "encoder_q-layer.1": 1190.1566, "encoder_q-layer.10": 1858.2942, "encoder_q-layer.11": 3577.1299, "encoder_q-layer.2": 1341.4236, "encoder_q-layer.3": 1420.8672, "encoder_q-layer.4": 1560.5128, "encoder_q-layer.5": 1571.1392, "encoder_q-layer.6": 1687.5192, "encoder_q-layer.7": 1825.3042, "encoder_q-layer.8": 2008.2683, "encoder_q-layer.9": 1865.317, "epoch": 0.28, "inbatch_neg_score": 0.338, "inbatch_pos_score": 1.0586, "learning_rate": 3.983333333333333e-05, "loss": 2.9007, "norm_diff": 0.0114, "norm_loss": 0.0, "num_token_doc": 66.7027, "num_token_overlap": 17.7794, "num_token_query": 52.2662, "num_token_union": 73.6776, "num_word_context": 202.2698, "num_word_doc": 49.7558, "num_word_query": 39.8518, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2663.4564, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3384, "query_norm": 1.4511, "queue_k_norm": 1.4617, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2662, "sent_len_1": 66.7027, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3363, "stdk": 0.0482, "stdq": 0.0466, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 28300 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 2.8951, "doc_norm": 1.4585, "encoder_q-embeddings": 3803.9097, "encoder_q-layer.0": 2772.0542, "encoder_q-layer.1": 3000.5061, "encoder_q-layer.10": 1822.2905, "encoder_q-layer.11": 3772.6416, "encoder_q-layer.2": 3353.0811, "encoder_q-layer.3": 3494.0547, "encoder_q-layer.4": 3874.8726, "encoder_q-layer.5": 3592.2351, "encoder_q-layer.6": 2981.2014, "encoder_q-layer.7": 2664.1194, "encoder_q-layer.8": 2714.3884, "encoder_q-layer.9": 1957.3859, "epoch": 0.28, "inbatch_neg_score": 0.3251, "inbatch_pos_score": 1.0215, "learning_rate": 3.977777777777778e-05, "loss": 2.8951, "norm_diff": 0.0234, "norm_loss": 0.0, "num_token_doc": 66.7229, "num_token_overlap": 17.7949, "num_token_query": 52.2728, "num_token_union": 73.6806, "num_word_context": 202.1668, "num_word_doc": 49.7582, "num_word_query": 39.882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4624.8269, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3235, "query_norm": 1.4351, "queue_k_norm": 1.4607, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2728, "sent_len_1": 66.7229, "sent_len_max_0": 128.0, "sent_len_max_1": 212.035, "stdk": 0.0482, "stdq": 0.0462, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 28400 }, { "accuracy": 59.2285, "active_queue_size": 16384.0, "cl_loss": 2.8907, "doc_norm": 1.462, "encoder_q-embeddings": 2148.106, "encoder_q-layer.0": 1398.2719, "encoder_q-layer.1": 1576.9805, "encoder_q-layer.10": 1697.5104, "encoder_q-layer.11": 3519.0396, "encoder_q-layer.2": 1875.9109, "encoder_q-layer.3": 1973.2009, "encoder_q-layer.4": 2198.5879, "encoder_q-layer.5": 2150.3772, "encoder_q-layer.6": 2124.3943, "encoder_q-layer.7": 1954.3562, "encoder_q-layer.8": 1999.5399, "encoder_q-layer.9": 1715.9849, "epoch": 0.28, "inbatch_neg_score": 0.3288, "inbatch_pos_score": 1.0342, "learning_rate": 3.972222222222222e-05, "loss": 2.8907, "norm_diff": 0.0324, "norm_loss": 0.0, "num_token_doc": 66.8194, "num_token_overlap": 17.8452, "num_token_query": 52.4135, "num_token_union": 73.7751, "num_word_context": 202.3307, "num_word_doc": 49.8231, "num_word_query": 39.9684, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3057.5736, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3279, "query_norm": 1.4295, "queue_k_norm": 1.4619, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4135, "sent_len_1": 66.8194, "sent_len_max_0": 128.0, "sent_len_max_1": 209.88, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 28500 }, { "accuracy": 57.959, "active_queue_size": 16384.0, "cl_loss": 2.8781, "doc_norm": 1.463, "encoder_q-embeddings": 1877.0806, "encoder_q-layer.0": 1222.7554, "encoder_q-layer.1": 1351.2556, "encoder_q-layer.10": 1703.5331, "encoder_q-layer.11": 3553.7949, "encoder_q-layer.2": 1497.5483, "encoder_q-layer.3": 1621.2344, "encoder_q-layer.4": 1783.1116, "encoder_q-layer.5": 1881.9635, "encoder_q-layer.6": 1915.2177, "encoder_q-layer.7": 1898.3375, "encoder_q-layer.8": 2057.9985, "encoder_q-layer.9": 1762.9991, "epoch": 0.28, "inbatch_neg_score": 0.325, "inbatch_pos_score": 1.0234, "learning_rate": 3.966666666666667e-05, "loss": 2.8781, "norm_diff": 0.0359, "norm_loss": 0.0, "num_token_doc": 66.7504, "num_token_overlap": 17.8752, "num_token_query": 52.4002, "num_token_union": 73.7137, "num_word_context": 202.3453, "num_word_doc": 49.7977, "num_word_query": 39.9615, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2842.0662, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3254, "query_norm": 1.4271, "queue_k_norm": 1.4594, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4002, "sent_len_1": 66.7504, "sent_len_max_0": 128.0, "sent_len_max_1": 209.7875, "stdk": 0.0484, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 28600 }, { "accuracy": 58.3496, "active_queue_size": 16384.0, "cl_loss": 2.8939, "doc_norm": 1.4607, "encoder_q-embeddings": 2680.9561, "encoder_q-layer.0": 1786.8802, "encoder_q-layer.1": 2083.6902, "encoder_q-layer.10": 2101.3555, "encoder_q-layer.11": 3961.3677, "encoder_q-layer.2": 2407.2034, "encoder_q-layer.3": 2573.3684, "encoder_q-layer.4": 2877.0386, "encoder_q-layer.5": 2957.1335, "encoder_q-layer.6": 3397.3059, "encoder_q-layer.7": 2983.1611, "encoder_q-layer.8": 2308.78, "encoder_q-layer.9": 1973.934, "epoch": 0.28, "inbatch_neg_score": 0.3355, "inbatch_pos_score": 1.0303, "learning_rate": 3.961111111111111e-05, "loss": 2.8939, "norm_diff": 0.0355, "norm_loss": 0.0, "num_token_doc": 66.7897, "num_token_overlap": 17.8219, "num_token_query": 52.2287, "num_token_union": 73.6907, "num_word_context": 202.3189, "num_word_doc": 49.8369, "num_word_query": 39.8419, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3844.834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3347, "query_norm": 1.4252, "queue_k_norm": 1.4613, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2287, "sent_len_1": 66.7897, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8288, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 28700 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.8962, "doc_norm": 1.4623, "encoder_q-embeddings": 2503.7061, "encoder_q-layer.0": 1722.9951, "encoder_q-layer.1": 1884.1388, "encoder_q-layer.10": 2020.2648, "encoder_q-layer.11": 3888.8516, "encoder_q-layer.2": 2301.0928, "encoder_q-layer.3": 2441.0735, "encoder_q-layer.4": 2593.5225, "encoder_q-layer.5": 2619.9929, "encoder_q-layer.6": 2610.0972, "encoder_q-layer.7": 2401.9563, "encoder_q-layer.8": 2246.844, "encoder_q-layer.9": 2018.9427, "epoch": 0.28, "inbatch_neg_score": 0.3194, "inbatch_pos_score": 1.0234, "learning_rate": 3.9555555555555556e-05, "loss": 2.8962, "norm_diff": 0.0143, "norm_loss": 0.0, "num_token_doc": 66.7053, "num_token_overlap": 17.7716, "num_token_query": 52.2856, "num_token_union": 73.6756, "num_word_context": 202.186, "num_word_doc": 49.7591, "num_word_query": 39.8506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3611.2287, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3181, "query_norm": 1.452, "queue_k_norm": 1.4619, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2856, "sent_len_1": 66.7053, "sent_len_max_0": 128.0, "sent_len_max_1": 211.295, "stdk": 0.0484, "stdq": 0.0467, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 28800 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.8886, "doc_norm": 1.4588, "encoder_q-embeddings": 2085.4031, "encoder_q-layer.0": 1337.5934, "encoder_q-layer.1": 1473.5857, "encoder_q-layer.10": 1735.6396, "encoder_q-layer.11": 3497.0247, "encoder_q-layer.2": 1721.0654, "encoder_q-layer.3": 1785.0559, "encoder_q-layer.4": 1924.849, "encoder_q-layer.5": 1994.9619, "encoder_q-layer.6": 2099.0237, "encoder_q-layer.7": 2184.7554, "encoder_q-layer.8": 2064.2209, "encoder_q-layer.9": 1712.8533, "epoch": 0.28, "inbatch_neg_score": 0.3074, "inbatch_pos_score": 1.0186, "learning_rate": 3.9500000000000005e-05, "loss": 2.8886, "norm_diff": 0.0275, "norm_loss": 0.0, "num_token_doc": 66.7984, "num_token_overlap": 17.815, "num_token_query": 52.3316, "num_token_union": 73.7369, "num_word_context": 202.2665, "num_word_doc": 49.8461, "num_word_query": 39.9073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2978.351, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3066, "query_norm": 1.4313, "queue_k_norm": 1.4612, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3316, "sent_len_1": 66.7984, "sent_len_max_0": 128.0, "sent_len_max_1": 210.8088, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 28900 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.8946, "doc_norm": 1.4643, "encoder_q-embeddings": 2082.4072, "encoder_q-layer.0": 1330.1061, "encoder_q-layer.1": 1504.2471, "encoder_q-layer.10": 1812.375, "encoder_q-layer.11": 3507.7136, "encoder_q-layer.2": 1702.0323, "encoder_q-layer.3": 1922.927, "encoder_q-layer.4": 2136.0945, "encoder_q-layer.5": 2053.9214, "encoder_q-layer.6": 1979.1324, "encoder_q-layer.7": 1964.6704, "encoder_q-layer.8": 2061.7539, "encoder_q-layer.9": 1869.5723, "epoch": 0.28, "inbatch_neg_score": 0.3132, "inbatch_pos_score": 1.0244, "learning_rate": 3.944444444444445e-05, "loss": 2.8946, "norm_diff": 0.0099, "norm_loss": 0.0, "num_token_doc": 66.7003, "num_token_overlap": 17.8102, "num_token_query": 52.378, "num_token_union": 73.707, "num_word_context": 202.2475, "num_word_doc": 49.7487, "num_word_query": 39.9296, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3009.6426, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.313, "query_norm": 1.4583, "queue_k_norm": 1.4608, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.378, "sent_len_1": 66.7003, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0375, "stdk": 0.0485, "stdq": 0.0467, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 29000 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.8977, "doc_norm": 1.4604, "encoder_q-embeddings": 3326.7405, "encoder_q-layer.0": 2325.2041, "encoder_q-layer.1": 2695.7966, "encoder_q-layer.10": 1802.1348, "encoder_q-layer.11": 3549.3796, "encoder_q-layer.2": 3191.7134, "encoder_q-layer.3": 3232.5615, "encoder_q-layer.4": 3562.3213, "encoder_q-layer.5": 3553.6287, "encoder_q-layer.6": 3241.334, "encoder_q-layer.7": 2981.2307, "encoder_q-layer.8": 2498.9358, "encoder_q-layer.9": 2005.5083, "epoch": 0.28, "inbatch_neg_score": 0.3024, "inbatch_pos_score": 1.0156, "learning_rate": 3.938888888888889e-05, "loss": 2.8977, "norm_diff": 0.0066, "norm_loss": 0.0, "num_token_doc": 66.4996, "num_token_overlap": 17.7113, "num_token_query": 52.1169, "num_token_union": 73.4789, "num_word_context": 201.8918, "num_word_doc": 49.6068, "num_word_query": 39.7539, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4397.7607, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.4572, "queue_k_norm": 1.4598, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1169, "sent_len_1": 66.4996, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4913, "stdk": 0.0484, "stdq": 0.0467, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 29100 }, { "accuracy": 59.9121, "active_queue_size": 16384.0, "cl_loss": 2.8844, "doc_norm": 1.4596, "encoder_q-embeddings": 1623.3425, "encoder_q-layer.0": 1051.5994, "encoder_q-layer.1": 1175.0938, "encoder_q-layer.10": 1822.1104, "encoder_q-layer.11": 3531.6665, "encoder_q-layer.2": 1401.4701, "encoder_q-layer.3": 1533.3619, "encoder_q-layer.4": 1640.7614, "encoder_q-layer.5": 1649.3182, "encoder_q-layer.6": 1868.7493, "encoder_q-layer.7": 1978.6122, "encoder_q-layer.8": 2063.7024, "encoder_q-layer.9": 1788.6123, "epoch": 0.29, "inbatch_neg_score": 0.2972, "inbatch_pos_score": 1.0068, "learning_rate": 3.933333333333333e-05, "loss": 2.8844, "norm_diff": 0.0193, "norm_loss": 0.0, "num_token_doc": 66.7341, "num_token_overlap": 17.7933, "num_token_query": 52.4185, "num_token_union": 73.7937, "num_word_context": 202.2732, "num_word_doc": 49.8074, "num_word_query": 39.9841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2711.9819, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2974, "query_norm": 1.4414, "queue_k_norm": 1.46, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4185, "sent_len_1": 66.7341, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8137, "stdk": 0.0484, "stdq": 0.0458, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 29200 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.8807, "doc_norm": 1.4581, "encoder_q-embeddings": 1592.9623, "encoder_q-layer.0": 973.6573, "encoder_q-layer.1": 1098.64, "encoder_q-layer.10": 1956.0765, "encoder_q-layer.11": 3663.415, "encoder_q-layer.2": 1235.0657, "encoder_q-layer.3": 1347.8977, "encoder_q-layer.4": 1425.1802, "encoder_q-layer.5": 1430.891, "encoder_q-layer.6": 1537.5394, "encoder_q-layer.7": 1814.1184, "encoder_q-layer.8": 2089.4946, "encoder_q-layer.9": 1832.0243, "epoch": 0.29, "inbatch_neg_score": 0.3096, "inbatch_pos_score": 0.9912, "learning_rate": 3.927777777777778e-05, "loss": 2.8807, "norm_diff": 0.0276, "norm_loss": 0.0, "num_token_doc": 66.7199, "num_token_overlap": 17.7602, "num_token_query": 52.2361, "num_token_union": 73.7016, "num_word_context": 202.1888, "num_word_doc": 49.7871, "num_word_query": 39.835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2637.919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3088, "query_norm": 1.4305, "queue_k_norm": 1.4582, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2361, "sent_len_1": 66.7199, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1562, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 29300 }, { "accuracy": 57.7637, "active_queue_size": 16384.0, "cl_loss": 2.8862, "doc_norm": 1.4618, "encoder_q-embeddings": 3669.2671, "encoder_q-layer.0": 2490.2417, "encoder_q-layer.1": 2998.6223, "encoder_q-layer.10": 1753.9338, "encoder_q-layer.11": 3597.8955, "encoder_q-layer.2": 3582.4536, "encoder_q-layer.3": 3931.6394, "encoder_q-layer.4": 3892.7805, "encoder_q-layer.5": 3887.386, "encoder_q-layer.6": 3535.8091, "encoder_q-layer.7": 2825.9521, "encoder_q-layer.8": 2220.1226, "encoder_q-layer.9": 1841.1492, "epoch": 0.29, "inbatch_neg_score": 0.3216, "inbatch_pos_score": 1.0234, "learning_rate": 3.922222222222223e-05, "loss": 2.8862, "norm_diff": 0.0202, "norm_loss": 0.0, "num_token_doc": 66.7785, "num_token_overlap": 17.8053, "num_token_query": 52.2711, "num_token_union": 73.723, "num_word_context": 202.2073, "num_word_doc": 49.8328, "num_word_query": 39.8758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4706.2121, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.321, "query_norm": 1.4415, "queue_k_norm": 1.4576, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2711, "sent_len_1": 66.7785, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1975, "stdk": 0.0486, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 29400 }, { "accuracy": 58.9355, "active_queue_size": 16384.0, "cl_loss": 2.8702, "doc_norm": 1.4572, "encoder_q-embeddings": 1496.0232, "encoder_q-layer.0": 961.1851, "encoder_q-layer.1": 1072.4272, "encoder_q-layer.10": 1977.797, "encoder_q-layer.11": 3947.3196, "encoder_q-layer.2": 1224.2778, "encoder_q-layer.3": 1306.7845, "encoder_q-layer.4": 1485.6875, "encoder_q-layer.5": 1518.9965, "encoder_q-layer.6": 1705.0941, "encoder_q-layer.7": 1792.2808, "encoder_q-layer.8": 2230.9053, "encoder_q-layer.9": 1923.7997, "epoch": 0.29, "inbatch_neg_score": 0.3286, "inbatch_pos_score": 1.0264, "learning_rate": 3.9166666666666665e-05, "loss": 2.8702, "norm_diff": 0.0407, "norm_loss": 0.0, "num_token_doc": 66.7599, "num_token_overlap": 17.7803, "num_token_query": 52.2464, "num_token_union": 73.7004, "num_word_context": 202.2847, "num_word_doc": 49.8453, "num_word_query": 39.864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2715.1987, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3286, "query_norm": 1.4165, "queue_k_norm": 1.4584, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2464, "sent_len_1": 66.7599, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3625, "stdk": 0.0484, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 29500 }, { "accuracy": 56.5918, "active_queue_size": 16384.0, "cl_loss": 2.8628, "doc_norm": 1.4561, "encoder_q-embeddings": 3286.5193, "encoder_q-layer.0": 2229.7822, "encoder_q-layer.1": 3130.6609, "encoder_q-layer.10": 1970.5804, "encoder_q-layer.11": 3919.304, "encoder_q-layer.2": 3752.0244, "encoder_q-layer.3": 4177.6572, "encoder_q-layer.4": 4365.7524, "encoder_q-layer.5": 3906.8193, "encoder_q-layer.6": 3475.74, "encoder_q-layer.7": 2806.0149, "encoder_q-layer.8": 2641.5715, "encoder_q-layer.9": 2058.6753, "epoch": 0.29, "inbatch_neg_score": 0.3363, "inbatch_pos_score": 1.0352, "learning_rate": 3.9111111111111115e-05, "loss": 2.8628, "norm_diff": 0.0127, "norm_loss": 0.0, "num_token_doc": 66.7317, "num_token_overlap": 17.8212, "num_token_query": 52.244, "num_token_union": 73.6183, "num_word_context": 202.1441, "num_word_doc": 49.7513, "num_word_query": 39.8369, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4825.2754, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3359, "query_norm": 1.4441, "queue_k_norm": 1.4567, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.244, "sent_len_1": 66.7317, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3075, "stdk": 0.0483, "stdq": 0.0465, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 29600 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.8705, "doc_norm": 1.4612, "encoder_q-embeddings": 3774.6418, "encoder_q-layer.0": 2437.5803, "encoder_q-layer.1": 2845.7241, "encoder_q-layer.10": 1833.0815, "encoder_q-layer.11": 3658.8523, "encoder_q-layer.2": 3073.168, "encoder_q-layer.3": 2961.0371, "encoder_q-layer.4": 2926.7236, "encoder_q-layer.5": 2457.0027, "encoder_q-layer.6": 2242.2747, "encoder_q-layer.7": 2050.2529, "encoder_q-layer.8": 2124.6489, "encoder_q-layer.9": 1800.808, "epoch": 0.29, "inbatch_neg_score": 0.3386, "inbatch_pos_score": 1.0664, "learning_rate": 3.905555555555556e-05, "loss": 2.8705, "norm_diff": 0.0083, "norm_loss": 0.0, "num_token_doc": 66.8415, "num_token_overlap": 17.8498, "num_token_query": 52.356, "num_token_union": 73.7843, "num_word_context": 202.3598, "num_word_doc": 49.8639, "num_word_query": 39.9182, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4068.6689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3403, "query_norm": 1.467, "queue_k_norm": 1.4603, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.356, "sent_len_1": 66.8415, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9325, "stdk": 0.0485, "stdq": 0.0472, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 29700 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.8721, "doc_norm": 1.4628, "encoder_q-embeddings": 5739.75, "encoder_q-layer.0": 4184.5615, "encoder_q-layer.1": 4735.6304, "encoder_q-layer.10": 1856.7708, "encoder_q-layer.11": 3617.8601, "encoder_q-layer.2": 6628.4219, "encoder_q-layer.3": 6594.0024, "encoder_q-layer.4": 6819.0303, "encoder_q-layer.5": 8334.5488, "encoder_q-layer.6": 7811.499, "encoder_q-layer.7": 5812.835, "encoder_q-layer.8": 4761.7764, "encoder_q-layer.9": 2456.3396, "epoch": 0.29, "inbatch_neg_score": 0.3398, "inbatch_pos_score": 1.0625, "learning_rate": 3.9000000000000006e-05, "loss": 2.8721, "norm_diff": 0.014, "norm_loss": 0.0, "num_token_doc": 66.8392, "num_token_overlap": 17.8482, "num_token_query": 52.3354, "num_token_union": 73.7606, "num_word_context": 202.4151, "num_word_doc": 49.8911, "num_word_query": 39.9213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8427.0964, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3408, "query_norm": 1.4489, "queue_k_norm": 1.4612, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3354, "sent_len_1": 66.8392, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6962, "stdk": 0.0486, "stdq": 0.0466, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 29800 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 2.8759, "doc_norm": 1.4564, "encoder_q-embeddings": 4147.002, "encoder_q-layer.0": 2754.1174, "encoder_q-layer.1": 3180.1084, "encoder_q-layer.10": 4016.0073, "encoder_q-layer.11": 7634.6479, "encoder_q-layer.2": 3953.9067, "encoder_q-layer.3": 4408.4067, "encoder_q-layer.4": 4430.7915, "encoder_q-layer.5": 4207.251, "encoder_q-layer.6": 4039.885, "encoder_q-layer.7": 4066.1494, "encoder_q-layer.8": 4329.7891, "encoder_q-layer.9": 3868.2896, "epoch": 0.29, "inbatch_neg_score": 0.3419, "inbatch_pos_score": 1.0273, "learning_rate": 3.894444444444444e-05, "loss": 2.8759, "norm_diff": 0.0497, "norm_loss": 0.0, "num_token_doc": 66.7805, "num_token_overlap": 17.7981, "num_token_query": 52.29, "num_token_union": 73.7442, "num_word_context": 202.4232, "num_word_doc": 49.7956, "num_word_query": 39.8902, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6340.4863, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3413, "query_norm": 1.4068, "queue_k_norm": 1.4599, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.29, "sent_len_1": 66.7805, "sent_len_max_0": 128.0, "sent_len_max_1": 212.0563, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 29900 }, { "accuracy": 58.4473, "active_queue_size": 16384.0, "cl_loss": 2.8916, "doc_norm": 1.4647, "encoder_q-embeddings": 3578.0957, "encoder_q-layer.0": 2264.1921, "encoder_q-layer.1": 2459.9402, "encoder_q-layer.10": 3713.3538, "encoder_q-layer.11": 6992.1284, "encoder_q-layer.2": 2861.3188, "encoder_q-layer.3": 3110.5576, "encoder_q-layer.4": 3602.8792, "encoder_q-layer.5": 3523.0632, "encoder_q-layer.6": 3675.3914, "encoder_q-layer.7": 3788.0581, "encoder_q-layer.8": 4217.5122, "encoder_q-layer.9": 3646.9219, "epoch": 0.29, "inbatch_neg_score": 0.3452, "inbatch_pos_score": 1.0742, "learning_rate": 3.888888888888889e-05, "loss": 2.8916, "norm_diff": 0.0175, "norm_loss": 0.0, "num_token_doc": 66.5594, "num_token_overlap": 17.7892, "num_token_query": 52.3243, "num_token_union": 73.6235, "num_word_context": 202.1606, "num_word_doc": 49.6727, "num_word_query": 39.9021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5553.579, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3452, "query_norm": 1.4472, "queue_k_norm": 1.4622, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3243, "sent_len_1": 66.5594, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9263, "stdk": 0.0486, "stdq": 0.0466, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 30000 }, { "dev_runtime": 26.831, "dev_samples_per_second": 1.193, "dev_steps_per_second": 0.037, "epoch": 0.29, "step": 30000, "test_accuracy": 93.2861328125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3886674642562866, "test_doc_norm": 1.413996696472168, "test_inbatch_neg_score": 0.6109520196914673, "test_inbatch_pos_score": 1.5767266750335693, "test_loss": 0.3886674642562866, "test_loss_align": 1.035063624382019, "test_loss_unif": 3.7569239139556885, "test_loss_unif_q@queue": 3.7569239139556885, "test_norm_diff": 0.056777771562337875, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.33945387601852417, "test_query_norm": 1.4707744121551514, "test_queue_k_norm": 1.4623589515686035, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04257344454526901, "test_stdq": 0.0433054082095623, "test_stdqueue_k": 0.04855499789118767, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.831, "dev_samples_per_second": 1.193, "dev_steps_per_second": 0.037, "epoch": 0.29, "eval_beir-arguana_ndcg@10": 0.35514, "eval_beir-arguana_recall@10": 0.60526, "eval_beir-arguana_recall@100": 0.90896, "eval_beir-arguana_recall@20": 0.74253, "eval_beir-avg_ndcg@10": 0.36885450000000003, "eval_beir-avg_recall@10": 0.436673, "eval_beir-avg_recall@100": 0.6177871666666667, "eval_beir-avg_recall@20": 0.49967941666666665, "eval_beir-cqadupstack_ndcg@10": 0.257925, "eval_beir-cqadupstack_recall@10": 0.34979, "eval_beir-cqadupstack_recall@100": 0.5744716666666666, "eval_beir-cqadupstack_recall@20": 0.41347416666666675, "eval_beir-fiqa_ndcg@10": 0.23625, "eval_beir-fiqa_recall@10": 0.29355, "eval_beir-fiqa_recall@100": 0.55965, "eval_beir-fiqa_recall@20": 0.3715, "eval_beir-nfcorpus_ndcg@10": 0.27928, "eval_beir-nfcorpus_recall@10": 0.1352, "eval_beir-nfcorpus_recall@100": 0.26497, "eval_beir-nfcorpus_recall@20": 0.16606, "eval_beir-nq_ndcg@10": 0.27017, "eval_beir-nq_recall@10": 0.44322, "eval_beir-nq_recall@100": 0.77378, "eval_beir-nq_recall@20": 0.55352, "eval_beir-quora_ndcg@10": 0.74688, "eval_beir-quora_recall@10": 0.86494, "eval_beir-quora_recall@100": 0.96918, "eval_beir-quora_recall@20": 0.91048, "eval_beir-scidocs_ndcg@10": 0.15037, "eval_beir-scidocs_recall@10": 0.15562, "eval_beir-scidocs_recall@100": 0.35118, "eval_beir-scidocs_recall@20": 0.21112, "eval_beir-scifact_ndcg@10": 0.62683, "eval_beir-scifact_recall@10": 0.77356, "eval_beir-scifact_recall@100": 0.91989, "eval_beir-scifact_recall@20": 0.82467, "eval_beir-trec-covid_ndcg@10": 0.56727, "eval_beir-trec-covid_recall@10": 0.61, "eval_beir-trec-covid_recall@100": 0.441, "eval_beir-trec-covid_recall@20": 0.594, "eval_beir-webis-touche2020_ndcg@10": 0.19843, "eval_beir-webis-touche2020_recall@10": 0.13559, "eval_beir-webis-touche2020_recall@100": 0.41479, "eval_beir-webis-touche2020_recall@20": 0.20944, "eval_senteval-avg_sts": 0.7520761097083069, "eval_senteval-sickr_spearman": 0.724596908108759, "eval_senteval-stsb_spearman": 0.7795553113078548, "step": 30000, "test_accuracy": 93.2861328125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3886674642562866, "test_doc_norm": 1.413996696472168, "test_inbatch_neg_score": 0.6109520196914673, "test_inbatch_pos_score": 1.5767266750335693, "test_loss": 0.3886674642562866, "test_loss_align": 1.035063624382019, "test_loss_unif": 3.7569239139556885, "test_loss_unif_q@queue": 3.7569239139556885, "test_norm_diff": 0.056777771562337875, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.33945387601852417, "test_query_norm": 1.4707744121551514, "test_queue_k_norm": 1.4623589515686035, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04257344454526901, "test_stdq": 0.0433054082095623, "test_stdqueue_k": 0.04855499789118767, "test_stdqueue_q": 0.0 }, { "accuracy": 59.0332, "active_queue_size": 16384.0, "cl_loss": 2.869, "doc_norm": 1.4646, "encoder_q-embeddings": 3056.844, "encoder_q-layer.0": 1929.7225, "encoder_q-layer.1": 2112.8086, "encoder_q-layer.10": 3472.4314, "encoder_q-layer.11": 7062.9556, "encoder_q-layer.2": 2322.437, "encoder_q-layer.3": 2479.9561, "encoder_q-layer.4": 2625.5989, "encoder_q-layer.5": 2753.106, "encoder_q-layer.6": 3029.4697, "encoder_q-layer.7": 3324.6274, "encoder_q-layer.8": 4111.8418, "encoder_q-layer.9": 3645.5791, "epoch": 0.29, "inbatch_neg_score": 0.3493, "inbatch_pos_score": 1.0742, "learning_rate": 3.883333333333333e-05, "loss": 2.869, "norm_diff": 0.0132, "norm_loss": 0.0, "num_token_doc": 66.7056, "num_token_overlap": 17.8211, "num_token_query": 52.2922, "num_token_union": 73.6839, "num_word_context": 202.227, "num_word_doc": 49.7913, "num_word_query": 39.9035, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5090.0093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3503, "query_norm": 1.4597, "queue_k_norm": 1.4602, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2922, "sent_len_1": 66.7056, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9688, "stdk": 0.0486, "stdq": 0.0469, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 30100 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.8664, "doc_norm": 1.4632, "encoder_q-embeddings": 4219.3711, "encoder_q-layer.0": 2817.4971, "encoder_q-layer.1": 3272.064, "encoder_q-layer.10": 3709.7288, "encoder_q-layer.11": 7476.1782, "encoder_q-layer.2": 3644.1304, "encoder_q-layer.3": 3747.4246, "encoder_q-layer.4": 3711.4119, "encoder_q-layer.5": 3844.6387, "encoder_q-layer.6": 4430.2383, "encoder_q-layer.7": 4278.5474, "encoder_q-layer.8": 4461.7271, "encoder_q-layer.9": 3636.8625, "epoch": 0.29, "inbatch_neg_score": 0.3566, "inbatch_pos_score": 1.0781, "learning_rate": 3.877777777777778e-05, "loss": 2.8664, "norm_diff": 0.0126, "norm_loss": 0.0, "num_token_doc": 66.7309, "num_token_overlap": 17.7846, "num_token_query": 52.2261, "num_token_union": 73.6831, "num_word_context": 202.159, "num_word_doc": 49.7953, "num_word_query": 39.8392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6136.8386, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3567, "query_norm": 1.4527, "queue_k_norm": 1.4615, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2261, "sent_len_1": 66.7309, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4238, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 30200 }, { "accuracy": 58.8379, "active_queue_size": 16384.0, "cl_loss": 2.852, "doc_norm": 1.4626, "encoder_q-embeddings": 3582.1655, "encoder_q-layer.0": 2290.9956, "encoder_q-layer.1": 2521.0259, "encoder_q-layer.10": 3520.356, "encoder_q-layer.11": 6974.1548, "encoder_q-layer.2": 2946.5439, "encoder_q-layer.3": 3167.3042, "encoder_q-layer.4": 3596.7322, "encoder_q-layer.5": 3728.6177, "encoder_q-layer.6": 4099.3247, "encoder_q-layer.7": 3746.8696, "encoder_q-layer.8": 4008.7351, "encoder_q-layer.9": 3520.8242, "epoch": 0.3, "inbatch_neg_score": 0.3581, "inbatch_pos_score": 1.0723, "learning_rate": 3.8722222222222225e-05, "loss": 2.852, "norm_diff": 0.0208, "norm_loss": 0.0, "num_token_doc": 66.864, "num_token_overlap": 17.8451, "num_token_query": 52.3743, "num_token_union": 73.8249, "num_word_context": 202.2484, "num_word_doc": 49.8908, "num_word_query": 39.9562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5580.5691, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3586, "query_norm": 1.4425, "queue_k_norm": 1.4628, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3743, "sent_len_1": 66.864, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9437, "stdk": 0.0484, "stdq": 0.0461, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 30300 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.87, "doc_norm": 1.4699, "encoder_q-embeddings": 3465.9536, "encoder_q-layer.0": 2374.8616, "encoder_q-layer.1": 2573.6921, "encoder_q-layer.10": 3611.6653, "encoder_q-layer.11": 7270.5479, "encoder_q-layer.2": 3020.5959, "encoder_q-layer.3": 3229.5903, "encoder_q-layer.4": 3597.0684, "encoder_q-layer.5": 3408.4604, "encoder_q-layer.6": 3977.2983, "encoder_q-layer.7": 3885.0857, "encoder_q-layer.8": 4074.7068, "encoder_q-layer.9": 3521.2397, "epoch": 0.3, "inbatch_neg_score": 0.3709, "inbatch_pos_score": 1.1055, "learning_rate": 3.866666666666667e-05, "loss": 2.87, "norm_diff": 0.0104, "norm_loss": 0.0, "num_token_doc": 66.6855, "num_token_overlap": 17.8263, "num_token_query": 52.2154, "num_token_union": 73.5637, "num_word_context": 201.9743, "num_word_doc": 49.7537, "num_word_query": 39.8092, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5679.2479, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3708, "query_norm": 1.4747, "queue_k_norm": 1.4639, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2154, "sent_len_1": 66.6855, "sent_len_max_0": 128.0, "sent_len_max_1": 209.955, "stdk": 0.0487, "stdq": 0.0468, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 30400 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.8694, "doc_norm": 1.4658, "encoder_q-embeddings": 3154.5569, "encoder_q-layer.0": 1994.5785, "encoder_q-layer.1": 2200.3728, "encoder_q-layer.10": 3340.3435, "encoder_q-layer.11": 6934.1108, "encoder_q-layer.2": 2537.8562, "encoder_q-layer.3": 2597.8865, "encoder_q-layer.4": 2987.5159, "encoder_q-layer.5": 2907.9727, "encoder_q-layer.6": 3238.5571, "encoder_q-layer.7": 3409.6575, "encoder_q-layer.8": 3871.2336, "encoder_q-layer.9": 3554.2319, "epoch": 0.3, "inbatch_neg_score": 0.3646, "inbatch_pos_score": 1.083, "learning_rate": 3.8611111111111116e-05, "loss": 2.8694, "norm_diff": 0.0089, "norm_loss": 0.0, "num_token_doc": 66.7943, "num_token_overlap": 17.8137, "num_token_query": 52.36, "num_token_union": 73.7623, "num_word_context": 202.4964, "num_word_doc": 49.8196, "num_word_query": 39.9371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5128.129, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3633, "query_norm": 1.4588, "queue_k_norm": 1.4644, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.36, "sent_len_1": 66.7943, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0325, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 30500 }, { "accuracy": 56.8848, "active_queue_size": 16384.0, "cl_loss": 2.8531, "doc_norm": 1.4587, "encoder_q-embeddings": 4092.5679, "encoder_q-layer.0": 2772.1985, "encoder_q-layer.1": 3132.0588, "encoder_q-layer.10": 3685.4912, "encoder_q-layer.11": 7032.5532, "encoder_q-layer.2": 3798.9995, "encoder_q-layer.3": 4104.7754, "encoder_q-layer.4": 4440.6597, "encoder_q-layer.5": 4402.7173, "encoder_q-layer.6": 4738.4995, "encoder_q-layer.7": 4528.9146, "encoder_q-layer.8": 4777.3467, "encoder_q-layer.9": 3989.9543, "epoch": 0.3, "inbatch_neg_score": 0.3722, "inbatch_pos_score": 1.0762, "learning_rate": 3.855555555555556e-05, "loss": 2.8531, "norm_diff": 0.0205, "norm_loss": 0.0, "num_token_doc": 66.6617, "num_token_overlap": 17.7866, "num_token_query": 52.2607, "num_token_union": 73.6568, "num_word_context": 202.2207, "num_word_doc": 49.7263, "num_word_query": 39.8541, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6367.6466, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.373, "query_norm": 1.4792, "queue_k_norm": 1.4658, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2607, "sent_len_1": 66.6617, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2763, "stdk": 0.0482, "stdq": 0.047, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 30600 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 2.8479, "doc_norm": 1.4684, "encoder_q-embeddings": 4143.2026, "encoder_q-layer.0": 2712.3179, "encoder_q-layer.1": 3166.2888, "encoder_q-layer.10": 4004.9707, "encoder_q-layer.11": 7505.0205, "encoder_q-layer.2": 3722.3916, "encoder_q-layer.3": 4174.8994, "encoder_q-layer.4": 4332.9995, "encoder_q-layer.5": 4046.5591, "encoder_q-layer.6": 3794.4824, "encoder_q-layer.7": 4012.1428, "encoder_q-layer.8": 4268.0107, "encoder_q-layer.9": 3670.9824, "epoch": 0.3, "inbatch_neg_score": 0.3737, "inbatch_pos_score": 1.0723, "learning_rate": 3.85e-05, "loss": 2.8479, "norm_diff": 0.0126, "norm_loss": 0.0, "num_token_doc": 66.8984, "num_token_overlap": 17.8602, "num_token_query": 52.3034, "num_token_union": 73.7477, "num_word_context": 202.2055, "num_word_doc": 49.9392, "num_word_query": 39.8884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6250.2689, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3743, "query_norm": 1.459, "queue_k_norm": 1.4669, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3034, "sent_len_1": 66.8984, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7237, "stdk": 0.0485, "stdq": 0.046, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 30700 }, { "accuracy": 59.9121, "active_queue_size": 16384.0, "cl_loss": 2.843, "doc_norm": 1.4655, "encoder_q-embeddings": 9301.7881, "encoder_q-layer.0": 6532.4351, "encoder_q-layer.1": 7406.1113, "encoder_q-layer.10": 4090.6138, "encoder_q-layer.11": 7227.5645, "encoder_q-layer.2": 8448.3301, "encoder_q-layer.3": 8597.7061, "encoder_q-layer.4": 9785.1133, "encoder_q-layer.5": 9474.165, "encoder_q-layer.6": 8353.583, "encoder_q-layer.7": 6713.9624, "encoder_q-layer.8": 5081.2896, "encoder_q-layer.9": 3949.0295, "epoch": 0.3, "inbatch_neg_score": 0.3711, "inbatch_pos_score": 1.0859, "learning_rate": 3.844444444444444e-05, "loss": 2.843, "norm_diff": 0.0127, "norm_loss": 0.0, "num_token_doc": 66.9152, "num_token_overlap": 17.8431, "num_token_query": 52.3396, "num_token_union": 73.7546, "num_word_context": 202.3072, "num_word_doc": 49.9207, "num_word_query": 39.9324, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11224.6432, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3721, "query_norm": 1.4737, "queue_k_norm": 1.4688, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3396, "sent_len_1": 66.9152, "sent_len_max_0": 128.0, "sent_len_max_1": 210.835, "stdk": 0.0484, "stdq": 0.0465, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 30800 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.8351, "doc_norm": 1.4715, "encoder_q-embeddings": 5262.0146, "encoder_q-layer.0": 3524.3696, "encoder_q-layer.1": 4043.4697, "encoder_q-layer.10": 4181.4839, "encoder_q-layer.11": 7680.9277, "encoder_q-layer.2": 4580.0146, "encoder_q-layer.3": 4469.793, "encoder_q-layer.4": 4727.3721, "encoder_q-layer.5": 4264.4907, "encoder_q-layer.6": 4732.3096, "encoder_q-layer.7": 4693.8774, "encoder_q-layer.8": 4661.5645, "encoder_q-layer.9": 3800.4998, "epoch": 0.3, "inbatch_neg_score": 0.3838, "inbatch_pos_score": 1.1055, "learning_rate": 3.838888888888889e-05, "loss": 2.8351, "norm_diff": 0.0172, "norm_loss": 0.0, "num_token_doc": 66.8584, "num_token_overlap": 17.854, "num_token_query": 52.3388, "num_token_union": 73.7651, "num_word_context": 202.3018, "num_word_doc": 49.9, "num_word_query": 39.9248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6934.964, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3828, "query_norm": 1.4888, "queue_k_norm": 1.4696, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3388, "sent_len_1": 66.8584, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0975, "stdk": 0.0485, "stdq": 0.0467, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 30900 }, { "accuracy": 58.3496, "active_queue_size": 16384.0, "cl_loss": 2.8583, "doc_norm": 1.469, "encoder_q-embeddings": 3669.6812, "encoder_q-layer.0": 2201.1023, "encoder_q-layer.1": 2389.7251, "encoder_q-layer.10": 3656.6702, "encoder_q-layer.11": 7312.854, "encoder_q-layer.2": 2822.4146, "encoder_q-layer.3": 2971.3403, "encoder_q-layer.4": 3382.6304, "encoder_q-layer.5": 3447.4104, "encoder_q-layer.6": 3783.6421, "encoder_q-layer.7": 4009.3479, "encoder_q-layer.8": 4725.9561, "encoder_q-layer.9": 3889.3169, "epoch": 0.3, "inbatch_neg_score": 0.3716, "inbatch_pos_score": 1.0889, "learning_rate": 3.8333333333333334e-05, "loss": 2.8583, "norm_diff": 0.0137, "norm_loss": 0.0, "num_token_doc": 66.8571, "num_token_overlap": 17.8153, "num_token_query": 52.2569, "num_token_union": 73.7437, "num_word_context": 202.3314, "num_word_doc": 49.9116, "num_word_query": 39.8481, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5684.2359, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3711, "query_norm": 1.477, "queue_k_norm": 1.4708, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2569, "sent_len_1": 66.8571, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4038, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 31000 }, { "accuracy": 59.2285, "active_queue_size": 16384.0, "cl_loss": 2.8432, "doc_norm": 1.4698, "encoder_q-embeddings": 3881.2722, "encoder_q-layer.0": 2617.594, "encoder_q-layer.1": 3040.4431, "encoder_q-layer.10": 4114.0498, "encoder_q-layer.11": 7621.5508, "encoder_q-layer.2": 3598.5203, "encoder_q-layer.3": 3789.3025, "encoder_q-layer.4": 4088.2568, "encoder_q-layer.5": 3919.2219, "encoder_q-layer.6": 4223.0703, "encoder_q-layer.7": 4375.6699, "encoder_q-layer.8": 4719.9854, "encoder_q-layer.9": 3913.9336, "epoch": 0.3, "inbatch_neg_score": 0.3933, "inbatch_pos_score": 1.1221, "learning_rate": 3.827777777777778e-05, "loss": 2.8432, "norm_diff": 0.0491, "norm_loss": 0.0, "num_token_doc": 66.7867, "num_token_overlap": 17.8109, "num_token_query": 52.2651, "num_token_union": 73.7218, "num_word_context": 202.1999, "num_word_doc": 49.8257, "num_word_query": 39.8492, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6163.6611, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3936, "query_norm": 1.5189, "queue_k_norm": 1.4713, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2651, "sent_len_1": 66.7867, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4762, "stdk": 0.0484, "stdq": 0.0471, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 31100 }, { "accuracy": 59.0332, "active_queue_size": 16384.0, "cl_loss": 2.8545, "doc_norm": 1.4726, "encoder_q-embeddings": 4178.2397, "encoder_q-layer.0": 2915.7637, "encoder_q-layer.1": 3342.7678, "encoder_q-layer.10": 3855.4597, "encoder_q-layer.11": 7606.7485, "encoder_q-layer.2": 3866.8994, "encoder_q-layer.3": 4119.5854, "encoder_q-layer.4": 4336.4663, "encoder_q-layer.5": 4505.7432, "encoder_q-layer.6": 4976.9868, "encoder_q-layer.7": 4512.4927, "encoder_q-layer.8": 4438.3364, "encoder_q-layer.9": 3845.8298, "epoch": 0.3, "inbatch_neg_score": 0.4011, "inbatch_pos_score": 1.1279, "learning_rate": 3.8222222222222226e-05, "loss": 2.8545, "norm_diff": 0.0269, "norm_loss": 0.0, "num_token_doc": 66.8399, "num_token_overlap": 17.8341, "num_token_query": 52.2837, "num_token_union": 73.7505, "num_word_context": 202.3306, "num_word_doc": 49.8341, "num_word_query": 39.8514, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6523.6678, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4014, "query_norm": 1.4995, "queue_k_norm": 1.473, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2837, "sent_len_1": 66.8399, "sent_len_max_0": 128.0, "sent_len_max_1": 210.41, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 31200 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.8435, "doc_norm": 1.4785, "encoder_q-embeddings": 3206.1992, "encoder_q-layer.0": 2019.6949, "encoder_q-layer.1": 2318.093, "encoder_q-layer.10": 4105.2871, "encoder_q-layer.11": 7562.749, "encoder_q-layer.2": 2630.0127, "encoder_q-layer.3": 2763.554, "encoder_q-layer.4": 3184.0525, "encoder_q-layer.5": 3266.2522, "encoder_q-layer.6": 3462.9871, "encoder_q-layer.7": 3795.0972, "encoder_q-layer.8": 4309.8804, "encoder_q-layer.9": 3798.2383, "epoch": 0.31, "inbatch_neg_score": 0.4166, "inbatch_pos_score": 1.1201, "learning_rate": 3.816666666666667e-05, "loss": 2.8435, "norm_diff": 0.0248, "norm_loss": 0.0, "num_token_doc": 66.8877, "num_token_overlap": 17.836, "num_token_query": 52.2999, "num_token_union": 73.7563, "num_word_context": 202.7236, "num_word_doc": 49.9213, "num_word_query": 39.9015, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5432.5352, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4163, "query_norm": 1.5033, "queue_k_norm": 1.4751, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2999, "sent_len_1": 66.8877, "sent_len_max_0": 128.0, "sent_len_max_1": 207.64, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 31300 }, { "accuracy": 55.9082, "active_queue_size": 16384.0, "cl_loss": 2.8438, "doc_norm": 1.4747, "encoder_q-embeddings": 3029.0203, "encoder_q-layer.0": 1877.6669, "encoder_q-layer.1": 2150.4465, "encoder_q-layer.10": 3520.8481, "encoder_q-layer.11": 7115.7832, "encoder_q-layer.2": 2445.6184, "encoder_q-layer.3": 2546.1392, "encoder_q-layer.4": 2705.0186, "encoder_q-layer.5": 2806.7417, "encoder_q-layer.6": 3014.4587, "encoder_q-layer.7": 3473.6636, "encoder_q-layer.8": 4084.3792, "encoder_q-layer.9": 3659.7661, "epoch": 0.31, "inbatch_neg_score": 0.4285, "inbatch_pos_score": 1.1211, "learning_rate": 3.811111111111112e-05, "loss": 2.8438, "norm_diff": 0.0471, "norm_loss": 0.0, "num_token_doc": 66.821, "num_token_overlap": 17.835, "num_token_query": 52.3034, "num_token_union": 73.7326, "num_word_context": 202.4014, "num_word_doc": 49.8724, "num_word_query": 39.9051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5134.2059, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4277, "query_norm": 1.5218, "queue_k_norm": 1.4784, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3034, "sent_len_1": 66.821, "sent_len_max_0": 128.0, "sent_len_max_1": 209.845, "stdk": 0.0484, "stdq": 0.0464, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 31400 }, { "accuracy": 58.8379, "active_queue_size": 16384.0, "cl_loss": 2.8615, "doc_norm": 1.4825, "encoder_q-embeddings": 3443.8499, "encoder_q-layer.0": 2140.3684, "encoder_q-layer.1": 2367.55, "encoder_q-layer.10": 3444.4954, "encoder_q-layer.11": 6725.25, "encoder_q-layer.2": 2791.0359, "encoder_q-layer.3": 2992.7317, "encoder_q-layer.4": 3179.3479, "encoder_q-layer.5": 3304.4062, "encoder_q-layer.6": 3638.8594, "encoder_q-layer.7": 3730.2488, "encoder_q-layer.8": 3961.7778, "encoder_q-layer.9": 3467.0002, "epoch": 0.31, "inbatch_neg_score": 0.4281, "inbatch_pos_score": 1.1514, "learning_rate": 3.805555555555555e-05, "loss": 2.8615, "norm_diff": 0.0357, "norm_loss": 0.0, "num_token_doc": 66.6668, "num_token_overlap": 17.7855, "num_token_query": 52.2739, "num_token_union": 73.6556, "num_word_context": 202.1637, "num_word_doc": 49.7488, "num_word_query": 39.8794, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5292.5673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4285, "query_norm": 1.5182, "queue_k_norm": 1.4786, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2739, "sent_len_1": 66.6668, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5975, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 31500 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.8522, "doc_norm": 1.4826, "encoder_q-embeddings": 5101.356, "encoder_q-layer.0": 3279.8223, "encoder_q-layer.1": 3495.6521, "encoder_q-layer.10": 3868.2649, "encoder_q-layer.11": 7369.0244, "encoder_q-layer.2": 4118.2969, "encoder_q-layer.3": 4172.0415, "encoder_q-layer.4": 4274.1836, "encoder_q-layer.5": 3905.4819, "encoder_q-layer.6": 3959.4509, "encoder_q-layer.7": 3783.9302, "encoder_q-layer.8": 4161.729, "encoder_q-layer.9": 3745.9475, "epoch": 0.31, "inbatch_neg_score": 0.4333, "inbatch_pos_score": 1.1357, "learning_rate": 3.8e-05, "loss": 2.8522, "norm_diff": 0.0128, "norm_loss": 0.0, "num_token_doc": 66.7588, "num_token_overlap": 17.7625, "num_token_query": 52.2513, "num_token_union": 73.7059, "num_word_context": 202.2062, "num_word_doc": 49.8072, "num_word_query": 39.8383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6446.291, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4336, "query_norm": 1.4947, "queue_k_norm": 1.4802, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2513, "sent_len_1": 66.7588, "sent_len_max_0": 128.0, "sent_len_max_1": 210.2812, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 31600 }, { "accuracy": 58.0566, "active_queue_size": 16384.0, "cl_loss": 2.857, "doc_norm": 1.481, "encoder_q-embeddings": 4159.4502, "encoder_q-layer.0": 2574.7351, "encoder_q-layer.1": 3006.7322, "encoder_q-layer.10": 3764.6216, "encoder_q-layer.11": 7303.6343, "encoder_q-layer.2": 3456.1509, "encoder_q-layer.3": 4025.8694, "encoder_q-layer.4": 4260.8623, "encoder_q-layer.5": 4086.9673, "encoder_q-layer.6": 4673.9102, "encoder_q-layer.7": 5056.8081, "encoder_q-layer.8": 4799.7812, "encoder_q-layer.9": 3833.7205, "epoch": 0.31, "inbatch_neg_score": 0.4351, "inbatch_pos_score": 1.1387, "learning_rate": 3.7944444444444444e-05, "loss": 2.857, "norm_diff": 0.0137, "norm_loss": 0.0, "num_token_doc": 66.7536, "num_token_overlap": 17.7879, "num_token_query": 52.1997, "num_token_union": 73.6954, "num_word_context": 202.2281, "num_word_doc": 49.8439, "num_word_query": 39.802, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6313.8095, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4358, "query_norm": 1.4944, "queue_k_norm": 1.4819, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1997, "sent_len_1": 66.7536, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6325, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 31700 }, { "accuracy": 56.8848, "active_queue_size": 16384.0, "cl_loss": 2.8457, "doc_norm": 1.4818, "encoder_q-embeddings": 3339.2661, "encoder_q-layer.0": 2204.1975, "encoder_q-layer.1": 2485.5862, "encoder_q-layer.10": 3475.6084, "encoder_q-layer.11": 7533.5225, "encoder_q-layer.2": 2891.0566, "encoder_q-layer.3": 3045.0852, "encoder_q-layer.4": 3315.3423, "encoder_q-layer.5": 3334.5139, "encoder_q-layer.6": 3591.1252, "encoder_q-layer.7": 3698.8127, "encoder_q-layer.8": 4308.4561, "encoder_q-layer.9": 3746.2029, "epoch": 0.31, "inbatch_neg_score": 0.4279, "inbatch_pos_score": 1.1191, "learning_rate": 3.7888888888888894e-05, "loss": 2.8457, "norm_diff": 0.0228, "norm_loss": 0.0, "num_token_doc": 66.8564, "num_token_overlap": 17.8331, "num_token_query": 52.2505, "num_token_union": 73.7628, "num_word_context": 202.4103, "num_word_doc": 49.8775, "num_word_query": 39.832, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5554.0836, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4263, "query_norm": 1.459, "queue_k_norm": 1.4822, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2505, "sent_len_1": 66.8564, "sent_len_max_0": 128.0, "sent_len_max_1": 209.26, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 31800 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.8615, "doc_norm": 1.4875, "encoder_q-embeddings": 10419.6328, "encoder_q-layer.0": 6795.9438, "encoder_q-layer.1": 8020.875, "encoder_q-layer.10": 7826.3018, "encoder_q-layer.11": 14451.7314, "encoder_q-layer.2": 9380.9922, "encoder_q-layer.3": 10577.3262, "encoder_q-layer.4": 11513.584, "encoder_q-layer.5": 11618.6396, "encoder_q-layer.6": 12388.0195, "encoder_q-layer.7": 11968.1602, "encoder_q-layer.8": 11821.0234, "encoder_q-layer.9": 9261.3311, "epoch": 0.31, "inbatch_neg_score": 0.4255, "inbatch_pos_score": 1.1318, "learning_rate": 3.7833333333333336e-05, "loss": 2.8615, "norm_diff": 0.0124, "norm_loss": 0.0, "num_token_doc": 66.9427, "num_token_overlap": 17.8077, "num_token_query": 52.2121, "num_token_union": 73.7499, "num_word_context": 202.491, "num_word_doc": 49.9227, "num_word_query": 39.8119, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15566.0748, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.426, "query_norm": 1.478, "queue_k_norm": 1.4842, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2121, "sent_len_1": 66.9427, "sent_len_max_0": 128.0, "sent_len_max_1": 210.8512, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 31900 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.8357, "doc_norm": 1.4858, "encoder_q-embeddings": 11664.0664, "encoder_q-layer.0": 7638.0088, "encoder_q-layer.1": 9814.6943, "encoder_q-layer.10": 7150.5312, "encoder_q-layer.11": 14239.5459, "encoder_q-layer.2": 10622.2441, "encoder_q-layer.3": 11309.4717, "encoder_q-layer.4": 11949.7168, "encoder_q-layer.5": 12392.4609, "encoder_q-layer.6": 10653.834, "encoder_q-layer.7": 9046.5215, "encoder_q-layer.8": 8929.2256, "encoder_q-layer.9": 7341.9619, "epoch": 0.31, "inbatch_neg_score": 0.4137, "inbatch_pos_score": 1.1465, "learning_rate": 3.777777777777778e-05, "loss": 2.8357, "norm_diff": 0.0136, "norm_loss": 0.0, "num_token_doc": 67.0178, "num_token_overlap": 17.8865, "num_token_query": 52.3304, "num_token_union": 73.8494, "num_word_context": 202.5663, "num_word_doc": 50.0234, "num_word_query": 39.8994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15250.2588, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4136, "query_norm": 1.474, "queue_k_norm": 1.4864, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3304, "sent_len_1": 67.0178, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1612, "stdk": 0.0485, "stdq": 0.0467, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 32000 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.8473, "doc_norm": 1.4853, "encoder_q-embeddings": 7088.3501, "encoder_q-layer.0": 4180.8447, "encoder_q-layer.1": 4545.0342, "encoder_q-layer.10": 8166.6733, "encoder_q-layer.11": 15353.5059, "encoder_q-layer.2": 5149.6313, "encoder_q-layer.3": 5689.4458, "encoder_q-layer.4": 6256.9692, "encoder_q-layer.5": 6450.5649, "encoder_q-layer.6": 7161.376, "encoder_q-layer.7": 7863.6069, "encoder_q-layer.8": 8950.9316, "encoder_q-layer.9": 7933.8276, "epoch": 0.31, "inbatch_neg_score": 0.4082, "inbatch_pos_score": 1.124, "learning_rate": 3.772222222222223e-05, "loss": 2.8473, "norm_diff": 0.0133, "norm_loss": 0.0, "num_token_doc": 66.8625, "num_token_overlap": 17.8365, "num_token_query": 52.3019, "num_token_union": 73.7317, "num_word_context": 202.1963, "num_word_doc": 49.8687, "num_word_query": 39.8728, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11311.6581, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4084, "query_norm": 1.4727, "queue_k_norm": 1.489, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3019, "sent_len_1": 66.8625, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5625, "stdk": 0.0485, "stdq": 0.0467, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 32100 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.8412, "doc_norm": 1.4871, "encoder_q-embeddings": 7482.3345, "encoder_q-layer.0": 5070.0615, "encoder_q-layer.1": 6520.6289, "encoder_q-layer.10": 3502.2666, "encoder_q-layer.11": 7030.7695, "encoder_q-layer.2": 7100.5435, "encoder_q-layer.3": 6985.4585, "encoder_q-layer.4": 7394.623, "encoder_q-layer.5": 7767.4238, "encoder_q-layer.6": 8272.5977, "encoder_q-layer.7": 7016.0869, "encoder_q-layer.8": 4844.2388, "encoder_q-layer.9": 3494.3445, "epoch": 0.31, "inbatch_neg_score": 0.3942, "inbatch_pos_score": 1.1104, "learning_rate": 3.766666666666667e-05, "loss": 2.8412, "norm_diff": 0.0451, "norm_loss": 0.0, "num_token_doc": 66.8035, "num_token_overlap": 17.7882, "num_token_query": 52.2767, "num_token_union": 73.7168, "num_word_context": 202.2415, "num_word_doc": 49.8525, "num_word_query": 39.886, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9612.7026, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3953, "query_norm": 1.4419, "queue_k_norm": 1.4871, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2767, "sent_len_1": 66.8035, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8587, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 32200 }, { "accuracy": 57.7637, "active_queue_size": 16384.0, "cl_loss": 2.8449, "doc_norm": 1.4852, "encoder_q-embeddings": 3093.2148, "encoder_q-layer.0": 1974.3116, "encoder_q-layer.1": 2193.1497, "encoder_q-layer.10": 3475.6531, "encoder_q-layer.11": 6692.9961, "encoder_q-layer.2": 2507.5747, "encoder_q-layer.3": 2710.7883, "encoder_q-layer.4": 2839.1243, "encoder_q-layer.5": 2941.0247, "encoder_q-layer.6": 3296.9536, "encoder_q-layer.7": 3524.0471, "encoder_q-layer.8": 3939.5471, "encoder_q-layer.9": 3488.5181, "epoch": 0.32, "inbatch_neg_score": 0.3983, "inbatch_pos_score": 1.124, "learning_rate": 3.761111111111111e-05, "loss": 2.8449, "norm_diff": 0.0164, "norm_loss": 0.0, "num_token_doc": 66.7795, "num_token_overlap": 17.7942, "num_token_query": 52.1987, "num_token_union": 73.684, "num_word_context": 202.396, "num_word_doc": 49.8206, "num_word_query": 39.8191, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5053.0746, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3982, "query_norm": 1.4721, "queue_k_norm": 1.4886, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1987, "sent_len_1": 66.7795, "sent_len_max_0": 128.0, "sent_len_max_1": 209.305, "stdk": 0.0485, "stdq": 0.0469, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 32300 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 2.8343, "doc_norm": 1.4876, "encoder_q-embeddings": 4330.5132, "encoder_q-layer.0": 2787.2131, "encoder_q-layer.1": 3329.375, "encoder_q-layer.10": 3562.7585, "encoder_q-layer.11": 7209.04, "encoder_q-layer.2": 3812.5679, "encoder_q-layer.3": 4137.9224, "encoder_q-layer.4": 4246.1641, "encoder_q-layer.5": 4654.9458, "encoder_q-layer.6": 4856.3838, "encoder_q-layer.7": 4968.0596, "encoder_q-layer.8": 4944.9375, "encoder_q-layer.9": 3693.2561, "epoch": 0.32, "inbatch_neg_score": 0.3988, "inbatch_pos_score": 1.126, "learning_rate": 3.7555555555555554e-05, "loss": 2.8343, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.8099, "num_token_overlap": 17.8323, "num_token_query": 52.4209, "num_token_union": 73.7737, "num_word_context": 202.1457, "num_word_doc": 49.8278, "num_word_query": 39.9782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6465.0307, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3982, "query_norm": 1.472, "queue_k_norm": 1.4865, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4209, "sent_len_1": 66.8099, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0462, "stdk": 0.0486, "stdq": 0.0468, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 32400 }, { "accuracy": 58.9355, "active_queue_size": 16384.0, "cl_loss": 2.842, "doc_norm": 1.4895, "encoder_q-embeddings": 9447.8184, "encoder_q-layer.0": 6874.1875, "encoder_q-layer.1": 7156.6025, "encoder_q-layer.10": 3417.5681, "encoder_q-layer.11": 6958.186, "encoder_q-layer.2": 8307.3105, "encoder_q-layer.3": 8734.6182, "encoder_q-layer.4": 8772.8174, "encoder_q-layer.5": 9307.6309, "encoder_q-layer.6": 9006.0957, "encoder_q-layer.7": 8376.583, "encoder_q-layer.8": 5345.9492, "encoder_q-layer.9": 3635.1143, "epoch": 0.32, "inbatch_neg_score": 0.3985, "inbatch_pos_score": 1.0986, "learning_rate": 3.7500000000000003e-05, "loss": 2.842, "norm_diff": 0.0459, "norm_loss": 0.0, "num_token_doc": 66.6552, "num_token_overlap": 17.7846, "num_token_query": 52.2558, "num_token_union": 73.6537, "num_word_context": 202.1682, "num_word_doc": 49.7321, "num_word_query": 39.8691, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11539.9498, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3994, "query_norm": 1.4436, "queue_k_norm": 1.4848, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2558, "sent_len_1": 66.6552, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5863, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 32500 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.8403, "doc_norm": 1.4875, "encoder_q-embeddings": 23659.709, "encoder_q-layer.0": 18145.4434, "encoder_q-layer.1": 21366.7793, "encoder_q-layer.10": 3714.2231, "encoder_q-layer.11": 6932.8872, "encoder_q-layer.2": 23890.8047, "encoder_q-layer.3": 26207.2305, "encoder_q-layer.4": 24837.4883, "encoder_q-layer.5": 24973.1406, "encoder_q-layer.6": 22911.5176, "encoder_q-layer.7": 13710.0469, "encoder_q-layer.8": 6546.0708, "encoder_q-layer.9": 3969.9822, "epoch": 0.32, "inbatch_neg_score": 0.3978, "inbatch_pos_score": 1.1211, "learning_rate": 3.7444444444444446e-05, "loss": 2.8403, "norm_diff": 0.0353, "norm_loss": 0.0, "num_token_doc": 66.7299, "num_token_overlap": 17.7885, "num_token_query": 52.3209, "num_token_union": 73.718, "num_word_context": 202.3519, "num_word_doc": 49.8042, "num_word_query": 39.9006, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28108.3404, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.3979, "query_norm": 1.4523, "queue_k_norm": 1.4851, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3209, "sent_len_1": 66.7299, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1937, "stdk": 0.0486, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 32600 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 2.834, "doc_norm": 1.4857, "encoder_q-embeddings": 11239.7236, "encoder_q-layer.0": 7917.583, "encoder_q-layer.1": 9161.4189, "encoder_q-layer.10": 3863.1003, "encoder_q-layer.11": 7019.6431, "encoder_q-layer.2": 10823.0068, "encoder_q-layer.3": 12685.5264, "encoder_q-layer.4": 14023.5938, "encoder_q-layer.5": 13491.5811, "encoder_q-layer.6": 12021.376, "encoder_q-layer.7": 8578.875, "encoder_q-layer.8": 6158.4839, "encoder_q-layer.9": 4494.0674, "epoch": 0.32, "inbatch_neg_score": 0.3918, "inbatch_pos_score": 1.0908, "learning_rate": 3.738888888888889e-05, "loss": 2.834, "norm_diff": 0.0351, "norm_loss": 0.0, "num_token_doc": 66.8026, "num_token_overlap": 17.8231, "num_token_query": 52.3783, "num_token_union": 73.7692, "num_word_context": 202.5568, "num_word_doc": 49.8331, "num_word_query": 39.9672, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14608.6673, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3921, "query_norm": 1.4507, "queue_k_norm": 1.4854, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3783, "sent_len_1": 66.8026, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4313, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 32700 }, { "accuracy": 58.252, "active_queue_size": 16384.0, "cl_loss": 2.8358, "doc_norm": 1.4865, "encoder_q-embeddings": 1657.2654, "encoder_q-layer.0": 1092.0767, "encoder_q-layer.1": 1287.4946, "encoder_q-layer.10": 2140.261, "encoder_q-layer.11": 4002.4141, "encoder_q-layer.2": 1542.9457, "encoder_q-layer.3": 1624.856, "encoder_q-layer.4": 1756.2346, "encoder_q-layer.5": 1760.6218, "encoder_q-layer.6": 1878.0745, "encoder_q-layer.7": 2010.4492, "encoder_q-layer.8": 2113.3728, "encoder_q-layer.9": 1945.2875, "epoch": 0.32, "inbatch_neg_score": 0.3717, "inbatch_pos_score": 1.0947, "learning_rate": 3.733333333333334e-05, "loss": 2.8358, "norm_diff": 0.0191, "norm_loss": 0.0, "num_token_doc": 66.7884, "num_token_overlap": 17.8216, "num_token_query": 52.3303, "num_token_union": 73.697, "num_word_context": 202.197, "num_word_doc": 49.8366, "num_word_query": 39.923, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2836.3569, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3711, "query_norm": 1.4674, "queue_k_norm": 1.4872, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3303, "sent_len_1": 66.7884, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3837, "stdk": 0.0485, "stdq": 0.0471, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 32800 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 2.8419, "doc_norm": 1.4884, "encoder_q-embeddings": 1912.5317, "encoder_q-layer.0": 1254.4258, "encoder_q-layer.1": 1418.4783, "encoder_q-layer.10": 1797.5347, "encoder_q-layer.11": 3552.3481, "encoder_q-layer.2": 1651.8887, "encoder_q-layer.3": 1830.9352, "encoder_q-layer.4": 1967.0599, "encoder_q-layer.5": 1976.1493, "encoder_q-layer.6": 2037.7545, "encoder_q-layer.7": 2328.2874, "encoder_q-layer.8": 2295.8657, "encoder_q-layer.9": 1813.9778, "epoch": 0.32, "inbatch_neg_score": 0.3702, "inbatch_pos_score": 1.0781, "learning_rate": 3.727777777777778e-05, "loss": 2.8419, "norm_diff": 0.0411, "norm_loss": 0.0, "num_token_doc": 66.8365, "num_token_overlap": 17.8333, "num_token_query": 52.3647, "num_token_union": 73.7931, "num_word_context": 202.4141, "num_word_doc": 49.9135, "num_word_query": 39.9542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2971.3583, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3696, "query_norm": 1.4473, "queue_k_norm": 1.4862, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3647, "sent_len_1": 66.8365, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1312, "stdk": 0.0486, "stdq": 0.0461, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 32900 }, { "accuracy": 57.5684, "active_queue_size": 16384.0, "cl_loss": 2.849, "doc_norm": 1.4862, "encoder_q-embeddings": 1798.4684, "encoder_q-layer.0": 1142.7563, "encoder_q-layer.1": 1229.5072, "encoder_q-layer.10": 1767.233, "encoder_q-layer.11": 3526.5312, "encoder_q-layer.2": 1448.9011, "encoder_q-layer.3": 1515.6788, "encoder_q-layer.4": 1563.6678, "encoder_q-layer.5": 1620.705, "encoder_q-layer.6": 1802.3002, "encoder_q-layer.7": 1762.4194, "encoder_q-layer.8": 1985.1895, "encoder_q-layer.9": 1850.3668, "epoch": 0.32, "inbatch_neg_score": 0.3681, "inbatch_pos_score": 1.0771, "learning_rate": 3.722222222222222e-05, "loss": 2.849, "norm_diff": 0.0303, "norm_loss": 0.0, "num_token_doc": 66.8166, "num_token_overlap": 17.7818, "num_token_query": 52.2344, "num_token_union": 73.7348, "num_word_context": 202.4221, "num_word_doc": 49.8533, "num_word_query": 39.8412, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2725.9007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3677, "query_norm": 1.4559, "queue_k_norm": 1.4841, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2344, "sent_len_1": 66.8166, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3212, "stdk": 0.0486, "stdq": 0.0464, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 33000 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.8419, "doc_norm": 1.4858, "encoder_q-embeddings": 2026.8455, "encoder_q-layer.0": 1231.2377, "encoder_q-layer.1": 1266.9229, "encoder_q-layer.10": 1824.3912, "encoder_q-layer.11": 3622.4124, "encoder_q-layer.2": 1521.3632, "encoder_q-layer.3": 1576.911, "encoder_q-layer.4": 1667.7164, "encoder_q-layer.5": 1664.3622, "encoder_q-layer.6": 1724.5961, "encoder_q-layer.7": 1746.326, "encoder_q-layer.8": 2057.7107, "encoder_q-layer.9": 1804.2393, "epoch": 0.32, "inbatch_neg_score": 0.3537, "inbatch_pos_score": 1.0869, "learning_rate": 3.7166666666666664e-05, "loss": 2.8419, "norm_diff": 0.015, "norm_loss": 0.0, "num_token_doc": 66.7727, "num_token_overlap": 17.836, "num_token_query": 52.3876, "num_token_union": 73.7497, "num_word_context": 202.1937, "num_word_doc": 49.8045, "num_word_query": 39.9683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2785.9868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3533, "query_norm": 1.4708, "queue_k_norm": 1.4836, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3876, "sent_len_1": 66.7727, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2262, "stdk": 0.0486, "stdq": 0.0467, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 33100 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.8424, "doc_norm": 1.4848, "encoder_q-embeddings": 1469.8687, "encoder_q-layer.0": 895.4934, "encoder_q-layer.1": 1024.6814, "encoder_q-layer.10": 1843.0723, "encoder_q-layer.11": 3505.6787, "encoder_q-layer.2": 1128.2312, "encoder_q-layer.3": 1202.7428, "encoder_q-layer.4": 1294.9271, "encoder_q-layer.5": 1366.1904, "encoder_q-layer.6": 1543.4946, "encoder_q-layer.7": 1699.2852, "encoder_q-layer.8": 2047.843, "encoder_q-layer.9": 1851.2107, "epoch": 0.32, "inbatch_neg_score": 0.3429, "inbatch_pos_score": 1.0645, "learning_rate": 3.7111111111111113e-05, "loss": 2.8424, "norm_diff": 0.0151, "norm_loss": 0.0, "num_token_doc": 66.6396, "num_token_overlap": 17.8096, "num_token_query": 52.312, "num_token_union": 73.6736, "num_word_context": 202.1499, "num_word_doc": 49.7227, "num_word_query": 39.9141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2488.9993, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3435, "query_norm": 1.4713, "queue_k_norm": 1.4816, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.312, "sent_len_1": 66.6396, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0225, "stdk": 0.0486, "stdq": 0.0466, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 33200 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.8222, "doc_norm": 1.4788, "encoder_q-embeddings": 3295.6797, "encoder_q-layer.0": 2295.3926, "encoder_q-layer.1": 2532.707, "encoder_q-layer.10": 1808.1689, "encoder_q-layer.11": 3367.2805, "encoder_q-layer.2": 3118.3689, "encoder_q-layer.3": 3523.0005, "encoder_q-layer.4": 3760.4026, "encoder_q-layer.5": 3887.3003, "encoder_q-layer.6": 3677.2026, "encoder_q-layer.7": 3169.5552, "encoder_q-layer.8": 2572.1909, "encoder_q-layer.9": 1787.1649, "epoch": 0.33, "inbatch_neg_score": 0.3486, "inbatch_pos_score": 1.0625, "learning_rate": 3.705555555555556e-05, "loss": 2.8222, "norm_diff": 0.0158, "norm_loss": 0.0, "num_token_doc": 66.8986, "num_token_overlap": 17.8757, "num_token_query": 52.3206, "num_token_union": 73.7603, "num_word_context": 202.0866, "num_word_doc": 49.9082, "num_word_query": 39.9108, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4495.9933, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3484, "query_norm": 1.4635, "queue_k_norm": 1.4837, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3206, "sent_len_1": 66.8986, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3413, "stdk": 0.0484, "stdq": 0.0458, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 33300 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.8289, "doc_norm": 1.48, "encoder_q-embeddings": 1565.8735, "encoder_q-layer.0": 1024.8357, "encoder_q-layer.1": 1120.8668, "encoder_q-layer.10": 1704.9258, "encoder_q-layer.11": 3415.6125, "encoder_q-layer.2": 1310.5695, "encoder_q-layer.3": 1385.0155, "encoder_q-layer.4": 1532.5707, "encoder_q-layer.5": 1468.6417, "encoder_q-layer.6": 1608.9023, "encoder_q-layer.7": 1750.9628, "encoder_q-layer.8": 1951.9352, "encoder_q-layer.9": 1710.1731, "epoch": 0.33, "inbatch_neg_score": 0.3455, "inbatch_pos_score": 1.0498, "learning_rate": 3.7e-05, "loss": 2.8289, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.6479, "num_token_overlap": 17.819, "num_token_query": 52.3664, "num_token_union": 73.7284, "num_word_context": 202.1923, "num_word_doc": 49.7438, "num_word_query": 39.9214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2535.3511, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.345, "query_norm": 1.4924, "queue_k_norm": 1.4819, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3664, "sent_len_1": 66.6479, "sent_len_max_0": 128.0, "sent_len_max_1": 206.7163, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 33400 }, { "accuracy": 58.6426, "active_queue_size": 16384.0, "cl_loss": 2.8324, "doc_norm": 1.4796, "encoder_q-embeddings": 4797.7676, "encoder_q-layer.0": 3147.4961, "encoder_q-layer.1": 3874.1584, "encoder_q-layer.10": 1758.8248, "encoder_q-layer.11": 3262.6943, "encoder_q-layer.2": 4567.231, "encoder_q-layer.3": 5200.647, "encoder_q-layer.4": 5829.3262, "encoder_q-layer.5": 5504.6348, "encoder_q-layer.6": 6574.6123, "encoder_q-layer.7": 4981.7861, "encoder_q-layer.8": 2868.3281, "encoder_q-layer.9": 1876.7338, "epoch": 0.33, "inbatch_neg_score": 0.355, "inbatch_pos_score": 1.0537, "learning_rate": 3.694444444444445e-05, "loss": 2.8324, "norm_diff": 0.0121, "norm_loss": 0.0, "num_token_doc": 66.7737, "num_token_overlap": 17.7737, "num_token_query": 52.1865, "num_token_union": 73.6889, "num_word_context": 202.263, "num_word_doc": 49.8396, "num_word_query": 39.8126, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6490.4608, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3535, "query_norm": 1.4823, "queue_k_norm": 1.4804, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1865, "sent_len_1": 66.7737, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4525, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 33500 }, { "accuracy": 59.2285, "active_queue_size": 16384.0, "cl_loss": 2.8179, "doc_norm": 1.4814, "encoder_q-embeddings": 1552.9427, "encoder_q-layer.0": 967.3697, "encoder_q-layer.1": 1113.9167, "encoder_q-layer.10": 1987.9316, "encoder_q-layer.11": 3650.1948, "encoder_q-layer.2": 1295.3342, "encoder_q-layer.3": 1379.0972, "encoder_q-layer.4": 1459.6277, "encoder_q-layer.5": 1511.146, "encoder_q-layer.6": 1730.2137, "encoder_q-layer.7": 1826.3427, "encoder_q-layer.8": 2154.4319, "encoder_q-layer.9": 1960.4928, "epoch": 0.33, "inbatch_neg_score": 0.3691, "inbatch_pos_score": 1.0986, "learning_rate": 3.688888888888889e-05, "loss": 2.8179, "norm_diff": 0.0326, "norm_loss": 0.0, "num_token_doc": 66.7277, "num_token_overlap": 17.8271, "num_token_query": 52.3193, "num_token_union": 73.6681, "num_word_context": 202.1742, "num_word_doc": 49.8146, "num_word_query": 39.901, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2626.0127, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3691, "query_norm": 1.514, "queue_k_norm": 1.48, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3193, "sent_len_1": 66.7277, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2837, "stdk": 0.0486, "stdq": 0.047, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 33600 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.8277, "doc_norm": 1.4798, "encoder_q-embeddings": 2195.9976, "encoder_q-layer.0": 1568.4501, "encoder_q-layer.1": 1827.7717, "encoder_q-layer.10": 1778.4739, "encoder_q-layer.11": 3541.4297, "encoder_q-layer.2": 2016.7048, "encoder_q-layer.3": 2135.7603, "encoder_q-layer.4": 2058.7727, "encoder_q-layer.5": 1784.1736, "encoder_q-layer.6": 1912.3003, "encoder_q-layer.7": 1918.4993, "encoder_q-layer.8": 2131.0217, "encoder_q-layer.9": 1864.2356, "epoch": 0.33, "inbatch_neg_score": 0.3795, "inbatch_pos_score": 1.0918, "learning_rate": 3.683333333333334e-05, "loss": 2.8277, "norm_diff": 0.0047, "norm_loss": 0.0, "num_token_doc": 66.724, "num_token_overlap": 17.7635, "num_token_query": 52.1297, "num_token_union": 73.6304, "num_word_context": 202.1137, "num_word_doc": 49.7851, "num_word_query": 39.7552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3075.4697, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3804, "query_norm": 1.4838, "queue_k_norm": 1.4819, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1297, "sent_len_1": 66.724, "sent_len_max_0": 128.0, "sent_len_max_1": 211.365, "stdk": 0.0486, "stdq": 0.0461, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 33700 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.8171, "doc_norm": 1.4794, "encoder_q-embeddings": 1473.2677, "encoder_q-layer.0": 931.7945, "encoder_q-layer.1": 1029.2745, "encoder_q-layer.10": 1704.5518, "encoder_q-layer.11": 3303.5659, "encoder_q-layer.2": 1181.0042, "encoder_q-layer.3": 1257.3695, "encoder_q-layer.4": 1333.788, "encoder_q-layer.5": 1330.7322, "encoder_q-layer.6": 1474.7181, "encoder_q-layer.7": 1601.2872, "encoder_q-layer.8": 1892.1482, "encoder_q-layer.9": 1724.6699, "epoch": 0.33, "inbatch_neg_score": 0.3888, "inbatch_pos_score": 1.1064, "learning_rate": 3.677777777777778e-05, "loss": 2.8171, "norm_diff": 0.0098, "norm_loss": 0.0, "num_token_doc": 66.8107, "num_token_overlap": 17.8385, "num_token_query": 52.4001, "num_token_union": 73.7796, "num_word_context": 202.106, "num_word_doc": 49.8447, "num_word_query": 39.977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2408.3121, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3887, "query_norm": 1.4718, "queue_k_norm": 1.4819, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4001, "sent_len_1": 66.8107, "sent_len_max_0": 128.0, "sent_len_max_1": 210.9162, "stdk": 0.0485, "stdq": 0.0458, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 33800 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.8167, "doc_norm": 1.4847, "encoder_q-embeddings": 810.2944, "encoder_q-layer.0": 525.3579, "encoder_q-layer.1": 613.3833, "encoder_q-layer.10": 977.4849, "encoder_q-layer.11": 1919.3024, "encoder_q-layer.2": 660.6718, "encoder_q-layer.3": 690.8142, "encoder_q-layer.4": 747.8838, "encoder_q-layer.5": 751.7992, "encoder_q-layer.6": 850.6945, "encoder_q-layer.7": 897.4157, "encoder_q-layer.8": 1076.7582, "encoder_q-layer.9": 965.3149, "epoch": 0.33, "inbatch_neg_score": 0.3966, "inbatch_pos_score": 1.126, "learning_rate": 3.672222222222222e-05, "loss": 2.8167, "norm_diff": 0.0325, "norm_loss": 0.0, "num_token_doc": 66.8594, "num_token_overlap": 17.831, "num_token_query": 52.2947, "num_token_union": 73.7402, "num_word_context": 202.3622, "num_word_doc": 49.8657, "num_word_query": 39.8535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1341.9922, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.396, "query_norm": 1.5171, "queue_k_norm": 1.4819, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2947, "sent_len_1": 66.8594, "sent_len_max_0": 128.0, "sent_len_max_1": 211.0825, "stdk": 0.0487, "stdq": 0.0478, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 33900 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.8243, "doc_norm": 1.4836, "encoder_q-embeddings": 803.5397, "encoder_q-layer.0": 495.0714, "encoder_q-layer.1": 553.5211, "encoder_q-layer.10": 873.7569, "encoder_q-layer.11": 1876.7797, "encoder_q-layer.2": 611.8161, "encoder_q-layer.3": 655.3851, "encoder_q-layer.4": 708.2707, "encoder_q-layer.5": 748.0023, "encoder_q-layer.6": 809.5258, "encoder_q-layer.7": 879.0142, "encoder_q-layer.8": 1065.7119, "encoder_q-layer.9": 915.1797, "epoch": 0.33, "inbatch_neg_score": 0.4021, "inbatch_pos_score": 1.1309, "learning_rate": 3.6666666666666666e-05, "loss": 2.8243, "norm_diff": 0.023, "norm_loss": 0.0, "num_token_doc": 66.7344, "num_token_overlap": 17.8349, "num_token_query": 52.3682, "num_token_union": 73.7918, "num_word_context": 202.4559, "num_word_doc": 49.8675, "num_word_query": 39.9656, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1332.0127, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4028, "query_norm": 1.4607, "queue_k_norm": 1.4843, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3682, "sent_len_1": 66.7344, "sent_len_max_0": 128.0, "sent_len_max_1": 204.87, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 34000 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.8138, "doc_norm": 1.4818, "encoder_q-embeddings": 942.7685, "encoder_q-layer.0": 620.8196, "encoder_q-layer.1": 713.8363, "encoder_q-layer.10": 793.5629, "encoder_q-layer.11": 1595.561, "encoder_q-layer.2": 832.4936, "encoder_q-layer.3": 911.6412, "encoder_q-layer.4": 933.9724, "encoder_q-layer.5": 843.9988, "encoder_q-layer.6": 825.4326, "encoder_q-layer.7": 834.7298, "encoder_q-layer.8": 915.1001, "encoder_q-layer.9": 781.3391, "epoch": 0.33, "inbatch_neg_score": 0.4048, "inbatch_pos_score": 1.1123, "learning_rate": 3.6611111111111115e-05, "loss": 2.8138, "norm_diff": 0.0217, "norm_loss": 0.0, "num_token_doc": 66.7993, "num_token_overlap": 17.827, "num_token_query": 52.282, "num_token_union": 73.733, "num_word_context": 202.2694, "num_word_doc": 49.8073, "num_word_query": 39.868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1350.5725, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4045, "query_norm": 1.4601, "queue_k_norm": 1.486, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.282, "sent_len_1": 66.7993, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8075, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 34100 }, { "accuracy": 58.5449, "active_queue_size": 16384.0, "cl_loss": 2.8211, "doc_norm": 1.4886, "encoder_q-embeddings": 1680.7751, "encoder_q-layer.0": 1153.9323, "encoder_q-layer.1": 1479.7542, "encoder_q-layer.10": 876.2133, "encoder_q-layer.11": 1885.8174, "encoder_q-layer.2": 1707.705, "encoder_q-layer.3": 1449.6862, "encoder_q-layer.4": 1350.0725, "encoder_q-layer.5": 1212.2561, "encoder_q-layer.6": 1223.7817, "encoder_q-layer.7": 1226.7728, "encoder_q-layer.8": 1221.2631, "encoder_q-layer.9": 913.0144, "epoch": 0.33, "inbatch_neg_score": 0.407, "inbatch_pos_score": 1.1211, "learning_rate": 3.655555555555556e-05, "loss": 2.8211, "norm_diff": 0.0296, "norm_loss": 0.0, "num_token_doc": 66.6051, "num_token_overlap": 17.7935, "num_token_query": 52.1825, "num_token_union": 73.556, "num_word_context": 201.9992, "num_word_doc": 49.681, "num_word_query": 39.7936, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2067.6463, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4075, "query_norm": 1.459, "queue_k_norm": 1.4855, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1825, "sent_len_1": 66.6051, "sent_len_max_0": 128.0, "sent_len_max_1": 209.62, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 34200 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.8082, "doc_norm": 1.4851, "encoder_q-embeddings": 763.9655, "encoder_q-layer.0": 463.2006, "encoder_q-layer.1": 505.9775, "encoder_q-layer.10": 908.5631, "encoder_q-layer.11": 1816.0138, "encoder_q-layer.2": 570.4002, "encoder_q-layer.3": 609.2318, "encoder_q-layer.4": 635.6266, "encoder_q-layer.5": 662.4077, "encoder_q-layer.6": 761.3195, "encoder_q-layer.7": 829.1436, "encoder_q-layer.8": 950.8197, "encoder_q-layer.9": 856.8046, "epoch": 0.33, "inbatch_neg_score": 0.4129, "inbatch_pos_score": 1.1318, "learning_rate": 3.65e-05, "loss": 2.8082, "norm_diff": 0.0189, "norm_loss": 0.0, "num_token_doc": 66.6163, "num_token_overlap": 17.7776, "num_token_query": 52.2655, "num_token_union": 73.6574, "num_word_context": 202.1209, "num_word_doc": 49.7166, "num_word_query": 39.8607, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1247.6987, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4138, "query_norm": 1.4662, "queue_k_norm": 1.4842, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2655, "sent_len_1": 66.6163, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2012, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 34300 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.8177, "doc_norm": 1.4833, "encoder_q-embeddings": 708.7049, "encoder_q-layer.0": 467.8923, "encoder_q-layer.1": 512.4478, "encoder_q-layer.10": 865.3213, "encoder_q-layer.11": 1631.5183, "encoder_q-layer.2": 606.6369, "encoder_q-layer.3": 632.7695, "encoder_q-layer.4": 680.8358, "encoder_q-layer.5": 703.1031, "encoder_q-layer.6": 724.1786, "encoder_q-layer.7": 758.0837, "encoder_q-layer.8": 941.5762, "encoder_q-layer.9": 819.5224, "epoch": 0.34, "inbatch_neg_score": 0.4143, "inbatch_pos_score": 1.1289, "learning_rate": 3.644444444444445e-05, "loss": 2.8177, "norm_diff": 0.0272, "norm_loss": 0.0, "num_token_doc": 66.8109, "num_token_overlap": 17.7929, "num_token_query": 52.2508, "num_token_union": 73.7333, "num_word_context": 202.3777, "num_word_doc": 49.8552, "num_word_query": 39.8562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1189.0234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4141, "query_norm": 1.4561, "queue_k_norm": 1.4864, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2508, "sent_len_1": 66.8109, "sent_len_max_0": 128.0, "sent_len_max_1": 210.99, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 34400 }, { "accuracy": 57.4707, "active_queue_size": 16384.0, "cl_loss": 2.8114, "doc_norm": 1.4839, "encoder_q-embeddings": 980.6092, "encoder_q-layer.0": 639.8453, "encoder_q-layer.1": 751.5092, "encoder_q-layer.10": 894.8854, "encoder_q-layer.11": 1833.7537, "encoder_q-layer.2": 888.9375, "encoder_q-layer.3": 948.7358, "encoder_q-layer.4": 977.4807, "encoder_q-layer.5": 951.0173, "encoder_q-layer.6": 906.4739, "encoder_q-layer.7": 927.9969, "encoder_q-layer.8": 1006.6469, "encoder_q-layer.9": 907.0427, "epoch": 0.34, "inbatch_neg_score": 0.4162, "inbatch_pos_score": 1.126, "learning_rate": 3.638888888888889e-05, "loss": 2.8114, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.8211, "num_token_overlap": 17.8106, "num_token_query": 52.339, "num_token_union": 73.7649, "num_word_context": 202.5576, "num_word_doc": 49.8783, "num_word_query": 39.9254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1476.8046, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4158, "query_norm": 1.4693, "queue_k_norm": 1.4868, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.339, "sent_len_1": 66.8211, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9125, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 34500 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.8217, "doc_norm": 1.4872, "encoder_q-embeddings": 928.4881, "encoder_q-layer.0": 649.3706, "encoder_q-layer.1": 688.662, "encoder_q-layer.10": 863.5083, "encoder_q-layer.11": 1714.4496, "encoder_q-layer.2": 759.1375, "encoder_q-layer.3": 850.2433, "encoder_q-layer.4": 924.7729, "encoder_q-layer.5": 909.9977, "encoder_q-layer.6": 994.3328, "encoder_q-layer.7": 1033.9052, "encoder_q-layer.8": 1094.02, "encoder_q-layer.9": 875.9731, "epoch": 0.34, "inbatch_neg_score": 0.4132, "inbatch_pos_score": 1.127, "learning_rate": 3.633333333333333e-05, "loss": 2.8217, "norm_diff": 0.0264, "norm_loss": 0.0, "num_token_doc": 66.7802, "num_token_overlap": 17.8273, "num_token_query": 52.3412, "num_token_union": 73.7141, "num_word_context": 202.2832, "num_word_doc": 49.834, "num_word_query": 39.9022, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1430.2269, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4136, "query_norm": 1.4608, "queue_k_norm": 1.488, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3412, "sent_len_1": 66.7802, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3175, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 34600 }, { "accuracy": 58.1543, "active_queue_size": 16384.0, "cl_loss": 2.8114, "doc_norm": 1.4873, "encoder_q-embeddings": 1084.0316, "encoder_q-layer.0": 752.9775, "encoder_q-layer.1": 900.3278, "encoder_q-layer.10": 827.187, "encoder_q-layer.11": 1786.9908, "encoder_q-layer.2": 985.4272, "encoder_q-layer.3": 1053.1736, "encoder_q-layer.4": 1003.0803, "encoder_q-layer.5": 1054.0352, "encoder_q-layer.6": 1033.2377, "encoder_q-layer.7": 1095.7043, "encoder_q-layer.8": 1055.4056, "encoder_q-layer.9": 858.8457, "epoch": 0.34, "inbatch_neg_score": 0.4151, "inbatch_pos_score": 1.127, "learning_rate": 3.6277777777777776e-05, "loss": 2.8114, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.7538, "num_token_overlap": 17.8234, "num_token_query": 52.3613, "num_token_union": 73.7774, "num_word_context": 202.4336, "num_word_doc": 49.8051, "num_word_query": 39.9414, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1542.2006, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4146, "query_norm": 1.4581, "queue_k_norm": 1.4896, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3613, "sent_len_1": 66.7538, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1287, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 34700 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.8251, "doc_norm": 1.4918, "encoder_q-embeddings": 762.6266, "encoder_q-layer.0": 479.2294, "encoder_q-layer.1": 518.8124, "encoder_q-layer.10": 858.098, "encoder_q-layer.11": 1701.0385, "encoder_q-layer.2": 581.6666, "encoder_q-layer.3": 634.8189, "encoder_q-layer.4": 670.4933, "encoder_q-layer.5": 665.1399, "encoder_q-layer.6": 731.9651, "encoder_q-layer.7": 795.3368, "encoder_q-layer.8": 959.2419, "encoder_q-layer.9": 846.3698, "epoch": 0.34, "inbatch_neg_score": 0.4115, "inbatch_pos_score": 1.1309, "learning_rate": 3.6222222222222225e-05, "loss": 2.8251, "norm_diff": 0.0342, "norm_loss": 0.0, "num_token_doc": 66.6585, "num_token_overlap": 17.7787, "num_token_query": 52.2444, "num_token_union": 73.6336, "num_word_context": 202.2316, "num_word_doc": 49.7507, "num_word_query": 39.8516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1222.2964, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4111, "query_norm": 1.4576, "queue_k_norm": 1.4896, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2444, "sent_len_1": 66.6585, "sent_len_max_0": 128.0, "sent_len_max_1": 205.5563, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 34800 }, { "accuracy": 60.0098, "active_queue_size": 16384.0, "cl_loss": 2.813, "doc_norm": 1.4862, "encoder_q-embeddings": 11227.2217, "encoder_q-layer.0": 9111.4785, "encoder_q-layer.1": 10553.3779, "encoder_q-layer.10": 1635.1978, "encoder_q-layer.11": 1953.2948, "encoder_q-layer.2": 14190.7363, "encoder_q-layer.3": 16643.2695, "encoder_q-layer.4": 20577.4531, "encoder_q-layer.5": 21489.541, "encoder_q-layer.6": 21462.9492, "encoder_q-layer.7": 18858.1055, "encoder_q-layer.8": 21091.1055, "encoder_q-layer.9": 10993.2637, "epoch": 0.34, "inbatch_neg_score": 0.4063, "inbatch_pos_score": 1.126, "learning_rate": 3.6166666666666674e-05, "loss": 2.813, "norm_diff": 0.0182, "norm_loss": 0.0, "num_token_doc": 66.8367, "num_token_overlap": 17.8226, "num_token_query": 52.3581, "num_token_union": 73.7792, "num_word_context": 202.4893, "num_word_doc": 49.8365, "num_word_query": 39.9202, "postclip_grad_norm": 1.0, "preclip_grad_norm": 22046.0973, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4062, "query_norm": 1.468, "queue_k_norm": 1.4893, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3581, "sent_len_1": 66.8367, "sent_len_max_0": 128.0, "sent_len_max_1": 211.1087, "stdk": 0.0485, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 34900 }, { "accuracy": 58.5449, "active_queue_size": 16384.0, "cl_loss": 2.8051, "doc_norm": 1.4939, "encoder_q-embeddings": 823.9682, "encoder_q-layer.0": 546.6815, "encoder_q-layer.1": 636.1199, "encoder_q-layer.10": 931.3442, "encoder_q-layer.11": 1869.5007, "encoder_q-layer.2": 754.4954, "encoder_q-layer.3": 803.7574, "encoder_q-layer.4": 861.3179, "encoder_q-layer.5": 938.8303, "encoder_q-layer.6": 959.926, "encoder_q-layer.7": 977.9004, "encoder_q-layer.8": 1064.5415, "encoder_q-layer.9": 923.0611, "epoch": 0.34, "inbatch_neg_score": 0.4008, "inbatch_pos_score": 1.1338, "learning_rate": 3.611111111111111e-05, "loss": 2.8051, "norm_diff": 0.0207, "norm_loss": 0.0, "num_token_doc": 66.9415, "num_token_overlap": 17.8178, "num_token_query": 52.2435, "num_token_union": 73.7663, "num_word_context": 202.302, "num_word_doc": 49.9291, "num_word_query": 39.8318, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1384.474, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4009, "query_norm": 1.4731, "queue_k_norm": 1.49, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2435, "sent_len_1": 66.9415, "sent_len_max_0": 128.0, "sent_len_max_1": 209.455, "stdk": 0.0488, "stdq": 0.0468, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 35000 }, { "accuracy": 59.5215, "active_queue_size": 16384.0, "cl_loss": 2.7997, "doc_norm": 1.4923, "encoder_q-embeddings": 748.8043, "encoder_q-layer.0": 472.9598, "encoder_q-layer.1": 534.9331, "encoder_q-layer.10": 971.7222, "encoder_q-layer.11": 1723.6555, "encoder_q-layer.2": 590.9932, "encoder_q-layer.3": 601.2816, "encoder_q-layer.4": 643.3317, "encoder_q-layer.5": 637.5272, "encoder_q-layer.6": 737.324, "encoder_q-layer.7": 829.1421, "encoder_q-layer.8": 1009.386, "encoder_q-layer.9": 900.0418, "epoch": 0.34, "inbatch_neg_score": 0.3992, "inbatch_pos_score": 1.1475, "learning_rate": 3.605555555555556e-05, "loss": 2.7997, "norm_diff": 0.012, "norm_loss": 0.0, "num_token_doc": 66.7717, "num_token_overlap": 17.8409, "num_token_query": 52.2549, "num_token_union": 73.6419, "num_word_context": 202.0827, "num_word_doc": 49.8024, "num_word_query": 39.8476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1228.7516, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4004, "query_norm": 1.482, "queue_k_norm": 1.4888, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2549, "sent_len_1": 66.7717, "sent_len_max_0": 128.0, "sent_len_max_1": 210.21, "stdk": 0.0488, "stdq": 0.0472, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 35100 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.8052, "doc_norm": 1.4942, "encoder_q-embeddings": 824.7737, "encoder_q-layer.0": 523.2275, "encoder_q-layer.1": 601.5069, "encoder_q-layer.10": 964.8721, "encoder_q-layer.11": 2053.0752, "encoder_q-layer.2": 683.5308, "encoder_q-layer.3": 707.2386, "encoder_q-layer.4": 767.3549, "encoder_q-layer.5": 818.275, "encoder_q-layer.6": 929.3425, "encoder_q-layer.7": 972.3777, "encoder_q-layer.8": 1132.0251, "encoder_q-layer.9": 1002.3186, "epoch": 0.34, "inbatch_neg_score": 0.4034, "inbatch_pos_score": 1.126, "learning_rate": 3.6e-05, "loss": 2.8052, "norm_diff": 0.0234, "norm_loss": 0.0, "num_token_doc": 66.6772, "num_token_overlap": 17.8739, "num_token_query": 52.4293, "num_token_union": 73.6976, "num_word_context": 202.336, "num_word_doc": 49.787, "num_word_query": 39.996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1424.5893, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4038, "query_norm": 1.4708, "queue_k_norm": 1.4893, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4293, "sent_len_1": 66.6772, "sent_len_max_0": 128.0, "sent_len_max_1": 205.5163, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 35200 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.8184, "doc_norm": 1.4944, "encoder_q-embeddings": 765.8632, "encoder_q-layer.0": 466.6865, "encoder_q-layer.1": 538.5306, "encoder_q-layer.10": 957.2332, "encoder_q-layer.11": 1833.9375, "encoder_q-layer.2": 595.2047, "encoder_q-layer.3": 640.4219, "encoder_q-layer.4": 707.2582, "encoder_q-layer.5": 717.8546, "encoder_q-layer.6": 808.9105, "encoder_q-layer.7": 906.554, "encoder_q-layer.8": 1059.8933, "encoder_q-layer.9": 924.0444, "epoch": 0.34, "inbatch_neg_score": 0.4051, "inbatch_pos_score": 1.123, "learning_rate": 3.594444444444445e-05, "loss": 2.8184, "norm_diff": 0.0152, "norm_loss": 0.0, "num_token_doc": 66.5716, "num_token_overlap": 17.7719, "num_token_query": 52.2457, "num_token_union": 73.5722, "num_word_context": 201.9132, "num_word_doc": 49.6577, "num_word_query": 39.8335, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1314.7334, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4048, "query_norm": 1.4792, "queue_k_norm": 1.49, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2457, "sent_len_1": 66.5716, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0037, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 35300 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.7993, "doc_norm": 1.4908, "encoder_q-embeddings": 790.123, "encoder_q-layer.0": 509.4449, "encoder_q-layer.1": 590.5676, "encoder_q-layer.10": 805.7063, "encoder_q-layer.11": 1613.3173, "encoder_q-layer.2": 683.4886, "encoder_q-layer.3": 769.5569, "encoder_q-layer.4": 793.4429, "encoder_q-layer.5": 863.3784, "encoder_q-layer.6": 848.6686, "encoder_q-layer.7": 845.718, "encoder_q-layer.8": 916.6931, "encoder_q-layer.9": 789.5577, "epoch": 0.35, "inbatch_neg_score": 0.4093, "inbatch_pos_score": 1.1289, "learning_rate": 3.5888888888888886e-05, "loss": 2.7993, "norm_diff": 0.0261, "norm_loss": 0.0, "num_token_doc": 66.7106, "num_token_overlap": 17.8063, "num_token_query": 52.3158, "num_token_union": 73.6868, "num_word_context": 202.2398, "num_word_doc": 49.7843, "num_word_query": 39.9089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1261.1575, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4097, "query_norm": 1.4647, "queue_k_norm": 1.4917, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3158, "sent_len_1": 66.7106, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8187, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 35400 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.7942, "doc_norm": 1.4923, "encoder_q-embeddings": 771.0839, "encoder_q-layer.0": 465.1958, "encoder_q-layer.1": 538.2873, "encoder_q-layer.10": 873.9781, "encoder_q-layer.11": 1809.1515, "encoder_q-layer.2": 615.2052, "encoder_q-layer.3": 652.3622, "encoder_q-layer.4": 691.0548, "encoder_q-layer.5": 710.6652, "encoder_q-layer.6": 759.6577, "encoder_q-layer.7": 814.1088, "encoder_q-layer.8": 974.2258, "encoder_q-layer.9": 861.9606, "epoch": 0.35, "inbatch_neg_score": 0.4093, "inbatch_pos_score": 1.124, "learning_rate": 3.5833333333333335e-05, "loss": 2.7942, "norm_diff": 0.0318, "norm_loss": 0.0, "num_token_doc": 66.832, "num_token_overlap": 17.8237, "num_token_query": 52.2714, "num_token_union": 73.7233, "num_word_context": 202.3464, "num_word_doc": 49.8607, "num_word_query": 39.8666, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1265.6714, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4082, "query_norm": 1.4604, "queue_k_norm": 1.4922, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2714, "sent_len_1": 66.832, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6937, "stdk": 0.0487, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 35500 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 2.7995, "doc_norm": 1.4919, "encoder_q-embeddings": 1165.0411, "encoder_q-layer.0": 791.829, "encoder_q-layer.1": 866.5057, "encoder_q-layer.10": 935.9802, "encoder_q-layer.11": 1880.7695, "encoder_q-layer.2": 1030.5033, "encoder_q-layer.3": 1099.7277, "encoder_q-layer.4": 1111.1562, "encoder_q-layer.5": 1167.7745, "encoder_q-layer.6": 1180.5524, "encoder_q-layer.7": 1128.931, "encoder_q-layer.8": 1236.9323, "encoder_q-layer.9": 962.1774, "epoch": 0.35, "inbatch_neg_score": 0.4074, "inbatch_pos_score": 1.1191, "learning_rate": 3.577777777777778e-05, "loss": 2.7995, "norm_diff": 0.0248, "norm_loss": 0.0, "num_token_doc": 66.7288, "num_token_overlap": 17.8776, "num_token_query": 52.3313, "num_token_union": 73.6816, "num_word_context": 202.3533, "num_word_doc": 49.8051, "num_word_query": 39.9173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1683.9431, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4072, "query_norm": 1.4671, "queue_k_norm": 1.4934, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3313, "sent_len_1": 66.7288, "sent_len_max_0": 128.0, "sent_len_max_1": 207.365, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 35600 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.8004, "doc_norm": 1.4891, "encoder_q-embeddings": 779.9437, "encoder_q-layer.0": 476.8156, "encoder_q-layer.1": 531.1447, "encoder_q-layer.10": 865.5435, "encoder_q-layer.11": 1685.3623, "encoder_q-layer.2": 605.1832, "encoder_q-layer.3": 638.3746, "encoder_q-layer.4": 669.2597, "encoder_q-layer.5": 743.0499, "encoder_q-layer.6": 802.3493, "encoder_q-layer.7": 858.7151, "encoder_q-layer.8": 956.0332, "encoder_q-layer.9": 848.1132, "epoch": 0.35, "inbatch_neg_score": 0.4174, "inbatch_pos_score": 1.1543, "learning_rate": 3.5722222222222226e-05, "loss": 2.8004, "norm_diff": 0.0058, "norm_loss": 0.0, "num_token_doc": 66.7023, "num_token_overlap": 17.7886, "num_token_query": 52.2242, "num_token_union": 73.6426, "num_word_context": 202.2774, "num_word_doc": 49.7675, "num_word_query": 39.8224, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1239.263, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4172, "query_norm": 1.4875, "queue_k_norm": 1.4923, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2242, "sent_len_1": 66.7023, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5025, "stdk": 0.0486, "stdq": 0.0467, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 35700 }, { "accuracy": 58.0566, "active_queue_size": 16384.0, "cl_loss": 2.8075, "doc_norm": 1.4916, "encoder_q-embeddings": 798.389, "encoder_q-layer.0": 542.7604, "encoder_q-layer.1": 612.2356, "encoder_q-layer.10": 918.3953, "encoder_q-layer.11": 1775.7834, "encoder_q-layer.2": 678.63, "encoder_q-layer.3": 736.903, "encoder_q-layer.4": 802.3903, "encoder_q-layer.5": 791.1697, "encoder_q-layer.6": 832.5941, "encoder_q-layer.7": 845.895, "encoder_q-layer.8": 989.9059, "encoder_q-layer.9": 859.4991, "epoch": 0.35, "inbatch_neg_score": 0.4239, "inbatch_pos_score": 1.125, "learning_rate": 3.566666666666667e-05, "loss": 2.8075, "norm_diff": 0.0201, "norm_loss": 0.0, "num_token_doc": 66.8518, "num_token_overlap": 17.8029, "num_token_query": 52.2873, "num_token_union": 73.7817, "num_word_context": 202.4827, "num_word_doc": 49.8829, "num_word_query": 39.9111, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1328.8219, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4236, "query_norm": 1.4714, "queue_k_norm": 1.4939, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2873, "sent_len_1": 66.8518, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8663, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 35800 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.7987, "doc_norm": 1.495, "encoder_q-embeddings": 1490.3883, "encoder_q-layer.0": 945.1326, "encoder_q-layer.1": 1006.9231, "encoder_q-layer.10": 1677.5677, "encoder_q-layer.11": 3205.5518, "encoder_q-layer.2": 1161.301, "encoder_q-layer.3": 1245.1179, "encoder_q-layer.4": 1363.946, "encoder_q-layer.5": 1396.3284, "encoder_q-layer.6": 1563.0125, "encoder_q-layer.7": 1683.6782, "encoder_q-layer.8": 1965.9491, "encoder_q-layer.9": 1779.403, "epoch": 0.35, "inbatch_neg_score": 0.4203, "inbatch_pos_score": 1.1377, "learning_rate": 3.561111111111111e-05, "loss": 2.7987, "norm_diff": 0.0155, "norm_loss": 0.0, "num_token_doc": 66.92, "num_token_overlap": 17.8717, "num_token_query": 52.3893, "num_token_union": 73.8307, "num_word_context": 202.4611, "num_word_doc": 49.9442, "num_word_query": 39.9508, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2414.7712, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4197, "query_norm": 1.4795, "queue_k_norm": 1.4936, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3893, "sent_len_1": 66.92, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0238, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 35900 }, { "accuracy": 58.252, "active_queue_size": 16384.0, "cl_loss": 2.7912, "doc_norm": 1.4959, "encoder_q-embeddings": 1674.7621, "encoder_q-layer.0": 1084.3162, "encoder_q-layer.1": 1280.2156, "encoder_q-layer.10": 1749.0909, "encoder_q-layer.11": 3670.9922, "encoder_q-layer.2": 1525.5396, "encoder_q-layer.3": 1613.5089, "encoder_q-layer.4": 1844.1752, "encoder_q-layer.5": 1719.3478, "encoder_q-layer.6": 1843.3076, "encoder_q-layer.7": 1858.5909, "encoder_q-layer.8": 1993.9539, "encoder_q-layer.9": 1703.0226, "epoch": 0.35, "inbatch_neg_score": 0.4304, "inbatch_pos_score": 1.1387, "learning_rate": 3.555555555555556e-05, "loss": 2.7912, "norm_diff": 0.0237, "norm_loss": 0.0, "num_token_doc": 66.7234, "num_token_overlap": 17.7771, "num_token_query": 52.3061, "num_token_union": 73.7138, "num_word_context": 202.2851, "num_word_doc": 49.7893, "num_word_query": 39.9004, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2838.641, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4292, "query_norm": 1.4723, "queue_k_norm": 1.4966, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3061, "sent_len_1": 66.7234, "sent_len_max_0": 128.0, "sent_len_max_1": 209.09, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 36000 }, { "accuracy": 58.3496, "active_queue_size": 16384.0, "cl_loss": 2.7973, "doc_norm": 1.4942, "encoder_q-embeddings": 1520.028, "encoder_q-layer.0": 952.5979, "encoder_q-layer.1": 1045.3448, "encoder_q-layer.10": 1774.6559, "encoder_q-layer.11": 3328.3682, "encoder_q-layer.2": 1198.5813, "encoder_q-layer.3": 1304.6365, "encoder_q-layer.4": 1465.8088, "encoder_q-layer.5": 1441.4723, "encoder_q-layer.6": 1622.1604, "encoder_q-layer.7": 1709.3248, "encoder_q-layer.8": 1894.3989, "encoder_q-layer.9": 1642.6621, "epoch": 0.35, "inbatch_neg_score": 0.4337, "inbatch_pos_score": 1.1348, "learning_rate": 3.55e-05, "loss": 2.7973, "norm_diff": 0.0143, "norm_loss": 0.0, "num_token_doc": 66.6634, "num_token_overlap": 17.7766, "num_token_query": 52.2418, "num_token_union": 73.6375, "num_word_context": 202.2096, "num_word_doc": 49.7137, "num_word_query": 39.8605, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2470.9999, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4336, "query_norm": 1.4825, "queue_k_norm": 1.4959, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2418, "sent_len_1": 66.6634, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1163, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 36100 }, { "accuracy": 57.5684, "active_queue_size": 16384.0, "cl_loss": 2.7904, "doc_norm": 1.4949, "encoder_q-embeddings": 1793.3314, "encoder_q-layer.0": 1128.2277, "encoder_q-layer.1": 1306.7534, "encoder_q-layer.10": 2093.4331, "encoder_q-layer.11": 3710.1602, "encoder_q-layer.2": 1501.4589, "encoder_q-layer.3": 1583.9868, "encoder_q-layer.4": 1801.8713, "encoder_q-layer.5": 1792.6592, "encoder_q-layer.6": 2008.9047, "encoder_q-layer.7": 2035.3154, "encoder_q-layer.8": 2256.8689, "encoder_q-layer.9": 1973.8859, "epoch": 0.35, "inbatch_neg_score": 0.4508, "inbatch_pos_score": 1.1602, "learning_rate": 3.5444444444444445e-05, "loss": 2.7904, "norm_diff": 0.0171, "norm_loss": 0.0, "num_token_doc": 66.6637, "num_token_overlap": 17.784, "num_token_query": 52.2591, "num_token_union": 73.6276, "num_word_context": 202.1696, "num_word_doc": 49.6841, "num_word_query": 39.8565, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2902.1159, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4497, "query_norm": 1.512, "queue_k_norm": 1.4986, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2591, "sent_len_1": 66.6637, "sent_len_max_0": 128.0, "sent_len_max_1": 211.6912, "stdk": 0.0486, "stdq": 0.0465, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 36200 }, { "accuracy": 58.5449, "active_queue_size": 16384.0, "cl_loss": 2.7845, "doc_norm": 1.498, "encoder_q-embeddings": 1509.3986, "encoder_q-layer.0": 930.1189, "encoder_q-layer.1": 1036.3799, "encoder_q-layer.10": 1908.212, "encoder_q-layer.11": 3649.0369, "encoder_q-layer.2": 1186.4207, "encoder_q-layer.3": 1366.5892, "encoder_q-layer.4": 1452.7039, "encoder_q-layer.5": 1529.1125, "encoder_q-layer.6": 1720.5624, "encoder_q-layer.7": 1901.2659, "encoder_q-layer.8": 2001.1782, "encoder_q-layer.9": 1781.1539, "epoch": 0.35, "inbatch_neg_score": 0.4548, "inbatch_pos_score": 1.1719, "learning_rate": 3.538888888888889e-05, "loss": 2.7845, "norm_diff": 0.011, "norm_loss": 0.0, "num_token_doc": 66.9532, "num_token_overlap": 17.8424, "num_token_query": 52.3292, "num_token_union": 73.7982, "num_word_context": 202.5855, "num_word_doc": 49.9313, "num_word_query": 39.9215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2614.474, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4558, "query_norm": 1.5071, "queue_k_norm": 1.498, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3292, "sent_len_1": 66.9532, "sent_len_max_0": 128.0, "sent_len_max_1": 211.2912, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 36300 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.799, "doc_norm": 1.5, "encoder_q-embeddings": 1540.2126, "encoder_q-layer.0": 957.9278, "encoder_q-layer.1": 1024.1425, "encoder_q-layer.10": 1918.8843, "encoder_q-layer.11": 3713.4717, "encoder_q-layer.2": 1196.0696, "encoder_q-layer.3": 1287.1176, "encoder_q-layer.4": 1399.1918, "encoder_q-layer.5": 1424.3489, "encoder_q-layer.6": 1613.2876, "encoder_q-layer.7": 1651.2528, "encoder_q-layer.8": 1974.9286, "encoder_q-layer.9": 1841.9962, "epoch": 0.36, "inbatch_neg_score": 0.4673, "inbatch_pos_score": 1.1797, "learning_rate": 3.5333333333333336e-05, "loss": 2.799, "norm_diff": 0.0107, "norm_loss": 0.0, "num_token_doc": 66.7561, "num_token_overlap": 17.8129, "num_token_query": 52.3676, "num_token_union": 73.7813, "num_word_context": 202.4595, "num_word_doc": 49.8146, "num_word_query": 39.9374, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2623.8571, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4673, "query_norm": 1.5064, "queue_k_norm": 1.4981, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3676, "sent_len_1": 66.7561, "sent_len_max_0": 128.0, "sent_len_max_1": 209.05, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 36400 }, { "accuracy": 58.8379, "active_queue_size": 16384.0, "cl_loss": 2.7862, "doc_norm": 1.4971, "encoder_q-embeddings": 1804.6145, "encoder_q-layer.0": 1235.3097, "encoder_q-layer.1": 1380.2621, "encoder_q-layer.10": 1916.0515, "encoder_q-layer.11": 3631.7805, "encoder_q-layer.2": 1569.7072, "encoder_q-layer.3": 1718.7839, "encoder_q-layer.4": 1829.0837, "encoder_q-layer.5": 1866.7028, "encoder_q-layer.6": 2135.8186, "encoder_q-layer.7": 2120.8865, "encoder_q-layer.8": 2076.4468, "encoder_q-layer.9": 1763.8025, "epoch": 0.36, "inbatch_neg_score": 0.4759, "inbatch_pos_score": 1.2012, "learning_rate": 3.527777777777778e-05, "loss": 2.7862, "norm_diff": 0.0206, "norm_loss": 0.0, "num_token_doc": 66.707, "num_token_overlap": 17.8645, "num_token_query": 52.3978, "num_token_union": 73.7172, "num_word_context": 202.1215, "num_word_doc": 49.7653, "num_word_query": 39.968, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2891.4555, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4756, "query_norm": 1.5177, "queue_k_norm": 1.5012, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3978, "sent_len_1": 66.707, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0112, "stdk": 0.0486, "stdq": 0.0464, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 36500 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.7885, "doc_norm": 1.502, "encoder_q-embeddings": 2065.6335, "encoder_q-layer.0": 1394.2451, "encoder_q-layer.1": 1534.3453, "encoder_q-layer.10": 2368.0688, "encoder_q-layer.11": 4252.1987, "encoder_q-layer.2": 1783.8428, "encoder_q-layer.3": 2016.2947, "encoder_q-layer.4": 2233.729, "encoder_q-layer.5": 2224.1182, "encoder_q-layer.6": 2555.2444, "encoder_q-layer.7": 2402.7217, "encoder_q-layer.8": 2603.0823, "encoder_q-layer.9": 2259.3035, "epoch": 0.36, "inbatch_neg_score": 0.482, "inbatch_pos_score": 1.208, "learning_rate": 3.522222222222222e-05, "loss": 2.7885, "norm_diff": 0.0318, "norm_loss": 0.0, "num_token_doc": 66.8847, "num_token_overlap": 17.8006, "num_token_query": 52.2438, "num_token_union": 73.7697, "num_word_context": 202.2572, "num_word_doc": 49.9254, "num_word_query": 39.8429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3349.6103, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4817, "query_norm": 1.5338, "queue_k_norm": 1.5011, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2438, "sent_len_1": 66.8847, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6675, "stdk": 0.0487, "stdq": 0.0473, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 36600 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.8086, "doc_norm": 1.5066, "encoder_q-embeddings": 2287.0354, "encoder_q-layer.0": 1594.1746, "encoder_q-layer.1": 1915.173, "encoder_q-layer.10": 1866.1455, "encoder_q-layer.11": 3541.5896, "encoder_q-layer.2": 2372.6343, "encoder_q-layer.3": 2647.9165, "encoder_q-layer.4": 2931.25, "encoder_q-layer.5": 3127.178, "encoder_q-layer.6": 2981.7856, "encoder_q-layer.7": 2964.5105, "encoder_q-layer.8": 2719.8428, "encoder_q-layer.9": 1887.2767, "epoch": 0.36, "inbatch_neg_score": 0.4899, "inbatch_pos_score": 1.207, "learning_rate": 3.516666666666667e-05, "loss": 2.8086, "norm_diff": 0.0095, "norm_loss": 0.0, "num_token_doc": 66.6759, "num_token_overlap": 17.7963, "num_token_query": 52.2598, "num_token_union": 73.6542, "num_word_context": 202.2552, "num_word_doc": 49.7547, "num_word_query": 39.8726, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3753.5417, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4897, "query_norm": 1.5007, "queue_k_norm": 1.5037, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2598, "sent_len_1": 66.6759, "sent_len_max_0": 128.0, "sent_len_max_1": 206.3275, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 36700 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.779, "doc_norm": 1.5096, "encoder_q-embeddings": 1510.7173, "encoder_q-layer.0": 993.035, "encoder_q-layer.1": 1118.2712, "encoder_q-layer.10": 2039.1863, "encoder_q-layer.11": 3544.9307, "encoder_q-layer.2": 1274.7776, "encoder_q-layer.3": 1400.3699, "encoder_q-layer.4": 1431.4054, "encoder_q-layer.5": 1493.1915, "encoder_q-layer.6": 1710.2045, "encoder_q-layer.7": 1902.9105, "encoder_q-layer.8": 2117.0671, "encoder_q-layer.9": 1901.4493, "epoch": 0.36, "inbatch_neg_score": 0.4897, "inbatch_pos_score": 1.208, "learning_rate": 3.511111111111111e-05, "loss": 2.779, "norm_diff": 0.032, "norm_loss": 0.0, "num_token_doc": 66.762, "num_token_overlap": 17.8265, "num_token_query": 52.3357, "num_token_union": 73.7308, "num_word_context": 202.1704, "num_word_doc": 49.8372, "num_word_query": 39.9081, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2628.3206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4888, "query_norm": 1.4776, "queue_k_norm": 1.5051, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3357, "sent_len_1": 66.762, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5025, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 36800 }, { "accuracy": 59.9121, "active_queue_size": 16384.0, "cl_loss": 2.7867, "doc_norm": 1.5076, "encoder_q-embeddings": 1976.5885, "encoder_q-layer.0": 1348.4653, "encoder_q-layer.1": 1566.9829, "encoder_q-layer.10": 1729.3572, "encoder_q-layer.11": 3428.8525, "encoder_q-layer.2": 1760.0841, "encoder_q-layer.3": 1947.9091, "encoder_q-layer.4": 1948.7045, "encoder_q-layer.5": 1932.7904, "encoder_q-layer.6": 2062.6873, "encoder_q-layer.7": 1932.0114, "encoder_q-layer.8": 1974.9667, "encoder_q-layer.9": 1758.0366, "epoch": 0.36, "inbatch_neg_score": 0.4821, "inbatch_pos_score": 1.2139, "learning_rate": 3.505555555555556e-05, "loss": 2.7867, "norm_diff": 0.0154, "norm_loss": 0.0, "num_token_doc": 66.703, "num_token_overlap": 17.8529, "num_token_query": 52.2405, "num_token_union": 73.6023, "num_word_context": 202.0395, "num_word_doc": 49.798, "num_word_query": 39.8494, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2938.0336, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4829, "query_norm": 1.4956, "queue_k_norm": 1.5081, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2405, "sent_len_1": 66.703, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2438, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 36900 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.7865, "doc_norm": 1.5126, "encoder_q-embeddings": 1529.9064, "encoder_q-layer.0": 960.0928, "encoder_q-layer.1": 1102.3174, "encoder_q-layer.10": 1931.5896, "encoder_q-layer.11": 3583.6699, "encoder_q-layer.2": 1257.7013, "encoder_q-layer.3": 1357.33, "encoder_q-layer.4": 1438.3842, "encoder_q-layer.5": 1570.4667, "encoder_q-layer.6": 1638.0486, "encoder_q-layer.7": 1814.2786, "encoder_q-layer.8": 2078.637, "encoder_q-layer.9": 1766.1021, "epoch": 0.36, "inbatch_neg_score": 0.4855, "inbatch_pos_score": 1.2002, "learning_rate": 3.5e-05, "loss": 2.7865, "norm_diff": 0.0176, "norm_loss": 0.0, "num_token_doc": 66.8248, "num_token_overlap": 17.8383, "num_token_query": 52.24, "num_token_union": 73.67, "num_word_context": 202.3027, "num_word_doc": 49.8534, "num_word_query": 39.8461, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2602.3316, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4858, "query_norm": 1.495, "queue_k_norm": 1.5097, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.24, "sent_len_1": 66.8248, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2363, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 37000 }, { "accuracy": 60.0098, "active_queue_size": 16384.0, "cl_loss": 2.7875, "doc_norm": 1.511, "encoder_q-embeddings": 2157.0369, "encoder_q-layer.0": 1389.6482, "encoder_q-layer.1": 1567.8262, "encoder_q-layer.10": 1808.906, "encoder_q-layer.11": 3621.0842, "encoder_q-layer.2": 1813.8466, "encoder_q-layer.3": 2001.5764, "encoder_q-layer.4": 2250.2939, "encoder_q-layer.5": 2376.6594, "encoder_q-layer.6": 2410.1865, "encoder_q-layer.7": 2321.2573, "encoder_q-layer.8": 2308.5012, "encoder_q-layer.9": 1779.5978, "epoch": 0.36, "inbatch_neg_score": 0.4887, "inbatch_pos_score": 1.2207, "learning_rate": 3.4944444444444446e-05, "loss": 2.7875, "norm_diff": 0.006, "norm_loss": 0.0, "num_token_doc": 66.7474, "num_token_overlap": 17.7673, "num_token_query": 52.2287, "num_token_union": 73.6636, "num_word_context": 201.9722, "num_word_doc": 49.7776, "num_word_query": 39.8181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3268.4095, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4893, "query_norm": 1.5138, "queue_k_norm": 1.5097, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2287, "sent_len_1": 66.7474, "sent_len_max_0": 128.0, "sent_len_max_1": 212.365, "stdk": 0.0488, "stdq": 0.0469, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 37100 }, { "accuracy": 58.8379, "active_queue_size": 16384.0, "cl_loss": 2.7874, "doc_norm": 1.513, "encoder_q-embeddings": 2903.4927, "encoder_q-layer.0": 1914.9816, "encoder_q-layer.1": 2201.5764, "encoder_q-layer.10": 1777.381, "encoder_q-layer.11": 3606.9739, "encoder_q-layer.2": 2662.1372, "encoder_q-layer.3": 3216.8208, "encoder_q-layer.4": 3981.1226, "encoder_q-layer.5": 3959.4285, "encoder_q-layer.6": 3533.2014, "encoder_q-layer.7": 2768.3992, "encoder_q-layer.8": 2430.1733, "encoder_q-layer.9": 1856.0527, "epoch": 0.36, "inbatch_neg_score": 0.491, "inbatch_pos_score": 1.2246, "learning_rate": 3.4888888888888895e-05, "loss": 2.7874, "norm_diff": 0.0126, "norm_loss": 0.0, "num_token_doc": 66.7661, "num_token_overlap": 17.8109, "num_token_query": 52.2504, "num_token_union": 73.6678, "num_word_context": 202.3809, "num_word_doc": 49.7958, "num_word_query": 39.848, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4253.2924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.491, "query_norm": 1.5042, "queue_k_norm": 1.5104, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2504, "sent_len_1": 66.7661, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8512, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 37200 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.7812, "doc_norm": 1.5131, "encoder_q-embeddings": 1424.4595, "encoder_q-layer.0": 905.3908, "encoder_q-layer.1": 989.1511, "encoder_q-layer.10": 1713.5352, "encoder_q-layer.11": 3460.594, "encoder_q-layer.2": 1147.0585, "encoder_q-layer.3": 1298.2161, "encoder_q-layer.4": 1317.7898, "encoder_q-layer.5": 1345.2095, "encoder_q-layer.6": 1510.9617, "encoder_q-layer.7": 1645.2469, "encoder_q-layer.8": 1933.911, "encoder_q-layer.9": 1734.3286, "epoch": 0.36, "inbatch_neg_score": 0.4915, "inbatch_pos_score": 1.2227, "learning_rate": 3.483333333333334e-05, "loss": 2.7812, "norm_diff": 0.0211, "norm_loss": 0.0, "num_token_doc": 66.7395, "num_token_overlap": 17.8005, "num_token_query": 52.309, "num_token_union": 73.7423, "num_word_context": 202.2803, "num_word_doc": 49.8262, "num_word_query": 39.8964, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2476.3612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4915, "query_norm": 1.492, "queue_k_norm": 1.5113, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.309, "sent_len_1": 66.7395, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4375, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 37300 }, { "accuracy": 59.9121, "active_queue_size": 16384.0, "cl_loss": 2.7725, "doc_norm": 1.5155, "encoder_q-embeddings": 1413.7596, "encoder_q-layer.0": 912.1025, "encoder_q-layer.1": 993.1025, "encoder_q-layer.10": 1633.0797, "encoder_q-layer.11": 3430.2856, "encoder_q-layer.2": 1137.6223, "encoder_q-layer.3": 1236.0746, "encoder_q-layer.4": 1367.7106, "encoder_q-layer.5": 1393.4053, "encoder_q-layer.6": 1534.1051, "encoder_q-layer.7": 1711.3593, "encoder_q-layer.8": 1880.131, "encoder_q-layer.9": 1646.8816, "epoch": 0.37, "inbatch_neg_score": 0.4956, "inbatch_pos_score": 1.2256, "learning_rate": 3.477777777777778e-05, "loss": 2.7725, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.9413, "num_token_overlap": 17.8777, "num_token_query": 52.4571, "num_token_union": 73.8692, "num_word_context": 202.4272, "num_word_doc": 49.9679, "num_word_query": 40.0066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2481.9997, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4968, "query_norm": 1.4926, "queue_k_norm": 1.5133, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4571, "sent_len_1": 66.9413, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7875, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 37400 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.7861, "doc_norm": 1.5213, "encoder_q-embeddings": 1760.511, "encoder_q-layer.0": 1140.1176, "encoder_q-layer.1": 1287.9542, "encoder_q-layer.10": 1965.6208, "encoder_q-layer.11": 3536.1458, "encoder_q-layer.2": 1533.0911, "encoder_q-layer.3": 1677.8986, "encoder_q-layer.4": 1740.571, "encoder_q-layer.5": 1830.9811, "encoder_q-layer.6": 1866.2638, "encoder_q-layer.7": 1900.5627, "encoder_q-layer.8": 2110.0503, "encoder_q-layer.9": 1814.5753, "epoch": 0.37, "inbatch_neg_score": 0.4958, "inbatch_pos_score": 1.2354, "learning_rate": 3.472222222222222e-05, "loss": 2.7861, "norm_diff": 0.0189, "norm_loss": 0.0, "num_token_doc": 66.8341, "num_token_overlap": 17.7806, "num_token_query": 52.2366, "num_token_union": 73.7698, "num_word_context": 202.2923, "num_word_doc": 49.8567, "num_word_query": 39.8336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2808.8574, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4958, "query_norm": 1.5024, "queue_k_norm": 1.5147, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2366, "sent_len_1": 66.8341, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3988, "stdk": 0.049, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 37500 }, { "accuracy": 59.3262, "active_queue_size": 16384.0, "cl_loss": 2.7831, "doc_norm": 1.5143, "encoder_q-embeddings": 3325.3374, "encoder_q-layer.0": 2254.9788, "encoder_q-layer.1": 2694.9998, "encoder_q-layer.10": 1861.6674, "encoder_q-layer.11": 3606.3943, "encoder_q-layer.2": 3114.0894, "encoder_q-layer.3": 3672.1621, "encoder_q-layer.4": 4439.8311, "encoder_q-layer.5": 4725.8213, "encoder_q-layer.6": 3919.749, "encoder_q-layer.7": 2938.9888, "encoder_q-layer.8": 2293.2581, "encoder_q-layer.9": 1770.895, "epoch": 0.37, "inbatch_neg_score": 0.4864, "inbatch_pos_score": 1.2051, "learning_rate": 3.466666666666667e-05, "loss": 2.7831, "norm_diff": 0.0411, "norm_loss": 0.0, "num_token_doc": 66.8279, "num_token_overlap": 17.8248, "num_token_query": 52.2428, "num_token_union": 73.6754, "num_word_context": 202.163, "num_word_doc": 49.8659, "num_word_query": 39.8425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4673.4331, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4866, "query_norm": 1.4731, "queue_k_norm": 1.5156, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2428, "sent_len_1": 66.8279, "sent_len_max_0": 128.0, "sent_len_max_1": 206.3825, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 37600 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.7756, "doc_norm": 1.518, "encoder_q-embeddings": 2800.4434, "encoder_q-layer.0": 1864.3702, "encoder_q-layer.1": 2299.8, "encoder_q-layer.10": 1680.4995, "encoder_q-layer.11": 3353.4363, "encoder_q-layer.2": 2774.2, "encoder_q-layer.3": 3266.7644, "encoder_q-layer.4": 4043.1938, "encoder_q-layer.5": 3950.333, "encoder_q-layer.6": 3218.0754, "encoder_q-layer.7": 2925.7405, "encoder_q-layer.8": 2345.4329, "encoder_q-layer.9": 1858.0416, "epoch": 0.37, "inbatch_neg_score": 0.472, "inbatch_pos_score": 1.2031, "learning_rate": 3.4611111111111114e-05, "loss": 2.7756, "norm_diff": 0.0475, "norm_loss": 0.0, "num_token_doc": 66.7171, "num_token_overlap": 17.7924, "num_token_query": 52.2471, "num_token_union": 73.6799, "num_word_context": 202.1613, "num_word_doc": 49.7887, "num_word_query": 39.8428, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4156.4854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4712, "query_norm": 1.4705, "queue_k_norm": 1.5168, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2471, "sent_len_1": 66.7171, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6662, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 37700 }, { "accuracy": 59.4238, "active_queue_size": 16384.0, "cl_loss": 2.7871, "doc_norm": 1.5141, "encoder_q-embeddings": 704.1536, "encoder_q-layer.0": 457.3564, "encoder_q-layer.1": 500.0315, "encoder_q-layer.10": 853.962, "encoder_q-layer.11": 1663.4602, "encoder_q-layer.2": 592.4049, "encoder_q-layer.3": 635.1364, "encoder_q-layer.4": 649.3397, "encoder_q-layer.5": 657.168, "encoder_q-layer.6": 737.3234, "encoder_q-layer.7": 835.6818, "encoder_q-layer.8": 946.1464, "encoder_q-layer.9": 822.7903, "epoch": 0.37, "inbatch_neg_score": 0.4834, "inbatch_pos_score": 1.208, "learning_rate": 3.4555555555555556e-05, "loss": 2.7871, "norm_diff": 0.0291, "norm_loss": 0.0, "num_token_doc": 66.6672, "num_token_overlap": 17.7936, "num_token_query": 52.2616, "num_token_union": 73.6265, "num_word_context": 202.0884, "num_word_doc": 49.729, "num_word_query": 39.8618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1215.8178, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4819, "query_norm": 1.485, "queue_k_norm": 1.5168, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2616, "sent_len_1": 66.6672, "sent_len_max_0": 128.0, "sent_len_max_1": 211.24, "stdk": 0.0487, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 37800 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 2.7742, "doc_norm": 1.5178, "encoder_q-embeddings": 715.4501, "encoder_q-layer.0": 459.123, "encoder_q-layer.1": 516.4046, "encoder_q-layer.10": 854.3423, "encoder_q-layer.11": 1754.3245, "encoder_q-layer.2": 584.2518, "encoder_q-layer.3": 616.9882, "encoder_q-layer.4": 657.373, "encoder_q-layer.5": 682.2473, "encoder_q-layer.6": 744.916, "encoder_q-layer.7": 857.5536, "encoder_q-layer.8": 973.8214, "encoder_q-layer.9": 857.4933, "epoch": 0.37, "inbatch_neg_score": 0.4635, "inbatch_pos_score": 1.1895, "learning_rate": 3.45e-05, "loss": 2.7742, "norm_diff": 0.0475, "norm_loss": 0.0, "num_token_doc": 66.8227, "num_token_overlap": 17.8015, "num_token_query": 52.2909, "num_token_union": 73.7326, "num_word_context": 202.3584, "num_word_doc": 49.871, "num_word_query": 39.8747, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1239.4878, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4651, "query_norm": 1.4703, "queue_k_norm": 1.5164, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2909, "sent_len_1": 66.8227, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9075, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 37900 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.7903, "doc_norm": 1.5177, "encoder_q-embeddings": 737.1533, "encoder_q-layer.0": 468.9995, "encoder_q-layer.1": 502.2184, "encoder_q-layer.10": 924.3749, "encoder_q-layer.11": 1815.7142, "encoder_q-layer.2": 578.5483, "encoder_q-layer.3": 627.5989, "encoder_q-layer.4": 681.3181, "encoder_q-layer.5": 697.8524, "encoder_q-layer.6": 795.4872, "encoder_q-layer.7": 880.626, "encoder_q-layer.8": 991.2458, "encoder_q-layer.9": 860.0487, "epoch": 0.37, "inbatch_neg_score": 0.4654, "inbatch_pos_score": 1.2021, "learning_rate": 3.444444444444445e-05, "loss": 2.7903, "norm_diff": 0.0376, "norm_loss": 0.0, "num_token_doc": 66.6504, "num_token_overlap": 17.7974, "num_token_query": 52.2833, "num_token_union": 73.6527, "num_word_context": 202.1288, "num_word_doc": 49.7436, "num_word_query": 39.8757, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1284.344, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4683, "query_norm": 1.4801, "queue_k_norm": 1.5168, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2833, "sent_len_1": 66.6504, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2812, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 38000 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.7797, "doc_norm": 1.5164, "encoder_q-embeddings": 818.1771, "encoder_q-layer.0": 506.5197, "encoder_q-layer.1": 565.0356, "encoder_q-layer.10": 900.2966, "encoder_q-layer.11": 1791.0981, "encoder_q-layer.2": 647.5748, "encoder_q-layer.3": 700.4942, "encoder_q-layer.4": 815.8332, "encoder_q-layer.5": 840.5032, "encoder_q-layer.6": 875.942, "encoder_q-layer.7": 1038.7594, "encoder_q-layer.8": 1098.2308, "encoder_q-layer.9": 906.919, "epoch": 0.37, "inbatch_neg_score": 0.4633, "inbatch_pos_score": 1.1855, "learning_rate": 3.438888888888889e-05, "loss": 2.7797, "norm_diff": 0.041, "norm_loss": 0.0, "num_token_doc": 66.9486, "num_token_overlap": 17.7933, "num_token_query": 52.2984, "num_token_union": 73.8438, "num_word_context": 202.6151, "num_word_doc": 49.9476, "num_word_query": 39.8957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1346.8972, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4644, "query_norm": 1.4754, "queue_k_norm": 1.5177, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2984, "sent_len_1": 66.9486, "sent_len_max_0": 128.0, "sent_len_max_1": 208.845, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 38100 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.7765, "doc_norm": 1.5203, "encoder_q-embeddings": 773.9361, "encoder_q-layer.0": 456.438, "encoder_q-layer.1": 511.4708, "encoder_q-layer.10": 871.9656, "encoder_q-layer.11": 1781.1857, "encoder_q-layer.2": 560.6708, "encoder_q-layer.3": 601.2351, "encoder_q-layer.4": 664.4338, "encoder_q-layer.5": 657.6439, "encoder_q-layer.6": 716.6677, "encoder_q-layer.7": 847.8203, "encoder_q-layer.8": 1042.8285, "encoder_q-layer.9": 884.5768, "epoch": 0.37, "inbatch_neg_score": 0.4532, "inbatch_pos_score": 1.1758, "learning_rate": 3.433333333333333e-05, "loss": 2.7765, "norm_diff": 0.0595, "norm_loss": 0.0, "num_token_doc": 66.802, "num_token_overlap": 17.8345, "num_token_query": 52.2661, "num_token_union": 73.6955, "num_word_context": 202.1971, "num_word_doc": 49.8696, "num_word_query": 39.8606, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1262.3941, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4543, "query_norm": 1.4609, "queue_k_norm": 1.5165, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2661, "sent_len_1": 66.802, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6925, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 38200 }, { "accuracy": 57.5684, "active_queue_size": 16384.0, "cl_loss": 2.7706, "doc_norm": 1.5148, "encoder_q-embeddings": 807.9049, "encoder_q-layer.0": 541.8699, "encoder_q-layer.1": 594.7387, "encoder_q-layer.10": 842.0933, "encoder_q-layer.11": 1706.8052, "encoder_q-layer.2": 641.0423, "encoder_q-layer.3": 659.3702, "encoder_q-layer.4": 688.4375, "encoder_q-layer.5": 681.2171, "encoder_q-layer.6": 754.3826, "encoder_q-layer.7": 841.3557, "encoder_q-layer.8": 969.5592, "encoder_q-layer.9": 825.3474, "epoch": 0.37, "inbatch_neg_score": 0.4507, "inbatch_pos_score": 1.1572, "learning_rate": 3.427777777777778e-05, "loss": 2.7706, "norm_diff": 0.0598, "norm_loss": 0.0, "num_token_doc": 66.8743, "num_token_overlap": 17.8045, "num_token_query": 52.2544, "num_token_union": 73.7351, "num_word_context": 202.111, "num_word_doc": 49.8845, "num_word_query": 39.8583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1252.9422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4507, "query_norm": 1.455, "queue_k_norm": 1.5169, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2544, "sent_len_1": 66.8743, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 38300 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.7738, "doc_norm": 1.5159, "encoder_q-embeddings": 1256.0845, "encoder_q-layer.0": 833.6862, "encoder_q-layer.1": 933.8397, "encoder_q-layer.10": 877.3842, "encoder_q-layer.11": 1709.0394, "encoder_q-layer.2": 1049.7711, "encoder_q-layer.3": 1210.5084, "encoder_q-layer.4": 1409.0643, "encoder_q-layer.5": 1506.4941, "encoder_q-layer.6": 1508.311, "encoder_q-layer.7": 1458.1489, "encoder_q-layer.8": 1407.1453, "encoder_q-layer.9": 963.9069, "epoch": 0.37, "inbatch_neg_score": 0.4458, "inbatch_pos_score": 1.1787, "learning_rate": 3.4222222222222224e-05, "loss": 2.7738, "norm_diff": 0.0394, "norm_loss": 0.0, "num_token_doc": 66.8244, "num_token_overlap": 17.8324, "num_token_query": 52.359, "num_token_union": 73.7576, "num_word_context": 202.3976, "num_word_doc": 49.8632, "num_word_query": 39.9295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1849.3508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.446, "query_norm": 1.4765, "queue_k_norm": 1.5168, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.359, "sent_len_1": 66.8244, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2025, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 38400 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 2.7778, "doc_norm": 1.5121, "encoder_q-embeddings": 752.6768, "encoder_q-layer.0": 478.3233, "encoder_q-layer.1": 531.5424, "encoder_q-layer.10": 906.8317, "encoder_q-layer.11": 1891.5598, "encoder_q-layer.2": 603.894, "encoder_q-layer.3": 621.201, "encoder_q-layer.4": 694.769, "encoder_q-layer.5": 723.6716, "encoder_q-layer.6": 832.9442, "encoder_q-layer.7": 899.3281, "encoder_q-layer.8": 1016.8947, "encoder_q-layer.9": 933.9103, "epoch": 0.38, "inbatch_neg_score": 0.4459, "inbatch_pos_score": 1.1445, "learning_rate": 3.4166666666666666e-05, "loss": 2.7778, "norm_diff": 0.0427, "norm_loss": 0.0, "num_token_doc": 66.7099, "num_token_overlap": 17.8241, "num_token_query": 52.4, "num_token_union": 73.7606, "num_word_context": 202.5006, "num_word_doc": 49.7939, "num_word_query": 39.9807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1314.6889, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4451, "query_norm": 1.4694, "queue_k_norm": 1.5176, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4, "sent_len_1": 66.7099, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0213, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 38500 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.77, "doc_norm": 1.5144, "encoder_q-embeddings": 2397.5742, "encoder_q-layer.0": 1624.6595, "encoder_q-layer.1": 2342.9575, "encoder_q-layer.10": 874.2534, "encoder_q-layer.11": 1703.2195, "encoder_q-layer.2": 2990.4265, "encoder_q-layer.3": 2974.8105, "encoder_q-layer.4": 2840.8806, "encoder_q-layer.5": 2639.9297, "encoder_q-layer.6": 2044.3232, "encoder_q-layer.7": 1424.9795, "encoder_q-layer.8": 1088.8033, "encoder_q-layer.9": 890.3834, "epoch": 0.38, "inbatch_neg_score": 0.4436, "inbatch_pos_score": 1.1826, "learning_rate": 3.411111111111111e-05, "loss": 2.77, "norm_diff": 0.0272, "norm_loss": 0.0, "num_token_doc": 66.8874, "num_token_overlap": 17.8509, "num_token_query": 52.2504, "num_token_union": 73.7388, "num_word_context": 202.2475, "num_word_doc": 49.9485, "num_word_query": 39.8709, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3106.9371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4426, "query_norm": 1.4872, "queue_k_norm": 1.5152, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2504, "sent_len_1": 66.8874, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3537, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 38600 }, { "accuracy": 59.5215, "active_queue_size": 16384.0, "cl_loss": 2.7671, "doc_norm": 1.5175, "encoder_q-embeddings": 744.1347, "encoder_q-layer.0": 478.4884, "encoder_q-layer.1": 526.8792, "encoder_q-layer.10": 865.7369, "encoder_q-layer.11": 1669.5265, "encoder_q-layer.2": 597.9651, "encoder_q-layer.3": 650.3354, "encoder_q-layer.4": 710.5443, "encoder_q-layer.5": 736.8063, "encoder_q-layer.6": 809.5872, "encoder_q-layer.7": 906.8026, "encoder_q-layer.8": 954.8195, "encoder_q-layer.9": 848.9268, "epoch": 0.38, "inbatch_neg_score": 0.4446, "inbatch_pos_score": 1.1797, "learning_rate": 3.405555555555556e-05, "loss": 2.7671, "norm_diff": 0.0269, "norm_loss": 0.0, "num_token_doc": 66.7727, "num_token_overlap": 17.8615, "num_token_query": 52.3356, "num_token_union": 73.7276, "num_word_context": 202.1703, "num_word_doc": 49.8076, "num_word_query": 39.9309, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1248.2598, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.446, "query_norm": 1.4906, "queue_k_norm": 1.5153, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3356, "sent_len_1": 66.7727, "sent_len_max_0": 128.0, "sent_len_max_1": 208.85, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 38700 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7751, "doc_norm": 1.5153, "encoder_q-embeddings": 731.8977, "encoder_q-layer.0": 467.3802, "encoder_q-layer.1": 520.3447, "encoder_q-layer.10": 842.1754, "encoder_q-layer.11": 1660.5272, "encoder_q-layer.2": 617.9548, "encoder_q-layer.3": 655.828, "encoder_q-layer.4": 677.6625, "encoder_q-layer.5": 712.5768, "encoder_q-layer.6": 775.7417, "encoder_q-layer.7": 863.6375, "encoder_q-layer.8": 959.2613, "encoder_q-layer.9": 823.456, "epoch": 0.38, "inbatch_neg_score": 0.4433, "inbatch_pos_score": 1.1738, "learning_rate": 3.4000000000000007e-05, "loss": 2.7751, "norm_diff": 0.0241, "norm_loss": 0.0, "num_token_doc": 66.6835, "num_token_overlap": 17.7916, "num_token_query": 52.2058, "num_token_union": 73.6457, "num_word_context": 202.1347, "num_word_doc": 49.7908, "num_word_query": 39.8045, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1219.913, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4443, "query_norm": 1.4912, "queue_k_norm": 1.5131, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2058, "sent_len_1": 66.6835, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8613, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 38800 }, { "accuracy": 59.4238, "active_queue_size": 16384.0, "cl_loss": 2.7587, "doc_norm": 1.5129, "encoder_q-embeddings": 683.5405, "encoder_q-layer.0": 433.668, "encoder_q-layer.1": 464.5451, "encoder_q-layer.10": 849.2268, "encoder_q-layer.11": 1692.4672, "encoder_q-layer.2": 532.6113, "encoder_q-layer.3": 586.6762, "encoder_q-layer.4": 642.4884, "encoder_q-layer.5": 644.3027, "encoder_q-layer.6": 721.3796, "encoder_q-layer.7": 826.2115, "encoder_q-layer.8": 968.8169, "encoder_q-layer.9": 862.1475, "epoch": 0.38, "inbatch_neg_score": 0.442, "inbatch_pos_score": 1.1816, "learning_rate": 3.394444444444444e-05, "loss": 2.7587, "norm_diff": 0.0088, "norm_loss": 0.0, "num_token_doc": 66.7262, "num_token_overlap": 17.8135, "num_token_query": 52.1984, "num_token_union": 73.662, "num_word_context": 202.1241, "num_word_doc": 49.8086, "num_word_query": 39.7821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1211.0917, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4429, "query_norm": 1.512, "queue_k_norm": 1.5149, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1984, "sent_len_1": 66.7262, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6912, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 38900 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.7671, "doc_norm": 1.5148, "encoder_q-embeddings": 761.7266, "encoder_q-layer.0": 454.5829, "encoder_q-layer.1": 487.8683, "encoder_q-layer.10": 833.4528, "encoder_q-layer.11": 1648.1265, "encoder_q-layer.2": 545.8185, "encoder_q-layer.3": 601.7484, "encoder_q-layer.4": 630.262, "encoder_q-layer.5": 632.6024, "encoder_q-layer.6": 730.9082, "encoder_q-layer.7": 837.5264, "encoder_q-layer.8": 936.7208, "encoder_q-layer.9": 870.2997, "epoch": 0.38, "inbatch_neg_score": 0.4428, "inbatch_pos_score": 1.166, "learning_rate": 3.388888888888889e-05, "loss": 2.7671, "norm_diff": 0.0123, "norm_loss": 0.0, "num_token_doc": 66.8041, "num_token_overlap": 17.8921, "num_token_query": 52.3219, "num_token_union": 73.6958, "num_word_context": 202.3258, "num_word_doc": 49.8621, "num_word_query": 39.9254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1198.7283, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4429, "query_norm": 1.5025, "queue_k_norm": 1.514, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3219, "sent_len_1": 66.8041, "sent_len_max_0": 128.0, "sent_len_max_1": 205.9162, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 39000 }, { "accuracy": 59.3262, "active_queue_size": 16384.0, "cl_loss": 2.7931, "doc_norm": 1.515, "encoder_q-embeddings": 1012.9426, "encoder_q-layer.0": 657.4707, "encoder_q-layer.1": 791.3436, "encoder_q-layer.10": 862.36, "encoder_q-layer.11": 1759.7729, "encoder_q-layer.2": 913.5221, "encoder_q-layer.3": 970.9822, "encoder_q-layer.4": 1057.5935, "encoder_q-layer.5": 1007.9315, "encoder_q-layer.6": 880.7989, "encoder_q-layer.7": 847.1313, "encoder_q-layer.8": 943.5993, "encoder_q-layer.9": 848.4861, "epoch": 0.38, "inbatch_neg_score": 0.4471, "inbatch_pos_score": 1.1816, "learning_rate": 3.3833333333333334e-05, "loss": 2.7931, "norm_diff": 0.008, "norm_loss": 0.0, "num_token_doc": 66.8607, "num_token_overlap": 17.7957, "num_token_query": 52.2086, "num_token_union": 73.7461, "num_word_context": 202.4549, "num_word_doc": 49.9211, "num_word_query": 39.8079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1473.2578, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4478, "query_norm": 1.5138, "queue_k_norm": 1.5145, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2086, "sent_len_1": 66.8607, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9487, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 39100 }, { "accuracy": 59.0332, "active_queue_size": 16384.0, "cl_loss": 2.781, "doc_norm": 1.5135, "encoder_q-embeddings": 2261.3784, "encoder_q-layer.0": 1582.0854, "encoder_q-layer.1": 1843.4022, "encoder_q-layer.10": 955.2397, "encoder_q-layer.11": 1832.4028, "encoder_q-layer.2": 2217.4883, "encoder_q-layer.3": 2490.2588, "encoder_q-layer.4": 2594.0376, "encoder_q-layer.5": 2543.0269, "encoder_q-layer.6": 2644.1294, "encoder_q-layer.7": 2103.0659, "encoder_q-layer.8": 1665.0305, "encoder_q-layer.9": 1040.0254, "epoch": 0.38, "inbatch_neg_score": 0.4585, "inbatch_pos_score": 1.1797, "learning_rate": 3.377777777777778e-05, "loss": 2.781, "norm_diff": 0.0105, "norm_loss": 0.0, "num_token_doc": 66.5575, "num_token_overlap": 17.7278, "num_token_query": 52.1196, "num_token_union": 73.5734, "num_word_context": 202.0624, "num_word_doc": 49.674, "num_word_query": 39.7382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3015.0173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4587, "query_norm": 1.513, "queue_k_norm": 1.5134, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1196, "sent_len_1": 66.5575, "sent_len_max_0": 128.0, "sent_len_max_1": 207.725, "stdk": 0.0489, "stdq": 0.0467, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 39200 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.7666, "doc_norm": 1.5222, "encoder_q-embeddings": 700.2682, "encoder_q-layer.0": 440.8958, "encoder_q-layer.1": 479.0746, "encoder_q-layer.10": 880.7131, "encoder_q-layer.11": 1790.2395, "encoder_q-layer.2": 557.0383, "encoder_q-layer.3": 595.6198, "encoder_q-layer.4": 626.0046, "encoder_q-layer.5": 648.4644, "encoder_q-layer.6": 722.053, "encoder_q-layer.7": 844.7126, "encoder_q-layer.8": 946.5356, "encoder_q-layer.9": 911.4745, "epoch": 0.38, "inbatch_neg_score": 0.4663, "inbatch_pos_score": 1.1885, "learning_rate": 3.3722222222222225e-05, "loss": 2.7666, "norm_diff": 0.025, "norm_loss": 0.0, "num_token_doc": 66.8151, "num_token_overlap": 17.7716, "num_token_query": 52.1962, "num_token_union": 73.7139, "num_word_context": 202.0705, "num_word_doc": 49.8471, "num_word_query": 39.8132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1217.1531, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4656, "query_norm": 1.4972, "queue_k_norm": 1.5154, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1962, "sent_len_1": 66.8151, "sent_len_max_0": 128.0, "sent_len_max_1": 210.9663, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 39300 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7805, "doc_norm": 1.5162, "encoder_q-embeddings": 650.2269, "encoder_q-layer.0": 417.6877, "encoder_q-layer.1": 474.7706, "encoder_q-layer.10": 1021.8618, "encoder_q-layer.11": 1892.0396, "encoder_q-layer.2": 527.4586, "encoder_q-layer.3": 546.152, "encoder_q-layer.4": 570.3044, "encoder_q-layer.5": 602.1277, "encoder_q-layer.6": 694.1542, "encoder_q-layer.7": 859.6134, "encoder_q-layer.8": 1084.168, "encoder_q-layer.9": 945.1539, "epoch": 0.38, "inbatch_neg_score": 0.4667, "inbatch_pos_score": 1.1865, "learning_rate": 3.366666666666667e-05, "loss": 2.7805, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.681, "num_token_overlap": 17.7621, "num_token_query": 52.2354, "num_token_union": 73.6585, "num_word_context": 202.2905, "num_word_doc": 49.7429, "num_word_query": 39.8303, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1228.2424, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4673, "query_norm": 1.481, "queue_k_norm": 1.5151, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2354, "sent_len_1": 66.681, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7575, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 39400 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.7734, "doc_norm": 1.5125, "encoder_q-embeddings": 802.5237, "encoder_q-layer.0": 497.7748, "encoder_q-layer.1": 531.1121, "encoder_q-layer.10": 816.4672, "encoder_q-layer.11": 1762.752, "encoder_q-layer.2": 582.0889, "encoder_q-layer.3": 627.2834, "encoder_q-layer.4": 671.232, "encoder_q-layer.5": 639.391, "encoder_q-layer.6": 722.6101, "encoder_q-layer.7": 814.5369, "encoder_q-layer.8": 894.364, "encoder_q-layer.9": 829.2538, "epoch": 0.39, "inbatch_neg_score": 0.473, "inbatch_pos_score": 1.2051, "learning_rate": 3.3611111111111116e-05, "loss": 2.7734, "norm_diff": 0.0132, "norm_loss": 0.0, "num_token_doc": 66.7404, "num_token_overlap": 17.8398, "num_token_query": 52.24, "num_token_union": 73.6437, "num_word_context": 202.1695, "num_word_doc": 49.7989, "num_word_query": 39.821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1244.6554, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4734, "query_norm": 1.4993, "queue_k_norm": 1.5151, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.24, "sent_len_1": 66.7404, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7163, "stdk": 0.0488, "stdq": 0.0469, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 39500 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.776, "doc_norm": 1.5174, "encoder_q-embeddings": 739.3759, "encoder_q-layer.0": 473.2685, "encoder_q-layer.1": 516.6495, "encoder_q-layer.10": 846.726, "encoder_q-layer.11": 1830.7466, "encoder_q-layer.2": 625.4587, "encoder_q-layer.3": 649.4143, "encoder_q-layer.4": 688.2988, "encoder_q-layer.5": 720.7905, "encoder_q-layer.6": 803.9045, "encoder_q-layer.7": 836.9554, "encoder_q-layer.8": 976.2854, "encoder_q-layer.9": 880.9634, "epoch": 0.39, "inbatch_neg_score": 0.4698, "inbatch_pos_score": 1.208, "learning_rate": 3.355555555555556e-05, "loss": 2.776, "norm_diff": 0.0359, "norm_loss": 0.0, "num_token_doc": 66.5838, "num_token_overlap": 17.7786, "num_token_query": 52.0926, "num_token_union": 73.4788, "num_word_context": 201.7275, "num_word_doc": 49.6671, "num_word_query": 39.7233, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1290.8919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.47, "query_norm": 1.4814, "queue_k_norm": 1.5143, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0926, "sent_len_1": 66.5838, "sent_len_max_0": 128.0, "sent_len_max_1": 209.49, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 39600 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.7751, "doc_norm": 1.5175, "encoder_q-embeddings": 694.6929, "encoder_q-layer.0": 442.1148, "encoder_q-layer.1": 474.6865, "encoder_q-layer.10": 870.5443, "encoder_q-layer.11": 1810.0135, "encoder_q-layer.2": 535.701, "encoder_q-layer.3": 574.872, "encoder_q-layer.4": 627.9393, "encoder_q-layer.5": 618.3464, "encoder_q-layer.6": 741.6048, "encoder_q-layer.7": 821.8391, "encoder_q-layer.8": 954.7904, "encoder_q-layer.9": 842.2806, "epoch": 0.39, "inbatch_neg_score": 0.4781, "inbatch_pos_score": 1.1934, "learning_rate": 3.35e-05, "loss": 2.7751, "norm_diff": 0.0493, "norm_loss": 0.0, "num_token_doc": 66.8062, "num_token_overlap": 17.8334, "num_token_query": 52.3854, "num_token_union": 73.7892, "num_word_context": 202.1739, "num_word_doc": 49.8337, "num_word_query": 39.9574, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1222.1148, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4771, "query_norm": 1.4682, "queue_k_norm": 1.5161, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3854, "sent_len_1": 66.8062, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1425, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 39700 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.7689, "doc_norm": 1.5165, "encoder_q-embeddings": 2082.8645, "encoder_q-layer.0": 1326.1284, "encoder_q-layer.1": 1477.0488, "encoder_q-layer.10": 1986.5762, "encoder_q-layer.11": 3792.8298, "encoder_q-layer.2": 1837.9409, "encoder_q-layer.3": 2070.2314, "encoder_q-layer.4": 2362.1565, "encoder_q-layer.5": 2271.9644, "encoder_q-layer.6": 2517.8916, "encoder_q-layer.7": 2340.1382, "encoder_q-layer.8": 2432.0051, "encoder_q-layer.9": 1983.5442, "epoch": 0.39, "inbatch_neg_score": 0.4721, "inbatch_pos_score": 1.1875, "learning_rate": 3.3444444444444443e-05, "loss": 2.7689, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.7126, "num_token_overlap": 17.8103, "num_token_query": 52.2689, "num_token_union": 73.6586, "num_word_context": 202.1422, "num_word_doc": 49.7823, "num_word_query": 39.8546, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3290.4185, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4724, "query_norm": 1.4781, "queue_k_norm": 1.5149, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2689, "sent_len_1": 66.7126, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3313, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 39800 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.7706, "doc_norm": 1.5192, "encoder_q-embeddings": 1928.3866, "encoder_q-layer.0": 1284.5458, "encoder_q-layer.1": 1481.0649, "encoder_q-layer.10": 2425.9316, "encoder_q-layer.11": 4036.084, "encoder_q-layer.2": 1854.1309, "encoder_q-layer.3": 2162.6355, "encoder_q-layer.4": 2379.6475, "encoder_q-layer.5": 2364.873, "encoder_q-layer.6": 2504.9722, "encoder_q-layer.7": 2531.4155, "encoder_q-layer.8": 2866.8401, "encoder_q-layer.9": 2333.8359, "epoch": 0.39, "inbatch_neg_score": 0.481, "inbatch_pos_score": 1.1963, "learning_rate": 3.338888888888889e-05, "loss": 2.7706, "norm_diff": 0.0607, "norm_loss": 0.0, "num_token_doc": 66.7719, "num_token_overlap": 17.8052, "num_token_query": 52.2516, "num_token_union": 73.7015, "num_word_context": 202.2237, "num_word_doc": 49.822, "num_word_query": 39.8532, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3405.3034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4827, "query_norm": 1.4585, "queue_k_norm": 1.5166, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2516, "sent_len_1": 66.7719, "sent_len_max_0": 128.0, "sent_len_max_1": 207.76, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 39900 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.7684, "doc_norm": 1.5152, "encoder_q-embeddings": 2178.8962, "encoder_q-layer.0": 1435.9437, "encoder_q-layer.1": 1667.7112, "encoder_q-layer.10": 1833.4373, "encoder_q-layer.11": 3596.5513, "encoder_q-layer.2": 1930.2096, "encoder_q-layer.3": 2117.0544, "encoder_q-layer.4": 2306.9229, "encoder_q-layer.5": 2400.5469, "encoder_q-layer.6": 2336.0012, "encoder_q-layer.7": 2361.1819, "encoder_q-layer.8": 2215.5591, "encoder_q-layer.9": 1870.4149, "epoch": 0.39, "inbatch_neg_score": 0.4871, "inbatch_pos_score": 1.1992, "learning_rate": 3.3333333333333335e-05, "loss": 2.7684, "norm_diff": 0.027, "norm_loss": 0.0, "num_token_doc": 66.7258, "num_token_overlap": 17.8424, "num_token_query": 52.3671, "num_token_union": 73.6988, "num_word_context": 202.3417, "num_word_doc": 49.7554, "num_word_query": 39.957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3247.9148, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4871, "query_norm": 1.4882, "queue_k_norm": 1.5167, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3671, "sent_len_1": 66.7258, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1225, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 40000 }, { "dev_runtime": 26.0284, "dev_samples_per_second": 1.229, "dev_steps_per_second": 0.038, "epoch": 0.39, "step": 40000, "test_accuracy": 93.6767578125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3606007397174835, "test_doc_norm": 1.4756016731262207, "test_inbatch_neg_score": 0.7679526805877686, "test_inbatch_pos_score": 1.7616137266159058, "test_loss": 0.3606007397174835, "test_loss_align": 1.040766716003418, "test_loss_unif": 3.533524990081787, "test_loss_unif_q@queue": 3.533524513244629, "test_norm_diff": 0.06080520153045654, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.48246291279792786, "test_query_norm": 1.5364067554473877, "test_queue_k_norm": 1.5162572860717773, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04314829409122467, "test_stdq": 0.043900907039642334, "test_stdqueue_k": 0.0488920621573925, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.0284, "dev_samples_per_second": 1.229, "dev_steps_per_second": 0.038, "epoch": 0.39, "eval_beir-arguana_ndcg@10": 0.35667, "eval_beir-arguana_recall@10": 0.60526, "eval_beir-arguana_recall@100": 0.90185, "eval_beir-arguana_recall@20": 0.73826, "eval_beir-avg_ndcg@10": 0.370676, "eval_beir-avg_recall@10": 0.44005974999999997, "eval_beir-avg_recall@100": 0.6220187500000001, "eval_beir-avg_recall@20": 0.5015974166666667, "eval_beir-cqadupstack_ndcg@10": 0.27117, "eval_beir-cqadupstack_recall@10": 0.3612875, "eval_beir-cqadupstack_recall@100": 0.5885275, "eval_beir-cqadupstack_recall@20": 0.4279041666666667, "eval_beir-fiqa_ndcg@10": 0.24443, "eval_beir-fiqa_recall@10": 0.30143, "eval_beir-fiqa_recall@100": 0.57744, "eval_beir-fiqa_recall@20": 0.37608, "eval_beir-nfcorpus_ndcg@10": 0.27725, "eval_beir-nfcorpus_recall@10": 0.13282, "eval_beir-nfcorpus_recall@100": 0.25888, "eval_beir-nfcorpus_recall@20": 0.16679, "eval_beir-nq_ndcg@10": 0.26497, "eval_beir-nq_recall@10": 0.43427, "eval_beir-nq_recall@100": 0.78165, "eval_beir-nq_recall@20": 0.55299, "eval_beir-quora_ndcg@10": 0.76474, "eval_beir-quora_recall@10": 0.87497, "eval_beir-quora_recall@100": 0.97465, "eval_beir-quora_recall@20": 0.92292, "eval_beir-scidocs_ndcg@10": 0.15207, "eval_beir-scidocs_recall@10": 0.15492, "eval_beir-scidocs_recall@100": 0.35635, "eval_beir-scidocs_recall@20": 0.21102, "eval_beir-scifact_ndcg@10": 0.63115, "eval_beir-scifact_recall@10": 0.79383, "eval_beir-scifact_recall@100": 0.91156, "eval_beir-scifact_recall@20": 0.84078, "eval_beir-trec-covid_ndcg@10": 0.5463, "eval_beir-trec-covid_recall@10": 0.604, "eval_beir-trec-covid_recall@100": 0.4316, "eval_beir-trec-covid_recall@20": 0.562, "eval_beir-webis-touche2020_ndcg@10": 0.19801, "eval_beir-webis-touche2020_recall@10": 0.13781, "eval_beir-webis-touche2020_recall@100": 0.43768, "eval_beir-webis-touche2020_recall@20": 0.21723, "eval_senteval-avg_sts": 0.7565677053940356, "eval_senteval-sickr_spearman": 0.7370823047409877, "eval_senteval-stsb_spearman": 0.7760531060470836, "step": 40000, "test_accuracy": 93.6767578125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3606007397174835, "test_doc_norm": 1.4756016731262207, "test_inbatch_neg_score": 0.7679526805877686, "test_inbatch_pos_score": 1.7616137266159058, "test_loss": 0.3606007397174835, "test_loss_align": 1.040766716003418, "test_loss_unif": 3.533524990081787, "test_loss_unif_q@queue": 3.533524513244629, "test_norm_diff": 0.06080520153045654, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.48246291279792786, "test_query_norm": 1.5364067554473877, "test_queue_k_norm": 1.5162572860717773, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04314829409122467, "test_stdq": 0.043900907039642334, "test_stdqueue_k": 0.0488920621573925, "test_stdqueue_q": 0.0 }, { "accuracy": 59.6191, "active_queue_size": 16384.0, "cl_loss": 2.7699, "doc_norm": 1.5203, "encoder_q-embeddings": 1829.4222, "encoder_q-layer.0": 1177.3367, "encoder_q-layer.1": 1406.3943, "encoder_q-layer.10": 1729.2769, "encoder_q-layer.11": 3631.2937, "encoder_q-layer.2": 1626.4913, "encoder_q-layer.3": 1849.502, "encoder_q-layer.4": 1932.4717, "encoder_q-layer.5": 2052.5977, "encoder_q-layer.6": 2187.7939, "encoder_q-layer.7": 2277.1982, "encoder_q-layer.8": 2274.7139, "encoder_q-layer.9": 1910.8933, "epoch": 0.39, "inbatch_neg_score": 0.4888, "inbatch_pos_score": 1.2158, "learning_rate": 3.327777777777778e-05, "loss": 2.7699, "norm_diff": 0.0207, "norm_loss": 0.0, "num_token_doc": 66.721, "num_token_overlap": 17.8376, "num_token_query": 52.3299, "num_token_union": 73.6932, "num_word_context": 202.2106, "num_word_doc": 49.8079, "num_word_query": 39.9314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3036.1181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4885, "query_norm": 1.4996, "queue_k_norm": 1.5164, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3299, "sent_len_1": 66.721, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0263, "stdk": 0.049, "stdq": 0.0468, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 40100 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.7699, "doc_norm": 1.5146, "encoder_q-embeddings": 2047.8246, "encoder_q-layer.0": 1385.395, "encoder_q-layer.1": 1464.5687, "encoder_q-layer.10": 1757.4504, "encoder_q-layer.11": 3477.2341, "encoder_q-layer.2": 1852.8998, "encoder_q-layer.3": 1948.9829, "encoder_q-layer.4": 2083.3311, "encoder_q-layer.5": 2380.0488, "encoder_q-layer.6": 2303.7007, "encoder_q-layer.7": 2343.7737, "encoder_q-layer.8": 2167.8218, "encoder_q-layer.9": 1763.7537, "epoch": 0.39, "inbatch_neg_score": 0.4838, "inbatch_pos_score": 1.2031, "learning_rate": 3.322222222222222e-05, "loss": 2.7699, "norm_diff": 0.0351, "norm_loss": 0.0, "num_token_doc": 66.7742, "num_token_overlap": 17.8274, "num_token_query": 52.2981, "num_token_union": 73.7124, "num_word_context": 202.424, "num_word_doc": 49.8332, "num_word_query": 39.895, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3117.191, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4844, "query_norm": 1.4796, "queue_k_norm": 1.5175, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2981, "sent_len_1": 66.7742, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2225, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 40200 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.7716, "doc_norm": 1.5172, "encoder_q-embeddings": 1316.1082, "encoder_q-layer.0": 826.0229, "encoder_q-layer.1": 895.4426, "encoder_q-layer.10": 1842.0488, "encoder_q-layer.11": 3652.8281, "encoder_q-layer.2": 1037.7555, "encoder_q-layer.3": 1083.5978, "encoder_q-layer.4": 1188.6808, "encoder_q-layer.5": 1209.4561, "encoder_q-layer.6": 1377.0765, "encoder_q-layer.7": 1521.9004, "encoder_q-layer.8": 1939.8652, "encoder_q-layer.9": 1811.0681, "epoch": 0.39, "inbatch_neg_score": 0.4828, "inbatch_pos_score": 1.2178, "learning_rate": 3.316666666666667e-05, "loss": 2.7716, "norm_diff": 0.0237, "norm_loss": 0.0, "num_token_doc": 66.6255, "num_token_overlap": 17.7823, "num_token_query": 52.3115, "num_token_union": 73.6317, "num_word_context": 202.0676, "num_word_doc": 49.6968, "num_word_query": 39.9104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2371.531, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4839, "query_norm": 1.4935, "queue_k_norm": 1.5191, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3115, "sent_len_1": 66.6255, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3363, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 40300 }, { "accuracy": 60.3027, "active_queue_size": 16384.0, "cl_loss": 2.7581, "doc_norm": 1.5162, "encoder_q-embeddings": 1674.7433, "encoder_q-layer.0": 1067.1581, "encoder_q-layer.1": 1167.866, "encoder_q-layer.10": 1735.6876, "encoder_q-layer.11": 3459.9585, "encoder_q-layer.2": 1276.6471, "encoder_q-layer.3": 1334.6194, "encoder_q-layer.4": 1472.8422, "encoder_q-layer.5": 1552.9745, "encoder_q-layer.6": 1600.9922, "encoder_q-layer.7": 1692.188, "encoder_q-layer.8": 1904.1262, "encoder_q-layer.9": 1754.1263, "epoch": 0.39, "inbatch_neg_score": 0.4833, "inbatch_pos_score": 1.2148, "learning_rate": 3.311111111111112e-05, "loss": 2.7581, "norm_diff": 0.027, "norm_loss": 0.0, "num_token_doc": 66.7372, "num_token_overlap": 17.801, "num_token_query": 52.3328, "num_token_union": 73.7323, "num_word_context": 202.2586, "num_word_doc": 49.7905, "num_word_query": 39.9181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2573.4728, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4839, "query_norm": 1.4893, "queue_k_norm": 1.5209, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3328, "sent_len_1": 66.7372, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2837, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 40400 }, { "accuracy": 58.252, "active_queue_size": 16384.0, "cl_loss": 2.7624, "doc_norm": 1.5216, "encoder_q-embeddings": 1436.6058, "encoder_q-layer.0": 945.7429, "encoder_q-layer.1": 1040.9393, "encoder_q-layer.10": 2000.2917, "encoder_q-layer.11": 3626.448, "encoder_q-layer.2": 1214.6982, "encoder_q-layer.3": 1329.6636, "encoder_q-layer.4": 1433.7699, "encoder_q-layer.5": 1540.4919, "encoder_q-layer.6": 1765.4712, "encoder_q-layer.7": 1920.7084, "encoder_q-layer.8": 2088.4084, "encoder_q-layer.9": 1785.0045, "epoch": 0.4, "inbatch_neg_score": 0.4803, "inbatch_pos_score": 1.2129, "learning_rate": 3.3055555555555553e-05, "loss": 2.7624, "norm_diff": 0.0244, "norm_loss": 0.0, "num_token_doc": 66.8029, "num_token_overlap": 17.7679, "num_token_query": 52.2149, "num_token_union": 73.7161, "num_word_context": 202.2131, "num_word_doc": 49.8404, "num_word_query": 39.7879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2557.2405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4797, "query_norm": 1.4972, "queue_k_norm": 1.5201, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2149, "sent_len_1": 66.8029, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1325, "stdk": 0.049, "stdq": 0.0469, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 40500 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.7603, "doc_norm": 1.5221, "encoder_q-embeddings": 1405.548, "encoder_q-layer.0": 904.3998, "encoder_q-layer.1": 974.3825, "encoder_q-layer.10": 1641.6133, "encoder_q-layer.11": 3272.5876, "encoder_q-layer.2": 1165.743, "encoder_q-layer.3": 1195.4733, "encoder_q-layer.4": 1313.9277, "encoder_q-layer.5": 1352.0887, "encoder_q-layer.6": 1464.9113, "encoder_q-layer.7": 1503.4125, "encoder_q-layer.8": 1823.3494, "encoder_q-layer.9": 1673.7517, "epoch": 0.4, "inbatch_neg_score": 0.477, "inbatch_pos_score": 1.2051, "learning_rate": 3.3e-05, "loss": 2.7603, "norm_diff": 0.0446, "norm_loss": 0.0, "num_token_doc": 66.8322, "num_token_overlap": 17.8486, "num_token_query": 52.38, "num_token_union": 73.771, "num_word_context": 202.5127, "num_word_doc": 49.8705, "num_word_query": 39.9281, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2336.0851, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4783, "query_norm": 1.4776, "queue_k_norm": 1.5189, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.38, "sent_len_1": 66.8322, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4263, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 40600 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.7531, "doc_norm": 1.5227, "encoder_q-embeddings": 1703.7738, "encoder_q-layer.0": 1115.2445, "encoder_q-layer.1": 1266.8779, "encoder_q-layer.10": 1664.8938, "encoder_q-layer.11": 3338.9084, "encoder_q-layer.2": 1476.9972, "encoder_q-layer.3": 1561.968, "encoder_q-layer.4": 1674.1443, "encoder_q-layer.5": 1848.7738, "encoder_q-layer.6": 2133.7639, "encoder_q-layer.7": 1923.9628, "encoder_q-layer.8": 1890.4471, "encoder_q-layer.9": 1712.1017, "epoch": 0.4, "inbatch_neg_score": 0.4825, "inbatch_pos_score": 1.2002, "learning_rate": 3.2944444444444445e-05, "loss": 2.7531, "norm_diff": 0.0335, "norm_loss": 0.0, "num_token_doc": 66.8814, "num_token_overlap": 17.8198, "num_token_query": 52.3831, "num_token_union": 73.8411, "num_word_context": 202.2743, "num_word_doc": 49.8928, "num_word_query": 39.9474, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2685.2097, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4822, "query_norm": 1.4893, "queue_k_norm": 1.521, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3831, "sent_len_1": 66.8814, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1312, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 40700 }, { "accuracy": 59.8145, "active_queue_size": 16384.0, "cl_loss": 2.7666, "doc_norm": 1.5239, "encoder_q-embeddings": 1688.1621, "encoder_q-layer.0": 1130.6248, "encoder_q-layer.1": 1227.506, "encoder_q-layer.10": 1630.4991, "encoder_q-layer.11": 3178.7383, "encoder_q-layer.2": 1482.4779, "encoder_q-layer.3": 1543.1993, "encoder_q-layer.4": 1731.6801, "encoder_q-layer.5": 1818.3939, "encoder_q-layer.6": 1810.8613, "encoder_q-layer.7": 1828.0098, "encoder_q-layer.8": 1871.9263, "encoder_q-layer.9": 1629.6893, "epoch": 0.4, "inbatch_neg_score": 0.4831, "inbatch_pos_score": 1.209, "learning_rate": 3.2888888888888894e-05, "loss": 2.7666, "norm_diff": 0.043, "norm_loss": 0.0, "num_token_doc": 66.8034, "num_token_overlap": 17.7986, "num_token_query": 52.3042, "num_token_union": 73.7821, "num_word_context": 202.3324, "num_word_doc": 49.8645, "num_word_query": 39.9142, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2645.9181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4836, "query_norm": 1.481, "queue_k_norm": 1.5201, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3042, "sent_len_1": 66.8034, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4437, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 40800 }, { "accuracy": 58.7402, "active_queue_size": 16384.0, "cl_loss": 2.7508, "doc_norm": 1.5195, "encoder_q-embeddings": 1592.0941, "encoder_q-layer.0": 993.4401, "encoder_q-layer.1": 1120.752, "encoder_q-layer.10": 1750.6748, "encoder_q-layer.11": 3676.2993, "encoder_q-layer.2": 1299.3363, "encoder_q-layer.3": 1417.3702, "encoder_q-layer.4": 1506.6301, "encoder_q-layer.5": 1592.1978, "encoder_q-layer.6": 1720.8013, "encoder_q-layer.7": 1897.9187, "encoder_q-layer.8": 2171.8989, "encoder_q-layer.9": 1804.8677, "epoch": 0.4, "inbatch_neg_score": 0.4786, "inbatch_pos_score": 1.1895, "learning_rate": 3.283333333333333e-05, "loss": 2.7508, "norm_diff": 0.0464, "norm_loss": 0.0, "num_token_doc": 66.9458, "num_token_overlap": 17.8434, "num_token_query": 52.3867, "num_token_union": 73.862, "num_word_context": 202.6284, "num_word_doc": 49.9656, "num_word_query": 39.961, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2701.584, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4778, "query_norm": 1.4731, "queue_k_norm": 1.521, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3867, "sent_len_1": 66.9458, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1937, "stdk": 0.0488, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 40900 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.7467, "doc_norm": 1.5224, "encoder_q-embeddings": 1618.1306, "encoder_q-layer.0": 1009.498, "encoder_q-layer.1": 1143.692, "encoder_q-layer.10": 1840.5281, "encoder_q-layer.11": 3670.9539, "encoder_q-layer.2": 1301.2504, "encoder_q-layer.3": 1385.6431, "encoder_q-layer.4": 1464.6558, "encoder_q-layer.5": 1423.532, "encoder_q-layer.6": 1619.5558, "encoder_q-layer.7": 1747.9607, "encoder_q-layer.8": 1947.7629, "encoder_q-layer.9": 1783.1552, "epoch": 0.4, "inbatch_neg_score": 0.4855, "inbatch_pos_score": 1.2031, "learning_rate": 3.277777777777778e-05, "loss": 2.7467, "norm_diff": 0.0246, "norm_loss": 0.0, "num_token_doc": 66.8535, "num_token_overlap": 17.8567, "num_token_query": 52.3524, "num_token_union": 73.783, "num_word_context": 202.3124, "num_word_doc": 49.8425, "num_word_query": 39.8873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2616.4881, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4851, "query_norm": 1.4978, "queue_k_norm": 1.5224, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3524, "sent_len_1": 66.8535, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3487, "stdk": 0.0489, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41000 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.751, "doc_norm": 1.5241, "encoder_q-embeddings": 1615.7599, "encoder_q-layer.0": 1066.2256, "encoder_q-layer.1": 1181.8707, "encoder_q-layer.10": 1676.2754, "encoder_q-layer.11": 3446.2656, "encoder_q-layer.2": 1328.4958, "encoder_q-layer.3": 1383.5337, "encoder_q-layer.4": 1491.2611, "encoder_q-layer.5": 1508.8777, "encoder_q-layer.6": 1633.6285, "encoder_q-layer.7": 1646.8196, "encoder_q-layer.8": 1865.9442, "encoder_q-layer.9": 1694.7241, "epoch": 0.4, "inbatch_neg_score": 0.4812, "inbatch_pos_score": 1.2207, "learning_rate": 3.272222222222223e-05, "loss": 2.751, "norm_diff": 0.0276, "norm_loss": 0.0, "num_token_doc": 66.7651, "num_token_overlap": 17.8056, "num_token_query": 52.2754, "num_token_union": 73.7122, "num_word_context": 202.4095, "num_word_doc": 49.8525, "num_word_query": 39.8907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2559.5255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.481, "query_norm": 1.4965, "queue_k_norm": 1.5213, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2754, "sent_len_1": 66.7651, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1525, "stdk": 0.049, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41100 }, { "accuracy": 60.0098, "active_queue_size": 16384.0, "cl_loss": 2.7535, "doc_norm": 1.5228, "encoder_q-embeddings": 1663.3784, "encoder_q-layer.0": 1164.8895, "encoder_q-layer.1": 1272.3149, "encoder_q-layer.10": 1808.1334, "encoder_q-layer.11": 3489.2932, "encoder_q-layer.2": 1446.9395, "encoder_q-layer.3": 1642.9468, "encoder_q-layer.4": 1714.2623, "encoder_q-layer.5": 1722.0619, "encoder_q-layer.6": 1767.9366, "encoder_q-layer.7": 1650.3273, "encoder_q-layer.8": 1952.054, "encoder_q-layer.9": 1770.334, "epoch": 0.4, "inbatch_neg_score": 0.4884, "inbatch_pos_score": 1.2246, "learning_rate": 3.266666666666667e-05, "loss": 2.7535, "norm_diff": 0.0154, "norm_loss": 0.0, "num_token_doc": 66.7451, "num_token_overlap": 17.8322, "num_token_query": 52.4403, "num_token_union": 73.7774, "num_word_context": 202.1459, "num_word_doc": 49.8072, "num_word_query": 39.9923, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2675.3431, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4883, "query_norm": 1.5074, "queue_k_norm": 1.5218, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4403, "sent_len_1": 66.7451, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6387, "stdk": 0.0489, "stdq": 0.047, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41200 }, { "accuracy": 60.0098, "active_queue_size": 16384.0, "cl_loss": 2.7614, "doc_norm": 1.5215, "encoder_q-embeddings": 1475.4926, "encoder_q-layer.0": 945.5283, "encoder_q-layer.1": 1065.9955, "encoder_q-layer.10": 1655.9349, "encoder_q-layer.11": 3294.6621, "encoder_q-layer.2": 1202.1891, "encoder_q-layer.3": 1274.3589, "encoder_q-layer.4": 1367.3094, "encoder_q-layer.5": 1404.9041, "encoder_q-layer.6": 1509.0845, "encoder_q-layer.7": 1611.7675, "encoder_q-layer.8": 1919.0286, "encoder_q-layer.9": 1686.8142, "epoch": 0.4, "inbatch_neg_score": 0.4892, "inbatch_pos_score": 1.2217, "learning_rate": 3.261111111111111e-05, "loss": 2.7614, "norm_diff": 0.0264, "norm_loss": 0.0, "num_token_doc": 66.6868, "num_token_overlap": 17.7507, "num_token_query": 52.2029, "num_token_union": 73.6855, "num_word_context": 202.3695, "num_word_doc": 49.7749, "num_word_query": 39.8382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2420.4652, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.489, "query_norm": 1.4951, "queue_k_norm": 1.5224, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2029, "sent_len_1": 66.6868, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6937, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41300 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.7553, "doc_norm": 1.5293, "encoder_q-embeddings": 1512.9895, "encoder_q-layer.0": 1024.8989, "encoder_q-layer.1": 1142.3568, "encoder_q-layer.10": 1708.4655, "encoder_q-layer.11": 3480.1089, "encoder_q-layer.2": 1291.0055, "encoder_q-layer.3": 1320.6658, "encoder_q-layer.4": 1386.9653, "encoder_q-layer.5": 1380.3915, "encoder_q-layer.6": 1533.3971, "encoder_q-layer.7": 1667.068, "encoder_q-layer.8": 1800.7499, "encoder_q-layer.9": 1676.5594, "epoch": 0.4, "inbatch_neg_score": 0.4855, "inbatch_pos_score": 1.2295, "learning_rate": 3.2555555555555555e-05, "loss": 2.7553, "norm_diff": 0.0361, "norm_loss": 0.0, "num_token_doc": 66.8672, "num_token_overlap": 17.8311, "num_token_query": 52.2703, "num_token_union": 73.7447, "num_word_context": 202.1962, "num_word_doc": 49.9225, "num_word_query": 39.8554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2508.7319, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4863, "query_norm": 1.4932, "queue_k_norm": 1.5232, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2703, "sent_len_1": 66.8672, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7475, "stdk": 0.0492, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41400 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.738, "doc_norm": 1.5224, "encoder_q-embeddings": 1576.9508, "encoder_q-layer.0": 1004.5413, "encoder_q-layer.1": 1098.0973, "encoder_q-layer.10": 1734.545, "encoder_q-layer.11": 3665.8826, "encoder_q-layer.2": 1252.5927, "encoder_q-layer.3": 1343.5909, "encoder_q-layer.4": 1421.4257, "encoder_q-layer.5": 1475.3037, "encoder_q-layer.6": 1678.1638, "encoder_q-layer.7": 1751.8151, "encoder_q-layer.8": 1983.6982, "encoder_q-layer.9": 1777.4429, "epoch": 0.41, "inbatch_neg_score": 0.4966, "inbatch_pos_score": 1.2373, "learning_rate": 3.2500000000000004e-05, "loss": 2.738, "norm_diff": 0.017, "norm_loss": 0.0, "num_token_doc": 66.8187, "num_token_overlap": 17.8989, "num_token_query": 52.5221, "num_token_union": 73.7666, "num_word_context": 202.2238, "num_word_doc": 49.829, "num_word_query": 40.0494, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2624.7241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4963, "query_norm": 1.5055, "queue_k_norm": 1.5242, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.5221, "sent_len_1": 66.8187, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8288, "stdk": 0.0489, "stdq": 0.0468, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41500 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.7439, "doc_norm": 1.5257, "encoder_q-embeddings": 1408.6429, "encoder_q-layer.0": 893.0115, "encoder_q-layer.1": 970.0831, "encoder_q-layer.10": 1633.958, "encoder_q-layer.11": 3402.8357, "encoder_q-layer.2": 1055.7327, "encoder_q-layer.3": 1163.4438, "encoder_q-layer.4": 1291.6356, "encoder_q-layer.5": 1331.8179, "encoder_q-layer.6": 1470.6304, "encoder_q-layer.7": 1535.2206, "encoder_q-layer.8": 1739.3784, "encoder_q-layer.9": 1609.2694, "epoch": 0.41, "inbatch_neg_score": 0.4984, "inbatch_pos_score": 1.2178, "learning_rate": 3.2444444444444446e-05, "loss": 2.7439, "norm_diff": 0.0297, "norm_loss": 0.0, "num_token_doc": 66.9511, "num_token_overlap": 17.8253, "num_token_query": 52.2644, "num_token_union": 73.7669, "num_word_context": 202.6405, "num_word_doc": 49.9478, "num_word_query": 39.8627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2383.1462, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.498, "query_norm": 1.4959, "queue_k_norm": 1.5243, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2644, "sent_len_1": 66.9511, "sent_len_max_0": 128.0, "sent_len_max_1": 210.51, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41600 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.7534, "doc_norm": 1.5247, "encoder_q-embeddings": 1510.0065, "encoder_q-layer.0": 1048.8064, "encoder_q-layer.1": 1152.131, "encoder_q-layer.10": 1784.734, "encoder_q-layer.11": 3487.1177, "encoder_q-layer.2": 1284.1538, "encoder_q-layer.3": 1302.0416, "encoder_q-layer.4": 1384.8954, "encoder_q-layer.5": 1424.8615, "encoder_q-layer.6": 1637.8676, "encoder_q-layer.7": 1701.9972, "encoder_q-layer.8": 1963.001, "encoder_q-layer.9": 1768.9794, "epoch": 0.41, "inbatch_neg_score": 0.4978, "inbatch_pos_score": 1.2471, "learning_rate": 3.238888888888889e-05, "loss": 2.7534, "norm_diff": 0.0125, "norm_loss": 0.0, "num_token_doc": 66.9534, "num_token_overlap": 17.8367, "num_token_query": 52.3266, "num_token_union": 73.7718, "num_word_context": 202.6045, "num_word_doc": 49.927, "num_word_query": 39.9323, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2546.6066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4978, "query_norm": 1.5135, "queue_k_norm": 1.5243, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3266, "sent_len_1": 66.9534, "sent_len_max_0": 128.0, "sent_len_max_1": 211.8288, "stdk": 0.0489, "stdq": 0.0468, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41700 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7516, "doc_norm": 1.529, "encoder_q-embeddings": 2730.4941, "encoder_q-layer.0": 1648.1527, "encoder_q-layer.1": 1759.2349, "encoder_q-layer.10": 3314.5872, "encoder_q-layer.11": 6549.2017, "encoder_q-layer.2": 1951.4865, "encoder_q-layer.3": 2088.2996, "encoder_q-layer.4": 2202.9248, "encoder_q-layer.5": 2228.9868, "encoder_q-layer.6": 2619.1189, "encoder_q-layer.7": 3010.4375, "encoder_q-layer.8": 3682.4399, "encoder_q-layer.9": 3360.3845, "epoch": 0.41, "inbatch_neg_score": 0.4959, "inbatch_pos_score": 1.2285, "learning_rate": 3.233333333333333e-05, "loss": 2.7516, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.8568, "num_token_overlap": 17.8181, "num_token_query": 52.4464, "num_token_union": 73.8633, "num_word_context": 202.3304, "num_word_doc": 49.8935, "num_word_query": 39.9965, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4568.9702, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4954, "query_norm": 1.5061, "queue_k_norm": 1.5264, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4464, "sent_len_1": 66.8568, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9675, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41800 }, { "accuracy": 59.1309, "active_queue_size": 16384.0, "cl_loss": 2.7522, "doc_norm": 1.5256, "encoder_q-embeddings": 3878.219, "encoder_q-layer.0": 2525.0925, "encoder_q-layer.1": 2737.5349, "encoder_q-layer.10": 3180.408, "encoder_q-layer.11": 6484.0386, "encoder_q-layer.2": 3306.1262, "encoder_q-layer.3": 3440.6245, "encoder_q-layer.4": 3747.042, "encoder_q-layer.5": 3893.4114, "encoder_q-layer.6": 3877.4983, "encoder_q-layer.7": 3884.0054, "encoder_q-layer.8": 3937.53, "encoder_q-layer.9": 3309.8208, "epoch": 0.41, "inbatch_neg_score": 0.5088, "inbatch_pos_score": 1.2344, "learning_rate": 3.227777777777778e-05, "loss": 2.7522, "norm_diff": 0.0168, "norm_loss": 0.0, "num_token_doc": 66.6792, "num_token_overlap": 17.7698, "num_token_query": 52.307, "num_token_union": 73.6939, "num_word_context": 202.3056, "num_word_doc": 49.7487, "num_word_query": 39.9011, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5648.4552, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5083, "query_norm": 1.5105, "queue_k_norm": 1.5253, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.307, "sent_len_1": 66.6792, "sent_len_max_0": 128.0, "sent_len_max_1": 206.0, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 41900 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.7456, "doc_norm": 1.528, "encoder_q-embeddings": 5051.6411, "encoder_q-layer.0": 3532.3562, "encoder_q-layer.1": 3951.2817, "encoder_q-layer.10": 3496.2898, "encoder_q-layer.11": 7171.2109, "encoder_q-layer.2": 5069.8882, "encoder_q-layer.3": 5249.9858, "encoder_q-layer.4": 6099.1074, "encoder_q-layer.5": 6140.2065, "encoder_q-layer.6": 6032.5962, "encoder_q-layer.7": 4806.8296, "encoder_q-layer.8": 4531.0146, "encoder_q-layer.9": 3594.3501, "epoch": 0.41, "inbatch_neg_score": 0.5141, "inbatch_pos_score": 1.2383, "learning_rate": 3.222222222222223e-05, "loss": 2.7456, "norm_diff": 0.0122, "norm_loss": 0.0, "num_token_doc": 66.644, "num_token_overlap": 17.7978, "num_token_query": 52.2787, "num_token_union": 73.6602, "num_word_context": 202.1095, "num_word_doc": 49.7267, "num_word_query": 39.8578, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7447.2846, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5142, "query_norm": 1.5159, "queue_k_norm": 1.5259, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2787, "sent_len_1": 66.644, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4112, "stdk": 0.049, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 42000 }, { "accuracy": 59.6191, "active_queue_size": 16384.0, "cl_loss": 2.743, "doc_norm": 1.5275, "encoder_q-embeddings": 3423.0562, "encoder_q-layer.0": 2284.1519, "encoder_q-layer.1": 2437.6689, "encoder_q-layer.10": 3403.9031, "encoder_q-layer.11": 6732.5181, "encoder_q-layer.2": 2676.4707, "encoder_q-layer.3": 2757.5339, "encoder_q-layer.4": 2855.0649, "encoder_q-layer.5": 2986.6091, "encoder_q-layer.6": 3382.6782, "encoder_q-layer.7": 3498.2185, "encoder_q-layer.8": 3669.8953, "encoder_q-layer.9": 3389.1301, "epoch": 0.41, "inbatch_neg_score": 0.5257, "inbatch_pos_score": 1.2539, "learning_rate": 3.2166666666666665e-05, "loss": 2.743, "norm_diff": 0.0072, "norm_loss": 0.0, "num_token_doc": 66.7536, "num_token_overlap": 17.8193, "num_token_query": 52.243, "num_token_union": 73.6635, "num_word_context": 201.9879, "num_word_doc": 49.7916, "num_word_query": 39.8279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5138.1465, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5249, "query_norm": 1.5328, "queue_k_norm": 1.5263, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.243, "sent_len_1": 66.7536, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0563, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 42100 }, { "accuracy": 58.6426, "active_queue_size": 16384.0, "cl_loss": 2.7518, "doc_norm": 1.5314, "encoder_q-embeddings": 3537.2866, "encoder_q-layer.0": 2279.9788, "encoder_q-layer.1": 2557.2971, "encoder_q-layer.10": 3399.3083, "encoder_q-layer.11": 6975.2666, "encoder_q-layer.2": 2883.5515, "encoder_q-layer.3": 3180.0115, "encoder_q-layer.4": 3561.0713, "encoder_q-layer.5": 3609.0542, "encoder_q-layer.6": 3800.6631, "encoder_q-layer.7": 4032.0005, "encoder_q-layer.8": 4090.6318, "encoder_q-layer.9": 3504.7571, "epoch": 0.41, "inbatch_neg_score": 0.5331, "inbatch_pos_score": 1.2705, "learning_rate": 3.2111111111111114e-05, "loss": 2.7518, "norm_diff": 0.0086, "norm_loss": 0.0, "num_token_doc": 66.9683, "num_token_overlap": 17.7977, "num_token_query": 52.224, "num_token_union": 73.7962, "num_word_context": 202.4018, "num_word_doc": 49.9497, "num_word_query": 39.8356, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5616.2538, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5342, "query_norm": 1.5361, "queue_k_norm": 1.5292, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.224, "sent_len_1": 66.9683, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9062, "stdk": 0.0491, "stdq": 0.0468, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 42200 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.7385, "doc_norm": 1.5286, "encoder_q-embeddings": 5472.0581, "encoder_q-layer.0": 3883.6082, "encoder_q-layer.1": 4521.124, "encoder_q-layer.10": 3324.3679, "encoder_q-layer.11": 6870.1372, "encoder_q-layer.2": 5408.9492, "encoder_q-layer.3": 5255.5942, "encoder_q-layer.4": 4922.2095, "encoder_q-layer.5": 4451.9204, "encoder_q-layer.6": 3957.8645, "encoder_q-layer.7": 3237.5017, "encoder_q-layer.8": 3499.8052, "encoder_q-layer.9": 3301.1409, "epoch": 0.41, "inbatch_neg_score": 0.5389, "inbatch_pos_score": 1.2656, "learning_rate": 3.2055555555555556e-05, "loss": 2.7385, "norm_diff": 0.0073, "norm_loss": 0.0, "num_token_doc": 66.701, "num_token_overlap": 17.8221, "num_token_query": 52.3128, "num_token_union": 73.6943, "num_word_context": 202.1314, "num_word_doc": 49.7583, "num_word_query": 39.8775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6850.1465, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5391, "query_norm": 1.5227, "queue_k_norm": 1.528, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3128, "sent_len_1": 66.701, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9638, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 42300 }, { "accuracy": 59.9121, "active_queue_size": 16384.0, "cl_loss": 2.7367, "doc_norm": 1.5305, "encoder_q-embeddings": 3074.7048, "encoder_q-layer.0": 2035.8949, "encoder_q-layer.1": 2276.1169, "encoder_q-layer.10": 3569.2727, "encoder_q-layer.11": 6944.0869, "encoder_q-layer.2": 2644.1255, "encoder_q-layer.3": 2814.2886, "encoder_q-layer.4": 3029.5791, "encoder_q-layer.5": 3480.5298, "encoder_q-layer.6": 3610.689, "encoder_q-layer.7": 3943.7593, "encoder_q-layer.8": 4054.7729, "encoder_q-layer.9": 3580.1941, "epoch": 0.41, "inbatch_neg_score": 0.5462, "inbatch_pos_score": 1.2725, "learning_rate": 3.2000000000000005e-05, "loss": 2.7367, "norm_diff": 0.0046, "norm_loss": 0.0, "num_token_doc": 67.034, "num_token_overlap": 17.8667, "num_token_query": 52.3877, "num_token_union": 73.8772, "num_word_context": 202.7962, "num_word_doc": 50.0184, "num_word_query": 39.9465, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5279.6175, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5464, "query_norm": 1.5325, "queue_k_norm": 1.532, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3877, "sent_len_1": 67.034, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5513, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 42400 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7443, "doc_norm": 1.53, "encoder_q-embeddings": 3608.3013, "encoder_q-layer.0": 2363.2109, "encoder_q-layer.1": 2788.4365, "encoder_q-layer.10": 3716.6521, "encoder_q-layer.11": 7617.1167, "encoder_q-layer.2": 3082.7051, "encoder_q-layer.3": 3369.1052, "encoder_q-layer.4": 3527.6677, "encoder_q-layer.5": 3644.4412, "encoder_q-layer.6": 3616.1301, "encoder_q-layer.7": 3669.6973, "encoder_q-layer.8": 3971.5015, "encoder_q-layer.9": 3543.5828, "epoch": 0.41, "inbatch_neg_score": 0.5431, "inbatch_pos_score": 1.2734, "learning_rate": 3.194444444444444e-05, "loss": 2.7443, "norm_diff": 0.0062, "norm_loss": 0.0, "num_token_doc": 66.7346, "num_token_overlap": 17.7996, "num_token_query": 52.282, "num_token_union": 73.7291, "num_word_context": 202.3792, "num_word_doc": 49.7712, "num_word_query": 39.8563, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5618.761, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.543, "query_norm": 1.5246, "queue_k_norm": 1.5331, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.282, "sent_len_1": 66.7346, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0563, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 42500 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7372, "doc_norm": 1.5339, "encoder_q-embeddings": 4900.583, "encoder_q-layer.0": 3271.6003, "encoder_q-layer.1": 3724.1436, "encoder_q-layer.10": 3225.6985, "encoder_q-layer.11": 6679.0229, "encoder_q-layer.2": 4420.3159, "encoder_q-layer.3": 4514.4731, "encoder_q-layer.4": 4757.667, "encoder_q-layer.5": 4833.4766, "encoder_q-layer.6": 5402.6191, "encoder_q-layer.7": 4708.2051, "encoder_q-layer.8": 3758.4927, "encoder_q-layer.9": 3244.7542, "epoch": 0.42, "inbatch_neg_score": 0.5423, "inbatch_pos_score": 1.2666, "learning_rate": 3.188888888888889e-05, "loss": 2.7372, "norm_diff": 0.0166, "norm_loss": 0.0, "num_token_doc": 66.5441, "num_token_overlap": 17.8254, "num_token_query": 52.3246, "num_token_union": 73.5876, "num_word_context": 202.0582, "num_word_doc": 49.6755, "num_word_query": 39.9208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6765.3447, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5425, "query_norm": 1.5173, "queue_k_norm": 1.5342, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3246, "sent_len_1": 66.5441, "sent_len_max_0": 128.0, "sent_len_max_1": 209.175, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 42600 }, { "accuracy": 60.0098, "active_queue_size": 16384.0, "cl_loss": 2.7529, "doc_norm": 1.5392, "encoder_q-embeddings": 1427.3448, "encoder_q-layer.0": 944.1354, "encoder_q-layer.1": 1066.9083, "encoder_q-layer.10": 1865.4028, "encoder_q-layer.11": 3611.9397, "encoder_q-layer.2": 1194.6823, "encoder_q-layer.3": 1273.5531, "encoder_q-layer.4": 1313.323, "encoder_q-layer.5": 1395.8967, "encoder_q-layer.6": 1514.5509, "encoder_q-layer.7": 1587.4528, "encoder_q-layer.8": 1939.7869, "encoder_q-layer.9": 1768.3119, "epoch": 0.42, "inbatch_neg_score": 0.5394, "inbatch_pos_score": 1.2646, "learning_rate": 3.183333333333334e-05, "loss": 2.7529, "norm_diff": 0.0361, "norm_loss": 0.0, "num_token_doc": 66.7838, "num_token_overlap": 17.7914, "num_token_query": 52.2815, "num_token_union": 73.7335, "num_word_context": 202.464, "num_word_doc": 49.8493, "num_word_query": 39.8701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2502.2204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.54, "query_norm": 1.5031, "queue_k_norm": 1.5354, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2815, "sent_len_1": 66.7838, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0263, "stdk": 0.0491, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 42700 }, { "accuracy": 60.9863, "active_queue_size": 16384.0, "cl_loss": 2.7543, "doc_norm": 1.5369, "encoder_q-embeddings": 1262.4731, "encoder_q-layer.0": 785.5094, "encoder_q-layer.1": 868.9186, "encoder_q-layer.10": 1648.9016, "encoder_q-layer.11": 3472.2358, "encoder_q-layer.2": 972.6072, "encoder_q-layer.3": 998.1169, "encoder_q-layer.4": 1100.8276, "encoder_q-layer.5": 1130.1093, "encoder_q-layer.6": 1311.0482, "encoder_q-layer.7": 1501.5156, "encoder_q-layer.8": 1815.65, "encoder_q-layer.9": 1685.7747, "epoch": 0.42, "inbatch_neg_score": 0.5434, "inbatch_pos_score": 1.2744, "learning_rate": 3.177777777777778e-05, "loss": 2.7543, "norm_diff": 0.0283, "norm_loss": 0.0, "num_token_doc": 66.7117, "num_token_overlap": 17.7746, "num_token_query": 52.2265, "num_token_union": 73.6747, "num_word_context": 202.2125, "num_word_doc": 49.7835, "num_word_query": 39.8245, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2319.3919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5439, "query_norm": 1.5086, "queue_k_norm": 1.5371, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2265, "sent_len_1": 66.7117, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6562, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 42800 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.748, "doc_norm": 1.539, "encoder_q-embeddings": 1690.5055, "encoder_q-layer.0": 1103.947, "encoder_q-layer.1": 1222.7355, "encoder_q-layer.10": 1787.5125, "encoder_q-layer.11": 3467.3894, "encoder_q-layer.2": 1411.5811, "encoder_q-layer.3": 1481.9592, "encoder_q-layer.4": 1652.4847, "encoder_q-layer.5": 1651.3257, "encoder_q-layer.6": 1807.5161, "encoder_q-layer.7": 1883.5924, "encoder_q-layer.8": 2121.1726, "encoder_q-layer.9": 1776.0571, "epoch": 0.42, "inbatch_neg_score": 0.5352, "inbatch_pos_score": 1.2764, "learning_rate": 3.1722222222222224e-05, "loss": 2.748, "norm_diff": 0.0182, "norm_loss": 0.0, "num_token_doc": 66.8641, "num_token_overlap": 17.8212, "num_token_query": 52.29, "num_token_union": 73.7565, "num_word_context": 202.2557, "num_word_doc": 49.8745, "num_word_query": 39.8983, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2695.074, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5347, "query_norm": 1.5208, "queue_k_norm": 1.5351, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.29, "sent_len_1": 66.8641, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3925, "stdk": 0.049, "stdq": 0.0473, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 42900 }, { "accuracy": 60.6934, "active_queue_size": 16384.0, "cl_loss": 2.7513, "doc_norm": 1.5385, "encoder_q-embeddings": 1399.7295, "encoder_q-layer.0": 835.5419, "encoder_q-layer.1": 931.1974, "encoder_q-layer.10": 1729.6599, "encoder_q-layer.11": 3522.7256, "encoder_q-layer.2": 1066.6055, "encoder_q-layer.3": 1153.9746, "encoder_q-layer.4": 1245.2561, "encoder_q-layer.5": 1303.014, "encoder_q-layer.6": 1477.7325, "encoder_q-layer.7": 1602.3788, "encoder_q-layer.8": 1956.5339, "encoder_q-layer.9": 1773.6555, "epoch": 0.42, "inbatch_neg_score": 0.5366, "inbatch_pos_score": 1.2822, "learning_rate": 3.1666666666666666e-05, "loss": 2.7513, "norm_diff": 0.0323, "norm_loss": 0.0, "num_token_doc": 66.6805, "num_token_overlap": 17.7421, "num_token_query": 52.245, "num_token_union": 73.6704, "num_word_context": 202.2448, "num_word_doc": 49.7407, "num_word_query": 39.8071, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2432.7759, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5361, "query_norm": 1.5062, "queue_k_norm": 1.538, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.245, "sent_len_1": 66.6805, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8887, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 43000 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7307, "doc_norm": 1.5376, "encoder_q-embeddings": 1402.675, "encoder_q-layer.0": 838.371, "encoder_q-layer.1": 921.6211, "encoder_q-layer.10": 1704.6495, "encoder_q-layer.11": 3425.98, "encoder_q-layer.2": 1045.8467, "encoder_q-layer.3": 1125.8368, "encoder_q-layer.4": 1174.3668, "encoder_q-layer.5": 1229.4297, "encoder_q-layer.6": 1414.3589, "encoder_q-layer.7": 1642.8658, "encoder_q-layer.8": 1900.1001, "encoder_q-layer.9": 1761.5267, "epoch": 0.42, "inbatch_neg_score": 0.5298, "inbatch_pos_score": 1.2646, "learning_rate": 3.1611111111111115e-05, "loss": 2.7307, "norm_diff": 0.0322, "norm_loss": 0.0, "num_token_doc": 66.8269, "num_token_overlap": 17.802, "num_token_query": 52.221, "num_token_union": 73.6967, "num_word_context": 202.2087, "num_word_doc": 49.8853, "num_word_query": 39.8344, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2415.3549, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5293, "query_norm": 1.5054, "queue_k_norm": 1.5374, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.221, "sent_len_1": 66.8269, "sent_len_max_0": 128.0, "sent_len_max_1": 206.8587, "stdk": 0.0489, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 43100 }, { "accuracy": 57.959, "active_queue_size": 16384.0, "cl_loss": 2.7378, "doc_norm": 1.5356, "encoder_q-embeddings": 1481.2668, "encoder_q-layer.0": 918.2972, "encoder_q-layer.1": 1002.9461, "encoder_q-layer.10": 1788.3907, "encoder_q-layer.11": 3529.926, "encoder_q-layer.2": 1133.1187, "encoder_q-layer.3": 1260.6622, "encoder_q-layer.4": 1308.0305, "encoder_q-layer.5": 1291.3053, "encoder_q-layer.6": 1424.2162, "encoder_q-layer.7": 1634.4564, "encoder_q-layer.8": 1981.0466, "encoder_q-layer.9": 1776.9449, "epoch": 0.42, "inbatch_neg_score": 0.5358, "inbatch_pos_score": 1.2471, "learning_rate": 3.155555555555556e-05, "loss": 2.7378, "norm_diff": 0.0383, "norm_loss": 0.0, "num_token_doc": 66.8705, "num_token_overlap": 17.7792, "num_token_query": 52.2918, "num_token_union": 73.8208, "num_word_context": 202.4889, "num_word_doc": 49.8942, "num_word_query": 39.8778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2507.6301, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5356, "query_norm": 1.4972, "queue_k_norm": 1.5383, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2918, "sent_len_1": 66.8705, "sent_len_max_0": 128.0, "sent_len_max_1": 211.13, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 43200 }, { "accuracy": 58.5449, "active_queue_size": 16384.0, "cl_loss": 2.7375, "doc_norm": 1.537, "encoder_q-embeddings": 2266.905, "encoder_q-layer.0": 1462.8715, "encoder_q-layer.1": 1601.9989, "encoder_q-layer.10": 1864.1, "encoder_q-layer.11": 3550.1934, "encoder_q-layer.2": 1907.4972, "encoder_q-layer.3": 2085.4019, "encoder_q-layer.4": 2318.7493, "encoder_q-layer.5": 2250.5579, "encoder_q-layer.6": 2181.7122, "encoder_q-layer.7": 2128.1953, "encoder_q-layer.8": 2223.2729, "encoder_q-layer.9": 1813.3602, "epoch": 0.42, "inbatch_neg_score": 0.5291, "inbatch_pos_score": 1.2549, "learning_rate": 3.15e-05, "loss": 2.7375, "norm_diff": 0.0304, "norm_loss": 0.0, "num_token_doc": 66.8116, "num_token_overlap": 17.769, "num_token_query": 52.3697, "num_token_union": 73.8142, "num_word_context": 202.5014, "num_word_doc": 49.84, "num_word_query": 39.9343, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3209.9047, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5283, "query_norm": 1.5066, "queue_k_norm": 1.5384, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3697, "sent_len_1": 66.8116, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5513, "stdk": 0.0489, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 43300 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.7451, "doc_norm": 1.5401, "encoder_q-embeddings": 2943.2786, "encoder_q-layer.0": 2150.9382, "encoder_q-layer.1": 2529.1511, "encoder_q-layer.10": 1658.7566, "encoder_q-layer.11": 3367.4016, "encoder_q-layer.2": 2959.8511, "encoder_q-layer.3": 3229.7246, "encoder_q-layer.4": 3652.9485, "encoder_q-layer.5": 3475.6978, "encoder_q-layer.6": 3284.158, "encoder_q-layer.7": 2706.1562, "encoder_q-layer.8": 1998.8353, "encoder_q-layer.9": 1696.9025, "epoch": 0.42, "inbatch_neg_score": 0.5326, "inbatch_pos_score": 1.2461, "learning_rate": 3.144444444444445e-05, "loss": 2.7451, "norm_diff": 0.0644, "norm_loss": 0.0, "num_token_doc": 66.7584, "num_token_overlap": 17.8034, "num_token_query": 52.228, "num_token_union": 73.6928, "num_word_context": 202.2724, "num_word_doc": 49.8346, "num_word_query": 39.8349, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4135.1689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5327, "query_norm": 1.4757, "queue_k_norm": 1.5388, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.228, "sent_len_1": 66.7584, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6937, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 43400 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.7275, "doc_norm": 1.5374, "encoder_q-embeddings": 1498.1337, "encoder_q-layer.0": 974.2648, "encoder_q-layer.1": 1068.1727, "encoder_q-layer.10": 1756.014, "encoder_q-layer.11": 3417.2402, "encoder_q-layer.2": 1163.6169, "encoder_q-layer.3": 1253.7051, "encoder_q-layer.4": 1304.0922, "encoder_q-layer.5": 1338.011, "encoder_q-layer.6": 1539.8329, "encoder_q-layer.7": 1619.2759, "encoder_q-layer.8": 1828.5486, "encoder_q-layer.9": 1795.9601, "epoch": 0.42, "inbatch_neg_score": 0.5241, "inbatch_pos_score": 1.2529, "learning_rate": 3.138888888888889e-05, "loss": 2.7275, "norm_diff": 0.0396, "norm_loss": 0.0, "num_token_doc": 66.8087, "num_token_overlap": 17.836, "num_token_query": 52.26, "num_token_union": 73.6843, "num_word_context": 202.467, "num_word_doc": 49.8748, "num_word_query": 39.8377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2443.729, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5225, "query_norm": 1.4979, "queue_k_norm": 1.5376, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.26, "sent_len_1": 66.8087, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5337, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 43500 }, { "accuracy": 58.3496, "active_queue_size": 16384.0, "cl_loss": 2.7475, "doc_norm": 1.5359, "encoder_q-embeddings": 1434.1848, "encoder_q-layer.0": 867.0598, "encoder_q-layer.1": 975.1294, "encoder_q-layer.10": 1727.5171, "encoder_q-layer.11": 3441.6943, "encoder_q-layer.2": 1088.9865, "encoder_q-layer.3": 1180.7717, "encoder_q-layer.4": 1289.8737, "encoder_q-layer.5": 1298.5527, "encoder_q-layer.6": 1457.9274, "encoder_q-layer.7": 1706.8534, "encoder_q-layer.8": 2025.9233, "encoder_q-layer.9": 1800.3483, "epoch": 0.43, "inbatch_neg_score": 0.5271, "inbatch_pos_score": 1.248, "learning_rate": 3.1333333333333334e-05, "loss": 2.7475, "norm_diff": 0.027, "norm_loss": 0.0, "num_token_doc": 66.7677, "num_token_overlap": 17.8283, "num_token_query": 52.445, "num_token_union": 73.79, "num_word_context": 202.3349, "num_word_doc": 49.8263, "num_word_query": 40.0063, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2503.7856, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5283, "query_norm": 1.5088, "queue_k_norm": 1.5387, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.445, "sent_len_1": 66.7677, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4313, "stdk": 0.0489, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 43600 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.7317, "doc_norm": 1.5393, "encoder_q-embeddings": 1417.1466, "encoder_q-layer.0": 867.8813, "encoder_q-layer.1": 963.5767, "encoder_q-layer.10": 1801.5491, "encoder_q-layer.11": 3511.4023, "encoder_q-layer.2": 1094.8448, "encoder_q-layer.3": 1186.2653, "encoder_q-layer.4": 1319.2437, "encoder_q-layer.5": 1396.6395, "encoder_q-layer.6": 1517.3876, "encoder_q-layer.7": 1650.493, "encoder_q-layer.8": 1931.1216, "encoder_q-layer.9": 1791.4502, "epoch": 0.43, "inbatch_neg_score": 0.5323, "inbatch_pos_score": 1.251, "learning_rate": 3.1277777777777776e-05, "loss": 2.7317, "norm_diff": 0.0452, "norm_loss": 0.0, "num_token_doc": 66.9597, "num_token_overlap": 17.8677, "num_token_query": 52.3821, "num_token_union": 73.8346, "num_word_context": 202.5474, "num_word_doc": 49.9402, "num_word_query": 39.9524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2497.1976, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5312, "query_norm": 1.4941, "queue_k_norm": 1.5397, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3821, "sent_len_1": 66.9597, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8388, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 43700 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7331, "doc_norm": 1.5457, "encoder_q-embeddings": 1817.9352, "encoder_q-layer.0": 1242.0554, "encoder_q-layer.1": 1467.5078, "encoder_q-layer.10": 1673.2085, "encoder_q-layer.11": 3360.9062, "encoder_q-layer.2": 1685.1194, "encoder_q-layer.3": 1759.5273, "encoder_q-layer.4": 1921.7177, "encoder_q-layer.5": 1896.7382, "encoder_q-layer.6": 1946.4546, "encoder_q-layer.7": 1870.5364, "encoder_q-layer.8": 1980.84, "encoder_q-layer.9": 1652.3616, "epoch": 0.43, "inbatch_neg_score": 0.525, "inbatch_pos_score": 1.2598, "learning_rate": 3.1222222222222225e-05, "loss": 2.7331, "norm_diff": 0.0475, "norm_loss": 0.0, "num_token_doc": 66.9288, "num_token_overlap": 17.873, "num_token_query": 52.4145, "num_token_union": 73.8221, "num_word_context": 202.509, "num_word_doc": 49.9132, "num_word_query": 39.9892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2805.0161, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5244, "query_norm": 1.4982, "queue_k_norm": 1.539, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4145, "sent_len_1": 66.9288, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3212, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 43800 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.7311, "doc_norm": 1.5392, "encoder_q-embeddings": 1891.0277, "encoder_q-layer.0": 1279.6528, "encoder_q-layer.1": 1444.4135, "encoder_q-layer.10": 1712.6124, "encoder_q-layer.11": 3269.7292, "encoder_q-layer.2": 1621.1261, "encoder_q-layer.3": 1727.0564, "encoder_q-layer.4": 1789.0005, "encoder_q-layer.5": 1905.379, "encoder_q-layer.6": 1942.0189, "encoder_q-layer.7": 1915.6534, "encoder_q-layer.8": 1988.7106, "encoder_q-layer.9": 1688.8755, "epoch": 0.43, "inbatch_neg_score": 0.5212, "inbatch_pos_score": 1.2607, "learning_rate": 3.116666666666667e-05, "loss": 2.7311, "norm_diff": 0.0314, "norm_loss": 0.0, "num_token_doc": 66.8608, "num_token_overlap": 17.8644, "num_token_query": 52.4455, "num_token_union": 73.8111, "num_word_context": 202.2804, "num_word_doc": 49.8747, "num_word_query": 39.996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2783.0203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.52, "query_norm": 1.5078, "queue_k_norm": 1.5377, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4455, "sent_len_1": 66.8608, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6962, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 43900 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 2.737, "doc_norm": 1.5372, "encoder_q-embeddings": 1612.0237, "encoder_q-layer.0": 1051.1088, "encoder_q-layer.1": 1138.3148, "encoder_q-layer.10": 1774.4758, "encoder_q-layer.11": 3532.2976, "encoder_q-layer.2": 1297.0239, "encoder_q-layer.3": 1348.687, "encoder_q-layer.4": 1436.0718, "encoder_q-layer.5": 1424.6562, "encoder_q-layer.6": 1544.5261, "encoder_q-layer.7": 1605.1475, "encoder_q-layer.8": 1845.1766, "encoder_q-layer.9": 1718.6334, "epoch": 0.43, "inbatch_neg_score": 0.524, "inbatch_pos_score": 1.2344, "learning_rate": 3.111111111111111e-05, "loss": 2.737, "norm_diff": 0.0334, "norm_loss": 0.0, "num_token_doc": 66.6785, "num_token_overlap": 17.8054, "num_token_query": 52.3557, "num_token_union": 73.7166, "num_word_context": 202.4915, "num_word_doc": 49.7493, "num_word_query": 39.9426, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2515.8775, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5234, "query_norm": 1.5038, "queue_k_norm": 1.5393, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3557, "sent_len_1": 66.6785, "sent_len_max_0": 128.0, "sent_len_max_1": 210.2175, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 44000 }, { "accuracy": 59.3262, "active_queue_size": 16384.0, "cl_loss": 2.7248, "doc_norm": 1.5401, "encoder_q-embeddings": 1623.4038, "encoder_q-layer.0": 1128.2441, "encoder_q-layer.1": 1278.0533, "encoder_q-layer.10": 1917.0131, "encoder_q-layer.11": 3541.6921, "encoder_q-layer.2": 1427.2709, "encoder_q-layer.3": 1579.7275, "encoder_q-layer.4": 1749.9498, "encoder_q-layer.5": 1884.6685, "encoder_q-layer.6": 1938.2693, "encoder_q-layer.7": 1917.6183, "encoder_q-layer.8": 1969.0824, "encoder_q-layer.9": 1781.9775, "epoch": 0.43, "inbatch_neg_score": 0.5225, "inbatch_pos_score": 1.252, "learning_rate": 3.105555555555555e-05, "loss": 2.7248, "norm_diff": 0.0226, "norm_loss": 0.0, "num_token_doc": 66.5936, "num_token_overlap": 17.7959, "num_token_query": 52.2064, "num_token_union": 73.562, "num_word_context": 202.0423, "num_word_doc": 49.7035, "num_word_query": 39.7795, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2731.1773, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5225, "query_norm": 1.5175, "queue_k_norm": 1.5398, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2064, "sent_len_1": 66.5936, "sent_len_max_0": 128.0, "sent_len_max_1": 207.0075, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 44100 }, { "accuracy": 58.8379, "active_queue_size": 16384.0, "cl_loss": 2.7324, "doc_norm": 1.5416, "encoder_q-embeddings": 2626.9985, "encoder_q-layer.0": 1941.1554, "encoder_q-layer.1": 2306.7549, "encoder_q-layer.10": 1631.9648, "encoder_q-layer.11": 3225.5198, "encoder_q-layer.2": 2468.5496, "encoder_q-layer.3": 2820.7725, "encoder_q-layer.4": 2880.4294, "encoder_q-layer.5": 2661.855, "encoder_q-layer.6": 2565.0571, "encoder_q-layer.7": 2359.7529, "encoder_q-layer.8": 2313.6038, "encoder_q-layer.9": 1821.5052, "epoch": 0.43, "inbatch_neg_score": 0.5273, "inbatch_pos_score": 1.2705, "learning_rate": 3.1e-05, "loss": 2.7324, "norm_diff": 0.012, "norm_loss": 0.0, "num_token_doc": 66.7965, "num_token_overlap": 17.8086, "num_token_query": 52.2695, "num_token_union": 73.7515, "num_word_context": 202.1895, "num_word_doc": 49.8627, "num_word_query": 39.8682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3647.0151, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5254, "query_norm": 1.5338, "queue_k_norm": 1.5405, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2695, "sent_len_1": 66.7965, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2025, "stdk": 0.0491, "stdq": 0.0469, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 44200 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.7331, "doc_norm": 1.5378, "encoder_q-embeddings": 1441.2775, "encoder_q-layer.0": 961.2744, "encoder_q-layer.1": 1081.6506, "encoder_q-layer.10": 1834.7367, "encoder_q-layer.11": 3510.0513, "encoder_q-layer.2": 1249.8109, "encoder_q-layer.3": 1203.5043, "encoder_q-layer.4": 1295.1266, "encoder_q-layer.5": 1303.9574, "encoder_q-layer.6": 1499.111, "encoder_q-layer.7": 1642.614, "encoder_q-layer.8": 1995.7411, "encoder_q-layer.9": 1792.676, "epoch": 0.43, "inbatch_neg_score": 0.5287, "inbatch_pos_score": 1.2422, "learning_rate": 3.094444444444445e-05, "loss": 2.7331, "norm_diff": 0.0158, "norm_loss": 0.0, "num_token_doc": 66.8394, "num_token_overlap": 17.8189, "num_token_query": 52.2715, "num_token_union": 73.7243, "num_word_context": 202.5433, "num_word_doc": 49.8634, "num_word_query": 39.8757, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2506.348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5278, "query_norm": 1.5231, "queue_k_norm": 1.5384, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2715, "sent_len_1": 66.8394, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3225, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 44300 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.7276, "doc_norm": 1.5354, "encoder_q-embeddings": 1359.1196, "encoder_q-layer.0": 883.2052, "encoder_q-layer.1": 939.57, "encoder_q-layer.10": 1787.2505, "encoder_q-layer.11": 3415.2402, "encoder_q-layer.2": 1112.7638, "encoder_q-layer.3": 1157.0438, "encoder_q-layer.4": 1271.043, "encoder_q-layer.5": 1322.7629, "encoder_q-layer.6": 1430.1445, "encoder_q-layer.7": 1552.3433, "encoder_q-layer.8": 1962.8793, "encoder_q-layer.9": 1711.092, "epoch": 0.43, "inbatch_neg_score": 0.5299, "inbatch_pos_score": 1.2656, "learning_rate": 3.088888888888889e-05, "loss": 2.7276, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.6166, "num_token_overlap": 17.7806, "num_token_query": 52.3199, "num_token_union": 73.6551, "num_word_context": 202.2838, "num_word_doc": 49.693, "num_word_query": 39.9233, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2385.6144, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5298, "query_norm": 1.5457, "queue_k_norm": 1.5384, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3199, "sent_len_1": 66.6166, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2775, "stdk": 0.0488, "stdq": 0.0471, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 44400 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.7408, "doc_norm": 1.5413, "encoder_q-embeddings": 1455.2661, "encoder_q-layer.0": 945.9741, "encoder_q-layer.1": 1011.6031, "encoder_q-layer.10": 1776.5168, "encoder_q-layer.11": 3291.4419, "encoder_q-layer.2": 1186.1353, "encoder_q-layer.3": 1303.1754, "encoder_q-layer.4": 1445.566, "encoder_q-layer.5": 1471.674, "encoder_q-layer.6": 1572.6935, "encoder_q-layer.7": 1653.1125, "encoder_q-layer.8": 1907.3497, "encoder_q-layer.9": 1722.6523, "epoch": 0.43, "inbatch_neg_score": 0.5391, "inbatch_pos_score": 1.2637, "learning_rate": 3.0833333333333335e-05, "loss": 2.7408, "norm_diff": 0.0242, "norm_loss": 0.0, "num_token_doc": 66.7302, "num_token_overlap": 17.7902, "num_token_query": 52.2783, "num_token_union": 73.6921, "num_word_context": 202.1956, "num_word_doc": 49.7854, "num_word_query": 39.874, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2442.1061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5396, "query_norm": 1.5171, "queue_k_norm": 1.5404, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2783, "sent_len_1": 66.7302, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5337, "stdk": 0.0491, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 44500 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.7167, "doc_norm": 1.5408, "encoder_q-embeddings": 1497.9955, "encoder_q-layer.0": 994.1962, "encoder_q-layer.1": 1114.4216, "encoder_q-layer.10": 1708.3912, "encoder_q-layer.11": 3581.52, "encoder_q-layer.2": 1286.0142, "encoder_q-layer.3": 1359.2037, "encoder_q-layer.4": 1439.3854, "encoder_q-layer.5": 1514.7267, "encoder_q-layer.6": 1650.7753, "encoder_q-layer.7": 1774.2806, "encoder_q-layer.8": 2045.9607, "encoder_q-layer.9": 1791.0361, "epoch": 0.44, "inbatch_neg_score": 0.5462, "inbatch_pos_score": 1.2861, "learning_rate": 3.077777777777778e-05, "loss": 2.7167, "norm_diff": 0.0149, "norm_loss": 0.0, "num_token_doc": 67.0173, "num_token_overlap": 17.8929, "num_token_query": 52.4992, "num_token_union": 73.8855, "num_word_context": 202.3824, "num_word_doc": 50.0057, "num_word_query": 40.0617, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2585.2055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5459, "query_norm": 1.5263, "queue_k_norm": 1.5413, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4992, "sent_len_1": 67.0173, "sent_len_max_0": 128.0, "sent_len_max_1": 210.47, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 44600 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.7209, "doc_norm": 1.5374, "encoder_q-embeddings": 5616.9814, "encoder_q-layer.0": 4360.1494, "encoder_q-layer.1": 4444.7739, "encoder_q-layer.10": 3361.5742, "encoder_q-layer.11": 6829.7705, "encoder_q-layer.2": 5022.1016, "encoder_q-layer.3": 5726.9824, "encoder_q-layer.4": 6768.0952, "encoder_q-layer.5": 7207.7578, "encoder_q-layer.6": 8198.9766, "encoder_q-layer.7": 7556.623, "encoder_q-layer.8": 7289.1401, "encoder_q-layer.9": 4644.8999, "epoch": 0.44, "inbatch_neg_score": 0.5544, "inbatch_pos_score": 1.2754, "learning_rate": 3.0722222222222227e-05, "loss": 2.7209, "norm_diff": 0.0117, "norm_loss": 0.0, "num_token_doc": 66.8136, "num_token_overlap": 17.8092, "num_token_query": 52.2751, "num_token_union": 73.724, "num_word_context": 202.2223, "num_word_doc": 49.8517, "num_word_query": 39.8708, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9072.0509, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5537, "query_norm": 1.5277, "queue_k_norm": 1.5422, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2751, "sent_len_1": 66.8136, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6238, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 44700 }, { "accuracy": 58.8379, "active_queue_size": 16384.0, "cl_loss": 2.7371, "doc_norm": 1.5405, "encoder_q-embeddings": 3446.7896, "encoder_q-layer.0": 2281.6128, "encoder_q-layer.1": 2472.3457, "encoder_q-layer.10": 3383.1177, "encoder_q-layer.11": 6818.2583, "encoder_q-layer.2": 2791.9004, "encoder_q-layer.3": 3044.6345, "encoder_q-layer.4": 3168.251, "encoder_q-layer.5": 3242.3818, "encoder_q-layer.6": 3498.731, "encoder_q-layer.7": 3493.0754, "encoder_q-layer.8": 4014.9058, "encoder_q-layer.9": 3390.5549, "epoch": 0.44, "inbatch_neg_score": 0.5645, "inbatch_pos_score": 1.2871, "learning_rate": 3.066666666666667e-05, "loss": 2.7371, "norm_diff": 0.0294, "norm_loss": 0.0, "num_token_doc": 66.836, "num_token_overlap": 17.7652, "num_token_query": 52.2708, "num_token_union": 73.7926, "num_word_context": 202.4877, "num_word_doc": 49.8711, "num_word_query": 39.8708, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5349.2037, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5649, "query_norm": 1.5111, "queue_k_norm": 1.5429, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2708, "sent_len_1": 66.836, "sent_len_max_0": 128.0, "sent_len_max_1": 207.62, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 44800 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.7143, "doc_norm": 1.5454, "encoder_q-embeddings": 3216.8875, "encoder_q-layer.0": 1966.3474, "encoder_q-layer.1": 2127.4639, "encoder_q-layer.10": 3578.0818, "encoder_q-layer.11": 7626.5503, "encoder_q-layer.2": 2412.2139, "encoder_q-layer.3": 2600.1919, "encoder_q-layer.4": 2781.5427, "encoder_q-layer.5": 2980.4109, "encoder_q-layer.6": 3287.8828, "encoder_q-layer.7": 3715.4531, "encoder_q-layer.8": 4343.6499, "encoder_q-layer.9": 3826.5596, "epoch": 0.44, "inbatch_neg_score": 0.5665, "inbatch_pos_score": 1.3018, "learning_rate": 3.061111111111111e-05, "loss": 2.7143, "norm_diff": 0.0101, "norm_loss": 0.0, "num_token_doc": 66.7301, "num_token_overlap": 17.7928, "num_token_query": 52.1824, "num_token_union": 73.6268, "num_word_context": 202.1997, "num_word_doc": 49.763, "num_word_query": 39.7941, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5458.3658, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5645, "query_norm": 1.5353, "queue_k_norm": 1.5443, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1824, "sent_len_1": 66.7301, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3487, "stdk": 0.0491, "stdq": 0.0472, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 44900 }, { "accuracy": 60.6934, "active_queue_size": 16384.0, "cl_loss": 2.7202, "doc_norm": 1.5461, "encoder_q-embeddings": 2751.6436, "encoder_q-layer.0": 1736.6443, "encoder_q-layer.1": 1875.4301, "encoder_q-layer.10": 3248.9033, "encoder_q-layer.11": 6907.3477, "encoder_q-layer.2": 2122.6765, "encoder_q-layer.3": 2256.1794, "encoder_q-layer.4": 2424.6035, "encoder_q-layer.5": 2547.1035, "encoder_q-layer.6": 2959.8213, "encoder_q-layer.7": 3344.1638, "encoder_q-layer.8": 3580.123, "encoder_q-layer.9": 3364.6541, "epoch": 0.44, "inbatch_neg_score": 0.5727, "inbatch_pos_score": 1.3057, "learning_rate": 3.055555555555556e-05, "loss": 2.7202, "norm_diff": 0.0286, "norm_loss": 0.0, "num_token_doc": 66.888, "num_token_overlap": 17.8719, "num_token_query": 52.3635, "num_token_union": 73.7654, "num_word_context": 202.2168, "num_word_doc": 49.932, "num_word_query": 39.9417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4839.4714, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5732, "query_norm": 1.5175, "queue_k_norm": 1.546, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3635, "sent_len_1": 66.888, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2962, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 45000 }, { "accuracy": 59.5215, "active_queue_size": 16384.0, "cl_loss": 2.7243, "doc_norm": 1.5456, "encoder_q-embeddings": 3619.4417, "encoder_q-layer.0": 2316.6436, "encoder_q-layer.1": 2541.717, "encoder_q-layer.10": 3544.7209, "encoder_q-layer.11": 7309.1919, "encoder_q-layer.2": 2881.2715, "encoder_q-layer.3": 3120.5339, "encoder_q-layer.4": 3297.4026, "encoder_q-layer.5": 3214.4612, "encoder_q-layer.6": 3355.2417, "encoder_q-layer.7": 3555.2278, "encoder_q-layer.8": 4056.5688, "encoder_q-layer.9": 3427.3, "epoch": 0.44, "inbatch_neg_score": 0.5706, "inbatch_pos_score": 1.3086, "learning_rate": 3.05e-05, "loss": 2.7243, "norm_diff": 0.0247, "norm_loss": 0.0, "num_token_doc": 66.7956, "num_token_overlap": 17.7883, "num_token_query": 52.2749, "num_token_union": 73.7394, "num_word_context": 202.2849, "num_word_doc": 49.8459, "num_word_query": 39.8664, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5547.7204, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5713, "query_norm": 1.5209, "queue_k_norm": 1.5462, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2749, "sent_len_1": 66.7956, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7312, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 45100 }, { "accuracy": 60.3027, "active_queue_size": 16384.0, "cl_loss": 2.7372, "doc_norm": 1.5449, "encoder_q-embeddings": 3150.2883, "encoder_q-layer.0": 2072.0679, "encoder_q-layer.1": 2373.5469, "encoder_q-layer.10": 3988.2021, "encoder_q-layer.11": 7999.1104, "encoder_q-layer.2": 2805.7725, "encoder_q-layer.3": 3065.8301, "encoder_q-layer.4": 3361.9133, "encoder_q-layer.5": 3658.6072, "encoder_q-layer.6": 4128.4268, "encoder_q-layer.7": 4195.0708, "encoder_q-layer.8": 4837.3145, "encoder_q-layer.9": 3968.9592, "epoch": 0.44, "inbatch_neg_score": 0.5733, "inbatch_pos_score": 1.3135, "learning_rate": 3.044444444444445e-05, "loss": 2.7372, "norm_diff": 0.0201, "norm_loss": 0.0, "num_token_doc": 66.6843, "num_token_overlap": 17.78, "num_token_query": 52.2154, "num_token_union": 73.6384, "num_word_context": 202.0814, "num_word_doc": 49.7645, "num_word_query": 39.81, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5747.6338, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5737, "query_norm": 1.5265, "queue_k_norm": 1.5463, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2154, "sent_len_1": 66.6843, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7688, "stdk": 0.049, "stdq": 0.0469, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 45200 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7161, "doc_norm": 1.5432, "encoder_q-embeddings": 2717.365, "encoder_q-layer.0": 1778.4446, "encoder_q-layer.1": 1859.9902, "encoder_q-layer.10": 3081.8162, "encoder_q-layer.11": 6714.1001, "encoder_q-layer.2": 2155.0164, "encoder_q-layer.3": 2280.5891, "encoder_q-layer.4": 2448.4729, "encoder_q-layer.5": 2534.4282, "encoder_q-layer.6": 2812.2996, "encoder_q-layer.7": 3095.6802, "encoder_q-layer.8": 3570.0969, "encoder_q-layer.9": 3136.7002, "epoch": 0.44, "inbatch_neg_score": 0.566, "inbatch_pos_score": 1.3027, "learning_rate": 3.0388888888888887e-05, "loss": 2.7161, "norm_diff": 0.043, "norm_loss": 0.0, "num_token_doc": 66.8029, "num_token_overlap": 17.8601, "num_token_query": 52.4013, "num_token_union": 73.746, "num_word_context": 202.1712, "num_word_doc": 49.8677, "num_word_query": 39.9825, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4622.0844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5664, "query_norm": 1.5002, "queue_k_norm": 1.5464, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4013, "sent_len_1": 66.8029, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9125, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 45300 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.731, "doc_norm": 1.5489, "encoder_q-embeddings": 4103.3989, "encoder_q-layer.0": 2814.6616, "encoder_q-layer.1": 3008.1077, "encoder_q-layer.10": 3198.418, "encoder_q-layer.11": 6465.3677, "encoder_q-layer.2": 3927.0591, "encoder_q-layer.3": 4415.1108, "encoder_q-layer.4": 4549.7217, "encoder_q-layer.5": 4477.2954, "encoder_q-layer.6": 5033.1816, "encoder_q-layer.7": 5303.2568, "encoder_q-layer.8": 5321.689, "encoder_q-layer.9": 3584.9585, "epoch": 0.44, "inbatch_neg_score": 0.5705, "inbatch_pos_score": 1.3184, "learning_rate": 3.0333333333333337e-05, "loss": 2.731, "norm_diff": 0.0315, "norm_loss": 0.0, "num_token_doc": 66.8681, "num_token_overlap": 17.8237, "num_token_query": 52.3567, "num_token_union": 73.7945, "num_word_context": 202.4836, "num_word_doc": 49.875, "num_word_query": 39.9335, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6460.3679, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5698, "query_norm": 1.5174, "queue_k_norm": 1.5486, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3567, "sent_len_1": 66.8681, "sent_len_max_0": 128.0, "sent_len_max_1": 209.18, "stdk": 0.0491, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 45400 }, { "accuracy": 60.498, "active_queue_size": 16384.0, "cl_loss": 2.7258, "doc_norm": 1.5492, "encoder_q-embeddings": 2797.27, "encoder_q-layer.0": 1828.8123, "encoder_q-layer.1": 2000.2988, "encoder_q-layer.10": 3376.2527, "encoder_q-layer.11": 6943.3853, "encoder_q-layer.2": 2425.1682, "encoder_q-layer.3": 2529.1982, "encoder_q-layer.4": 2753.5693, "encoder_q-layer.5": 2876.7729, "encoder_q-layer.6": 3255.6455, "encoder_q-layer.7": 3502.637, "encoder_q-layer.8": 3992.0393, "encoder_q-layer.9": 3514.4792, "epoch": 0.44, "inbatch_neg_score": 0.5698, "inbatch_pos_score": 1.2998, "learning_rate": 3.0277777777777776e-05, "loss": 2.7258, "norm_diff": 0.0391, "norm_loss": 0.0, "num_token_doc": 66.8414, "num_token_overlap": 17.788, "num_token_query": 52.1868, "num_token_union": 73.7057, "num_word_context": 202.3957, "num_word_doc": 49.9025, "num_word_query": 39.7904, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4962.1226, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5693, "query_norm": 1.5101, "queue_k_norm": 1.55, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1868, "sent_len_1": 66.8414, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6562, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 45500 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.7311, "doc_norm": 1.5502, "encoder_q-embeddings": 3536.2444, "encoder_q-layer.0": 2425.792, "encoder_q-layer.1": 2525.6528, "encoder_q-layer.10": 3594.5723, "encoder_q-layer.11": 7340.7065, "encoder_q-layer.2": 2936.4434, "encoder_q-layer.3": 3282.7217, "encoder_q-layer.4": 3595.7104, "encoder_q-layer.5": 3780.8333, "encoder_q-layer.6": 3501.4956, "encoder_q-layer.7": 3596.9739, "encoder_q-layer.8": 3998.4766, "encoder_q-layer.9": 3494.1597, "epoch": 0.45, "inbatch_neg_score": 0.5733, "inbatch_pos_score": 1.3115, "learning_rate": 3.0222222222222225e-05, "loss": 2.7311, "norm_diff": 0.0253, "norm_loss": 0.0, "num_token_doc": 66.8252, "num_token_overlap": 17.8046, "num_token_query": 52.2824, "num_token_union": 73.7473, "num_word_context": 202.1152, "num_word_doc": 49.8443, "num_word_query": 39.8659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5543.7016, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5737, "query_norm": 1.5248, "queue_k_norm": 1.551, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2824, "sent_len_1": 66.8252, "sent_len_max_0": 128.0, "sent_len_max_1": 210.9625, "stdk": 0.0491, "stdq": 0.0468, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 45600 }, { "accuracy": 57.4707, "active_queue_size": 16384.0, "cl_loss": 2.7236, "doc_norm": 1.547, "encoder_q-embeddings": 3236.9319, "encoder_q-layer.0": 2000.9106, "encoder_q-layer.1": 2274.3518, "encoder_q-layer.10": 3675.2537, "encoder_q-layer.11": 7381.041, "encoder_q-layer.2": 2682.8721, "encoder_q-layer.3": 3019.5127, "encoder_q-layer.4": 3213.0461, "encoder_q-layer.5": 3303.8889, "encoder_q-layer.6": 3537.689, "encoder_q-layer.7": 3768.1526, "encoder_q-layer.8": 4597.0312, "encoder_q-layer.9": 3858.0198, "epoch": 0.45, "inbatch_neg_score": 0.577, "inbatch_pos_score": 1.2949, "learning_rate": 3.016666666666667e-05, "loss": 2.7236, "norm_diff": 0.032, "norm_loss": 0.0, "num_token_doc": 66.7403, "num_token_overlap": 17.795, "num_token_query": 52.2751, "num_token_union": 73.7019, "num_word_context": 202.4212, "num_word_doc": 49.7963, "num_word_query": 39.8985, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5527.3073, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5771, "query_norm": 1.515, "queue_k_norm": 1.5511, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2751, "sent_len_1": 66.7403, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6525, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 45700 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7045, "doc_norm": 1.5537, "encoder_q-embeddings": 1362.8563, "encoder_q-layer.0": 844.1936, "encoder_q-layer.1": 951.2071, "encoder_q-layer.10": 1757.5204, "encoder_q-layer.11": 3338.624, "encoder_q-layer.2": 1127.3121, "encoder_q-layer.3": 1187.6947, "encoder_q-layer.4": 1310.5394, "encoder_q-layer.5": 1388.5367, "encoder_q-layer.6": 1507.2812, "encoder_q-layer.7": 1546.8557, "encoder_q-layer.8": 1812.8062, "encoder_q-layer.9": 1613.2181, "epoch": 0.45, "inbatch_neg_score": 0.574, "inbatch_pos_score": 1.3105, "learning_rate": 3.0111111111111113e-05, "loss": 2.7045, "norm_diff": 0.0394, "norm_loss": 0.0, "num_token_doc": 66.8961, "num_token_overlap": 17.8828, "num_token_query": 52.3369, "num_token_union": 73.7364, "num_word_context": 202.3669, "num_word_doc": 49.8821, "num_word_query": 39.9056, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2370.645, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5752, "query_norm": 1.5143, "queue_k_norm": 1.5506, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3369, "sent_len_1": 66.8961, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9375, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 45800 }, { "accuracy": 58.9355, "active_queue_size": 16384.0, "cl_loss": 2.7208, "doc_norm": 1.5526, "encoder_q-embeddings": 1603.5135, "encoder_q-layer.0": 1089.3367, "encoder_q-layer.1": 1187.6479, "encoder_q-layer.10": 1868.7162, "encoder_q-layer.11": 3593.0813, "encoder_q-layer.2": 1363.2592, "encoder_q-layer.3": 1487.8839, "encoder_q-layer.4": 1686.3704, "encoder_q-layer.5": 1598.5964, "encoder_q-layer.6": 1770.6836, "encoder_q-layer.7": 1762.4387, "encoder_q-layer.8": 1996.9934, "encoder_q-layer.9": 1790.0684, "epoch": 0.45, "inbatch_neg_score": 0.5845, "inbatch_pos_score": 1.2939, "learning_rate": 3.005555555555556e-05, "loss": 2.7208, "norm_diff": 0.0585, "norm_loss": 0.0, "num_token_doc": 66.8035, "num_token_overlap": 17.7639, "num_token_query": 52.1755, "num_token_union": 73.7558, "num_word_context": 202.1915, "num_word_doc": 49.8625, "num_word_query": 39.8012, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2671.4538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5845, "query_norm": 1.4941, "queue_k_norm": 1.5514, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1755, "sent_len_1": 66.8035, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9812, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 45900 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.7277, "doc_norm": 1.5494, "encoder_q-embeddings": 1840.2198, "encoder_q-layer.0": 1182.0481, "encoder_q-layer.1": 1263.787, "encoder_q-layer.10": 1715.0411, "encoder_q-layer.11": 3469.9216, "encoder_q-layer.2": 1483.6799, "encoder_q-layer.3": 1582.4321, "encoder_q-layer.4": 1721.8776, "encoder_q-layer.5": 1727.964, "encoder_q-layer.6": 1776.0499, "encoder_q-layer.7": 1920.0073, "encoder_q-layer.8": 1988.9602, "encoder_q-layer.9": 1685.2286, "epoch": 0.45, "inbatch_neg_score": 0.5837, "inbatch_pos_score": 1.3145, "learning_rate": 3e-05, "loss": 2.7277, "norm_diff": 0.0185, "norm_loss": 0.0, "num_token_doc": 66.81, "num_token_overlap": 17.8293, "num_token_query": 52.4164, "num_token_union": 73.7798, "num_word_context": 202.421, "num_word_doc": 49.8095, "num_word_query": 39.9887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2750.4803, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5835, "query_norm": 1.5309, "queue_k_norm": 1.5523, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4164, "sent_len_1": 66.81, "sent_len_max_0": 128.0, "sent_len_max_1": 209.645, "stdk": 0.0489, "stdq": 0.0468, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 46000 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.7216, "doc_norm": 1.5556, "encoder_q-embeddings": 1906.826, "encoder_q-layer.0": 1202.4746, "encoder_q-layer.1": 1468.2643, "encoder_q-layer.10": 1669.598, "encoder_q-layer.11": 3349.3772, "encoder_q-layer.2": 1564.8429, "encoder_q-layer.3": 1773.0311, "encoder_q-layer.4": 1978.8707, "encoder_q-layer.5": 2200.5728, "encoder_q-layer.6": 2333.542, "encoder_q-layer.7": 2309.5369, "encoder_q-layer.8": 1979.6415, "encoder_q-layer.9": 1668.5787, "epoch": 0.45, "inbatch_neg_score": 0.5814, "inbatch_pos_score": 1.332, "learning_rate": 2.9944444444444446e-05, "loss": 2.7216, "norm_diff": 0.0289, "norm_loss": 0.0, "num_token_doc": 66.5578, "num_token_overlap": 17.764, "num_token_query": 52.2332, "num_token_union": 73.548, "num_word_context": 201.9879, "num_word_doc": 49.6571, "num_word_query": 39.8433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2965.9747, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5825, "query_norm": 1.5267, "queue_k_norm": 1.5533, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2332, "sent_len_1": 66.5578, "sent_len_max_0": 128.0, "sent_len_max_1": 206.65, "stdk": 0.0492, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 46100 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7315, "doc_norm": 1.5589, "encoder_q-embeddings": 1694.6831, "encoder_q-layer.0": 1164.0309, "encoder_q-layer.1": 1306.0125, "encoder_q-layer.10": 1831.3505, "encoder_q-layer.11": 3514.8613, "encoder_q-layer.2": 1548.8958, "encoder_q-layer.3": 1651.6165, "encoder_q-layer.4": 1836.9036, "encoder_q-layer.5": 1965.8767, "encoder_q-layer.6": 2055.7385, "encoder_q-layer.7": 2149.3203, "encoder_q-layer.8": 2182.3271, "encoder_q-layer.9": 1810.8287, "epoch": 0.45, "inbatch_neg_score": 0.5881, "inbatch_pos_score": 1.3379, "learning_rate": 2.988888888888889e-05, "loss": 2.7315, "norm_diff": 0.0135, "norm_loss": 0.0, "num_token_doc": 66.764, "num_token_overlap": 17.8178, "num_token_query": 52.223, "num_token_union": 73.6581, "num_word_context": 201.9603, "num_word_doc": 49.7998, "num_word_query": 39.8314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2862.7924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5889, "query_norm": 1.5454, "queue_k_norm": 1.5549, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.223, "sent_len_1": 66.764, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0462, "stdk": 0.0493, "stdq": 0.0474, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 46200 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.7147, "doc_norm": 1.5527, "encoder_q-embeddings": 1807.6027, "encoder_q-layer.0": 1204.2004, "encoder_q-layer.1": 1352.5465, "encoder_q-layer.10": 1573.2485, "encoder_q-layer.11": 3160.0659, "encoder_q-layer.2": 1632.561, "encoder_q-layer.3": 1767.0875, "encoder_q-layer.4": 1885.4331, "encoder_q-layer.5": 1887.177, "encoder_q-layer.6": 1997.3419, "encoder_q-layer.7": 1818.0457, "encoder_q-layer.8": 1950.0236, "encoder_q-layer.9": 1610.5048, "epoch": 0.45, "inbatch_neg_score": 0.584, "inbatch_pos_score": 1.334, "learning_rate": 2.9833333333333335e-05, "loss": 2.7147, "norm_diff": 0.0288, "norm_loss": 0.0, "num_token_doc": 66.9392, "num_token_overlap": 17.8526, "num_token_query": 52.364, "num_token_union": 73.8065, "num_word_context": 202.5288, "num_word_doc": 49.9522, "num_word_query": 39.9424, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2739.609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.584, "query_norm": 1.524, "queue_k_norm": 1.555, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.364, "sent_len_1": 66.9392, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9288, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 46300 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7129, "doc_norm": 1.5545, "encoder_q-embeddings": 1294.4519, "encoder_q-layer.0": 830.7311, "encoder_q-layer.1": 892.497, "encoder_q-layer.10": 1748.8403, "encoder_q-layer.11": 3372.3831, "encoder_q-layer.2": 989.7045, "encoder_q-layer.3": 1028.6664, "encoder_q-layer.4": 1103.4873, "encoder_q-layer.5": 1147.3208, "encoder_q-layer.6": 1370.0787, "encoder_q-layer.7": 1507.6362, "encoder_q-layer.8": 1790.1152, "encoder_q-layer.9": 1745.832, "epoch": 0.45, "inbatch_neg_score": 0.5919, "inbatch_pos_score": 1.3311, "learning_rate": 2.9777777777777777e-05, "loss": 2.7129, "norm_diff": 0.0136, "norm_loss": 0.0, "num_token_doc": 66.5858, "num_token_overlap": 17.7978, "num_token_query": 52.2906, "num_token_union": 73.6081, "num_word_context": 202.2198, "num_word_doc": 49.6827, "num_word_query": 39.867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2291.1504, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5913, "query_norm": 1.5409, "queue_k_norm": 1.5536, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2906, "sent_len_1": 66.5858, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6587, "stdk": 0.049, "stdq": 0.047, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 46400 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.7109, "doc_norm": 1.5527, "encoder_q-embeddings": 1767.9529, "encoder_q-layer.0": 1254.4718, "encoder_q-layer.1": 1466.3839, "encoder_q-layer.10": 1952.3273, "encoder_q-layer.11": 3861.4812, "encoder_q-layer.2": 1812.95, "encoder_q-layer.3": 1949.8879, "encoder_q-layer.4": 1992.0186, "encoder_q-layer.5": 2014.1127, "encoder_q-layer.6": 1989.8857, "encoder_q-layer.7": 1972.2581, "encoder_q-layer.8": 2154.4592, "encoder_q-layer.9": 1884.9351, "epoch": 0.45, "inbatch_neg_score": 0.5946, "inbatch_pos_score": 1.3145, "learning_rate": 2.9722222222222223e-05, "loss": 2.7109, "norm_diff": 0.0179, "norm_loss": 0.0, "num_token_doc": 66.8126, "num_token_overlap": 17.849, "num_token_query": 52.3596, "num_token_union": 73.7262, "num_word_context": 202.458, "num_word_doc": 49.8438, "num_word_query": 39.9308, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2996.7592, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5938, "query_norm": 1.5357, "queue_k_norm": 1.5556, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3596, "sent_len_1": 66.8126, "sent_len_max_0": 128.0, "sent_len_max_1": 209.79, "stdk": 0.0489, "stdq": 0.0467, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 46500 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7146, "doc_norm": 1.552, "encoder_q-embeddings": 4818.9966, "encoder_q-layer.0": 3285.644, "encoder_q-layer.1": 3860.0515, "encoder_q-layer.10": 1824.202, "encoder_q-layer.11": 3532.1711, "encoder_q-layer.2": 4590.4126, "encoder_q-layer.3": 5101.125, "encoder_q-layer.4": 5751.5, "encoder_q-layer.5": 5477.6699, "encoder_q-layer.6": 4577.6372, "encoder_q-layer.7": 3577.5703, "encoder_q-layer.8": 3088.2021, "encoder_q-layer.9": 1853.6556, "epoch": 0.45, "inbatch_neg_score": 0.6005, "inbatch_pos_score": 1.3105, "learning_rate": 2.9666666666666672e-05, "loss": 2.7146, "norm_diff": 0.0317, "norm_loss": 0.0, "num_token_doc": 66.7125, "num_token_overlap": 17.8437, "num_token_query": 52.314, "num_token_union": 73.6675, "num_word_context": 202.052, "num_word_doc": 49.765, "num_word_query": 39.8792, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6158.7653, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6001, "query_norm": 1.5203, "queue_k_norm": 1.5574, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.314, "sent_len_1": 66.7125, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4737, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 46600 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.6982, "doc_norm": 1.5562, "encoder_q-embeddings": 1625.3247, "encoder_q-layer.0": 1053.5131, "encoder_q-layer.1": 1187.8612, "encoder_q-layer.10": 1900.0654, "encoder_q-layer.11": 3777.2136, "encoder_q-layer.2": 1390.3994, "encoder_q-layer.3": 1447.5525, "encoder_q-layer.4": 1639.3131, "encoder_q-layer.5": 1748.7291, "encoder_q-layer.6": 1931.8134, "encoder_q-layer.7": 1890.4364, "encoder_q-layer.8": 2188.7422, "encoder_q-layer.9": 1887.8497, "epoch": 0.46, "inbatch_neg_score": 0.6114, "inbatch_pos_score": 1.3477, "learning_rate": 2.961111111111111e-05, "loss": 2.6982, "norm_diff": 0.0093, "norm_loss": 0.0, "num_token_doc": 66.8435, "num_token_overlap": 17.7984, "num_token_query": 52.4052, "num_token_union": 73.8479, "num_word_context": 202.3002, "num_word_doc": 49.8674, "num_word_query": 39.9674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2806.5156, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.5507, "queue_k_norm": 1.5594, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4052, "sent_len_1": 66.8435, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2688, "stdk": 0.049, "stdq": 0.0469, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 46700 }, { "accuracy": 59.6191, "active_queue_size": 16384.0, "cl_loss": 2.6998, "doc_norm": 1.56, "encoder_q-embeddings": 1907.839, "encoder_q-layer.0": 1300.5234, "encoder_q-layer.1": 1365.684, "encoder_q-layer.10": 1724.1316, "encoder_q-layer.11": 3435.4854, "encoder_q-layer.2": 1599.3191, "encoder_q-layer.3": 1734.2351, "encoder_q-layer.4": 1731.672, "encoder_q-layer.5": 1730.0789, "encoder_q-layer.6": 1831.4001, "encoder_q-layer.7": 1832.8275, "encoder_q-layer.8": 1900.2908, "encoder_q-layer.9": 1697.525, "epoch": 0.46, "inbatch_neg_score": 0.6123, "inbatch_pos_score": 1.3467, "learning_rate": 2.955555555555556e-05, "loss": 2.6998, "norm_diff": 0.0201, "norm_loss": 0.0, "num_token_doc": 66.9592, "num_token_overlap": 17.8875, "num_token_query": 52.408, "num_token_union": 73.8571, "num_word_context": 202.5709, "num_word_doc": 49.9863, "num_word_query": 39.9813, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2779.1303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6128, "query_norm": 1.5399, "queue_k_norm": 1.5594, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.408, "sent_len_1": 66.9592, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6887, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 46800 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.7164, "doc_norm": 1.5606, "encoder_q-embeddings": 2063.4243, "encoder_q-layer.0": 1303.3665, "encoder_q-layer.1": 1473.1074, "encoder_q-layer.10": 1735.9097, "encoder_q-layer.11": 3329.3389, "encoder_q-layer.2": 1645.5867, "encoder_q-layer.3": 1799.376, "encoder_q-layer.4": 1889.6124, "encoder_q-layer.5": 1939.9325, "encoder_q-layer.6": 2100.1467, "encoder_q-layer.7": 2122.1304, "encoder_q-layer.8": 2266.946, "encoder_q-layer.9": 1700.2629, "epoch": 0.46, "inbatch_neg_score": 0.6191, "inbatch_pos_score": 1.375, "learning_rate": 2.95e-05, "loss": 2.7164, "norm_diff": 0.0091, "norm_loss": 0.0, "num_token_doc": 66.8577, "num_token_overlap": 17.8129, "num_token_query": 52.231, "num_token_union": 73.7231, "num_word_context": 202.2783, "num_word_doc": 49.8821, "num_word_query": 39.8326, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2943.8822, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6177, "query_norm": 1.5518, "queue_k_norm": 1.5607, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.231, "sent_len_1": 66.8577, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1325, "stdk": 0.049, "stdq": 0.0467, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 46900 }, { "accuracy": 60.3027, "active_queue_size": 16384.0, "cl_loss": 2.7136, "doc_norm": 1.5614, "encoder_q-embeddings": 2459.074, "encoder_q-layer.0": 1724.9563, "encoder_q-layer.1": 2047.4413, "encoder_q-layer.10": 1603.1573, "encoder_q-layer.11": 3259.8438, "encoder_q-layer.2": 2281.2776, "encoder_q-layer.3": 2306.1558, "encoder_q-layer.4": 2802.0703, "encoder_q-layer.5": 2612.9587, "encoder_q-layer.6": 2729.241, "encoder_q-layer.7": 2441.4602, "encoder_q-layer.8": 2189.2751, "encoder_q-layer.9": 1661.8529, "epoch": 0.46, "inbatch_neg_score": 0.624, "inbatch_pos_score": 1.3613, "learning_rate": 2.9444444444444448e-05, "loss": 2.7136, "norm_diff": 0.0073, "norm_loss": 0.0, "num_token_doc": 66.7366, "num_token_overlap": 17.7849, "num_token_query": 52.1952, "num_token_union": 73.6274, "num_word_context": 202.1701, "num_word_doc": 49.774, "num_word_query": 39.822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3444.9657, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.623, "query_norm": 1.5585, "queue_k_norm": 1.5604, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1952, "sent_len_1": 66.7366, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7463, "stdk": 0.049, "stdq": 0.0467, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 47000 }, { "accuracy": 59.5215, "active_queue_size": 16384.0, "cl_loss": 2.7077, "doc_norm": 1.564, "encoder_q-embeddings": 1557.0624, "encoder_q-layer.0": 992.9995, "encoder_q-layer.1": 1101.8616, "encoder_q-layer.10": 1738.5503, "encoder_q-layer.11": 3521.5598, "encoder_q-layer.2": 1251.1633, "encoder_q-layer.3": 1339.5219, "encoder_q-layer.4": 1493.6957, "encoder_q-layer.5": 1569.4812, "encoder_q-layer.6": 1808.3169, "encoder_q-layer.7": 1825.3069, "encoder_q-layer.8": 2024.5105, "encoder_q-layer.9": 1729.9188, "epoch": 0.46, "inbatch_neg_score": 0.6329, "inbatch_pos_score": 1.3662, "learning_rate": 2.9388888888888887e-05, "loss": 2.7077, "norm_diff": 0.0072, "norm_loss": 0.0, "num_token_doc": 66.9376, "num_token_overlap": 17.802, "num_token_query": 52.1731, "num_token_union": 73.7165, "num_word_context": 202.1819, "num_word_doc": 49.925, "num_word_query": 39.7822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2598.7242, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6333, "query_norm": 1.5595, "queue_k_norm": 1.565, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1731, "sent_len_1": 66.9376, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5538, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 47100 }, { "accuracy": 60.9863, "active_queue_size": 16384.0, "cl_loss": 2.7099, "doc_norm": 1.5706, "encoder_q-embeddings": 2495.1777, "encoder_q-layer.0": 1625.2908, "encoder_q-layer.1": 1858.8955, "encoder_q-layer.10": 1685.5557, "encoder_q-layer.11": 3399.5703, "encoder_q-layer.2": 2282.0776, "encoder_q-layer.3": 2403.7319, "encoder_q-layer.4": 2432.9546, "encoder_q-layer.5": 2590.0522, "encoder_q-layer.6": 2479.9417, "encoder_q-layer.7": 2021.5946, "encoder_q-layer.8": 2064.6301, "encoder_q-layer.9": 1745.5065, "epoch": 0.46, "inbatch_neg_score": 0.6336, "inbatch_pos_score": 1.3828, "learning_rate": 2.9333333333333336e-05, "loss": 2.7099, "norm_diff": 0.0117, "norm_loss": 0.0, "num_token_doc": 66.8195, "num_token_overlap": 17.8801, "num_token_query": 52.4493, "num_token_union": 73.8212, "num_word_context": 202.3664, "num_word_doc": 49.8759, "num_word_query": 40.011, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3362.5755, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6343, "query_norm": 1.5643, "queue_k_norm": 1.5655, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4493, "sent_len_1": 66.8195, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5737, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 47200 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.7092, "doc_norm": 1.5656, "encoder_q-embeddings": 1740.983, "encoder_q-layer.0": 1162.4089, "encoder_q-layer.1": 1380.8597, "encoder_q-layer.10": 1668.973, "encoder_q-layer.11": 3330.0288, "encoder_q-layer.2": 1611.9259, "encoder_q-layer.3": 1742.7791, "encoder_q-layer.4": 1891.204, "encoder_q-layer.5": 2018.9518, "encoder_q-layer.6": 1883.3455, "encoder_q-layer.7": 1810.9749, "encoder_q-layer.8": 1878.0057, "encoder_q-layer.9": 1692.3795, "epoch": 0.46, "inbatch_neg_score": 0.6432, "inbatch_pos_score": 1.3789, "learning_rate": 2.927777777777778e-05, "loss": 2.7092, "norm_diff": 0.0055, "norm_loss": 0.0, "num_token_doc": 66.9969, "num_token_overlap": 17.8251, "num_token_query": 52.1511, "num_token_union": 73.7402, "num_word_context": 202.5367, "num_word_doc": 49.9998, "num_word_query": 39.7829, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2741.3703, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.644, "query_norm": 1.5712, "queue_k_norm": 1.5665, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1511, "sent_len_1": 66.9969, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0863, "stdk": 0.0491, "stdq": 0.0467, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 47300 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.7207, "doc_norm": 1.5648, "encoder_q-embeddings": 1459.3007, "encoder_q-layer.0": 926.7253, "encoder_q-layer.1": 1020.725, "encoder_q-layer.10": 1684.4113, "encoder_q-layer.11": 3381.4897, "encoder_q-layer.2": 1165.0566, "encoder_q-layer.3": 1231.3438, "encoder_q-layer.4": 1350.8899, "encoder_q-layer.5": 1377.0386, "encoder_q-layer.6": 1545.6191, "encoder_q-layer.7": 1733.0605, "encoder_q-layer.8": 1948.3813, "encoder_q-layer.9": 1688.6335, "epoch": 0.46, "inbatch_neg_score": 0.6551, "inbatch_pos_score": 1.375, "learning_rate": 2.9222222222222224e-05, "loss": 2.7207, "norm_diff": 0.0086, "norm_loss": 0.0, "num_token_doc": 66.7759, "num_token_overlap": 17.8196, "num_token_query": 52.362, "num_token_union": 73.7351, "num_word_context": 202.5118, "num_word_doc": 49.8321, "num_word_query": 39.9432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2461.3696, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6538, "query_norm": 1.5562, "queue_k_norm": 1.5681, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.362, "sent_len_1": 66.7759, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6675, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 47400 }, { "accuracy": 60.5957, "active_queue_size": 16384.0, "cl_loss": 2.7043, "doc_norm": 1.5677, "encoder_q-embeddings": 1686.3195, "encoder_q-layer.0": 1072.8663, "encoder_q-layer.1": 1238.8726, "encoder_q-layer.10": 1603.6328, "encoder_q-layer.11": 3217.6106, "encoder_q-layer.2": 1444.4137, "encoder_q-layer.3": 1535.4086, "encoder_q-layer.4": 1640.4373, "encoder_q-layer.5": 1679.0396, "encoder_q-layer.6": 1878.7957, "encoder_q-layer.7": 1833.1848, "encoder_q-layer.8": 1946.8765, "encoder_q-layer.9": 1655.0774, "epoch": 0.46, "inbatch_neg_score": 0.6558, "inbatch_pos_score": 1.3984, "learning_rate": 2.916666666666667e-05, "loss": 2.7043, "norm_diff": 0.008, "norm_loss": 0.0, "num_token_doc": 66.9707, "num_token_overlap": 17.8538, "num_token_query": 52.3934, "num_token_union": 73.8317, "num_word_context": 202.2971, "num_word_doc": 49.9244, "num_word_query": 39.9636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2617.0039, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6553, "query_norm": 1.5688, "queue_k_norm": 1.5698, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3934, "sent_len_1": 66.9707, "sent_len_max_0": 128.0, "sent_len_max_1": 210.14, "stdk": 0.049, "stdq": 0.0467, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 47500 }, { "accuracy": 58.9355, "active_queue_size": 16384.0, "cl_loss": 2.7199, "doc_norm": 1.5702, "encoder_q-embeddings": 2827.8887, "encoder_q-layer.0": 1802.2581, "encoder_q-layer.1": 1974.6963, "encoder_q-layer.10": 1795.7983, "encoder_q-layer.11": 3759.3555, "encoder_q-layer.2": 2225.1692, "encoder_q-layer.3": 2387.4785, "encoder_q-layer.4": 2544.4202, "encoder_q-layer.5": 2471.5549, "encoder_q-layer.6": 2457.0413, "encoder_q-layer.7": 2309.0042, "encoder_q-layer.8": 2162.0474, "encoder_q-layer.9": 1775.0923, "epoch": 0.46, "inbatch_neg_score": 0.6548, "inbatch_pos_score": 1.375, "learning_rate": 2.9111111111111112e-05, "loss": 2.7199, "norm_diff": 0.0272, "norm_loss": 0.0, "num_token_doc": 66.6088, "num_token_overlap": 17.782, "num_token_query": 52.2991, "num_token_union": 73.6259, "num_word_context": 202.0068, "num_word_doc": 49.6951, "num_word_query": 39.8799, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3521.9604, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6538, "query_norm": 1.543, "queue_k_norm": 1.5688, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2991, "sent_len_1": 66.6088, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1087, "stdk": 0.0491, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 47600 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.7133, "doc_norm": 1.5699, "encoder_q-embeddings": 3533.51, "encoder_q-layer.0": 2448.7834, "encoder_q-layer.1": 2982.6172, "encoder_q-layer.10": 1783.1224, "encoder_q-layer.11": 3519.2483, "encoder_q-layer.2": 3633.8267, "encoder_q-layer.3": 4018.6704, "encoder_q-layer.4": 3988.1655, "encoder_q-layer.5": 4169.728, "encoder_q-layer.6": 3166.5154, "encoder_q-layer.7": 2343.4094, "encoder_q-layer.8": 2144.8643, "encoder_q-layer.9": 1796.9352, "epoch": 0.47, "inbatch_neg_score": 0.6597, "inbatch_pos_score": 1.4219, "learning_rate": 2.9055555555555558e-05, "loss": 2.7133, "norm_diff": 0.0094, "norm_loss": 0.0, "num_token_doc": 66.6214, "num_token_overlap": 17.8241, "num_token_query": 52.2916, "num_token_union": 73.6469, "num_word_context": 202.1412, "num_word_doc": 49.7212, "num_word_query": 39.8795, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4624.6276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6602, "query_norm": 1.5728, "queue_k_norm": 1.5711, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2916, "sent_len_1": 66.6214, "sent_len_max_0": 128.0, "sent_len_max_1": 206.875, "stdk": 0.049, "stdq": 0.0473, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 47700 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.7137, "doc_norm": 1.5765, "encoder_q-embeddings": 3163.075, "encoder_q-layer.0": 2011.7915, "encoder_q-layer.1": 2228.2627, "encoder_q-layer.10": 3116.5359, "encoder_q-layer.11": 6354.2622, "encoder_q-layer.2": 2454.9453, "encoder_q-layer.3": 2759.7271, "encoder_q-layer.4": 2778.4365, "encoder_q-layer.5": 2959.4458, "encoder_q-layer.6": 3229.2087, "encoder_q-layer.7": 3348.7922, "encoder_q-layer.8": 3671.4409, "encoder_q-layer.9": 3364.5618, "epoch": 0.47, "inbatch_neg_score": 0.6507, "inbatch_pos_score": 1.4023, "learning_rate": 2.9e-05, "loss": 2.7137, "norm_diff": 0.0169, "norm_loss": 0.0, "num_token_doc": 66.794, "num_token_overlap": 17.7936, "num_token_query": 52.2561, "num_token_union": 73.6919, "num_word_context": 202.404, "num_word_doc": 49.8359, "num_word_query": 39.857, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4899.556, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6514, "query_norm": 1.5596, "queue_k_norm": 1.5727, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2561, "sent_len_1": 66.794, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3225, "stdk": 0.0493, "stdq": 0.0471, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 47800 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.7032, "doc_norm": 1.5763, "encoder_q-embeddings": 3266.2283, "encoder_q-layer.0": 2139.6396, "encoder_q-layer.1": 2371.6479, "encoder_q-layer.10": 3640.4351, "encoder_q-layer.11": 6463.4214, "encoder_q-layer.2": 2766.7529, "encoder_q-layer.3": 2850.1172, "encoder_q-layer.4": 3130.1987, "encoder_q-layer.5": 3120.6375, "encoder_q-layer.6": 3323.5259, "encoder_q-layer.7": 3238.4683, "encoder_q-layer.8": 3683.8811, "encoder_q-layer.9": 3296.51, "epoch": 0.47, "inbatch_neg_score": 0.6498, "inbatch_pos_score": 1.4053, "learning_rate": 2.8944444444444446e-05, "loss": 2.7032, "norm_diff": 0.0162, "norm_loss": 0.0, "num_token_doc": 66.9311, "num_token_overlap": 17.8704, "num_token_query": 52.3565, "num_token_union": 73.7932, "num_word_context": 202.6001, "num_word_doc": 49.939, "num_word_query": 39.9281, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5047.5985, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6494, "query_norm": 1.5601, "queue_k_norm": 1.5734, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3565, "sent_len_1": 66.9311, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0925, "stdk": 0.0492, "stdq": 0.0473, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 47900 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.7156, "doc_norm": 1.5795, "encoder_q-embeddings": 3803.2651, "encoder_q-layer.0": 2635.2954, "encoder_q-layer.1": 2913.4556, "encoder_q-layer.10": 3218.2532, "encoder_q-layer.11": 6514.5249, "encoder_q-layer.2": 3507.3579, "encoder_q-layer.3": 4077.4497, "encoder_q-layer.4": 4514.4897, "encoder_q-layer.5": 4886.3135, "encoder_q-layer.6": 4886.3242, "encoder_q-layer.7": 4643.5151, "encoder_q-layer.8": 4095.9854, "encoder_q-layer.9": 3504.0989, "epoch": 0.47, "inbatch_neg_score": 0.646, "inbatch_pos_score": 1.3564, "learning_rate": 2.8888888888888888e-05, "loss": 2.7156, "norm_diff": 0.0612, "norm_loss": 0.0, "num_token_doc": 66.7983, "num_token_overlap": 17.8303, "num_token_query": 52.2372, "num_token_union": 73.7026, "num_word_context": 202.0691, "num_word_doc": 49.8334, "num_word_query": 39.8393, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6088.1196, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.646, "query_norm": 1.5184, "queue_k_norm": 1.5759, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2372, "sent_len_1": 66.7983, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1325, "stdk": 0.0493, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 48000 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.7198, "doc_norm": 1.5778, "encoder_q-embeddings": 2761.1855, "encoder_q-layer.0": 1760.0095, "encoder_q-layer.1": 1983.6211, "encoder_q-layer.10": 3257.5601, "encoder_q-layer.11": 6503.9395, "encoder_q-layer.2": 2296.0381, "encoder_q-layer.3": 2465.6597, "encoder_q-layer.4": 2671.9287, "encoder_q-layer.5": 2851.3962, "encoder_q-layer.6": 3070.3848, "encoder_q-layer.7": 3310.6145, "encoder_q-layer.8": 3710.0151, "encoder_q-layer.9": 3308.813, "epoch": 0.47, "inbatch_neg_score": 0.645, "inbatch_pos_score": 1.4043, "learning_rate": 2.8833333333333334e-05, "loss": 2.7198, "norm_diff": 0.0188, "norm_loss": 0.0, "num_token_doc": 66.7991, "num_token_overlap": 17.7862, "num_token_query": 52.1281, "num_token_union": 73.6417, "num_word_context": 202.2171, "num_word_doc": 49.8492, "num_word_query": 39.7433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4763.1262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6455, "query_norm": 1.559, "queue_k_norm": 1.5759, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1281, "sent_len_1": 66.7991, "sent_len_max_0": 128.0, "sent_len_max_1": 209.7887, "stdk": 0.0492, "stdq": 0.0474, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 48100 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.6964, "doc_norm": 1.5782, "encoder_q-embeddings": 3360.8069, "encoder_q-layer.0": 2140.7974, "encoder_q-layer.1": 2498.1208, "encoder_q-layer.10": 3230.2292, "encoder_q-layer.11": 6431.4927, "encoder_q-layer.2": 2897.6753, "encoder_q-layer.3": 3142.6177, "encoder_q-layer.4": 3460.1841, "encoder_q-layer.5": 3649.4407, "encoder_q-layer.6": 3696.9998, "encoder_q-layer.7": 3721.0698, "encoder_q-layer.8": 4090.7432, "encoder_q-layer.9": 3454.9614, "epoch": 0.47, "inbatch_neg_score": 0.6398, "inbatch_pos_score": 1.3994, "learning_rate": 2.877777777777778e-05, "loss": 2.6964, "norm_diff": 0.037, "norm_loss": 0.0, "num_token_doc": 66.8518, "num_token_overlap": 17.8574, "num_token_query": 52.2463, "num_token_union": 73.7042, "num_word_context": 202.4069, "num_word_doc": 49.8638, "num_word_query": 39.8124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5351.2164, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6406, "query_norm": 1.5413, "queue_k_norm": 1.577, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2463, "sent_len_1": 66.8518, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8525, "stdk": 0.0492, "stdq": 0.0468, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 48200 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.7035, "doc_norm": 1.574, "encoder_q-embeddings": 3127.8604, "encoder_q-layer.0": 1970.7172, "encoder_q-layer.1": 2247.5376, "encoder_q-layer.10": 3460.3457, "encoder_q-layer.11": 7213.7749, "encoder_q-layer.2": 2591.3501, "encoder_q-layer.3": 2776.1155, "encoder_q-layer.4": 3033.7322, "encoder_q-layer.5": 3355.0361, "encoder_q-layer.6": 3463.8455, "encoder_q-layer.7": 3580.9844, "encoder_q-layer.8": 3803.1892, "encoder_q-layer.9": 3407.1235, "epoch": 0.47, "inbatch_neg_score": 0.6487, "inbatch_pos_score": 1.4004, "learning_rate": 2.8722222222222222e-05, "loss": 2.7035, "norm_diff": 0.0175, "norm_loss": 0.0, "num_token_doc": 66.7692, "num_token_overlap": 17.7895, "num_token_query": 52.2203, "num_token_union": 73.7044, "num_word_context": 202.1526, "num_word_doc": 49.8406, "num_word_query": 39.836, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5270.2582, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6475, "query_norm": 1.5565, "queue_k_norm": 1.5769, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2203, "sent_len_1": 66.7692, "sent_len_max_0": 128.0, "sent_len_max_1": 208.23, "stdk": 0.049, "stdq": 0.0473, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 48300 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.7105, "doc_norm": 1.5755, "encoder_q-embeddings": 3358.4158, "encoder_q-layer.0": 2327.5298, "encoder_q-layer.1": 3011.2095, "encoder_q-layer.10": 1737.3037, "encoder_q-layer.11": 3351.2661, "encoder_q-layer.2": 3314.0154, "encoder_q-layer.3": 3611.9736, "encoder_q-layer.4": 3997.7356, "encoder_q-layer.5": 3971.8154, "encoder_q-layer.6": 4116.5874, "encoder_q-layer.7": 3718.4414, "encoder_q-layer.8": 3104.0828, "encoder_q-layer.9": 2002.9425, "epoch": 0.47, "inbatch_neg_score": 0.6389, "inbatch_pos_score": 1.3604, "learning_rate": 2.8666666666666668e-05, "loss": 2.7105, "norm_diff": 0.0604, "norm_loss": 0.0, "num_token_doc": 67.0062, "num_token_overlap": 17.8681, "num_token_query": 52.292, "num_token_union": 73.804, "num_word_context": 202.4463, "num_word_doc": 50.0048, "num_word_query": 39.8759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4790.3571, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6382, "query_norm": 1.5151, "queue_k_norm": 1.5774, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.292, "sent_len_1": 67.0062, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3512, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 48400 }, { "accuracy": 58.9355, "active_queue_size": 16384.0, "cl_loss": 2.7143, "doc_norm": 1.5768, "encoder_q-embeddings": 1448.9661, "encoder_q-layer.0": 949.683, "encoder_q-layer.1": 1060.7161, "encoder_q-layer.10": 1754.5573, "encoder_q-layer.11": 3560.3223, "encoder_q-layer.2": 1213.6688, "encoder_q-layer.3": 1284.8506, "encoder_q-layer.4": 1427.5719, "encoder_q-layer.5": 1417.631, "encoder_q-layer.6": 1552.551, "encoder_q-layer.7": 1833.6779, "encoder_q-layer.8": 2031.0916, "encoder_q-layer.9": 1781.4843, "epoch": 0.47, "inbatch_neg_score": 0.6349, "inbatch_pos_score": 1.3672, "learning_rate": 2.861111111111111e-05, "loss": 2.7143, "norm_diff": 0.0432, "norm_loss": 0.0, "num_token_doc": 66.6606, "num_token_overlap": 17.7811, "num_token_query": 52.3597, "num_token_union": 73.6995, "num_word_context": 202.4169, "num_word_doc": 49.7503, "num_word_query": 39.9511, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2544.1524, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6362, "query_norm": 1.5337, "queue_k_norm": 1.5774, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3597, "sent_len_1": 66.6606, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2912, "stdk": 0.0491, "stdq": 0.0466, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 48500 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.699, "doc_norm": 1.5754, "encoder_q-embeddings": 1511.1326, "encoder_q-layer.0": 975.8655, "encoder_q-layer.1": 1045.1481, "encoder_q-layer.10": 1988.7904, "encoder_q-layer.11": 3828.1736, "encoder_q-layer.2": 1181.1107, "encoder_q-layer.3": 1308.043, "encoder_q-layer.4": 1415.678, "encoder_q-layer.5": 1448.7576, "encoder_q-layer.6": 1674.4216, "encoder_q-layer.7": 1826.8159, "encoder_q-layer.8": 2048.2688, "encoder_q-layer.9": 1813.2471, "epoch": 0.47, "inbatch_neg_score": 0.6443, "inbatch_pos_score": 1.3516, "learning_rate": 2.855555555555556e-05, "loss": 2.699, "norm_diff": 0.0608, "norm_loss": 0.0, "num_token_doc": 66.7263, "num_token_overlap": 17.8285, "num_token_query": 52.3394, "num_token_union": 73.6983, "num_word_context": 202.2332, "num_word_doc": 49.7965, "num_word_query": 39.9249, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2653.3244, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6445, "query_norm": 1.5145, "queue_k_norm": 1.5756, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3394, "sent_len_1": 66.7263, "sent_len_max_0": 128.0, "sent_len_max_1": 208.985, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 48600 }, { "accuracy": 58.7402, "active_queue_size": 16384.0, "cl_loss": 2.707, "doc_norm": 1.5787, "encoder_q-embeddings": 1403.6884, "encoder_q-layer.0": 871.7024, "encoder_q-layer.1": 935.0629, "encoder_q-layer.10": 1776.6338, "encoder_q-layer.11": 3642.1509, "encoder_q-layer.2": 1059.9369, "encoder_q-layer.3": 1170.0768, "encoder_q-layer.4": 1254.6251, "encoder_q-layer.5": 1327.2258, "encoder_q-layer.6": 1476.3527, "encoder_q-layer.7": 1663.4829, "encoder_q-layer.8": 1999.6107, "encoder_q-layer.9": 1846.0378, "epoch": 0.48, "inbatch_neg_score": 0.6377, "inbatch_pos_score": 1.3701, "learning_rate": 2.8499999999999998e-05, "loss": 2.707, "norm_diff": 0.0496, "norm_loss": 0.0, "num_token_doc": 66.7938, "num_token_overlap": 17.8508, "num_token_query": 52.2877, "num_token_union": 73.6811, "num_word_context": 202.2742, "num_word_doc": 49.8281, "num_word_query": 39.8918, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2474.6169, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6387, "query_norm": 1.5291, "queue_k_norm": 1.5757, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2877, "sent_len_1": 66.7938, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2475, "stdk": 0.0492, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 48700 }, { "accuracy": 61.4746, "active_queue_size": 16384.0, "cl_loss": 2.7111, "doc_norm": 1.5787, "encoder_q-embeddings": 1539.8511, "encoder_q-layer.0": 1024.2438, "encoder_q-layer.1": 1093.4014, "encoder_q-layer.10": 1742.9093, "encoder_q-layer.11": 3291.6565, "encoder_q-layer.2": 1297.8945, "encoder_q-layer.3": 1408.7722, "encoder_q-layer.4": 1510.4675, "encoder_q-layer.5": 1641.1798, "encoder_q-layer.6": 1726.1523, "encoder_q-layer.7": 1815.3271, "encoder_q-layer.8": 1971.1909, "encoder_q-layer.9": 1813.0114, "epoch": 0.48, "inbatch_neg_score": 0.6383, "inbatch_pos_score": 1.3721, "learning_rate": 2.8444444444444447e-05, "loss": 2.7111, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.7468, "num_token_overlap": 17.8487, "num_token_query": 52.4656, "num_token_union": 73.7725, "num_word_context": 202.1752, "num_word_doc": 49.79, "num_word_query": 40.0055, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2576.5142, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6377, "query_norm": 1.5252, "queue_k_norm": 1.5761, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4656, "sent_len_1": 66.7468, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2725, "stdk": 0.0492, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 48800 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7098, "doc_norm": 1.5793, "encoder_q-embeddings": 1343.1794, "encoder_q-layer.0": 855.4877, "encoder_q-layer.1": 922.7265, "encoder_q-layer.10": 1624.5538, "encoder_q-layer.11": 3418.783, "encoder_q-layer.2": 1041.0604, "encoder_q-layer.3": 1115.3091, "encoder_q-layer.4": 1206.2531, "encoder_q-layer.5": 1257.569, "encoder_q-layer.6": 1456.7278, "encoder_q-layer.7": 1643.6305, "encoder_q-layer.8": 1861.0032, "encoder_q-layer.9": 1648.9346, "epoch": 0.48, "inbatch_neg_score": 0.6383, "inbatch_pos_score": 1.374, "learning_rate": 2.8388888888888893e-05, "loss": 2.7098, "norm_diff": 0.0512, "norm_loss": 0.0, "num_token_doc": 66.9134, "num_token_overlap": 17.8391, "num_token_query": 52.3799, "num_token_union": 73.8321, "num_word_context": 202.7265, "num_word_doc": 49.9291, "num_word_query": 39.9376, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2378.5588, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6387, "query_norm": 1.5281, "queue_k_norm": 1.5778, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3799, "sent_len_1": 66.9134, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6738, "stdk": 0.0492, "stdq": 0.0463, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 48900 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.7092, "doc_norm": 1.577, "encoder_q-embeddings": 1998.7993, "encoder_q-layer.0": 1351.5398, "encoder_q-layer.1": 1459.0194, "encoder_q-layer.10": 1631.835, "encoder_q-layer.11": 3190.9019, "encoder_q-layer.2": 1737.2805, "encoder_q-layer.3": 1816.8815, "encoder_q-layer.4": 1987.9506, "encoder_q-layer.5": 1998.1213, "encoder_q-layer.6": 1888.7288, "encoder_q-layer.7": 1941.0444, "encoder_q-layer.8": 2046.3047, "encoder_q-layer.9": 1668.0746, "epoch": 0.48, "inbatch_neg_score": 0.6318, "inbatch_pos_score": 1.3682, "learning_rate": 2.8333333333333335e-05, "loss": 2.7092, "norm_diff": 0.0573, "norm_loss": 0.0, "num_token_doc": 66.7149, "num_token_overlap": 17.8093, "num_token_query": 52.3662, "num_token_union": 73.7241, "num_word_context": 202.5416, "num_word_doc": 49.7792, "num_word_query": 39.921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2875.1281, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6328, "query_norm": 1.5197, "queue_k_norm": 1.5787, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3662, "sent_len_1": 66.7149, "sent_len_max_0": 128.0, "sent_len_max_1": 205.92, "stdk": 0.0491, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 49000 }, { "accuracy": 59.3262, "active_queue_size": 16384.0, "cl_loss": 2.7094, "doc_norm": 1.575, "encoder_q-embeddings": 1378.361, "encoder_q-layer.0": 846.7332, "encoder_q-layer.1": 948.9483, "encoder_q-layer.10": 1741.501, "encoder_q-layer.11": 3578.2651, "encoder_q-layer.2": 1083.2858, "encoder_q-layer.3": 1157.5607, "encoder_q-layer.4": 1310.6943, "encoder_q-layer.5": 1366.2725, "encoder_q-layer.6": 1601.1677, "encoder_q-layer.7": 1767.5734, "encoder_q-layer.8": 2144.4688, "encoder_q-layer.9": 1782.376, "epoch": 0.48, "inbatch_neg_score": 0.6256, "inbatch_pos_score": 1.3594, "learning_rate": 2.827777777777778e-05, "loss": 2.7094, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.7517, "num_token_overlap": 17.8169, "num_token_query": 52.2619, "num_token_union": 73.6975, "num_word_context": 202.1679, "num_word_doc": 49.7743, "num_word_query": 39.8303, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2516.5566, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6255, "query_norm": 1.5193, "queue_k_norm": 1.5793, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2619, "sent_len_1": 66.7517, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2363, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 49100 }, { "accuracy": 59.4238, "active_queue_size": 16384.0, "cl_loss": 2.7059, "doc_norm": 1.5814, "encoder_q-embeddings": 1431.5398, "encoder_q-layer.0": 921.018, "encoder_q-layer.1": 1011.1078, "encoder_q-layer.10": 1753.7083, "encoder_q-layer.11": 3408.1143, "encoder_q-layer.2": 1166.84, "encoder_q-layer.3": 1287.1908, "encoder_q-layer.4": 1414.2542, "encoder_q-layer.5": 1447.3535, "encoder_q-layer.6": 1628.4897, "encoder_q-layer.7": 1780.163, "encoder_q-layer.8": 1932.3516, "encoder_q-layer.9": 1738.0709, "epoch": 0.48, "inbatch_neg_score": 0.6242, "inbatch_pos_score": 1.3496, "learning_rate": 2.8222222222222223e-05, "loss": 2.7059, "norm_diff": 0.0526, "norm_loss": 0.0, "num_token_doc": 66.8111, "num_token_overlap": 17.8263, "num_token_query": 52.2955, "num_token_union": 73.7182, "num_word_context": 202.2167, "num_word_doc": 49.8509, "num_word_query": 39.9029, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2482.0066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6235, "query_norm": 1.5288, "queue_k_norm": 1.578, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2955, "sent_len_1": 66.8111, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4563, "stdk": 0.0493, "stdq": 0.0463, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 49200 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.6949, "doc_norm": 1.5765, "encoder_q-embeddings": 1372.4156, "encoder_q-layer.0": 901.1424, "encoder_q-layer.1": 1014.5087, "encoder_q-layer.10": 1745.0293, "encoder_q-layer.11": 3447.2361, "encoder_q-layer.2": 1214.4072, "encoder_q-layer.3": 1289.1852, "encoder_q-layer.4": 1443.7897, "encoder_q-layer.5": 1497.8826, "encoder_q-layer.6": 1617.6676, "encoder_q-layer.7": 1811.2955, "encoder_q-layer.8": 2089.5386, "encoder_q-layer.9": 1857.8312, "epoch": 0.48, "inbatch_neg_score": 0.6283, "inbatch_pos_score": 1.3984, "learning_rate": 2.816666666666667e-05, "loss": 2.6949, "norm_diff": 0.0153, "norm_loss": 0.0, "num_token_doc": 66.7484, "num_token_overlap": 17.8, "num_token_query": 52.1978, "num_token_union": 73.6506, "num_word_context": 202.1661, "num_word_doc": 49.824, "num_word_query": 39.8237, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2552.0288, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6289, "query_norm": 1.5613, "queue_k_norm": 1.5768, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1978, "sent_len_1": 66.7484, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2163, "stdk": 0.0491, "stdq": 0.0475, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 49300 }, { "accuracy": 60.5957, "active_queue_size": 16384.0, "cl_loss": 2.7086, "doc_norm": 1.5783, "encoder_q-embeddings": 1897.4517, "encoder_q-layer.0": 1264.3309, "encoder_q-layer.1": 1381.5303, "encoder_q-layer.10": 1725.6731, "encoder_q-layer.11": 3551.1133, "encoder_q-layer.2": 1628.1425, "encoder_q-layer.3": 1764.7416, "encoder_q-layer.4": 2007.8876, "encoder_q-layer.5": 2101.6621, "encoder_q-layer.6": 2361.6045, "encoder_q-layer.7": 2141.8525, "encoder_q-layer.8": 2338.4866, "encoder_q-layer.9": 1777.7307, "epoch": 0.48, "inbatch_neg_score": 0.6287, "inbatch_pos_score": 1.373, "learning_rate": 2.811111111111111e-05, "loss": 2.7086, "norm_diff": 0.0289, "norm_loss": 0.0, "num_token_doc": 66.7552, "num_token_overlap": 17.7934, "num_token_query": 52.2983, "num_token_union": 73.7336, "num_word_context": 202.2274, "num_word_doc": 49.791, "num_word_query": 39.8861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3017.0458, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6289, "query_norm": 1.5494, "queue_k_norm": 1.5768, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2983, "sent_len_1": 66.7552, "sent_len_max_0": 128.0, "sent_len_max_1": 210.8762, "stdk": 0.0492, "stdq": 0.0468, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 49400 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.7109, "doc_norm": 1.5793, "encoder_q-embeddings": 1607.5839, "encoder_q-layer.0": 1066.313, "encoder_q-layer.1": 1257.2383, "encoder_q-layer.10": 1864.2397, "encoder_q-layer.11": 3660.7961, "encoder_q-layer.2": 1513.5834, "encoder_q-layer.3": 1579.5294, "encoder_q-layer.4": 1725.7054, "encoder_q-layer.5": 1803.5009, "encoder_q-layer.6": 1807.3553, "encoder_q-layer.7": 1714.9019, "encoder_q-layer.8": 1967.1354, "encoder_q-layer.9": 1752.2719, "epoch": 0.48, "inbatch_neg_score": 0.6386, "inbatch_pos_score": 1.3838, "learning_rate": 2.8055555555555557e-05, "loss": 2.7109, "norm_diff": 0.0275, "norm_loss": 0.0, "num_token_doc": 66.8094, "num_token_overlap": 17.7601, "num_token_query": 52.2693, "num_token_union": 73.7755, "num_word_context": 202.1723, "num_word_doc": 49.8533, "num_word_query": 39.8703, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2728.1666, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6387, "query_norm": 1.5518, "queue_k_norm": 1.5802, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2693, "sent_len_1": 66.8094, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4613, "stdk": 0.0492, "stdq": 0.0466, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 49500 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.7029, "doc_norm": 1.5762, "encoder_q-embeddings": 1465.3594, "encoder_q-layer.0": 978.8939, "encoder_q-layer.1": 1126.7037, "encoder_q-layer.10": 1852.7206, "encoder_q-layer.11": 3582.9946, "encoder_q-layer.2": 1220.4213, "encoder_q-layer.3": 1285.2448, "encoder_q-layer.4": 1413.4404, "encoder_q-layer.5": 1498.2229, "encoder_q-layer.6": 1704.4839, "encoder_q-layer.7": 1819.833, "encoder_q-layer.8": 2020.1265, "encoder_q-layer.9": 1722.1708, "epoch": 0.48, "inbatch_neg_score": 0.6399, "inbatch_pos_score": 1.3799, "learning_rate": 2.8000000000000003e-05, "loss": 2.7029, "norm_diff": 0.0326, "norm_loss": 0.0, "num_token_doc": 66.662, "num_token_overlap": 17.7916, "num_token_query": 52.2366, "num_token_union": 73.6349, "num_word_context": 202.1136, "num_word_doc": 49.7476, "num_word_query": 39.8424, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2589.0939, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6406, "query_norm": 1.5436, "queue_k_norm": 1.5782, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2366, "sent_len_1": 66.662, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6962, "stdk": 0.0491, "stdq": 0.0461, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 49600 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.7057, "doc_norm": 1.5741, "encoder_q-embeddings": 1282.4116, "encoder_q-layer.0": 829.7569, "encoder_q-layer.1": 915.6667, "encoder_q-layer.10": 1778.3831, "encoder_q-layer.11": 3541.3267, "encoder_q-layer.2": 1027.0773, "encoder_q-layer.3": 1071.1083, "encoder_q-layer.4": 1197.8868, "encoder_q-layer.5": 1206.7809, "encoder_q-layer.6": 1366.9156, "encoder_q-layer.7": 1519.2922, "encoder_q-layer.8": 1863.1476, "encoder_q-layer.9": 1729.3978, "epoch": 0.49, "inbatch_neg_score": 0.6491, "inbatch_pos_score": 1.3643, "learning_rate": 2.7944444444444445e-05, "loss": 2.7057, "norm_diff": 0.0304, "norm_loss": 0.0, "num_token_doc": 66.761, "num_token_overlap": 17.7964, "num_token_query": 52.204, "num_token_union": 73.6688, "num_word_context": 202.1372, "num_word_doc": 49.7937, "num_word_query": 39.8196, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2393.9206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6494, "query_norm": 1.5437, "queue_k_norm": 1.5777, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.204, "sent_len_1": 66.761, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6612, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 49700 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6977, "doc_norm": 1.5798, "encoder_q-embeddings": 1309.8221, "encoder_q-layer.0": 841.7034, "encoder_q-layer.1": 912.7972, "encoder_q-layer.10": 1933.7811, "encoder_q-layer.11": 3483.3589, "encoder_q-layer.2": 1037.1992, "encoder_q-layer.3": 1139.5813, "encoder_q-layer.4": 1221.3271, "encoder_q-layer.5": 1306.9697, "encoder_q-layer.6": 1499.467, "encoder_q-layer.7": 1637.3756, "encoder_q-layer.8": 2001.5613, "encoder_q-layer.9": 1769.8911, "epoch": 0.49, "inbatch_neg_score": 0.6545, "inbatch_pos_score": 1.3799, "learning_rate": 2.788888888888889e-05, "loss": 2.6977, "norm_diff": 0.0284, "norm_loss": 0.0, "num_token_doc": 66.7554, "num_token_overlap": 17.8095, "num_token_query": 52.26, "num_token_union": 73.7189, "num_word_context": 202.3571, "num_word_doc": 49.8337, "num_word_query": 39.8839, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2414.1637, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6553, "query_norm": 1.5514, "queue_k_norm": 1.5793, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.26, "sent_len_1": 66.7554, "sent_len_max_0": 128.0, "sent_len_max_1": 207.97, "stdk": 0.0492, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 49800 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.6953, "doc_norm": 1.5779, "encoder_q-embeddings": 1412.3016, "encoder_q-layer.0": 884.8369, "encoder_q-layer.1": 1023.3008, "encoder_q-layer.10": 1729.0035, "encoder_q-layer.11": 3524.8645, "encoder_q-layer.2": 1195.751, "encoder_q-layer.3": 1280.191, "encoder_q-layer.4": 1495.4712, "encoder_q-layer.5": 1541.1295, "encoder_q-layer.6": 1699.7843, "encoder_q-layer.7": 1859.5382, "encoder_q-layer.8": 1988.9042, "encoder_q-layer.9": 1745.4794, "epoch": 0.49, "inbatch_neg_score": 0.6589, "inbatch_pos_score": 1.4082, "learning_rate": 2.7833333333333333e-05, "loss": 2.6953, "norm_diff": 0.0111, "norm_loss": 0.0, "num_token_doc": 66.8433, "num_token_overlap": 17.8208, "num_token_query": 52.2674, "num_token_union": 73.7434, "num_word_context": 202.4224, "num_word_doc": 49.8575, "num_word_query": 39.8798, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2568.7823, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6597, "query_norm": 1.5679, "queue_k_norm": 1.5779, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2674, "sent_len_1": 66.8433, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8625, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 49900 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.7043, "doc_norm": 1.5798, "encoder_q-embeddings": 1381.6019, "encoder_q-layer.0": 877.7112, "encoder_q-layer.1": 970.9426, "encoder_q-layer.10": 1674.6069, "encoder_q-layer.11": 3292.2078, "encoder_q-layer.2": 1087.8113, "encoder_q-layer.3": 1147.1421, "encoder_q-layer.4": 1290.2135, "encoder_q-layer.5": 1350.8102, "encoder_q-layer.6": 1509.453, "encoder_q-layer.7": 1662.816, "encoder_q-layer.8": 1983.0172, "encoder_q-layer.9": 1745.394, "epoch": 0.49, "inbatch_neg_score": 0.6714, "inbatch_pos_score": 1.4082, "learning_rate": 2.777777777777778e-05, "loss": 2.7043, "norm_diff": 0.0106, "norm_loss": 0.0, "num_token_doc": 66.6922, "num_token_overlap": 17.7921, "num_token_query": 52.3051, "num_token_union": 73.6936, "num_word_context": 202.3378, "num_word_doc": 49.7778, "num_word_query": 39.9076, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2408.0983, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6719, "query_norm": 1.5883, "queue_k_norm": 1.5804, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3051, "sent_len_1": 66.6922, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3212, "stdk": 0.0491, "stdq": 0.0471, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 50000 }, { "dev_runtime": 25.9858, "dev_samples_per_second": 1.231, "dev_steps_per_second": 0.038, "epoch": 0.49, "step": 50000, "test_accuracy": 94.20166015625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3361448645591736, "test_doc_norm": 1.5455735921859741, "test_inbatch_neg_score": 0.9534587264060974, "test_inbatch_pos_score": 1.9607853889465332, "test_loss": 0.3361448645591736, "test_loss_align": 1.1165010929107666, "test_loss_unif": 3.1336493492126465, "test_loss_unif_q@queue": 3.1336493492126465, "test_norm_diff": 0.07497774064540863, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6652405261993408, "test_query_norm": 1.620551347732544, "test_queue_k_norm": 1.5800683498382568, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04353529214859009, "test_stdq": 0.04413451626896858, "test_stdqueue_k": 0.049193330109119415, "test_stdqueue_q": 0.0 }, { "dev_runtime": 25.9858, "dev_samples_per_second": 1.231, "dev_steps_per_second": 0.038, "epoch": 0.49, "eval_beir-arguana_ndcg@10": 0.39398, "eval_beir-arguana_recall@10": 0.66927, "eval_beir-arguana_recall@100": 0.93812, "eval_beir-arguana_recall@20": 0.78307, "eval_beir-avg_ndcg@10": 0.36982099999999996, "eval_beir-avg_recall@10": 0.4429639166666666, "eval_beir-avg_recall@100": 0.62736125, "eval_beir-avg_recall@20": 0.5051065833333332, "eval_beir-cqadupstack_ndcg@10": 0.27871, "eval_beir-cqadupstack_recall@10": 0.3760491666666667, "eval_beir-cqadupstack_recall@100": 0.6063425000000001, "eval_beir-cqadupstack_recall@20": 0.4412158333333333, "eval_beir-fiqa_ndcg@10": 0.24443, "eval_beir-fiqa_recall@10": 0.30153, "eval_beir-fiqa_recall@100": 0.59141, "eval_beir-fiqa_recall@20": 0.38198, "eval_beir-nfcorpus_ndcg@10": 0.29059, "eval_beir-nfcorpus_recall@10": 0.14502, "eval_beir-nfcorpus_recall@100": 0.27253, "eval_beir-nfcorpus_recall@20": 0.17721, "eval_beir-nq_ndcg@10": 0.27832, "eval_beir-nq_recall@10": 0.45744, "eval_beir-nq_recall@100": 0.80127, "eval_beir-nq_recall@20": 0.57935, "eval_beir-quora_ndcg@10": 0.76861, "eval_beir-quora_recall@10": 0.87844, "eval_beir-quora_recall@100": 0.97572, "eval_beir-quora_recall@20": 0.92414, "eval_beir-scidocs_ndcg@10": 0.15765, "eval_beir-scidocs_recall@10": 0.16143, "eval_beir-scidocs_recall@100": 0.36658, "eval_beir-scidocs_recall@20": 0.21947, "eval_beir-scifact_ndcg@10": 0.62593, "eval_beir-scifact_recall@10": 0.78733, "eval_beir-scifact_recall@100": 0.90989, "eval_beir-scifact_recall@20": 0.84244, "eval_beir-trec-covid_ndcg@10": 0.4906, "eval_beir-trec-covid_recall@10": 0.518, "eval_beir-trec-covid_recall@100": 0.3922, "eval_beir-trec-covid_recall@20": 0.509, "eval_beir-webis-touche2020_ndcg@10": 0.16939, "eval_beir-webis-touche2020_recall@10": 0.13513, "eval_beir-webis-touche2020_recall@100": 0.41955, "eval_beir-webis-touche2020_recall@20": 0.19319, "eval_senteval-avg_sts": 0.7572708728515776, "eval_senteval-sickr_spearman": 0.7312603588734713, "eval_senteval-stsb_spearman": 0.7832813868296838, "step": 50000, "test_accuracy": 94.20166015625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3361448645591736, "test_doc_norm": 1.5455735921859741, "test_inbatch_neg_score": 0.9534587264060974, "test_inbatch_pos_score": 1.9607853889465332, "test_loss": 0.3361448645591736, "test_loss_align": 1.1165010929107666, "test_loss_unif": 3.1336493492126465, "test_loss_unif_q@queue": 3.1336493492126465, "test_norm_diff": 0.07497774064540863, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6652405261993408, "test_query_norm": 1.620551347732544, "test_queue_k_norm": 1.5800683498382568, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04353529214859009, "test_stdq": 0.04413451626896858, "test_stdqueue_k": 0.049193330109119415, "test_stdqueue_q": 0.0 }, { "accuracy": 60.3027, "active_queue_size": 16384.0, "cl_loss": 2.6954, "doc_norm": 1.5841, "encoder_q-embeddings": 1376.4982, "encoder_q-layer.0": 895.2197, "encoder_q-layer.1": 998.319, "encoder_q-layer.10": 1634.608, "encoder_q-layer.11": 3413.666, "encoder_q-layer.2": 1128.2808, "encoder_q-layer.3": 1195.7911, "encoder_q-layer.4": 1277.0908, "encoder_q-layer.5": 1305.6617, "encoder_q-layer.6": 1459.2742, "encoder_q-layer.7": 1640.9308, "encoder_q-layer.8": 2008.813, "encoder_q-layer.9": 1709.1437, "epoch": 0.49, "inbatch_neg_score": 0.6795, "inbatch_pos_score": 1.415, "learning_rate": 2.772222222222222e-05, "loss": 2.6954, "norm_diff": 0.0124, "norm_loss": 0.0, "num_token_doc": 66.7702, "num_token_overlap": 17.811, "num_token_query": 52.1901, "num_token_union": 73.6426, "num_word_context": 202.1834, "num_word_doc": 49.8213, "num_word_query": 39.8107, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2431.8988, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6802, "query_norm": 1.5717, "queue_k_norm": 1.5819, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1901, "sent_len_1": 66.7702, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9512, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 50100 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.717, "doc_norm": 1.5835, "encoder_q-embeddings": 1397.9297, "encoder_q-layer.0": 905.47, "encoder_q-layer.1": 976.4324, "encoder_q-layer.10": 1658.9954, "encoder_q-layer.11": 3295.7566, "encoder_q-layer.2": 1134.307, "encoder_q-layer.3": 1263.2255, "encoder_q-layer.4": 1380.0546, "encoder_q-layer.5": 1490.489, "encoder_q-layer.6": 1573.6425, "encoder_q-layer.7": 1722.741, "encoder_q-layer.8": 1866.2399, "encoder_q-layer.9": 1712.35, "epoch": 0.49, "inbatch_neg_score": 0.6862, "inbatch_pos_score": 1.4287, "learning_rate": 2.7666666666666667e-05, "loss": 2.717, "norm_diff": 0.0103, "norm_loss": 0.0, "num_token_doc": 66.7643, "num_token_overlap": 17.7935, "num_token_query": 52.2875, "num_token_union": 73.6998, "num_word_context": 202.5166, "num_word_doc": 49.7833, "num_word_query": 39.8878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2414.371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6865, "query_norm": 1.5938, "queue_k_norm": 1.5809, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2875, "sent_len_1": 66.7643, "sent_len_max_0": 128.0, "sent_len_max_1": 211.09, "stdk": 0.0492, "stdq": 0.0474, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 50200 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.6949, "doc_norm": 1.5834, "encoder_q-embeddings": 1446.3511, "encoder_q-layer.0": 940.4307, "encoder_q-layer.1": 1032.7428, "encoder_q-layer.10": 1716.4321, "encoder_q-layer.11": 3431.3132, "encoder_q-layer.2": 1173.9032, "encoder_q-layer.3": 1271.6034, "encoder_q-layer.4": 1393.5585, "encoder_q-layer.5": 1470.5139, "encoder_q-layer.6": 1564.345, "encoder_q-layer.7": 1643.864, "encoder_q-layer.8": 1922.556, "encoder_q-layer.9": 1641.5251, "epoch": 0.49, "inbatch_neg_score": 0.6864, "inbatch_pos_score": 1.4199, "learning_rate": 2.761111111111111e-05, "loss": 2.6949, "norm_diff": 0.024, "norm_loss": 0.0, "num_token_doc": 66.5912, "num_token_overlap": 17.7894, "num_token_query": 52.2526, "num_token_union": 73.5812, "num_word_context": 201.8258, "num_word_doc": 49.6807, "num_word_query": 39.8474, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2466.1314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6865, "query_norm": 1.5594, "queue_k_norm": 1.5843, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2526, "sent_len_1": 66.5912, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4087, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 50300 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.6967, "doc_norm": 1.5832, "encoder_q-embeddings": 3253.8254, "encoder_q-layer.0": 2188.6636, "encoder_q-layer.1": 2403.8679, "encoder_q-layer.10": 3431.3184, "encoder_q-layer.11": 7264.9697, "encoder_q-layer.2": 2947.1589, "encoder_q-layer.3": 3153.9414, "encoder_q-layer.4": 3488.2102, "encoder_q-layer.5": 3846.3557, "encoder_q-layer.6": 3929.1812, "encoder_q-layer.7": 3759.884, "encoder_q-layer.8": 4064.7656, "encoder_q-layer.9": 3500.9353, "epoch": 0.49, "inbatch_neg_score": 0.6846, "inbatch_pos_score": 1.4209, "learning_rate": 2.7555555555555555e-05, "loss": 2.6967, "norm_diff": 0.0147, "norm_loss": 0.0, "num_token_doc": 66.6077, "num_token_overlap": 17.85, "num_token_query": 52.3745, "num_token_union": 73.6161, "num_word_context": 202.0929, "num_word_doc": 49.7135, "num_word_query": 39.9429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5438.3316, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6841, "query_norm": 1.5684, "queue_k_norm": 1.5857, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3745, "sent_len_1": 66.6077, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0538, "stdk": 0.0491, "stdq": 0.047, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 50400 }, { "accuracy": 62.5, "active_queue_size": 16384.0, "cl_loss": 2.7071, "doc_norm": 1.5906, "encoder_q-embeddings": 2530.8718, "encoder_q-layer.0": 1621.592, "encoder_q-layer.1": 1767.2755, "encoder_q-layer.10": 3166.3604, "encoder_q-layer.11": 6864.5659, "encoder_q-layer.2": 2081.7881, "encoder_q-layer.3": 2302.5203, "encoder_q-layer.4": 2384.6367, "encoder_q-layer.5": 2503.8662, "encoder_q-layer.6": 2742.8467, "encoder_q-layer.7": 3089.9509, "encoder_q-layer.8": 3502.9563, "encoder_q-layer.9": 3035.106, "epoch": 0.49, "inbatch_neg_score": 0.6845, "inbatch_pos_score": 1.4375, "learning_rate": 2.7500000000000004e-05, "loss": 2.7071, "norm_diff": 0.0406, "norm_loss": 0.0, "num_token_doc": 66.6638, "num_token_overlap": 17.8006, "num_token_query": 52.2422, "num_token_union": 73.6151, "num_word_context": 201.9974, "num_word_doc": 49.7242, "num_word_query": 39.8425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4560.0429, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6855, "query_norm": 1.55, "queue_k_norm": 1.5868, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2422, "sent_len_1": 66.6638, "sent_len_max_0": 128.0, "sent_len_max_1": 210.59, "stdk": 0.0494, "stdq": 0.0463, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 50500 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.6983, "doc_norm": 1.5853, "encoder_q-embeddings": 3905.2117, "encoder_q-layer.0": 2752.4631, "encoder_q-layer.1": 3304.0288, "encoder_q-layer.10": 3450.6892, "encoder_q-layer.11": 7113.7275, "encoder_q-layer.2": 3996.196, "encoder_q-layer.3": 3888.4619, "encoder_q-layer.4": 4370.7319, "encoder_q-layer.5": 4371.3247, "encoder_q-layer.6": 4437.8525, "encoder_q-layer.7": 4486.9746, "encoder_q-layer.8": 4167.8174, "encoder_q-layer.9": 3410.1338, "epoch": 0.49, "inbatch_neg_score": 0.6884, "inbatch_pos_score": 1.4258, "learning_rate": 2.7444444444444443e-05, "loss": 2.6983, "norm_diff": 0.0343, "norm_loss": 0.0, "num_token_doc": 66.7035, "num_token_overlap": 17.7923, "num_token_query": 52.2932, "num_token_union": 73.6746, "num_word_context": 202.2642, "num_word_doc": 49.7868, "num_word_query": 39.8891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6172.4344, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6885, "query_norm": 1.551, "queue_k_norm": 1.5865, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2932, "sent_len_1": 66.7035, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7188, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 50600 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.6969, "doc_norm": 1.5883, "encoder_q-embeddings": 22749.1055, "encoder_q-layer.0": 17271.2285, "encoder_q-layer.1": 19561.498, "encoder_q-layer.10": 3466.5151, "encoder_q-layer.11": 6913.6504, "encoder_q-layer.2": 25122.5391, "encoder_q-layer.3": 27445.3379, "encoder_q-layer.4": 30955.2773, "encoder_q-layer.5": 37467.3398, "encoder_q-layer.6": 35210.0547, "encoder_q-layer.7": 29640.668, "encoder_q-layer.8": 23580.2734, "encoder_q-layer.9": 7072.9233, "epoch": 0.49, "inbatch_neg_score": 0.6866, "inbatch_pos_score": 1.4326, "learning_rate": 2.7388888888888892e-05, "loss": 2.6969, "norm_diff": 0.0313, "norm_loss": 0.0, "num_token_doc": 66.9211, "num_token_overlap": 17.8392, "num_token_query": 52.3126, "num_token_union": 73.7998, "num_word_context": 202.2841, "num_word_doc": 49.9271, "num_word_query": 39.887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 36705.7526, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.687, "query_norm": 1.5571, "queue_k_norm": 1.5867, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3126, "sent_len_1": 66.9211, "sent_len_max_0": 128.0, "sent_len_max_1": 210.2138, "stdk": 0.0492, "stdq": 0.0467, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 50700 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.7071, "doc_norm": 1.5864, "encoder_q-embeddings": 3489.7263, "encoder_q-layer.0": 2484.0278, "encoder_q-layer.1": 2860.7686, "encoder_q-layer.10": 3284.7241, "encoder_q-layer.11": 6756.2842, "encoder_q-layer.2": 3215.7136, "encoder_q-layer.3": 3560.7405, "encoder_q-layer.4": 3750.792, "encoder_q-layer.5": 3824.5027, "encoder_q-layer.6": 3782.1978, "encoder_q-layer.7": 3879.9424, "encoder_q-layer.8": 3967.0122, "encoder_q-layer.9": 3359.397, "epoch": 0.5, "inbatch_neg_score": 0.6925, "inbatch_pos_score": 1.415, "learning_rate": 2.733333333333333e-05, "loss": 2.7071, "norm_diff": 0.0423, "norm_loss": 0.0, "num_token_doc": 66.7083, "num_token_overlap": 17.7494, "num_token_query": 52.0482, "num_token_union": 73.5622, "num_word_context": 201.9691, "num_word_doc": 49.7723, "num_word_query": 39.6841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5547.846, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6919, "query_norm": 1.5441, "queue_k_norm": 1.5867, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0482, "sent_len_1": 66.7083, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3562, "stdk": 0.0491, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 50800 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.7, "doc_norm": 1.5884, "encoder_q-embeddings": 3224.5713, "encoder_q-layer.0": 2192.5334, "encoder_q-layer.1": 2458.0403, "encoder_q-layer.10": 3328.6475, "encoder_q-layer.11": 6565.4121, "encoder_q-layer.2": 2724.8574, "encoder_q-layer.3": 2796.9399, "encoder_q-layer.4": 3074.6001, "encoder_q-layer.5": 3233.6235, "encoder_q-layer.6": 3363.9282, "encoder_q-layer.7": 3556.3069, "encoder_q-layer.8": 3914.5737, "encoder_q-layer.9": 3475.054, "epoch": 0.5, "inbatch_neg_score": 0.6879, "inbatch_pos_score": 1.4414, "learning_rate": 2.727777777777778e-05, "loss": 2.7, "norm_diff": 0.0243, "norm_loss": 0.0, "num_token_doc": 66.8925, "num_token_overlap": 17.7592, "num_token_query": 52.1786, "num_token_union": 73.7922, "num_word_context": 202.4483, "num_word_doc": 49.9087, "num_word_query": 39.8037, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5150.0573, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6885, "query_norm": 1.5641, "queue_k_norm": 1.5891, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1786, "sent_len_1": 66.8925, "sent_len_max_0": 128.0, "sent_len_max_1": 208.16, "stdk": 0.0491, "stdq": 0.0471, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 50900 }, { "accuracy": 60.4004, "active_queue_size": 16384.0, "cl_loss": 2.6904, "doc_norm": 1.5891, "encoder_q-embeddings": 6942.1201, "encoder_q-layer.0": 4860.9233, "encoder_q-layer.1": 5461.5073, "encoder_q-layer.10": 3371.2466, "encoder_q-layer.11": 6857.104, "encoder_q-layer.2": 7148.7578, "encoder_q-layer.3": 8028.7734, "encoder_q-layer.4": 9360.1055, "encoder_q-layer.5": 10006.6465, "encoder_q-layer.6": 10060.2666, "encoder_q-layer.7": 8174.2261, "encoder_q-layer.8": 5257.5225, "encoder_q-layer.9": 3608.3928, "epoch": 0.5, "inbatch_neg_score": 0.6842, "inbatch_pos_score": 1.4141, "learning_rate": 2.7222222222222223e-05, "loss": 2.6904, "norm_diff": 0.0477, "norm_loss": 0.0, "num_token_doc": 66.8101, "num_token_overlap": 17.8504, "num_token_query": 52.4228, "num_token_union": 73.8088, "num_word_context": 202.4475, "num_word_doc": 49.865, "num_word_query": 39.9962, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10488.6314, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6855, "query_norm": 1.5414, "queue_k_norm": 1.5907, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4228, "sent_len_1": 66.8101, "sent_len_max_0": 128.0, "sent_len_max_1": 207.0513, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 51000 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.689, "doc_norm": 1.5906, "encoder_q-embeddings": 4160.6089, "encoder_q-layer.0": 2820.366, "encoder_q-layer.1": 3076.9102, "encoder_q-layer.10": 4054.521, "encoder_q-layer.11": 7751.2974, "encoder_q-layer.2": 3621.104, "encoder_q-layer.3": 4035.0811, "encoder_q-layer.4": 4536.4941, "encoder_q-layer.5": 4777.9937, "encoder_q-layer.6": 4966.1138, "encoder_q-layer.7": 4641.5156, "encoder_q-layer.8": 4320.8472, "encoder_q-layer.9": 3777.0178, "epoch": 0.5, "inbatch_neg_score": 0.6937, "inbatch_pos_score": 1.4248, "learning_rate": 2.716666666666667e-05, "loss": 2.689, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.6528, "num_token_overlap": 17.7972, "num_token_query": 52.2843, "num_token_union": 73.636, "num_word_context": 202.1192, "num_word_doc": 49.7259, "num_word_query": 39.8791, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6523.7957, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6929, "query_norm": 1.5522, "queue_k_norm": 1.5903, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2843, "sent_len_1": 66.6528, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3, "stdk": 0.0492, "stdq": 0.0463, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 51100 }, { "accuracy": 60.6934, "active_queue_size": 16384.0, "cl_loss": 2.6785, "doc_norm": 1.5953, "encoder_q-embeddings": 4572.2363, "encoder_q-layer.0": 3235.3101, "encoder_q-layer.1": 3737.2832, "encoder_q-layer.10": 3639.6848, "encoder_q-layer.11": 7361.4604, "encoder_q-layer.2": 4741.4639, "encoder_q-layer.3": 4982.6504, "encoder_q-layer.4": 5557.0132, "encoder_q-layer.5": 6170.9883, "encoder_q-layer.6": 5954.0981, "encoder_q-layer.7": 4742.0288, "encoder_q-layer.8": 4656.7729, "encoder_q-layer.9": 3739.7073, "epoch": 0.5, "inbatch_neg_score": 0.6924, "inbatch_pos_score": 1.4336, "learning_rate": 2.7111111111111114e-05, "loss": 2.6785, "norm_diff": 0.0415, "norm_loss": 0.0, "num_token_doc": 66.7263, "num_token_overlap": 17.8683, "num_token_query": 52.4414, "num_token_union": 73.6957, "num_word_context": 202.1202, "num_word_doc": 49.7929, "num_word_query": 40.0134, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7285.5827, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6929, "query_norm": 1.5538, "queue_k_norm": 1.5921, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4414, "sent_len_1": 66.7263, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3862, "stdk": 0.0493, "stdq": 0.0464, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 51200 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.6906, "doc_norm": 1.5933, "encoder_q-embeddings": 4115.5312, "encoder_q-layer.0": 3007.5339, "encoder_q-layer.1": 3055.4783, "encoder_q-layer.10": 3602.4714, "encoder_q-layer.11": 7026.7041, "encoder_q-layer.2": 3271.3562, "encoder_q-layer.3": 3563.4294, "encoder_q-layer.4": 3753.0964, "encoder_q-layer.5": 4084.5859, "encoder_q-layer.6": 4382.2637, "encoder_q-layer.7": 4728.2661, "encoder_q-layer.8": 4443.9297, "encoder_q-layer.9": 3605.7175, "epoch": 0.5, "inbatch_neg_score": 0.6924, "inbatch_pos_score": 1.4297, "learning_rate": 2.7055555555555557e-05, "loss": 2.6906, "norm_diff": 0.0372, "norm_loss": 0.0, "num_token_doc": 66.7622, "num_token_overlap": 17.7893, "num_token_query": 52.2563, "num_token_union": 73.7236, "num_word_context": 202.2609, "num_word_doc": 49.8104, "num_word_query": 39.8763, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6132.2654, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6948, "query_norm": 1.5561, "queue_k_norm": 1.5923, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2563, "sent_len_1": 66.7622, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6425, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 51300 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.6913, "doc_norm": 1.596, "encoder_q-embeddings": 1946.5374, "encoder_q-layer.0": 1351.2218, "encoder_q-layer.1": 1537.8149, "encoder_q-layer.10": 1916.1116, "encoder_q-layer.11": 3721.824, "encoder_q-layer.2": 1787.8145, "encoder_q-layer.3": 2108.0273, "encoder_q-layer.4": 2418.6743, "encoder_q-layer.5": 2503.0747, "encoder_q-layer.6": 2537.6611, "encoder_q-layer.7": 2746.7275, "encoder_q-layer.8": 2640.1064, "encoder_q-layer.9": 1975.6909, "epoch": 0.5, "inbatch_neg_score": 0.6873, "inbatch_pos_score": 1.4209, "learning_rate": 2.7000000000000002e-05, "loss": 2.6913, "norm_diff": 0.0613, "norm_loss": 0.0, "num_token_doc": 66.8258, "num_token_overlap": 17.7814, "num_token_query": 52.2172, "num_token_union": 73.7595, "num_word_context": 202.344, "num_word_doc": 49.8627, "num_word_query": 39.8531, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3327.8605, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6875, "query_norm": 1.5347, "queue_k_norm": 1.5931, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2172, "sent_len_1": 66.8258, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3175, "stdk": 0.0493, "stdq": 0.0458, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 51400 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.6815, "doc_norm": 1.5911, "encoder_q-embeddings": 3171.283, "encoder_q-layer.0": 2125.9365, "encoder_q-layer.1": 2315.1987, "encoder_q-layer.10": 1687.4774, "encoder_q-layer.11": 3378.9062, "encoder_q-layer.2": 2851.3279, "encoder_q-layer.3": 3007.4438, "encoder_q-layer.4": 3063.0085, "encoder_q-layer.5": 3575.7646, "encoder_q-layer.6": 3181.7478, "encoder_q-layer.7": 2530.6692, "encoder_q-layer.8": 2128.5603, "encoder_q-layer.9": 1778.5969, "epoch": 0.5, "inbatch_neg_score": 0.6879, "inbatch_pos_score": 1.4229, "learning_rate": 2.6944444444444445e-05, "loss": 2.6815, "norm_diff": 0.044, "norm_loss": 0.0, "num_token_doc": 66.8531, "num_token_overlap": 17.839, "num_token_query": 52.3653, "num_token_union": 73.7611, "num_word_context": 202.3867, "num_word_doc": 49.8636, "num_word_query": 39.953, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4086.2993, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.688, "query_norm": 1.5471, "queue_k_norm": 1.5926, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3653, "sent_len_1": 66.8531, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6075, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 51500 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.6941, "doc_norm": 1.5905, "encoder_q-embeddings": 2474.8503, "encoder_q-layer.0": 1633.3097, "encoder_q-layer.1": 1824.0515, "encoder_q-layer.10": 1973.1754, "encoder_q-layer.11": 3685.2671, "encoder_q-layer.2": 2104.9404, "encoder_q-layer.3": 2303.7937, "encoder_q-layer.4": 2453.4446, "encoder_q-layer.5": 2639.5452, "encoder_q-layer.6": 2936.7524, "encoder_q-layer.7": 2473.948, "encoder_q-layer.8": 2470.124, "encoder_q-layer.9": 1965.8997, "epoch": 0.5, "inbatch_neg_score": 0.6846, "inbatch_pos_score": 1.4521, "learning_rate": 2.688888888888889e-05, "loss": 2.6941, "norm_diff": 0.0116, "norm_loss": 0.0, "num_token_doc": 66.9262, "num_token_overlap": 17.8091, "num_token_query": 52.2415, "num_token_union": 73.7803, "num_word_context": 202.5011, "num_word_doc": 49.9153, "num_word_query": 39.8321, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3522.9519, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6836, "query_norm": 1.5796, "queue_k_norm": 1.5938, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2415, "sent_len_1": 66.9262, "sent_len_max_0": 128.0, "sent_len_max_1": 210.4475, "stdk": 0.0491, "stdq": 0.0477, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 51600 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.692, "doc_norm": 1.591, "encoder_q-embeddings": 2031.2854, "encoder_q-layer.0": 1422.5975, "encoder_q-layer.1": 1648.4338, "encoder_q-layer.10": 1992.4291, "encoder_q-layer.11": 3669.6248, "encoder_q-layer.2": 1874.6459, "encoder_q-layer.3": 2024.9187, "encoder_q-layer.4": 2182.3616, "encoder_q-layer.5": 2106.4924, "encoder_q-layer.6": 2084.5403, "encoder_q-layer.7": 1959.0873, "encoder_q-layer.8": 2116.5884, "encoder_q-layer.9": 1809.4979, "epoch": 0.5, "inbatch_neg_score": 0.6896, "inbatch_pos_score": 1.4395, "learning_rate": 2.6833333333333333e-05, "loss": 2.692, "norm_diff": 0.0222, "norm_loss": 0.0, "num_token_doc": 66.8138, "num_token_overlap": 17.8236, "num_token_query": 52.2222, "num_token_union": 73.6992, "num_word_context": 202.4302, "num_word_doc": 49.8435, "num_word_query": 39.8355, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3113.361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6899, "query_norm": 1.5708, "queue_k_norm": 1.5926, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2222, "sent_len_1": 66.8138, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4988, "stdk": 0.0491, "stdq": 0.0471, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 51700 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.6891, "doc_norm": 1.5936, "encoder_q-embeddings": 2093.353, "encoder_q-layer.0": 1417.5519, "encoder_q-layer.1": 1586.6779, "encoder_q-layer.10": 1689.7743, "encoder_q-layer.11": 3383.2983, "encoder_q-layer.2": 2006.7285, "encoder_q-layer.3": 2239.6909, "encoder_q-layer.4": 2428.5935, "encoder_q-layer.5": 2412.9746, "encoder_q-layer.6": 2291.6423, "encoder_q-layer.7": 2138.3303, "encoder_q-layer.8": 2285.6338, "encoder_q-layer.9": 1744.6035, "epoch": 0.51, "inbatch_neg_score": 0.6905, "inbatch_pos_score": 1.4229, "learning_rate": 2.677777777777778e-05, "loss": 2.6891, "norm_diff": 0.0378, "norm_loss": 0.0, "num_token_doc": 66.8491, "num_token_overlap": 17.8044, "num_token_query": 52.3308, "num_token_union": 73.7806, "num_word_context": 202.2736, "num_word_doc": 49.8742, "num_word_query": 39.9164, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3171.7322, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6899, "query_norm": 1.5557, "queue_k_norm": 1.5946, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3308, "sent_len_1": 66.8491, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6538, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 51800 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.6892, "doc_norm": 1.5948, "encoder_q-embeddings": 1457.8352, "encoder_q-layer.0": 942.7548, "encoder_q-layer.1": 1106.6647, "encoder_q-layer.10": 1743.6969, "encoder_q-layer.11": 3439.8945, "encoder_q-layer.2": 1290.3966, "encoder_q-layer.3": 1475.7238, "encoder_q-layer.4": 1687.5693, "encoder_q-layer.5": 1780.6644, "encoder_q-layer.6": 1927.726, "encoder_q-layer.7": 1885.3038, "encoder_q-layer.8": 2181.188, "encoder_q-layer.9": 1863.7531, "epoch": 0.51, "inbatch_neg_score": 0.7009, "inbatch_pos_score": 1.4336, "learning_rate": 2.6722222222222228e-05, "loss": 2.6892, "norm_diff": 0.0386, "norm_loss": 0.0, "num_token_doc": 66.7608, "num_token_overlap": 17.7852, "num_token_query": 52.3634, "num_token_union": 73.7939, "num_word_context": 202.3344, "num_word_doc": 49.821, "num_word_query": 39.9322, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2727.2794, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7002, "query_norm": 1.5562, "queue_k_norm": 1.594, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3634, "sent_len_1": 66.7608, "sent_len_max_0": 128.0, "sent_len_max_1": 209.315, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 51900 }, { "accuracy": 60.791, "active_queue_size": 16384.0, "cl_loss": 2.6761, "doc_norm": 1.5936, "encoder_q-embeddings": 1547.691, "encoder_q-layer.0": 1036.1215, "encoder_q-layer.1": 1153.9298, "encoder_q-layer.10": 1791.2469, "encoder_q-layer.11": 3483.9692, "encoder_q-layer.2": 1306.2709, "encoder_q-layer.3": 1364.6437, "encoder_q-layer.4": 1484.8435, "encoder_q-layer.5": 1603.7567, "encoder_q-layer.6": 1790.6213, "encoder_q-layer.7": 1862.5959, "encoder_q-layer.8": 2085.6726, "encoder_q-layer.9": 1758.7793, "epoch": 0.51, "inbatch_neg_score": 0.7039, "inbatch_pos_score": 1.4385, "learning_rate": 2.6666666666666667e-05, "loss": 2.6761, "norm_diff": 0.0379, "norm_loss": 0.0, "num_token_doc": 66.8518, "num_token_overlap": 17.777, "num_token_query": 52.2454, "num_token_union": 73.7507, "num_word_context": 202.3215, "num_word_doc": 49.8613, "num_word_query": 39.8446, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2643.6445, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7036, "query_norm": 1.5557, "queue_k_norm": 1.5963, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2454, "sent_len_1": 66.8518, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8837, "stdk": 0.0491, "stdq": 0.0459, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 52000 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.6814, "doc_norm": 1.5934, "encoder_q-embeddings": 2270.5156, "encoder_q-layer.0": 1547.304, "encoder_q-layer.1": 1639.3508, "encoder_q-layer.10": 1638.0066, "encoder_q-layer.11": 3304.053, "encoder_q-layer.2": 1873.9901, "encoder_q-layer.3": 1728.3739, "encoder_q-layer.4": 1890.9271, "encoder_q-layer.5": 1825.1688, "encoder_q-layer.6": 1591.9832, "encoder_q-layer.7": 1551.7484, "encoder_q-layer.8": 1743.5505, "encoder_q-layer.9": 1576.0178, "epoch": 0.51, "inbatch_neg_score": 0.7037, "inbatch_pos_score": 1.4385, "learning_rate": 2.6611111111111116e-05, "loss": 2.6814, "norm_diff": 0.0243, "norm_loss": 0.0, "num_token_doc": 66.7428, "num_token_overlap": 17.8276, "num_token_query": 52.2774, "num_token_union": 73.6534, "num_word_context": 202.253, "num_word_doc": 49.8185, "num_word_query": 39.8655, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2860.0359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7031, "query_norm": 1.5691, "queue_k_norm": 1.5965, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2774, "sent_len_1": 66.7428, "sent_len_max_0": 128.0, "sent_len_max_1": 206.41, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 52100 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.6998, "doc_norm": 1.5968, "encoder_q-embeddings": 1341.1621, "encoder_q-layer.0": 860.6425, "encoder_q-layer.1": 978.666, "encoder_q-layer.10": 1717.5907, "encoder_q-layer.11": 3480.4487, "encoder_q-layer.2": 1133.3553, "encoder_q-layer.3": 1224.3555, "encoder_q-layer.4": 1327.5208, "encoder_q-layer.5": 1374.2826, "encoder_q-layer.6": 1529.3235, "encoder_q-layer.7": 1675.8915, "encoder_q-layer.8": 1891.4395, "encoder_q-layer.9": 1683.2251, "epoch": 0.51, "inbatch_neg_score": 0.708, "inbatch_pos_score": 1.4531, "learning_rate": 2.6555555555555555e-05, "loss": 2.6998, "norm_diff": 0.0211, "norm_loss": 0.0, "num_token_doc": 66.8386, "num_token_overlap": 17.8141, "num_token_query": 52.2707, "num_token_union": 73.7365, "num_word_context": 202.1953, "num_word_doc": 49.8497, "num_word_query": 39.8589, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2439.4971, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.707, "query_norm": 1.5757, "queue_k_norm": 1.5971, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2707, "sent_len_1": 66.8386, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0263, "stdk": 0.0492, "stdq": 0.0466, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 52200 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.6795, "doc_norm": 1.5934, "encoder_q-embeddings": 1274.1971, "encoder_q-layer.0": 824.2672, "encoder_q-layer.1": 910.0609, "encoder_q-layer.10": 1684.3615, "encoder_q-layer.11": 3285.7178, "encoder_q-layer.2": 1031.6755, "encoder_q-layer.3": 1111.6555, "encoder_q-layer.4": 1161.6602, "encoder_q-layer.5": 1236.074, "encoder_q-layer.6": 1409.0815, "encoder_q-layer.7": 1561.2284, "encoder_q-layer.8": 1840.6492, "encoder_q-layer.9": 1631.4363, "epoch": 0.51, "inbatch_neg_score": 0.7202, "inbatch_pos_score": 1.458, "learning_rate": 2.6500000000000004e-05, "loss": 2.6795, "norm_diff": 0.0112, "norm_loss": 0.0, "num_token_doc": 66.7672, "num_token_overlap": 17.8346, "num_token_query": 52.347, "num_token_union": 73.7137, "num_word_context": 202.5118, "num_word_doc": 49.7967, "num_word_query": 39.9187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2309.0075, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7188, "query_norm": 1.5992, "queue_k_norm": 1.5961, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.347, "sent_len_1": 66.7672, "sent_len_max_0": 128.0, "sent_len_max_1": 208.91, "stdk": 0.049, "stdq": 0.0471, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 52300 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 2.681, "doc_norm": 1.5937, "encoder_q-embeddings": 1989.5032, "encoder_q-layer.0": 1383.6226, "encoder_q-layer.1": 1564.6245, "encoder_q-layer.10": 1683.1819, "encoder_q-layer.11": 3497.4543, "encoder_q-layer.2": 1707.7577, "encoder_q-layer.3": 1729.7212, "encoder_q-layer.4": 1852.0321, "encoder_q-layer.5": 1812.5178, "encoder_q-layer.6": 1905.551, "encoder_q-layer.7": 1942.9077, "encoder_q-layer.8": 1965.976, "encoder_q-layer.9": 1677.728, "epoch": 0.51, "inbatch_neg_score": 0.7313, "inbatch_pos_score": 1.4453, "learning_rate": 2.6444444444444443e-05, "loss": 2.681, "norm_diff": 0.0132, "norm_loss": 0.0, "num_token_doc": 66.7766, "num_token_overlap": 17.8703, "num_token_query": 52.3896, "num_token_union": 73.7221, "num_word_context": 202.2688, "num_word_doc": 49.8301, "num_word_query": 39.9582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2946.0979, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7314, "query_norm": 1.5806, "queue_k_norm": 1.5979, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3896, "sent_len_1": 66.7766, "sent_len_max_0": 128.0, "sent_len_max_1": 209.52, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 52400 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6795, "doc_norm": 1.6003, "encoder_q-embeddings": 1641.3234, "encoder_q-layer.0": 1103.9774, "encoder_q-layer.1": 1237.604, "encoder_q-layer.10": 1704.6283, "encoder_q-layer.11": 3534.6953, "encoder_q-layer.2": 1416.9757, "encoder_q-layer.3": 1512.3147, "encoder_q-layer.4": 1682.1511, "encoder_q-layer.5": 1703.1823, "encoder_q-layer.6": 1805.9752, "encoder_q-layer.7": 1918.2625, "encoder_q-layer.8": 2048.7852, "encoder_q-layer.9": 1731.5518, "epoch": 0.51, "inbatch_neg_score": 0.7377, "inbatch_pos_score": 1.4717, "learning_rate": 2.6388888888888892e-05, "loss": 2.6795, "norm_diff": 0.0189, "norm_loss": 0.0, "num_token_doc": 66.8214, "num_token_overlap": 17.8408, "num_token_query": 52.4154, "num_token_union": 73.7904, "num_word_context": 202.3472, "num_word_doc": 49.8347, "num_word_query": 39.9416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2752.1387, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7383, "query_norm": 1.5815, "queue_k_norm": 1.5978, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4154, "sent_len_1": 66.8214, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2625, "stdk": 0.0493, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 52500 }, { "accuracy": 57.959, "active_queue_size": 16384.0, "cl_loss": 2.6845, "doc_norm": 1.6033, "encoder_q-embeddings": 1483.0627, "encoder_q-layer.0": 975.7773, "encoder_q-layer.1": 1077.1776, "encoder_q-layer.10": 1756.6674, "encoder_q-layer.11": 3575.2485, "encoder_q-layer.2": 1265.8254, "encoder_q-layer.3": 1369.4521, "encoder_q-layer.4": 1437.1492, "encoder_q-layer.5": 1546.3883, "encoder_q-layer.6": 1637.6584, "encoder_q-layer.7": 1747.4092, "encoder_q-layer.8": 1960.9054, "encoder_q-layer.9": 1788.8325, "epoch": 0.51, "inbatch_neg_score": 0.7468, "inbatch_pos_score": 1.4629, "learning_rate": 2.633333333333333e-05, "loss": 2.6845, "norm_diff": 0.006, "norm_loss": 0.0, "num_token_doc": 66.8032, "num_token_overlap": 17.8063, "num_token_query": 52.2804, "num_token_union": 73.7481, "num_word_context": 202.2854, "num_word_doc": 49.8449, "num_word_query": 39.9034, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2587.0319, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7471, "query_norm": 1.5979, "queue_k_norm": 1.6017, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2804, "sent_len_1": 66.8032, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8237, "stdk": 0.0493, "stdq": 0.0464, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 52600 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.6715, "doc_norm": 1.6038, "encoder_q-embeddings": 1460.1455, "encoder_q-layer.0": 967.998, "encoder_q-layer.1": 1141.2899, "encoder_q-layer.10": 1846.0781, "encoder_q-layer.11": 3539.2883, "encoder_q-layer.2": 1319.0697, "encoder_q-layer.3": 1545.4602, "encoder_q-layer.4": 1403.4979, "encoder_q-layer.5": 1348.2498, "encoder_q-layer.6": 1535.8322, "encoder_q-layer.7": 1721.4272, "encoder_q-layer.8": 1949.1218, "encoder_q-layer.9": 1690.3876, "epoch": 0.51, "inbatch_neg_score": 0.756, "inbatch_pos_score": 1.5117, "learning_rate": 2.627777777777778e-05, "loss": 2.6715, "norm_diff": 0.0064, "norm_loss": 0.0, "num_token_doc": 66.8655, "num_token_overlap": 17.8332, "num_token_query": 52.3036, "num_token_union": 73.7765, "num_word_context": 202.4602, "num_word_doc": 49.8884, "num_word_query": 39.8885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2563.3895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7554, "query_norm": 1.6053, "queue_k_norm": 1.6027, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3036, "sent_len_1": 66.8655, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0062, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 52700 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.6861, "doc_norm": 1.6061, "encoder_q-embeddings": 1648.7787, "encoder_q-layer.0": 1074.1885, "encoder_q-layer.1": 1265.7341, "encoder_q-layer.10": 1747.8489, "encoder_q-layer.11": 3577.5012, "encoder_q-layer.2": 1525.7896, "encoder_q-layer.3": 1645.049, "encoder_q-layer.4": 1784.1665, "encoder_q-layer.5": 1824.6172, "encoder_q-layer.6": 1972.1395, "encoder_q-layer.7": 2092.2932, "encoder_q-layer.8": 2098.626, "encoder_q-layer.9": 1721.3573, "epoch": 0.52, "inbatch_neg_score": 0.766, "inbatch_pos_score": 1.5146, "learning_rate": 2.6222222222222226e-05, "loss": 2.6861, "norm_diff": 0.0092, "norm_loss": 0.0, "num_token_doc": 66.8121, "num_token_overlap": 17.7994, "num_token_query": 52.2546, "num_token_union": 73.7285, "num_word_context": 202.4887, "num_word_doc": 49.8631, "num_word_query": 39.8497, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2783.6759, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7666, "query_norm": 1.609, "queue_k_norm": 1.6042, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2546, "sent_len_1": 66.8121, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9288, "stdk": 0.0493, "stdq": 0.0468, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 52800 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.6766, "doc_norm": 1.6038, "encoder_q-embeddings": 1598.4355, "encoder_q-layer.0": 1058.543, "encoder_q-layer.1": 1191.3049, "encoder_q-layer.10": 1817.7458, "encoder_q-layer.11": 3650.8135, "encoder_q-layer.2": 1428.6031, "encoder_q-layer.3": 1556.8901, "encoder_q-layer.4": 1587.973, "encoder_q-layer.5": 1671.056, "encoder_q-layer.6": 1795.2794, "encoder_q-layer.7": 1829.7684, "encoder_q-layer.8": 1933.3763, "encoder_q-layer.9": 1810.55, "epoch": 0.52, "inbatch_neg_score": 0.7724, "inbatch_pos_score": 1.5, "learning_rate": 2.6166666666666668e-05, "loss": 2.6766, "norm_diff": 0.0134, "norm_loss": 0.0, "num_token_doc": 67.0094, "num_token_overlap": 17.851, "num_token_query": 52.2685, "num_token_union": 73.8152, "num_word_context": 202.5239, "num_word_doc": 49.986, "num_word_query": 39.8762, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2719.0173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7715, "query_norm": 1.5917, "queue_k_norm": 1.6063, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2685, "sent_len_1": 67.0094, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4025, "stdk": 0.0491, "stdq": 0.0461, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 52900 }, { "accuracy": 59.9121, "active_queue_size": 16384.0, "cl_loss": 2.6769, "doc_norm": 1.6115, "encoder_q-embeddings": 3310.0417, "encoder_q-layer.0": 2389.7952, "encoder_q-layer.1": 3001.9539, "encoder_q-layer.10": 1738.9563, "encoder_q-layer.11": 3628.5022, "encoder_q-layer.2": 3148.7324, "encoder_q-layer.3": 2657.5898, "encoder_q-layer.4": 2934.011, "encoder_q-layer.5": 2884.8206, "encoder_q-layer.6": 2705.9731, "encoder_q-layer.7": 2339.0325, "encoder_q-layer.8": 2144.4792, "encoder_q-layer.9": 1829.9702, "epoch": 0.52, "inbatch_neg_score": 0.774, "inbatch_pos_score": 1.5225, "learning_rate": 2.6111111111111114e-05, "loss": 2.6769, "norm_diff": 0.0106, "norm_loss": 0.0, "num_token_doc": 66.8368, "num_token_overlap": 17.7791, "num_token_query": 52.2745, "num_token_union": 73.7909, "num_word_context": 202.4841, "num_word_doc": 49.865, "num_word_query": 39.8661, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4078.9386, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7749, "query_norm": 1.6051, "queue_k_norm": 1.605, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2745, "sent_len_1": 66.8368, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9913, "stdk": 0.0494, "stdq": 0.047, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 53000 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.6796, "doc_norm": 1.6042, "encoder_q-embeddings": 1558.9397, "encoder_q-layer.0": 1015.3081, "encoder_q-layer.1": 1146.6752, "encoder_q-layer.10": 1667.5013, "encoder_q-layer.11": 3475.7058, "encoder_q-layer.2": 1298.6924, "encoder_q-layer.3": 1413.2635, "encoder_q-layer.4": 1586.3046, "encoder_q-layer.5": 1736.726, "encoder_q-layer.6": 1741.1322, "encoder_q-layer.7": 1793.5862, "encoder_q-layer.8": 1917.2427, "encoder_q-layer.9": 1683.575, "epoch": 0.52, "inbatch_neg_score": 0.7745, "inbatch_pos_score": 1.5156, "learning_rate": 2.6055555555555556e-05, "loss": 2.6796, "norm_diff": 0.0094, "norm_loss": 0.0, "num_token_doc": 66.6833, "num_token_overlap": 17.8097, "num_token_query": 52.2167, "num_token_union": 73.5825, "num_word_context": 202.2065, "num_word_doc": 49.7601, "num_word_query": 39.8438, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2643.8368, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7744, "query_norm": 1.599, "queue_k_norm": 1.6078, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2167, "sent_len_1": 66.6833, "sent_len_max_0": 128.0, "sent_len_max_1": 210.7063, "stdk": 0.049, "stdq": 0.047, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 53100 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6767, "doc_norm": 1.6108, "encoder_q-embeddings": 1293.265, "encoder_q-layer.0": 832.1124, "encoder_q-layer.1": 906.8732, "encoder_q-layer.10": 1908.5667, "encoder_q-layer.11": 3894.5647, "encoder_q-layer.2": 1013.5586, "encoder_q-layer.3": 1081.3816, "encoder_q-layer.4": 1176.6968, "encoder_q-layer.5": 1213.9951, "encoder_q-layer.6": 1425.91, "encoder_q-layer.7": 1713.7041, "encoder_q-layer.8": 2185.7141, "encoder_q-layer.9": 1798.1602, "epoch": 0.52, "inbatch_neg_score": 0.7744, "inbatch_pos_score": 1.5068, "learning_rate": 2.6000000000000002e-05, "loss": 2.6767, "norm_diff": 0.0263, "norm_loss": 0.0, "num_token_doc": 66.744, "num_token_overlap": 17.8141, "num_token_query": 52.382, "num_token_union": 73.7554, "num_word_context": 202.3956, "num_word_doc": 49.8387, "num_word_query": 39.9569, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2567.8664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7754, "query_norm": 1.5844, "queue_k_norm": 1.6088, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.382, "sent_len_1": 66.744, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7562, "stdk": 0.0493, "stdq": 0.0464, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 53200 }, { "accuracy": 59.6191, "active_queue_size": 16384.0, "cl_loss": 2.6751, "doc_norm": 1.6104, "encoder_q-embeddings": 3484.6479, "encoder_q-layer.0": 2567.0286, "encoder_q-layer.1": 3018.7344, "encoder_q-layer.10": 1685.7161, "encoder_q-layer.11": 3456.927, "encoder_q-layer.2": 3564.7891, "encoder_q-layer.3": 3055.1533, "encoder_q-layer.4": 3166.1753, "encoder_q-layer.5": 3034.9124, "encoder_q-layer.6": 3287.1101, "encoder_q-layer.7": 2988.4124, "encoder_q-layer.8": 2354.0137, "encoder_q-layer.9": 1730.8156, "epoch": 0.52, "inbatch_neg_score": 0.7745, "inbatch_pos_score": 1.5117, "learning_rate": 2.5944444444444444e-05, "loss": 2.6751, "norm_diff": 0.0211, "norm_loss": 0.0, "num_token_doc": 66.7513, "num_token_overlap": 17.8129, "num_token_query": 52.2916, "num_token_union": 73.731, "num_word_context": 202.2541, "num_word_doc": 49.8357, "num_word_query": 39.8903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4480.0367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7739, "query_norm": 1.5893, "queue_k_norm": 1.6132, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2916, "sent_len_1": 66.7513, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8812, "stdk": 0.0492, "stdq": 0.0468, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 53300 }, { "accuracy": 58.4473, "active_queue_size": 16384.0, "cl_loss": 2.6819, "doc_norm": 1.6124, "encoder_q-embeddings": 3036.2913, "encoder_q-layer.0": 1989.6931, "encoder_q-layer.1": 2237.6384, "encoder_q-layer.10": 3606.853, "encoder_q-layer.11": 7260.4326, "encoder_q-layer.2": 2575.7961, "encoder_q-layer.3": 2774.2295, "encoder_q-layer.4": 3064.9297, "encoder_q-layer.5": 3331.1633, "encoder_q-layer.6": 3537.5713, "encoder_q-layer.7": 3879.4011, "encoder_q-layer.8": 4006.2756, "encoder_q-layer.9": 3447.426, "epoch": 0.52, "inbatch_neg_score": 0.771, "inbatch_pos_score": 1.4941, "learning_rate": 2.588888888888889e-05, "loss": 2.6819, "norm_diff": 0.04, "norm_loss": 0.0, "num_token_doc": 66.7262, "num_token_overlap": 17.7713, "num_token_query": 52.2024, "num_token_union": 73.6689, "num_word_context": 201.977, "num_word_doc": 49.7916, "num_word_query": 39.8282, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5311.0182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.77, "query_norm": 1.5724, "queue_k_norm": 1.6142, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2024, "sent_len_1": 66.7262, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1987, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 53400 }, { "accuracy": 61.7188, "active_queue_size": 16384.0, "cl_loss": 2.6724, "doc_norm": 1.6129, "encoder_q-embeddings": 3259.7966, "encoder_q-layer.0": 2139.2336, "encoder_q-layer.1": 2346.4526, "encoder_q-layer.10": 3895.9429, "encoder_q-layer.11": 7197.0244, "encoder_q-layer.2": 2693.8625, "encoder_q-layer.3": 2761.4963, "encoder_q-layer.4": 2951.6624, "encoder_q-layer.5": 3074.7229, "encoder_q-layer.6": 3289.1226, "encoder_q-layer.7": 3605.7351, "encoder_q-layer.8": 4106.9824, "encoder_q-layer.9": 3534.5298, "epoch": 0.52, "inbatch_neg_score": 0.7671, "inbatch_pos_score": 1.5176, "learning_rate": 2.5833333333333336e-05, "loss": 2.6724, "norm_diff": 0.0392, "norm_loss": 0.0, "num_token_doc": 66.8026, "num_token_overlap": 17.8094, "num_token_query": 52.3443, "num_token_union": 73.7605, "num_word_context": 202.42, "num_word_doc": 49.8429, "num_word_query": 39.925, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5313.1092, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7666, "query_norm": 1.5737, "queue_k_norm": 1.614, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3443, "sent_len_1": 66.8026, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8575, "stdk": 0.0492, "stdq": 0.0463, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 53500 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.6632, "doc_norm": 1.618, "encoder_q-embeddings": 3677.9878, "encoder_q-layer.0": 2512.7825, "encoder_q-layer.1": 3002.9531, "encoder_q-layer.10": 3326.8523, "encoder_q-layer.11": 7100.5361, "encoder_q-layer.2": 3635.4031, "encoder_q-layer.3": 4007.875, "encoder_q-layer.4": 4236.7212, "encoder_q-layer.5": 4378.9116, "encoder_q-layer.6": 4594.3237, "encoder_q-layer.7": 4362.9097, "encoder_q-layer.8": 4151.2974, "encoder_q-layer.9": 3477.2808, "epoch": 0.52, "inbatch_neg_score": 0.7685, "inbatch_pos_score": 1.5195, "learning_rate": 2.5777777777777778e-05, "loss": 2.6632, "norm_diff": 0.0397, "norm_loss": 0.0, "num_token_doc": 66.5839, "num_token_overlap": 17.8183, "num_token_query": 52.2874, "num_token_union": 73.584, "num_word_context": 202.1388, "num_word_doc": 49.6987, "num_word_query": 39.8926, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6172.0071, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7676, "query_norm": 1.5783, "queue_k_norm": 1.6151, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2874, "sent_len_1": 66.5839, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9238, "stdk": 0.0494, "stdq": 0.0464, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 53600 }, { "accuracy": 58.7402, "active_queue_size": 16384.0, "cl_loss": 2.6824, "doc_norm": 1.6123, "encoder_q-embeddings": 3872.9529, "encoder_q-layer.0": 2713.1416, "encoder_q-layer.1": 2948.0669, "encoder_q-layer.10": 3751.7417, "encoder_q-layer.11": 6698.4409, "encoder_q-layer.2": 3462.7458, "encoder_q-layer.3": 3936.2693, "encoder_q-layer.4": 4097.5967, "encoder_q-layer.5": 4190.3945, "encoder_q-layer.6": 4010.0432, "encoder_q-layer.7": 3826.48, "encoder_q-layer.8": 4245.6631, "encoder_q-layer.9": 3547.5315, "epoch": 0.52, "inbatch_neg_score": 0.7711, "inbatch_pos_score": 1.4941, "learning_rate": 2.5722222222222224e-05, "loss": 2.6824, "norm_diff": 0.0347, "norm_loss": 0.0, "num_token_doc": 66.7964, "num_token_overlap": 17.8713, "num_token_query": 52.3186, "num_token_union": 73.7103, "num_word_context": 202.4425, "num_word_doc": 49.8524, "num_word_query": 39.8972, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5863.6549, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7705, "query_norm": 1.5776, "queue_k_norm": 1.616, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3186, "sent_len_1": 66.7964, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9775, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 53700 }, { "accuracy": 60.6934, "active_queue_size": 16384.0, "cl_loss": 2.6758, "doc_norm": 1.6198, "encoder_q-embeddings": 2458.7422, "encoder_q-layer.0": 1623.2202, "encoder_q-layer.1": 1766.5377, "encoder_q-layer.10": 3480.8359, "encoder_q-layer.11": 7048.4575, "encoder_q-layer.2": 2036.655, "encoder_q-layer.3": 2253.1353, "encoder_q-layer.4": 2426.6584, "encoder_q-layer.5": 2580.8904, "encoder_q-layer.6": 2991.4944, "encoder_q-layer.7": 3233.8723, "encoder_q-layer.8": 3775.5581, "encoder_q-layer.9": 3360.0825, "epoch": 0.53, "inbatch_neg_score": 0.7703, "inbatch_pos_score": 1.5146, "learning_rate": 2.5666666666666666e-05, "loss": 2.6758, "norm_diff": 0.0437, "norm_loss": 0.0, "num_token_doc": 66.7617, "num_token_overlap": 17.8152, "num_token_query": 52.3797, "num_token_union": 73.7316, "num_word_context": 202.2547, "num_word_doc": 49.8035, "num_word_query": 39.9706, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4792.6607, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.771, "query_norm": 1.5761, "queue_k_norm": 1.616, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3797, "sent_len_1": 66.7617, "sent_len_max_0": 128.0, "sent_len_max_1": 211.0163, "stdk": 0.0494, "stdq": 0.0464, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 53800 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.6709, "doc_norm": 1.6203, "encoder_q-embeddings": 2972.0327, "encoder_q-layer.0": 1932.9749, "encoder_q-layer.1": 2090.439, "encoder_q-layer.10": 3186.3877, "encoder_q-layer.11": 6723.374, "encoder_q-layer.2": 2364.1519, "encoder_q-layer.3": 2578.9473, "encoder_q-layer.4": 2698.0813, "encoder_q-layer.5": 2959.0259, "encoder_q-layer.6": 3102.366, "encoder_q-layer.7": 3332.8276, "encoder_q-layer.8": 3728.0591, "encoder_q-layer.9": 3234.416, "epoch": 0.53, "inbatch_neg_score": 0.7679, "inbatch_pos_score": 1.5283, "learning_rate": 2.5611111111111115e-05, "loss": 2.6709, "norm_diff": 0.0343, "norm_loss": 0.0, "num_token_doc": 66.8334, "num_token_overlap": 17.801, "num_token_query": 52.3121, "num_token_union": 73.7618, "num_word_context": 202.5498, "num_word_doc": 49.8775, "num_word_query": 39.8885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4927.3557, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7681, "query_norm": 1.586, "queue_k_norm": 1.6179, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3121, "sent_len_1": 66.8334, "sent_len_max_0": 128.0, "sent_len_max_1": 209.945, "stdk": 0.0494, "stdq": 0.0469, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 53900 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.6732, "doc_norm": 1.6153, "encoder_q-embeddings": 2602.0635, "encoder_q-layer.0": 1708.1862, "encoder_q-layer.1": 1880.5321, "encoder_q-layer.10": 3409.2979, "encoder_q-layer.11": 6794.3965, "encoder_q-layer.2": 2114.2812, "encoder_q-layer.3": 2377.5781, "encoder_q-layer.4": 2447.5005, "encoder_q-layer.5": 2612.8062, "encoder_q-layer.6": 2997.5889, "encoder_q-layer.7": 3272.2021, "encoder_q-layer.8": 3670.0698, "encoder_q-layer.9": 3206.9194, "epoch": 0.53, "inbatch_neg_score": 0.7751, "inbatch_pos_score": 1.5107, "learning_rate": 2.5555555555555554e-05, "loss": 2.6732, "norm_diff": 0.0357, "norm_loss": 0.0, "num_token_doc": 66.7507, "num_token_overlap": 17.7958, "num_token_query": 52.2061, "num_token_union": 73.6586, "num_word_context": 201.9775, "num_word_doc": 49.8017, "num_word_query": 39.8112, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4739.9669, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7744, "query_norm": 1.5797, "queue_k_norm": 1.6179, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2061, "sent_len_1": 66.7507, "sent_len_max_0": 128.0, "sent_len_max_1": 210.9087, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 54000 }, { "accuracy": 60.498, "active_queue_size": 16384.0, "cl_loss": 2.6816, "doc_norm": 1.6189, "encoder_q-embeddings": 2967.1479, "encoder_q-layer.0": 2018.6844, "encoder_q-layer.1": 2208.3093, "encoder_q-layer.10": 3310.9182, "encoder_q-layer.11": 6893.458, "encoder_q-layer.2": 2669.1421, "encoder_q-layer.3": 2701.5149, "encoder_q-layer.4": 2862.1592, "encoder_q-layer.5": 2764.3186, "encoder_q-layer.6": 3168.1494, "encoder_q-layer.7": 3314.0898, "encoder_q-layer.8": 3700.6562, "encoder_q-layer.9": 3264.9175, "epoch": 0.53, "inbatch_neg_score": 0.7614, "inbatch_pos_score": 1.5098, "learning_rate": 2.5500000000000003e-05, "loss": 2.6816, "norm_diff": 0.0416, "norm_loss": 0.0, "num_token_doc": 66.6827, "num_token_overlap": 17.7895, "num_token_query": 52.2505, "num_token_union": 73.6764, "num_word_context": 202.555, "num_word_doc": 49.7833, "num_word_query": 39.8619, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5032.2902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7607, "query_norm": 1.5773, "queue_k_norm": 1.6176, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2505, "sent_len_1": 66.6827, "sent_len_max_0": 128.0, "sent_len_max_1": 206.7175, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 54100 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.6776, "doc_norm": 1.6184, "encoder_q-embeddings": 6248.8901, "encoder_q-layer.0": 4019.9209, "encoder_q-layer.1": 5177.5342, "encoder_q-layer.10": 3853.2649, "encoder_q-layer.11": 7064.1284, "encoder_q-layer.2": 6688.5479, "encoder_q-layer.3": 6584.8062, "encoder_q-layer.4": 7648.0908, "encoder_q-layer.5": 7773.3491, "encoder_q-layer.6": 8176.6367, "encoder_q-layer.7": 6505.2153, "encoder_q-layer.8": 5050.5815, "encoder_q-layer.9": 3886.5825, "epoch": 0.53, "inbatch_neg_score": 0.7658, "inbatch_pos_score": 1.5234, "learning_rate": 2.5444444444444442e-05, "loss": 2.6776, "norm_diff": 0.0214, "norm_loss": 0.0, "num_token_doc": 66.8284, "num_token_overlap": 17.7652, "num_token_query": 52.241, "num_token_union": 73.7679, "num_word_context": 202.4725, "num_word_doc": 49.8974, "num_word_query": 39.8575, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9160.496, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7671, "query_norm": 1.597, "queue_k_norm": 1.6178, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.241, "sent_len_1": 66.8284, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1975, "stdk": 0.0493, "stdq": 0.0474, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 54200 }, { "accuracy": 59.2285, "active_queue_size": 16384.0, "cl_loss": 2.6798, "doc_norm": 1.615, "encoder_q-embeddings": 1523.8601, "encoder_q-layer.0": 1036.5493, "encoder_q-layer.1": 1171.7169, "encoder_q-layer.10": 1730.9286, "encoder_q-layer.11": 3657.9685, "encoder_q-layer.2": 1385.9836, "encoder_q-layer.3": 1374.9473, "encoder_q-layer.4": 1456.1699, "encoder_q-layer.5": 1437.6677, "encoder_q-layer.6": 1607.0138, "encoder_q-layer.7": 1718.6989, "encoder_q-layer.8": 1985.4254, "encoder_q-layer.9": 1761.8993, "epoch": 0.53, "inbatch_neg_score": 0.7681, "inbatch_pos_score": 1.498, "learning_rate": 2.538888888888889e-05, "loss": 2.6798, "norm_diff": 0.0415, "norm_loss": 0.0, "num_token_doc": 66.7856, "num_token_overlap": 17.7929, "num_token_query": 52.1203, "num_token_union": 73.6348, "num_word_context": 202.2507, "num_word_doc": 49.8291, "num_word_query": 39.7514, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2624.4803, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7681, "query_norm": 1.5734, "queue_k_norm": 1.6193, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1203, "sent_len_1": 66.7856, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1525, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 54300 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.6789, "doc_norm": 1.6189, "encoder_q-embeddings": 1356.3096, "encoder_q-layer.0": 846.1978, "encoder_q-layer.1": 932.379, "encoder_q-layer.10": 1613.8962, "encoder_q-layer.11": 3378.2495, "encoder_q-layer.2": 1045.001, "encoder_q-layer.3": 1092.9266, "encoder_q-layer.4": 1193.3525, "encoder_q-layer.5": 1219.5575, "encoder_q-layer.6": 1436.9027, "encoder_q-layer.7": 1536.9868, "encoder_q-layer.8": 1790.1724, "encoder_q-layer.9": 1643.5623, "epoch": 0.53, "inbatch_neg_score": 0.7678, "inbatch_pos_score": 1.5098, "learning_rate": 2.5333333333333337e-05, "loss": 2.6789, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.7438, "num_token_overlap": 17.8326, "num_token_query": 52.3134, "num_token_union": 73.6773, "num_word_context": 202.2827, "num_word_doc": 49.8065, "num_word_query": 39.9018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2342.4858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7681, "query_norm": 1.5654, "queue_k_norm": 1.619, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3134, "sent_len_1": 66.7438, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4275, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 54400 }, { "accuracy": 59.5215, "active_queue_size": 16384.0, "cl_loss": 2.6747, "doc_norm": 1.6167, "encoder_q-embeddings": 3861.9348, "encoder_q-layer.0": 2729.0337, "encoder_q-layer.1": 3531.9075, "encoder_q-layer.10": 1668.3628, "encoder_q-layer.11": 3324.957, "encoder_q-layer.2": 4101.1079, "encoder_q-layer.3": 4700.2744, "encoder_q-layer.4": 5721.1406, "encoder_q-layer.5": 6091.9404, "encoder_q-layer.6": 5310.3618, "encoder_q-layer.7": 3400.8225, "encoder_q-layer.8": 2536.9805, "encoder_q-layer.9": 1970.6873, "epoch": 0.53, "inbatch_neg_score": 0.7648, "inbatch_pos_score": 1.4961, "learning_rate": 2.527777777777778e-05, "loss": 2.6747, "norm_diff": 0.0556, "norm_loss": 0.0, "num_token_doc": 66.6871, "num_token_overlap": 17.7964, "num_token_query": 52.2974, "num_token_union": 73.6788, "num_word_context": 202.3766, "num_word_doc": 49.7521, "num_word_query": 39.8914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5969.277, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7642, "query_norm": 1.5611, "queue_k_norm": 1.6204, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2974, "sent_len_1": 66.6871, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6413, "stdk": 0.0491, "stdq": 0.0459, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 54500 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.6864, "doc_norm": 1.6127, "encoder_q-embeddings": 1313.2401, "encoder_q-layer.0": 833.1826, "encoder_q-layer.1": 936.2914, "encoder_q-layer.10": 1782.8473, "encoder_q-layer.11": 3771.6323, "encoder_q-layer.2": 1076.3545, "encoder_q-layer.3": 1123.8124, "encoder_q-layer.4": 1219.9012, "encoder_q-layer.5": 1267.47, "encoder_q-layer.6": 1443.9152, "encoder_q-layer.7": 1635.6892, "encoder_q-layer.8": 1914.5447, "encoder_q-layer.9": 1787.8181, "epoch": 0.53, "inbatch_neg_score": 0.7709, "inbatch_pos_score": 1.4883, "learning_rate": 2.5222222222222225e-05, "loss": 2.6864, "norm_diff": 0.0575, "norm_loss": 0.0, "num_token_doc": 66.6552, "num_token_overlap": 17.7829, "num_token_query": 52.2669, "num_token_union": 73.6525, "num_word_context": 202.2459, "num_word_doc": 49.7372, "num_word_query": 39.8626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2520.3887, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.77, "query_norm": 1.5552, "queue_k_norm": 1.621, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2669, "sent_len_1": 66.6552, "sent_len_max_0": 128.0, "sent_len_max_1": 211.7887, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 54600 }, { "accuracy": 62.7441, "active_queue_size": 16384.0, "cl_loss": 2.6778, "doc_norm": 1.6236, "encoder_q-embeddings": 1549.9845, "encoder_q-layer.0": 1024.7751, "encoder_q-layer.1": 1171.7861, "encoder_q-layer.10": 1773.8684, "encoder_q-layer.11": 3511.0154, "encoder_q-layer.2": 1415.8992, "encoder_q-layer.3": 1508.3593, "encoder_q-layer.4": 1649.8848, "encoder_q-layer.5": 1723.4598, "encoder_q-layer.6": 1788.5265, "encoder_q-layer.7": 1744.9572, "encoder_q-layer.8": 1981.7681, "encoder_q-layer.9": 1701.6307, "epoch": 0.53, "inbatch_neg_score": 0.7669, "inbatch_pos_score": 1.5303, "learning_rate": 2.5166666666666667e-05, "loss": 2.6778, "norm_diff": 0.0417, "norm_loss": 0.0, "num_token_doc": 66.7562, "num_token_overlap": 17.8609, "num_token_query": 52.414, "num_token_union": 73.7415, "num_word_context": 202.3328, "num_word_doc": 49.7862, "num_word_query": 39.9854, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2654.3578, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7676, "query_norm": 1.5819, "queue_k_norm": 1.6185, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.414, "sent_len_1": 66.7562, "sent_len_max_0": 128.0, "sent_len_max_1": 208.975, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 54700 }, { "accuracy": 61.8652, "active_queue_size": 16384.0, "cl_loss": 2.6539, "doc_norm": 1.621, "encoder_q-embeddings": 1324.76, "encoder_q-layer.0": 821.116, "encoder_q-layer.1": 906.8984, "encoder_q-layer.10": 1748.1664, "encoder_q-layer.11": 3418.2131, "encoder_q-layer.2": 1037.3401, "encoder_q-layer.3": 1085.5116, "encoder_q-layer.4": 1162.2677, "encoder_q-layer.5": 1240.8281, "encoder_q-layer.6": 1348.8661, "encoder_q-layer.7": 1506.2565, "encoder_q-layer.8": 1794.9519, "encoder_q-layer.9": 1646.3699, "epoch": 0.54, "inbatch_neg_score": 0.76, "inbatch_pos_score": 1.5078, "learning_rate": 2.5111111111111113e-05, "loss": 2.6539, "norm_diff": 0.0387, "norm_loss": 0.0, "num_token_doc": 66.8905, "num_token_overlap": 17.8867, "num_token_query": 52.4023, "num_token_union": 73.7942, "num_word_context": 202.3962, "num_word_doc": 49.9053, "num_word_query": 39.9836, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2337.8888, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7603, "query_norm": 1.5822, "queue_k_norm": 1.622, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4023, "sent_len_1": 66.8905, "sent_len_max_0": 128.0, "sent_len_max_1": 211.2038, "stdk": 0.0493, "stdq": 0.0469, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 54800 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6683, "doc_norm": 1.6213, "encoder_q-embeddings": 1590.5233, "encoder_q-layer.0": 1036.3412, "encoder_q-layer.1": 1148.0864, "encoder_q-layer.10": 1700.4417, "encoder_q-layer.11": 3456.78, "encoder_q-layer.2": 1255.7102, "encoder_q-layer.3": 1372.1691, "encoder_q-layer.4": 1497.7853, "encoder_q-layer.5": 1542.078, "encoder_q-layer.6": 1752.5767, "encoder_q-layer.7": 1826.65, "encoder_q-layer.8": 1890.051, "encoder_q-layer.9": 1664.4375, "epoch": 0.54, "inbatch_neg_score": 0.7664, "inbatch_pos_score": 1.4932, "learning_rate": 2.5055555555555555e-05, "loss": 2.6683, "norm_diff": 0.0454, "norm_loss": 0.0, "num_token_doc": 66.829, "num_token_overlap": 17.8167, "num_token_query": 52.2782, "num_token_union": 73.754, "num_word_context": 202.3774, "num_word_doc": 49.8956, "num_word_query": 39.8753, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2570.6618, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7661, "query_norm": 1.5758, "queue_k_norm": 1.621, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2782, "sent_len_1": 66.829, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0137, "stdk": 0.0493, "stdq": 0.0463, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 54900 }, { "accuracy": 62.3535, "active_queue_size": 16384.0, "cl_loss": 2.6599, "doc_norm": 1.622, "encoder_q-embeddings": 1418.6047, "encoder_q-layer.0": 931.0488, "encoder_q-layer.1": 1101.7213, "encoder_q-layer.10": 1654.2157, "encoder_q-layer.11": 3264.2466, "encoder_q-layer.2": 1234.2808, "encoder_q-layer.3": 1344.1986, "encoder_q-layer.4": 1535.9214, "encoder_q-layer.5": 1533.0171, "encoder_q-layer.6": 1668.3992, "encoder_q-layer.7": 1598.0634, "encoder_q-layer.8": 1788.8359, "encoder_q-layer.9": 1608.7023, "epoch": 0.54, "inbatch_neg_score": 0.7617, "inbatch_pos_score": 1.5215, "learning_rate": 2.5e-05, "loss": 2.6599, "norm_diff": 0.0369, "norm_loss": 0.0, "num_token_doc": 66.7795, "num_token_overlap": 17.8315, "num_token_query": 52.2722, "num_token_union": 73.6647, "num_word_context": 202.2646, "num_word_doc": 49.8318, "num_word_query": 39.8634, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2440.5204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7617, "query_norm": 1.5851, "queue_k_norm": 1.6197, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2722, "sent_len_1": 66.7795, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8013, "stdk": 0.0493, "stdq": 0.0468, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 55000 }, { "accuracy": 60.0098, "active_queue_size": 16384.0, "cl_loss": 2.6753, "doc_norm": 1.6166, "encoder_q-embeddings": 2403.0342, "encoder_q-layer.0": 1614.0862, "encoder_q-layer.1": 1917.5463, "encoder_q-layer.10": 1810.5731, "encoder_q-layer.11": 3568.3506, "encoder_q-layer.2": 2326.0928, "encoder_q-layer.3": 2574.3481, "encoder_q-layer.4": 2862.3679, "encoder_q-layer.5": 2968.0784, "encoder_q-layer.6": 2737.3818, "encoder_q-layer.7": 2448.3811, "encoder_q-layer.8": 2176.6741, "encoder_q-layer.9": 1801.6429, "epoch": 0.54, "inbatch_neg_score": 0.7611, "inbatch_pos_score": 1.5117, "learning_rate": 2.4944444444444447e-05, "loss": 2.6753, "norm_diff": 0.0209, "norm_loss": 0.0, "num_token_doc": 66.8861, "num_token_overlap": 17.8135, "num_token_query": 52.2236, "num_token_union": 73.7463, "num_word_context": 202.3706, "num_word_doc": 49.9087, "num_word_query": 39.8044, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3599.0442, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7612, "query_norm": 1.5957, "queue_k_norm": 1.6217, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2236, "sent_len_1": 66.8861, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1275, "stdk": 0.0491, "stdq": 0.0472, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 55100 }, { "accuracy": 58.0566, "active_queue_size": 16384.0, "cl_loss": 2.6673, "doc_norm": 1.618, "encoder_q-embeddings": 2085.6711, "encoder_q-layer.0": 1440.3473, "encoder_q-layer.1": 1592.7308, "encoder_q-layer.10": 1749.0875, "encoder_q-layer.11": 3763.6743, "encoder_q-layer.2": 1873.1545, "encoder_q-layer.3": 1993.1675, "encoder_q-layer.4": 2284.938, "encoder_q-layer.5": 2476.0239, "encoder_q-layer.6": 2674.2517, "encoder_q-layer.7": 2462.7698, "encoder_q-layer.8": 2316.864, "encoder_q-layer.9": 1864.5214, "epoch": 0.54, "inbatch_neg_score": 0.7628, "inbatch_pos_score": 1.499, "learning_rate": 2.488888888888889e-05, "loss": 2.6673, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.7803, "num_token_overlap": 17.8197, "num_token_query": 52.2855, "num_token_union": 73.7053, "num_word_context": 202.2416, "num_word_doc": 49.8213, "num_word_query": 39.8604, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3342.0338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7622, "query_norm": 1.5797, "queue_k_norm": 1.6205, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2855, "sent_len_1": 66.7803, "sent_len_max_0": 128.0, "sent_len_max_1": 210.8162, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 55200 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.664, "doc_norm": 1.6208, "encoder_q-embeddings": 1782.7271, "encoder_q-layer.0": 1155.3687, "encoder_q-layer.1": 1300.8849, "encoder_q-layer.10": 1758.2933, "encoder_q-layer.11": 3514.7664, "encoder_q-layer.2": 1504.1958, "encoder_q-layer.3": 1689.6147, "encoder_q-layer.4": 1868.9336, "encoder_q-layer.5": 2038.2747, "encoder_q-layer.6": 2021.5791, "encoder_q-layer.7": 2001.9647, "encoder_q-layer.8": 2073.5918, "encoder_q-layer.9": 1733.9493, "epoch": 0.54, "inbatch_neg_score": 0.7647, "inbatch_pos_score": 1.5166, "learning_rate": 2.4833333333333335e-05, "loss": 2.664, "norm_diff": 0.0232, "norm_loss": 0.0, "num_token_doc": 66.8153, "num_token_overlap": 17.8386, "num_token_query": 52.2215, "num_token_union": 73.6706, "num_word_context": 202.3071, "num_word_doc": 49.8451, "num_word_query": 39.8281, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2861.9967, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7661, "query_norm": 1.5976, "queue_k_norm": 1.6194, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2215, "sent_len_1": 66.8153, "sent_len_max_0": 128.0, "sent_len_max_1": 209.42, "stdk": 0.0493, "stdq": 0.0468, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 55300 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.6802, "doc_norm": 1.6234, "encoder_q-embeddings": 1313.2123, "encoder_q-layer.0": 828.6558, "encoder_q-layer.1": 932.8901, "encoder_q-layer.10": 1794.7041, "encoder_q-layer.11": 3425.4949, "encoder_q-layer.2": 1062.2047, "encoder_q-layer.3": 1080.8385, "encoder_q-layer.4": 1143.9944, "encoder_q-layer.5": 1183.7568, "encoder_q-layer.6": 1332.6678, "encoder_q-layer.7": 1459.8451, "encoder_q-layer.8": 1855.7131, "encoder_q-layer.9": 1654.9395, "epoch": 0.54, "inbatch_neg_score": 0.7759, "inbatch_pos_score": 1.5098, "learning_rate": 2.477777777777778e-05, "loss": 2.6802, "norm_diff": 0.0231, "norm_loss": 0.0, "num_token_doc": 66.7104, "num_token_overlap": 17.7882, "num_token_query": 52.448, "num_token_union": 73.8157, "num_word_context": 202.371, "num_word_doc": 49.7491, "num_word_query": 40.0087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2307.9237, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7734, "query_norm": 1.6003, "queue_k_norm": 1.6217, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.448, "sent_len_1": 66.7104, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6262, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 55400 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.663, "doc_norm": 1.6191, "encoder_q-embeddings": 1487.9939, "encoder_q-layer.0": 942.0747, "encoder_q-layer.1": 1053.4752, "encoder_q-layer.10": 1747.39, "encoder_q-layer.11": 3482.7502, "encoder_q-layer.2": 1199.11, "encoder_q-layer.3": 1313.7313, "encoder_q-layer.4": 1482.4392, "encoder_q-layer.5": 1550.8242, "encoder_q-layer.6": 1643.1724, "encoder_q-layer.7": 1926.7379, "encoder_q-layer.8": 2083.7422, "encoder_q-layer.9": 1718.1566, "epoch": 0.54, "inbatch_neg_score": 0.7755, "inbatch_pos_score": 1.5127, "learning_rate": 2.4722222222222223e-05, "loss": 2.663, "norm_diff": 0.0163, "norm_loss": 0.0, "num_token_doc": 66.8004, "num_token_overlap": 17.8206, "num_token_query": 52.2528, "num_token_union": 73.6963, "num_word_context": 202.6198, "num_word_doc": 49.8386, "num_word_query": 39.8618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2591.4453, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7764, "query_norm": 1.6028, "queue_k_norm": 1.6234, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2528, "sent_len_1": 66.8004, "sent_len_max_0": 128.0, "sent_len_max_1": 210.725, "stdk": 0.0492, "stdq": 0.0467, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 55500 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.6685, "doc_norm": 1.6257, "encoder_q-embeddings": 1636.175, "encoder_q-layer.0": 1054.1051, "encoder_q-layer.1": 1205.2554, "encoder_q-layer.10": 1715.1604, "encoder_q-layer.11": 3461.4204, "encoder_q-layer.2": 1384.9559, "encoder_q-layer.3": 1462.7117, "encoder_q-layer.4": 1558.4683, "encoder_q-layer.5": 1583.8832, "encoder_q-layer.6": 1577.2527, "encoder_q-layer.7": 1679.6129, "encoder_q-layer.8": 1865.5947, "encoder_q-layer.9": 1649.9358, "epoch": 0.54, "inbatch_neg_score": 0.7851, "inbatch_pos_score": 1.5293, "learning_rate": 2.466666666666667e-05, "loss": 2.6685, "norm_diff": 0.0255, "norm_loss": 0.0, "num_token_doc": 66.9103, "num_token_overlap": 17.8388, "num_token_query": 52.3337, "num_token_union": 73.8226, "num_word_context": 202.4632, "num_word_doc": 49.9502, "num_word_query": 39.9173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2608.2936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7842, "query_norm": 1.6002, "queue_k_norm": 1.623, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3337, "sent_len_1": 66.9103, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1925, "stdk": 0.0494, "stdq": 0.0463, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 55600 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.6757, "doc_norm": 1.6254, "encoder_q-embeddings": 1763.8871, "encoder_q-layer.0": 1202.8778, "encoder_q-layer.1": 1327.3275, "encoder_q-layer.10": 1594.5623, "encoder_q-layer.11": 3324.6616, "encoder_q-layer.2": 1576.2903, "encoder_q-layer.3": 1712.4929, "encoder_q-layer.4": 1967.9889, "encoder_q-layer.5": 2222.3757, "encoder_q-layer.6": 2160.5527, "encoder_q-layer.7": 2197.4402, "encoder_q-layer.8": 2123.0808, "encoder_q-layer.9": 1728.8881, "epoch": 0.54, "inbatch_neg_score": 0.7858, "inbatch_pos_score": 1.5391, "learning_rate": 2.461111111111111e-05, "loss": 2.6757, "norm_diff": 0.0134, "norm_loss": 0.0, "num_token_doc": 66.8361, "num_token_overlap": 17.7786, "num_token_query": 52.1071, "num_token_union": 73.7033, "num_word_context": 202.232, "num_word_doc": 49.8843, "num_word_query": 39.7358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2885.4457, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7847, "query_norm": 1.612, "queue_k_norm": 1.6237, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1071, "sent_len_1": 66.8361, "sent_len_max_0": 128.0, "sent_len_max_1": 209.7225, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 55700 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.6664, "doc_norm": 1.6233, "encoder_q-embeddings": 1324.234, "encoder_q-layer.0": 828.7408, "encoder_q-layer.1": 867.6981, "encoder_q-layer.10": 1808.4709, "encoder_q-layer.11": 3538.896, "encoder_q-layer.2": 970.304, "encoder_q-layer.3": 1034.8267, "encoder_q-layer.4": 1129.0626, "encoder_q-layer.5": 1202.8375, "encoder_q-layer.6": 1400.0206, "encoder_q-layer.7": 1676.7856, "encoder_q-layer.8": 1998.9167, "encoder_q-layer.9": 1781.6479, "epoch": 0.54, "inbatch_neg_score": 0.7891, "inbatch_pos_score": 1.5352, "learning_rate": 2.4555555555555557e-05, "loss": 2.6664, "norm_diff": 0.0074, "norm_loss": 0.0, "num_token_doc": 66.7167, "num_token_overlap": 17.7887, "num_token_query": 52.1913, "num_token_union": 73.6747, "num_word_context": 202.2873, "num_word_doc": 49.7935, "num_word_query": 39.8006, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2405.1182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.79, "query_norm": 1.6188, "queue_k_norm": 1.6228, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1913, "sent_len_1": 66.7167, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2012, "stdk": 0.0493, "stdq": 0.0472, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 55800 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.672, "doc_norm": 1.6271, "encoder_q-embeddings": 1199.3318, "encoder_q-layer.0": 764.1917, "encoder_q-layer.1": 802.6839, "encoder_q-layer.10": 1593.7394, "encoder_q-layer.11": 3486.5237, "encoder_q-layer.2": 899.6758, "encoder_q-layer.3": 946.2786, "encoder_q-layer.4": 1006.3278, "encoder_q-layer.5": 1089.8031, "encoder_q-layer.6": 1258.6284, "encoder_q-layer.7": 1472.9108, "encoder_q-layer.8": 1713.0129, "encoder_q-layer.9": 1600.2209, "epoch": 0.55, "inbatch_neg_score": 0.7973, "inbatch_pos_score": 1.5518, "learning_rate": 2.45e-05, "loss": 2.672, "norm_diff": 0.0196, "norm_loss": 0.0, "num_token_doc": 66.8588, "num_token_overlap": 17.8013, "num_token_query": 52.2121, "num_token_union": 73.7053, "num_word_context": 202.1456, "num_word_doc": 49.8866, "num_word_query": 39.8307, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2225.4079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7969, "query_norm": 1.6075, "queue_k_norm": 1.6242, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2121, "sent_len_1": 66.8588, "sent_len_max_0": 128.0, "sent_len_max_1": 208.235, "stdk": 0.0494, "stdq": 0.0469, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 55900 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.6779, "doc_norm": 1.6255, "encoder_q-embeddings": 1284.8691, "encoder_q-layer.0": 806.5388, "encoder_q-layer.1": 908.8002, "encoder_q-layer.10": 1850.3567, "encoder_q-layer.11": 3837.4387, "encoder_q-layer.2": 1024.376, "encoder_q-layer.3": 1099.6748, "encoder_q-layer.4": 1183.9587, "encoder_q-layer.5": 1194.9912, "encoder_q-layer.6": 1397.8602, "encoder_q-layer.7": 1606.3436, "encoder_q-layer.8": 1862.2933, "encoder_q-layer.9": 1769.707, "epoch": 0.55, "inbatch_neg_score": 0.8013, "inbatch_pos_score": 1.5352, "learning_rate": 2.4444444444444445e-05, "loss": 2.6779, "norm_diff": 0.0342, "norm_loss": 0.0, "num_token_doc": 66.6749, "num_token_overlap": 17.8053, "num_token_query": 52.1844, "num_token_union": 73.5846, "num_word_context": 202.1368, "num_word_doc": 49.7706, "num_word_query": 39.8016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2452.0545, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8018, "query_norm": 1.5912, "queue_k_norm": 1.6251, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1844, "sent_len_1": 66.6749, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3113, "stdk": 0.0493, "stdq": 0.0462, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 56000 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.6904, "doc_norm": 1.6235, "encoder_q-embeddings": 2661.489, "encoder_q-layer.0": 1790.2544, "encoder_q-layer.1": 2004.4364, "encoder_q-layer.10": 1753.9133, "encoder_q-layer.11": 3758.3186, "encoder_q-layer.2": 2432.9277, "encoder_q-layer.3": 2572.314, "encoder_q-layer.4": 3001.0752, "encoder_q-layer.5": 3164.3477, "encoder_q-layer.6": 3290.3726, "encoder_q-layer.7": 2865.7122, "encoder_q-layer.8": 2559.7407, "encoder_q-layer.9": 1918.1727, "epoch": 0.55, "inbatch_neg_score": 0.8062, "inbatch_pos_score": 1.5479, "learning_rate": 2.4388888888888887e-05, "loss": 2.6904, "norm_diff": 0.0359, "norm_loss": 0.0, "num_token_doc": 66.8009, "num_token_overlap": 17.8212, "num_token_query": 52.4238, "num_token_union": 73.7988, "num_word_context": 202.5563, "num_word_doc": 49.8297, "num_word_query": 39.9536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3913.8944, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8047, "query_norm": 1.5876, "queue_k_norm": 1.6263, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4238, "sent_len_1": 66.8009, "sent_len_max_0": 128.0, "sent_len_max_1": 209.755, "stdk": 0.0492, "stdq": 0.0462, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 56100 }, { "accuracy": 58.9355, "active_queue_size": 16384.0, "cl_loss": 2.6752, "doc_norm": 1.6241, "encoder_q-embeddings": 1349.3518, "encoder_q-layer.0": 875.8466, "encoder_q-layer.1": 963.3945, "encoder_q-layer.10": 1922.0022, "encoder_q-layer.11": 3796.3599, "encoder_q-layer.2": 1109.2626, "encoder_q-layer.3": 1171.4359, "encoder_q-layer.4": 1304.9081, "encoder_q-layer.5": 1363.6516, "encoder_q-layer.6": 1522.3112, "encoder_q-layer.7": 1617.564, "encoder_q-layer.8": 1963.4729, "encoder_q-layer.9": 1841.7932, "epoch": 0.55, "inbatch_neg_score": 0.8038, "inbatch_pos_score": 1.5322, "learning_rate": 2.4333333333333336e-05, "loss": 2.6752, "norm_diff": 0.0365, "norm_loss": 0.0, "num_token_doc": 66.8938, "num_token_overlap": 17.8495, "num_token_query": 52.3986, "num_token_union": 73.8109, "num_word_context": 202.5846, "num_word_doc": 49.9272, "num_word_query": 39.9829, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2485.7063, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8042, "query_norm": 1.5876, "queue_k_norm": 1.627, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3986, "sent_len_1": 66.8938, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9225, "stdk": 0.0492, "stdq": 0.0463, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 56200 }, { "accuracy": 61.5723, "active_queue_size": 16384.0, "cl_loss": 2.6732, "doc_norm": 1.6297, "encoder_q-embeddings": 2934.9023, "encoder_q-layer.0": 1892.9049, "encoder_q-layer.1": 2165.6233, "encoder_q-layer.10": 3666.2183, "encoder_q-layer.11": 7214.813, "encoder_q-layer.2": 2521.4143, "encoder_q-layer.3": 2781.8188, "encoder_q-layer.4": 2877.7007, "encoder_q-layer.5": 3077.8982, "encoder_q-layer.6": 3519.8223, "encoder_q-layer.7": 3655.6155, "encoder_q-layer.8": 4499.9888, "encoder_q-layer.9": 3636.1377, "epoch": 0.55, "inbatch_neg_score": 0.8041, "inbatch_pos_score": 1.5488, "learning_rate": 2.427777777777778e-05, "loss": 2.6732, "norm_diff": 0.0416, "norm_loss": 0.0, "num_token_doc": 66.9453, "num_token_overlap": 17.7877, "num_token_query": 52.2906, "num_token_union": 73.8477, "num_word_context": 202.3048, "num_word_doc": 49.9608, "num_word_query": 39.8853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5283.6192, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8042, "query_norm": 1.5882, "queue_k_norm": 1.6272, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2906, "sent_len_1": 66.9453, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5775, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 56300 }, { "accuracy": 60.4004, "active_queue_size": 16384.0, "cl_loss": 2.6785, "doc_norm": 1.6266, "encoder_q-embeddings": 5746.7534, "encoder_q-layer.0": 4011.011, "encoder_q-layer.1": 4309.0459, "encoder_q-layer.10": 3386.5891, "encoder_q-layer.11": 7040.3535, "encoder_q-layer.2": 5206.1992, "encoder_q-layer.3": 4999.5698, "encoder_q-layer.4": 5455.7012, "encoder_q-layer.5": 5106.1006, "encoder_q-layer.6": 5362.9219, "encoder_q-layer.7": 5096.395, "encoder_q-layer.8": 4732.3789, "encoder_q-layer.9": 3568.2671, "epoch": 0.55, "inbatch_neg_score": 0.8024, "inbatch_pos_score": 1.5322, "learning_rate": 2.4222222222222224e-05, "loss": 2.6785, "norm_diff": 0.0403, "norm_loss": 0.0, "num_token_doc": 66.7996, "num_token_overlap": 17.8046, "num_token_query": 52.2306, "num_token_union": 73.6781, "num_word_context": 202.2649, "num_word_doc": 49.8458, "num_word_query": 39.8254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7406.7596, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8037, "query_norm": 1.5863, "queue_k_norm": 1.6289, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2306, "sent_len_1": 66.7996, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4925, "stdk": 0.0492, "stdq": 0.0466, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 56400 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.6701, "doc_norm": 1.6286, "encoder_q-embeddings": 3565.6765, "encoder_q-layer.0": 2409.7344, "encoder_q-layer.1": 2854.3496, "encoder_q-layer.10": 3453.7068, "encoder_q-layer.11": 7274.9199, "encoder_q-layer.2": 3547.7183, "encoder_q-layer.3": 3584.1755, "encoder_q-layer.4": 3907.4536, "encoder_q-layer.5": 4228.0645, "encoder_q-layer.6": 4159.2168, "encoder_q-layer.7": 4349.917, "encoder_q-layer.8": 4202.7852, "encoder_q-layer.9": 3411.6389, "epoch": 0.55, "inbatch_neg_score": 0.811, "inbatch_pos_score": 1.5498, "learning_rate": 2.4166666666666667e-05, "loss": 2.6701, "norm_diff": 0.0403, "norm_loss": 0.0, "num_token_doc": 66.7769, "num_token_overlap": 17.8066, "num_token_query": 52.2836, "num_token_union": 73.7478, "num_word_context": 202.3574, "num_word_doc": 49.8679, "num_word_query": 39.8782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5989.9951, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.811, "query_norm": 1.5883, "queue_k_norm": 1.6297, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2836, "sent_len_1": 66.7769, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9588, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 56500 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.6664, "doc_norm": 1.6252, "encoder_q-embeddings": 3035.4924, "encoder_q-layer.0": 1839.7164, "encoder_q-layer.1": 2026.9189, "encoder_q-layer.10": 3416.803, "encoder_q-layer.11": 6663.7754, "encoder_q-layer.2": 2399.0, "encoder_q-layer.3": 2699.76, "encoder_q-layer.4": 2925.7417, "encoder_q-layer.5": 3109.2773, "encoder_q-layer.6": 3311.7214, "encoder_q-layer.7": 3608.2202, "encoder_q-layer.8": 3942.3384, "encoder_q-layer.9": 3352.0947, "epoch": 0.55, "inbatch_neg_score": 0.8106, "inbatch_pos_score": 1.5488, "learning_rate": 2.4111111111111113e-05, "loss": 2.6664, "norm_diff": 0.0424, "norm_loss": 0.0, "num_token_doc": 66.6964, "num_token_overlap": 17.8208, "num_token_query": 52.3124, "num_token_union": 73.6514, "num_word_context": 202.0008, "num_word_doc": 49.7826, "num_word_query": 39.9196, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5082.9312, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8105, "query_norm": 1.5829, "queue_k_norm": 1.6298, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3124, "sent_len_1": 66.6964, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0362, "stdk": 0.0491, "stdq": 0.0462, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 56600 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.6773, "doc_norm": 1.6309, "encoder_q-embeddings": 2890.7542, "encoder_q-layer.0": 2105.3967, "encoder_q-layer.1": 2252.6677, "encoder_q-layer.10": 3433.9656, "encoder_q-layer.11": 6878.1929, "encoder_q-layer.2": 2436.2786, "encoder_q-layer.3": 2513.4531, "encoder_q-layer.4": 2783.3018, "encoder_q-layer.5": 2814.1213, "encoder_q-layer.6": 3400.1582, "encoder_q-layer.7": 3618.6609, "encoder_q-layer.8": 3875.4868, "encoder_q-layer.9": 3298.3584, "epoch": 0.55, "inbatch_neg_score": 0.8116, "inbatch_pos_score": 1.5693, "learning_rate": 2.4055555555555555e-05, "loss": 2.6773, "norm_diff": 0.0322, "norm_loss": 0.0, "num_token_doc": 66.7256, "num_token_overlap": 17.7803, "num_token_query": 52.2958, "num_token_union": 73.7137, "num_word_context": 202.605, "num_word_doc": 49.8165, "num_word_query": 39.874, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5115.3148, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8125, "query_norm": 1.5987, "queue_k_norm": 1.6323, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2958, "sent_len_1": 66.7256, "sent_len_max_0": 128.0, "sent_len_max_1": 209.035, "stdk": 0.0493, "stdq": 0.0469, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 56700 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.6615, "doc_norm": 1.6307, "encoder_q-embeddings": 3709.9182, "encoder_q-layer.0": 2520.4946, "encoder_q-layer.1": 2770.6929, "encoder_q-layer.10": 3077.6399, "encoder_q-layer.11": 6688.8145, "encoder_q-layer.2": 3076.8152, "encoder_q-layer.3": 3284.0813, "encoder_q-layer.4": 3686.1372, "encoder_q-layer.5": 3472.4431, "encoder_q-layer.6": 3768.5383, "encoder_q-layer.7": 4148.5698, "encoder_q-layer.8": 4187.8906, "encoder_q-layer.9": 3224.0149, "epoch": 0.55, "inbatch_neg_score": 0.8151, "inbatch_pos_score": 1.5732, "learning_rate": 2.4e-05, "loss": 2.6615, "norm_diff": 0.0461, "norm_loss": 0.0, "num_token_doc": 66.8457, "num_token_overlap": 17.819, "num_token_query": 52.3755, "num_token_union": 73.7753, "num_word_context": 202.3522, "num_word_doc": 49.8168, "num_word_query": 39.9264, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5659.8401, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8154, "query_norm": 1.5846, "queue_k_norm": 1.6309, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3755, "sent_len_1": 66.8457, "sent_len_max_0": 128.0, "sent_len_max_1": 212.0075, "stdk": 0.0493, "stdq": 0.0463, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 56800 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.6716, "doc_norm": 1.6279, "encoder_q-embeddings": 2624.0261, "encoder_q-layer.0": 1624.335, "encoder_q-layer.1": 1789.2327, "encoder_q-layer.10": 3668.1624, "encoder_q-layer.11": 7136.7793, "encoder_q-layer.2": 1985.6334, "encoder_q-layer.3": 2178.9165, "encoder_q-layer.4": 2347.4268, "encoder_q-layer.5": 2465.0237, "encoder_q-layer.6": 2824.6177, "encoder_q-layer.7": 3160.7644, "encoder_q-layer.8": 3770.0498, "encoder_q-layer.9": 3433.4355, "epoch": 0.56, "inbatch_neg_score": 0.811, "inbatch_pos_score": 1.5469, "learning_rate": 2.3944444444444443e-05, "loss": 2.6716, "norm_diff": 0.0339, "norm_loss": 0.0, "num_token_doc": 66.5789, "num_token_overlap": 17.7859, "num_token_query": 52.2938, "num_token_union": 73.6082, "num_word_context": 201.9663, "num_word_doc": 49.6821, "num_word_query": 39.9031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4804.7782, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8105, "query_norm": 1.594, "queue_k_norm": 1.6322, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2938, "sent_len_1": 66.5789, "sent_len_max_0": 128.0, "sent_len_max_1": 206.4787, "stdk": 0.0491, "stdq": 0.0468, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 56900 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.668, "doc_norm": 1.6304, "encoder_q-embeddings": 3083.2781, "encoder_q-layer.0": 1984.8406, "encoder_q-layer.1": 2188.5205, "encoder_q-layer.10": 3343.4089, "encoder_q-layer.11": 6941.5654, "encoder_q-layer.2": 2434.7314, "encoder_q-layer.3": 2697.3855, "encoder_q-layer.4": 2819.9299, "encoder_q-layer.5": 2877.2434, "encoder_q-layer.6": 3045.2197, "encoder_q-layer.7": 3197.4438, "encoder_q-layer.8": 3855.0154, "encoder_q-layer.9": 3362.5798, "epoch": 0.56, "inbatch_neg_score": 0.8156, "inbatch_pos_score": 1.5732, "learning_rate": 2.3888888888888892e-05, "loss": 2.668, "norm_diff": 0.0322, "norm_loss": 0.0, "num_token_doc": 66.6809, "num_token_overlap": 17.7644, "num_token_query": 52.1936, "num_token_union": 73.646, "num_word_context": 202.3064, "num_word_doc": 49.7709, "num_word_query": 39.8096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5030.5207, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8164, "query_norm": 1.5982, "queue_k_norm": 1.6324, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1936, "sent_len_1": 66.6809, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1375, "stdk": 0.0492, "stdq": 0.0468, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 57000 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.655, "doc_norm": 1.6357, "encoder_q-embeddings": 3741.1943, "encoder_q-layer.0": 2475.7573, "encoder_q-layer.1": 2789.7625, "encoder_q-layer.10": 3835.2769, "encoder_q-layer.11": 7362.8379, "encoder_q-layer.2": 3344.5322, "encoder_q-layer.3": 3652.9026, "encoder_q-layer.4": 4117.9131, "encoder_q-layer.5": 4567.3911, "encoder_q-layer.6": 4737.2686, "encoder_q-layer.7": 4503.895, "encoder_q-layer.8": 4367.6445, "encoder_q-layer.9": 3698.4548, "epoch": 0.56, "inbatch_neg_score": 0.8191, "inbatch_pos_score": 1.5703, "learning_rate": 2.3833333333333334e-05, "loss": 2.655, "norm_diff": 0.0201, "norm_loss": 0.0, "num_token_doc": 66.7601, "num_token_overlap": 17.8155, "num_token_query": 52.2436, "num_token_union": 73.6461, "num_word_context": 202.1082, "num_word_doc": 49.8277, "num_word_query": 39.8599, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6164.3383, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8198, "query_norm": 1.6156, "queue_k_norm": 1.6325, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2436, "sent_len_1": 66.7601, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9225, "stdk": 0.0494, "stdq": 0.0475, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 57100 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.6672, "doc_norm": 1.6311, "encoder_q-embeddings": 2626.7039, "encoder_q-layer.0": 1670.6125, "encoder_q-layer.1": 1855.1165, "encoder_q-layer.10": 3517.9893, "encoder_q-layer.11": 6584.7764, "encoder_q-layer.2": 2225.5413, "encoder_q-layer.3": 2302.2527, "encoder_q-layer.4": 2488.7285, "encoder_q-layer.5": 2517.1804, "encoder_q-layer.6": 2872.9487, "encoder_q-layer.7": 3147.7764, "encoder_q-layer.8": 3728.9326, "encoder_q-layer.9": 3296.3289, "epoch": 0.56, "inbatch_neg_score": 0.8136, "inbatch_pos_score": 1.5664, "learning_rate": 2.377777777777778e-05, "loss": 2.6672, "norm_diff": 0.0434, "norm_loss": 0.0, "num_token_doc": 66.6744, "num_token_overlap": 17.8062, "num_token_query": 52.1711, "num_token_union": 73.6118, "num_word_context": 202.1971, "num_word_doc": 49.7966, "num_word_query": 39.7813, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4634.784, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.814, "query_norm": 1.5877, "queue_k_norm": 1.6319, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1711, "sent_len_1": 66.6744, "sent_len_max_0": 128.0, "sent_len_max_1": 205.1138, "stdk": 0.0492, "stdq": 0.0463, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 57200 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.6623, "doc_norm": 1.6338, "encoder_q-embeddings": 3786.1604, "encoder_q-layer.0": 2553.9983, "encoder_q-layer.1": 2956.7932, "encoder_q-layer.10": 3056.7449, "encoder_q-layer.11": 6414.7373, "encoder_q-layer.2": 3611.481, "encoder_q-layer.3": 3782.4849, "encoder_q-layer.4": 4273.2739, "encoder_q-layer.5": 4263.7476, "encoder_q-layer.6": 4363.4243, "encoder_q-layer.7": 4307.8184, "encoder_q-layer.8": 3970.0112, "encoder_q-layer.9": 3279.4854, "epoch": 0.56, "inbatch_neg_score": 0.8207, "inbatch_pos_score": 1.5664, "learning_rate": 2.3722222222222222e-05, "loss": 2.6623, "norm_diff": 0.0325, "norm_loss": 0.0, "num_token_doc": 66.8805, "num_token_overlap": 17.8035, "num_token_query": 52.2434, "num_token_union": 73.7474, "num_word_context": 202.2451, "num_word_doc": 49.8923, "num_word_query": 39.8349, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5864.5359, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8203, "query_norm": 1.6013, "queue_k_norm": 1.6337, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2434, "sent_len_1": 66.8805, "sent_len_max_0": 128.0, "sent_len_max_1": 210.2837, "stdk": 0.0493, "stdq": 0.0468, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 57300 }, { "accuracy": 59.6191, "active_queue_size": 16384.0, "cl_loss": 2.6742, "doc_norm": 1.6347, "encoder_q-embeddings": 2723.7146, "encoder_q-layer.0": 1719.8859, "encoder_q-layer.1": 1905.8898, "encoder_q-layer.10": 3726.8042, "encoder_q-layer.11": 6813.9614, "encoder_q-layer.2": 2144.2241, "encoder_q-layer.3": 2317.2388, "encoder_q-layer.4": 2494.1936, "encoder_q-layer.5": 2675.3386, "encoder_q-layer.6": 3051.0759, "encoder_q-layer.7": 3453.2288, "encoder_q-layer.8": 3827.4539, "encoder_q-layer.9": 3490.7432, "epoch": 0.56, "inbatch_neg_score": 0.8166, "inbatch_pos_score": 1.5664, "learning_rate": 2.3666666666666668e-05, "loss": 2.6742, "norm_diff": 0.0326, "norm_loss": 0.0, "num_token_doc": 66.7672, "num_token_overlap": 17.76, "num_token_query": 52.1721, "num_token_union": 73.7214, "num_word_context": 202.5587, "num_word_doc": 49.8562, "num_word_query": 39.7853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4841.3746, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8179, "query_norm": 1.6021, "queue_k_norm": 1.6356, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1721, "sent_len_1": 66.7672, "sent_len_max_0": 128.0, "sent_len_max_1": 205.825, "stdk": 0.0493, "stdq": 0.0468, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 57400 }, { "accuracy": 63.623, "active_queue_size": 16384.0, "cl_loss": 2.6568, "doc_norm": 1.6379, "encoder_q-embeddings": 2583.0801, "encoder_q-layer.0": 1698.8981, "encoder_q-layer.1": 1844.4354, "encoder_q-layer.10": 3149.3206, "encoder_q-layer.11": 6383.8018, "encoder_q-layer.2": 2156.8596, "encoder_q-layer.3": 2371.6763, "encoder_q-layer.4": 2424.5159, "encoder_q-layer.5": 2541.8254, "encoder_q-layer.6": 2878.6685, "encoder_q-layer.7": 3130.241, "encoder_q-layer.8": 3585.0667, "encoder_q-layer.9": 3234.7375, "epoch": 0.56, "inbatch_neg_score": 0.822, "inbatch_pos_score": 1.5908, "learning_rate": 2.361111111111111e-05, "loss": 2.6568, "norm_diff": 0.0288, "norm_loss": 0.0, "num_token_doc": 66.5049, "num_token_overlap": 17.7848, "num_token_query": 52.2561, "num_token_union": 73.5262, "num_word_context": 202.023, "num_word_doc": 49.6324, "num_word_query": 39.8431, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4601.6235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8237, "query_norm": 1.6092, "queue_k_norm": 1.6348, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2561, "sent_len_1": 66.5049, "sent_len_max_0": 128.0, "sent_len_max_1": 207.595, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 57500 }, { "accuracy": 59.9121, "active_queue_size": 16384.0, "cl_loss": 2.648, "doc_norm": 1.6372, "encoder_q-embeddings": 3071.4507, "encoder_q-layer.0": 2008.5121, "encoder_q-layer.1": 2241.667, "encoder_q-layer.10": 3297.311, "encoder_q-layer.11": 7091.1982, "encoder_q-layer.2": 2753.8015, "encoder_q-layer.3": 2926.5562, "encoder_q-layer.4": 3264.5469, "encoder_q-layer.5": 3314.2715, "encoder_q-layer.6": 3673.7717, "encoder_q-layer.7": 3730.2744, "encoder_q-layer.8": 3863.8198, "encoder_q-layer.9": 3284.5442, "epoch": 0.56, "inbatch_neg_score": 0.8279, "inbatch_pos_score": 1.5586, "learning_rate": 2.3555555555555556e-05, "loss": 2.648, "norm_diff": 0.0407, "norm_loss": 0.0, "num_token_doc": 66.892, "num_token_overlap": 17.8675, "num_token_query": 52.3362, "num_token_union": 73.7669, "num_word_context": 202.3658, "num_word_doc": 49.8676, "num_word_query": 39.9295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5344.8598, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8271, "query_norm": 1.5966, "queue_k_norm": 1.6362, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3362, "sent_len_1": 66.892, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9613, "stdk": 0.0494, "stdq": 0.0463, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 57600 }, { "accuracy": 60.0098, "active_queue_size": 16384.0, "cl_loss": 2.6621, "doc_norm": 1.6345, "encoder_q-embeddings": 3665.8291, "encoder_q-layer.0": 2382.2864, "encoder_q-layer.1": 2746.125, "encoder_q-layer.10": 3588.2087, "encoder_q-layer.11": 7087.4106, "encoder_q-layer.2": 3222.1042, "encoder_q-layer.3": 3517.6252, "encoder_q-layer.4": 3752.2197, "encoder_q-layer.5": 3697.5977, "encoder_q-layer.6": 4213.9966, "encoder_q-layer.7": 4594.144, "encoder_q-layer.8": 4606.6665, "encoder_q-layer.9": 3605.6057, "epoch": 0.56, "inbatch_neg_score": 0.8268, "inbatch_pos_score": 1.5771, "learning_rate": 2.35e-05, "loss": 2.6621, "norm_diff": 0.0237, "norm_loss": 0.0, "num_token_doc": 66.7823, "num_token_overlap": 17.7376, "num_token_query": 52.1246, "num_token_union": 73.6771, "num_word_context": 202.2017, "num_word_doc": 49.822, "num_word_query": 39.7595, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5953.9028, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8252, "query_norm": 1.6108, "queue_k_norm": 1.6365, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1246, "sent_len_1": 66.7823, "sent_len_max_0": 128.0, "sent_len_max_1": 210.72, "stdk": 0.0492, "stdq": 0.047, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 57700 }, { "accuracy": 61.5723, "active_queue_size": 16384.0, "cl_loss": 2.6589, "doc_norm": 1.6398, "encoder_q-embeddings": 2743.5537, "encoder_q-layer.0": 1827.3715, "encoder_q-layer.1": 2175.3013, "encoder_q-layer.10": 3412.3027, "encoder_q-layer.11": 7102.9854, "encoder_q-layer.2": 2459.0098, "encoder_q-layer.3": 2774.9031, "encoder_q-layer.4": 3251.0403, "encoder_q-layer.5": 3148.3035, "encoder_q-layer.6": 3291.1765, "encoder_q-layer.7": 3601.9243, "encoder_q-layer.8": 3771.6045, "encoder_q-layer.9": 3352.9666, "epoch": 0.56, "inbatch_neg_score": 0.8303, "inbatch_pos_score": 1.582, "learning_rate": 2.3444444444444448e-05, "loss": 2.6589, "norm_diff": 0.0333, "norm_loss": 0.0, "num_token_doc": 66.766, "num_token_overlap": 17.7925, "num_token_query": 52.2355, "num_token_union": 73.6548, "num_word_context": 202.467, "num_word_doc": 49.8175, "num_word_query": 39.8243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5113.3053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8306, "query_norm": 1.6066, "queue_k_norm": 1.6356, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2355, "sent_len_1": 66.766, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3088, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 57800 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.6715, "doc_norm": 1.6377, "encoder_q-embeddings": 1436.8973, "encoder_q-layer.0": 891.6205, "encoder_q-layer.1": 967.6231, "encoder_q-layer.10": 1746.0762, "encoder_q-layer.11": 3459.887, "encoder_q-layer.2": 1123.6086, "encoder_q-layer.3": 1214.2239, "encoder_q-layer.4": 1266.7954, "encoder_q-layer.5": 1360.6826, "encoder_q-layer.6": 1494.4602, "encoder_q-layer.7": 1639.0372, "encoder_q-layer.8": 1887.7881, "encoder_q-layer.9": 1763.0267, "epoch": 0.57, "inbatch_neg_score": 0.8337, "inbatch_pos_score": 1.5889, "learning_rate": 2.338888888888889e-05, "loss": 2.6715, "norm_diff": 0.022, "norm_loss": 0.0, "num_token_doc": 66.8403, "num_token_overlap": 17.8232, "num_token_query": 52.284, "num_token_union": 73.7264, "num_word_context": 202.4124, "num_word_doc": 49.8692, "num_word_query": 39.8778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2445.3406, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8345, "query_norm": 1.6157, "queue_k_norm": 1.6362, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.284, "sent_len_1": 66.8403, "sent_len_max_0": 128.0, "sent_len_max_1": 209.635, "stdk": 0.0493, "stdq": 0.0469, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 57900 }, { "accuracy": 62.6465, "active_queue_size": 16384.0, "cl_loss": 2.6396, "doc_norm": 1.6404, "encoder_q-embeddings": 1199.2616, "encoder_q-layer.0": 778.1227, "encoder_q-layer.1": 849.4104, "encoder_q-layer.10": 1671.9935, "encoder_q-layer.11": 3459.4995, "encoder_q-layer.2": 998.5901, "encoder_q-layer.3": 1055.6835, "encoder_q-layer.4": 1215.0277, "encoder_q-layer.5": 1276.323, "encoder_q-layer.6": 1393.9021, "encoder_q-layer.7": 1581.9347, "encoder_q-layer.8": 1724.4545, "encoder_q-layer.9": 1605.7367, "epoch": 0.57, "inbatch_neg_score": 0.8336, "inbatch_pos_score": 1.5977, "learning_rate": 2.3333333333333336e-05, "loss": 2.6396, "norm_diff": 0.0287, "norm_loss": 0.0, "num_token_doc": 66.8471, "num_token_overlap": 17.8661, "num_token_query": 52.3765, "num_token_union": 73.7602, "num_word_context": 202.3221, "num_word_doc": 49.8906, "num_word_query": 39.9421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2313.6466, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.835, "query_norm": 1.6118, "queue_k_norm": 1.637, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3765, "sent_len_1": 66.8471, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7537, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 58000 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.6504, "doc_norm": 1.6363, "encoder_q-embeddings": 1372.0891, "encoder_q-layer.0": 864.1129, "encoder_q-layer.1": 947.7045, "encoder_q-layer.10": 1819.1676, "encoder_q-layer.11": 3649.8491, "encoder_q-layer.2": 1069.9435, "encoder_q-layer.3": 1195.8568, "encoder_q-layer.4": 1293.0477, "encoder_q-layer.5": 1396.8381, "encoder_q-layer.6": 1520.8992, "encoder_q-layer.7": 1743.7147, "encoder_q-layer.8": 1983.1868, "encoder_q-layer.9": 1709.5496, "epoch": 0.57, "inbatch_neg_score": 0.8453, "inbatch_pos_score": 1.5732, "learning_rate": 2.3277777777777778e-05, "loss": 2.6504, "norm_diff": 0.0134, "norm_loss": 0.0, "num_token_doc": 66.8057, "num_token_overlap": 17.8315, "num_token_query": 52.1877, "num_token_union": 73.6518, "num_word_context": 202.2123, "num_word_doc": 49.8704, "num_word_query": 39.7846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2541.2661, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8457, "query_norm": 1.6229, "queue_k_norm": 1.6374, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1877, "sent_len_1": 66.8057, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0637, "stdk": 0.0492, "stdq": 0.047, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 58100 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.653, "doc_norm": 1.6411, "encoder_q-embeddings": 2385.1794, "encoder_q-layer.0": 1797.7509, "encoder_q-layer.1": 2286.1116, "encoder_q-layer.10": 1687.1097, "encoder_q-layer.11": 3554.3779, "encoder_q-layer.2": 2638.3171, "encoder_q-layer.3": 2762.5061, "encoder_q-layer.4": 3424.748, "encoder_q-layer.5": 4086.7131, "encoder_q-layer.6": 3917.1631, "encoder_q-layer.7": 3352.6746, "encoder_q-layer.8": 2912.113, "encoder_q-layer.9": 1934.0541, "epoch": 0.57, "inbatch_neg_score": 0.8509, "inbatch_pos_score": 1.6055, "learning_rate": 2.3222222222222224e-05, "loss": 2.653, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.8589, "num_token_overlap": 17.8193, "num_token_query": 52.2446, "num_token_union": 73.7149, "num_word_context": 202.417, "num_word_doc": 49.8829, "num_word_query": 39.8495, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4285.224, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8501, "query_norm": 1.6276, "queue_k_norm": 1.6407, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2446, "sent_len_1": 66.8589, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5938, "stdk": 0.0493, "stdq": 0.047, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 58200 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.6565, "doc_norm": 1.6399, "encoder_q-embeddings": 1336.1584, "encoder_q-layer.0": 929.1226, "encoder_q-layer.1": 1038.4507, "encoder_q-layer.10": 1779.1489, "encoder_q-layer.11": 3501.3904, "encoder_q-layer.2": 1147.9331, "encoder_q-layer.3": 1250.1022, "encoder_q-layer.4": 1342.5033, "encoder_q-layer.5": 1397.9791, "encoder_q-layer.6": 1598.9762, "encoder_q-layer.7": 1777.0465, "encoder_q-layer.8": 2048.6318, "encoder_q-layer.9": 1733.1174, "epoch": 0.57, "inbatch_neg_score": 0.8575, "inbatch_pos_score": 1.6045, "learning_rate": 2.3166666666666666e-05, "loss": 2.6565, "norm_diff": 0.0154, "norm_loss": 0.0, "num_token_doc": 66.7058, "num_token_overlap": 17.8097, "num_token_query": 52.2523, "num_token_union": 73.653, "num_word_context": 202.079, "num_word_doc": 49.7907, "num_word_query": 39.8341, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2498.3408, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8569, "query_norm": 1.6267, "queue_k_norm": 1.6403, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2523, "sent_len_1": 66.7058, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6175, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 58300 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.6593, "doc_norm": 1.6449, "encoder_q-embeddings": 2476.1855, "encoder_q-layer.0": 1636.6941, "encoder_q-layer.1": 1846.845, "encoder_q-layer.10": 1713.2133, "encoder_q-layer.11": 3425.4585, "encoder_q-layer.2": 2119.3354, "encoder_q-layer.3": 2305.9829, "encoder_q-layer.4": 2306.0596, "encoder_q-layer.5": 2222.5964, "encoder_q-layer.6": 2372.2827, "encoder_q-layer.7": 2290.7786, "encoder_q-layer.8": 2292.4854, "encoder_q-layer.9": 1866.6898, "epoch": 0.57, "inbatch_neg_score": 0.8583, "inbatch_pos_score": 1.6113, "learning_rate": 2.3111111111111112e-05, "loss": 2.6593, "norm_diff": 0.0159, "norm_loss": 0.0, "num_token_doc": 66.7342, "num_token_overlap": 17.8084, "num_token_query": 52.32, "num_token_union": 73.715, "num_word_context": 202.4612, "num_word_doc": 49.7911, "num_word_query": 39.8853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3360.9166, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8594, "query_norm": 1.6289, "queue_k_norm": 1.6419, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.32, "sent_len_1": 66.7342, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6413, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 58400 }, { "accuracy": 62.7441, "active_queue_size": 16384.0, "cl_loss": 2.6473, "doc_norm": 1.6441, "encoder_q-embeddings": 1501.756, "encoder_q-layer.0": 997.9383, "encoder_q-layer.1": 1109.825, "encoder_q-layer.10": 1768.2163, "encoder_q-layer.11": 3447.1667, "encoder_q-layer.2": 1279.6329, "encoder_q-layer.3": 1452.999, "encoder_q-layer.4": 1559.2993, "encoder_q-layer.5": 1580.3137, "encoder_q-layer.6": 1704.7753, "encoder_q-layer.7": 1763.7349, "encoder_q-layer.8": 2002.8552, "encoder_q-layer.9": 1696.382, "epoch": 0.57, "inbatch_neg_score": 0.873, "inbatch_pos_score": 1.627, "learning_rate": 2.3055555555555558e-05, "loss": 2.6473, "norm_diff": 0.0105, "norm_loss": 0.0, "num_token_doc": 67.0007, "num_token_overlap": 17.8744, "num_token_query": 52.3801, "num_token_union": 73.84, "num_word_context": 202.4554, "num_word_doc": 49.9938, "num_word_query": 39.9521, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2603.4682, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.874, "query_norm": 1.6418, "queue_k_norm": 1.6434, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3801, "sent_len_1": 67.0007, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1575, "stdk": 0.0494, "stdq": 0.0468, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 58500 }, { "accuracy": 59.2285, "active_queue_size": 16384.0, "cl_loss": 2.6544, "doc_norm": 1.6438, "encoder_q-embeddings": 1422.1776, "encoder_q-layer.0": 933.8613, "encoder_q-layer.1": 1073.4652, "encoder_q-layer.10": 1677.1476, "encoder_q-layer.11": 3594.262, "encoder_q-layer.2": 1237.1389, "encoder_q-layer.3": 1325.921, "encoder_q-layer.4": 1357.4126, "encoder_q-layer.5": 1409.8921, "encoder_q-layer.6": 1531.8149, "encoder_q-layer.7": 1638.3375, "encoder_q-layer.8": 1874.7454, "encoder_q-layer.9": 1628.1104, "epoch": 0.57, "inbatch_neg_score": 0.8833, "inbatch_pos_score": 1.6094, "learning_rate": 2.3000000000000003e-05, "loss": 2.6544, "norm_diff": 0.0115, "norm_loss": 0.0, "num_token_doc": 66.7134, "num_token_overlap": 17.8222, "num_token_query": 52.2802, "num_token_union": 73.6767, "num_word_context": 202.2163, "num_word_doc": 49.7722, "num_word_query": 39.8863, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2525.3515, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8823, "query_norm": 1.6359, "queue_k_norm": 1.6443, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2802, "sent_len_1": 66.7134, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6488, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 58600 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.6612, "doc_norm": 1.6463, "encoder_q-embeddings": 1350.3093, "encoder_q-layer.0": 862.9537, "encoder_q-layer.1": 949.937, "encoder_q-layer.10": 1686.7759, "encoder_q-layer.11": 3485.0166, "encoder_q-layer.2": 1055.5475, "encoder_q-layer.3": 1111.1132, "encoder_q-layer.4": 1214.7828, "encoder_q-layer.5": 1277.6851, "encoder_q-layer.6": 1429.6592, "encoder_q-layer.7": 1633.4922, "encoder_q-layer.8": 1969.5107, "encoder_q-layer.9": 1700.9163, "epoch": 0.57, "inbatch_neg_score": 0.8932, "inbatch_pos_score": 1.623, "learning_rate": 2.2944444444444446e-05, "loss": 2.6612, "norm_diff": 0.0071, "norm_loss": 0.0, "num_token_doc": 66.7875, "num_token_overlap": 17.7937, "num_token_query": 52.1774, "num_token_union": 73.6854, "num_word_context": 202.0671, "num_word_doc": 49.7933, "num_word_query": 39.783, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2410.1957, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8936, "query_norm": 1.6467, "queue_k_norm": 1.6455, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1774, "sent_len_1": 66.7875, "sent_len_max_0": 128.0, "sent_len_max_1": 209.17, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 58700 }, { "accuracy": 60.0098, "active_queue_size": 16384.0, "cl_loss": 2.6488, "doc_norm": 1.65, "encoder_q-embeddings": 2186.9131, "encoder_q-layer.0": 1532.7968, "encoder_q-layer.1": 1694.3885, "encoder_q-layer.10": 1617.6456, "encoder_q-layer.11": 3454.1538, "encoder_q-layer.2": 2120.6511, "encoder_q-layer.3": 2295.2576, "encoder_q-layer.4": 2524.5908, "encoder_q-layer.5": 2577.603, "encoder_q-layer.6": 2740.0112, "encoder_q-layer.7": 2680.8655, "encoder_q-layer.8": 2588.1003, "encoder_q-layer.9": 1807.4089, "epoch": 0.57, "inbatch_neg_score": 0.8974, "inbatch_pos_score": 1.6309, "learning_rate": 2.288888888888889e-05, "loss": 2.6488, "norm_diff": 0.0122, "norm_loss": 0.0, "num_token_doc": 66.9668, "num_token_overlap": 17.8786, "num_token_query": 52.3588, "num_token_union": 73.8256, "num_word_context": 202.4132, "num_word_doc": 49.936, "num_word_query": 39.952, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3481.9369, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8975, "query_norm": 1.6378, "queue_k_norm": 1.6485, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3588, "sent_len_1": 66.9668, "sent_len_max_0": 128.0, "sent_len_max_1": 210.72, "stdk": 0.0494, "stdq": 0.0464, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 58800 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.6616, "doc_norm": 1.6498, "encoder_q-embeddings": 1660.1165, "encoder_q-layer.0": 1147.1329, "encoder_q-layer.1": 1280.9548, "encoder_q-layer.10": 1717.4163, "encoder_q-layer.11": 3659.7866, "encoder_q-layer.2": 1390.8309, "encoder_q-layer.3": 1426.4839, "encoder_q-layer.4": 1707.4905, "encoder_q-layer.5": 1741.2213, "encoder_q-layer.6": 1943.4908, "encoder_q-layer.7": 2127.8313, "encoder_q-layer.8": 2447.0977, "encoder_q-layer.9": 1803.4089, "epoch": 0.58, "inbatch_neg_score": 0.9112, "inbatch_pos_score": 1.6475, "learning_rate": 2.2833333333333334e-05, "loss": 2.6616, "norm_diff": 0.0074, "norm_loss": 0.0, "num_token_doc": 66.9056, "num_token_overlap": 17.8209, "num_token_query": 52.2838, "num_token_union": 73.74, "num_word_context": 202.5997, "num_word_doc": 49.9178, "num_word_query": 39.8781, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2902.7949, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9116, "query_norm": 1.6441, "queue_k_norm": 1.6499, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2838, "sent_len_1": 66.9056, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3887, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 58900 }, { "accuracy": 61.377, "active_queue_size": 16384.0, "cl_loss": 2.6492, "doc_norm": 1.6512, "encoder_q-embeddings": 3399.3425, "encoder_q-layer.0": 2356.4036, "encoder_q-layer.1": 2594.7708, "encoder_q-layer.10": 1808.9434, "encoder_q-layer.11": 3397.8921, "encoder_q-layer.2": 3170.9587, "encoder_q-layer.3": 3411.0037, "encoder_q-layer.4": 3802.886, "encoder_q-layer.5": 3646.2427, "encoder_q-layer.6": 3468.415, "encoder_q-layer.7": 3004.3252, "encoder_q-layer.8": 2510.1199, "encoder_q-layer.9": 1830.0591, "epoch": 0.58, "inbatch_neg_score": 0.9152, "inbatch_pos_score": 1.6631, "learning_rate": 2.277777777777778e-05, "loss": 2.6492, "norm_diff": 0.0213, "norm_loss": 0.0, "num_token_doc": 66.8547, "num_token_overlap": 17.8428, "num_token_query": 52.3603, "num_token_union": 73.8078, "num_word_context": 202.4168, "num_word_doc": 49.8722, "num_word_query": 39.9436, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4496.8797, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9146, "query_norm": 1.6299, "queue_k_norm": 1.6514, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3603, "sent_len_1": 66.8547, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9762, "stdk": 0.0493, "stdq": 0.0459, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 59000 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.6615, "doc_norm": 1.6522, "encoder_q-embeddings": 2060.2698, "encoder_q-layer.0": 1475.1338, "encoder_q-layer.1": 1678.0861, "encoder_q-layer.10": 1815.802, "encoder_q-layer.11": 3674.0454, "encoder_q-layer.2": 1981.8773, "encoder_q-layer.3": 2183.9578, "encoder_q-layer.4": 2097.834, "encoder_q-layer.5": 2100.4221, "encoder_q-layer.6": 2225.6655, "encoder_q-layer.7": 2235.8716, "encoder_q-layer.8": 2230.5801, "encoder_q-layer.9": 1821.302, "epoch": 0.58, "inbatch_neg_score": 0.9143, "inbatch_pos_score": 1.6494, "learning_rate": 2.2722222222222222e-05, "loss": 2.6615, "norm_diff": 0.023, "norm_loss": 0.0, "num_token_doc": 66.7153, "num_token_overlap": 17.784, "num_token_query": 52.1932, "num_token_union": 73.6777, "num_word_context": 202.2718, "num_word_doc": 49.8016, "num_word_query": 39.7932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3209.9733, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.915, "query_norm": 1.6292, "queue_k_norm": 1.6534, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1932, "sent_len_1": 66.7153, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4925, "stdk": 0.0493, "stdq": 0.0461, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 59100 }, { "accuracy": 61.8652, "active_queue_size": 16384.0, "cl_loss": 2.6499, "doc_norm": 1.655, "encoder_q-embeddings": 1634.5342, "encoder_q-layer.0": 1128.8782, "encoder_q-layer.1": 1284.2352, "encoder_q-layer.10": 1822.5602, "encoder_q-layer.11": 3629.5688, "encoder_q-layer.2": 1528.9609, "encoder_q-layer.3": 1664.2939, "encoder_q-layer.4": 1813.7224, "encoder_q-layer.5": 1869.2307, "encoder_q-layer.6": 2006.3124, "encoder_q-layer.7": 1913.7876, "encoder_q-layer.8": 2055.5698, "encoder_q-layer.9": 1790.5286, "epoch": 0.58, "inbatch_neg_score": 0.9157, "inbatch_pos_score": 1.6621, "learning_rate": 2.2666666666666668e-05, "loss": 2.6499, "norm_diff": 0.0256, "norm_loss": 0.0, "num_token_doc": 66.675, "num_token_overlap": 17.8333, "num_token_query": 52.4211, "num_token_union": 73.7378, "num_word_context": 202.23, "num_word_doc": 49.7433, "num_word_query": 39.9717, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2819.9698, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.915, "query_norm": 1.6294, "queue_k_norm": 1.6545, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4211, "sent_len_1": 66.675, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3363, "stdk": 0.0493, "stdq": 0.0463, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 59200 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.6627, "doc_norm": 1.656, "encoder_q-embeddings": 1503.8795, "encoder_q-layer.0": 998.2018, "encoder_q-layer.1": 1095.6953, "encoder_q-layer.10": 1775.0303, "encoder_q-layer.11": 3721.064, "encoder_q-layer.2": 1226.5665, "encoder_q-layer.3": 1306.9198, "encoder_q-layer.4": 1378.0594, "encoder_q-layer.5": 1418.7902, "encoder_q-layer.6": 1588.1018, "encoder_q-layer.7": 1672.5795, "encoder_q-layer.8": 1951.5466, "encoder_q-layer.9": 1690.2784, "epoch": 0.58, "inbatch_neg_score": 0.9192, "inbatch_pos_score": 1.6494, "learning_rate": 2.2611111111111113e-05, "loss": 2.6627, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.8974, "num_token_overlap": 17.8029, "num_token_query": 52.2606, "num_token_union": 73.7828, "num_word_context": 202.5485, "num_word_doc": 49.9509, "num_word_query": 39.8487, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2599.5431, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9194, "query_norm": 1.6208, "queue_k_norm": 1.6545, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2606, "sent_len_1": 66.8974, "sent_len_max_0": 128.0, "sent_len_max_1": 208.27, "stdk": 0.0493, "stdq": 0.046, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 59300 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.6525, "doc_norm": 1.6579, "encoder_q-embeddings": 1529.1449, "encoder_q-layer.0": 962.6792, "encoder_q-layer.1": 1084.274, "encoder_q-layer.10": 1912.5242, "encoder_q-layer.11": 3689.5186, "encoder_q-layer.2": 1198.7483, "encoder_q-layer.3": 1291.2312, "encoder_q-layer.4": 1406.6619, "encoder_q-layer.5": 1469.6757, "encoder_q-layer.6": 1674.0286, "encoder_q-layer.7": 1794.2328, "encoder_q-layer.8": 2227.3159, "encoder_q-layer.9": 1858.7858, "epoch": 0.58, "inbatch_neg_score": 0.9151, "inbatch_pos_score": 1.6562, "learning_rate": 2.255555555555556e-05, "loss": 2.6525, "norm_diff": 0.0249, "norm_loss": 0.0, "num_token_doc": 66.8096, "num_token_overlap": 17.886, "num_token_query": 52.4047, "num_token_union": 73.7643, "num_word_context": 202.3204, "num_word_doc": 49.8698, "num_word_query": 39.972, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2664.1137, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.915, "query_norm": 1.6331, "queue_k_norm": 1.6573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4047, "sent_len_1": 66.8096, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9675, "stdk": 0.0493, "stdq": 0.0468, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 59400 }, { "accuracy": 59.8145, "active_queue_size": 16384.0, "cl_loss": 2.6537, "doc_norm": 1.6589, "encoder_q-embeddings": 1586.1161, "encoder_q-layer.0": 1053.4463, "encoder_q-layer.1": 1189.2393, "encoder_q-layer.10": 1697.3977, "encoder_q-layer.11": 3536.4641, "encoder_q-layer.2": 1368.7428, "encoder_q-layer.3": 1487.1555, "encoder_q-layer.4": 1618.8192, "encoder_q-layer.5": 1654.8665, "encoder_q-layer.6": 1789.1321, "encoder_q-layer.7": 1900.5857, "encoder_q-layer.8": 1953.4445, "encoder_q-layer.9": 1739.1881, "epoch": 0.58, "inbatch_neg_score": 0.9149, "inbatch_pos_score": 1.6523, "learning_rate": 2.25e-05, "loss": 2.6537, "norm_diff": 0.0334, "norm_loss": 0.0, "num_token_doc": 66.7704, "num_token_overlap": 17.8132, "num_token_query": 52.3343, "num_token_union": 73.7099, "num_word_context": 202.3365, "num_word_doc": 49.8169, "num_word_query": 39.9146, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2689.399, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9155, "query_norm": 1.6256, "queue_k_norm": 1.6591, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3343, "sent_len_1": 66.7704, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7475, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 59500 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.6496, "doc_norm": 1.6577, "encoder_q-embeddings": 3362.1082, "encoder_q-layer.0": 2301.7415, "encoder_q-layer.1": 2583.7278, "encoder_q-layer.10": 1854.6688, "encoder_q-layer.11": 3861.3142, "encoder_q-layer.2": 3214.1152, "encoder_q-layer.3": 3439.7095, "encoder_q-layer.4": 3833.4675, "encoder_q-layer.5": 4151.0483, "encoder_q-layer.6": 3681.0862, "encoder_q-layer.7": 3570.2693, "encoder_q-layer.8": 2815.4187, "encoder_q-layer.9": 2164.6763, "epoch": 0.58, "inbatch_neg_score": 0.9061, "inbatch_pos_score": 1.6426, "learning_rate": 2.2444444444444447e-05, "loss": 2.6496, "norm_diff": 0.0381, "norm_loss": 0.0, "num_token_doc": 66.6638, "num_token_overlap": 17.8012, "num_token_query": 52.3229, "num_token_union": 73.711, "num_word_context": 202.2539, "num_word_doc": 49.7549, "num_word_query": 39.916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4718.7542, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9067, "query_norm": 1.6196, "queue_k_norm": 1.6596, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3229, "sent_len_1": 66.6638, "sent_len_max_0": 128.0, "sent_len_max_1": 206.0062, "stdk": 0.0493, "stdq": 0.0466, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 59600 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.6713, "doc_norm": 1.6593, "encoder_q-embeddings": 1435.6263, "encoder_q-layer.0": 927.8817, "encoder_q-layer.1": 1019.0005, "encoder_q-layer.10": 1613.2831, "encoder_q-layer.11": 3484.5161, "encoder_q-layer.2": 1194.15, "encoder_q-layer.3": 1264.8282, "encoder_q-layer.4": 1390.2903, "encoder_q-layer.5": 1481.2076, "encoder_q-layer.6": 1659.6886, "encoder_q-layer.7": 1684.4141, "encoder_q-layer.8": 1854.4984, "encoder_q-layer.9": 1651.203, "epoch": 0.58, "inbatch_neg_score": 0.9031, "inbatch_pos_score": 1.6523, "learning_rate": 2.238888888888889e-05, "loss": 2.6713, "norm_diff": 0.0415, "norm_loss": 0.0, "num_token_doc": 66.6755, "num_token_overlap": 17.8118, "num_token_query": 52.381, "num_token_union": 73.6879, "num_word_context": 202.2043, "num_word_doc": 49.7699, "num_word_query": 39.9548, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2507.6998, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9038, "query_norm": 1.6178, "queue_k_norm": 1.6606, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.381, "sent_len_1": 66.6755, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3212, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 59700 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.6628, "doc_norm": 1.659, "encoder_q-embeddings": 1383.1021, "encoder_q-layer.0": 899.5039, "encoder_q-layer.1": 998.8337, "encoder_q-layer.10": 1926.3986, "encoder_q-layer.11": 3740.6531, "encoder_q-layer.2": 1149.3888, "encoder_q-layer.3": 1234.9691, "encoder_q-layer.4": 1356.7252, "encoder_q-layer.5": 1440.5608, "encoder_q-layer.6": 1610.3242, "encoder_q-layer.7": 1884.5627, "encoder_q-layer.8": 2208.7219, "encoder_q-layer.9": 1956.5469, "epoch": 0.58, "inbatch_neg_score": 0.905, "inbatch_pos_score": 1.6416, "learning_rate": 2.2333333333333335e-05, "loss": 2.6628, "norm_diff": 0.0444, "norm_loss": 0.0, "num_token_doc": 66.737, "num_token_overlap": 17.7913, "num_token_query": 52.2919, "num_token_union": 73.7163, "num_word_context": 202.4511, "num_word_doc": 49.8198, "num_word_query": 39.8956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2620.0113, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9053, "query_norm": 1.6146, "queue_k_norm": 1.6606, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2919, "sent_len_1": 66.737, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8413, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 59800 }, { "accuracy": 59.4238, "active_queue_size": 16384.0, "cl_loss": 2.6636, "doc_norm": 1.6575, "encoder_q-embeddings": 2858.458, "encoder_q-layer.0": 1794.2362, "encoder_q-layer.1": 2012.1431, "encoder_q-layer.10": 3597.4417, "encoder_q-layer.11": 7196.4087, "encoder_q-layer.2": 2266.9856, "encoder_q-layer.3": 2357.4036, "encoder_q-layer.4": 2625.3247, "encoder_q-layer.5": 2647.0847, "encoder_q-layer.6": 2955.5791, "encoder_q-layer.7": 3265.1316, "encoder_q-layer.8": 3880.0493, "encoder_q-layer.9": 3544.0459, "epoch": 0.58, "inbatch_neg_score": 0.9059, "inbatch_pos_score": 1.6387, "learning_rate": 2.2277777777777778e-05, "loss": 2.6636, "norm_diff": 0.0408, "norm_loss": 0.0, "num_token_doc": 66.779, "num_token_overlap": 17.8169, "num_token_query": 52.4235, "num_token_union": 73.7578, "num_word_context": 202.4824, "num_word_doc": 49.8059, "num_word_query": 39.9684, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5025.6341, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9048, "query_norm": 1.6168, "queue_k_norm": 1.6602, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4235, "sent_len_1": 66.779, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6213, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 59900 }, { "accuracy": 59.1309, "active_queue_size": 16384.0, "cl_loss": 2.6441, "doc_norm": 1.6592, "encoder_q-embeddings": 3050.1936, "encoder_q-layer.0": 2067.1497, "encoder_q-layer.1": 2309.24, "encoder_q-layer.10": 3467.7251, "encoder_q-layer.11": 7221.3604, "encoder_q-layer.2": 2703.9246, "encoder_q-layer.3": 2902.5312, "encoder_q-layer.4": 3139.0049, "encoder_q-layer.5": 3181.7422, "encoder_q-layer.6": 3403.8757, "encoder_q-layer.7": 3834.1526, "encoder_q-layer.8": 3895.0891, "encoder_q-layer.9": 3475.1157, "epoch": 0.59, "inbatch_neg_score": 0.9005, "inbatch_pos_score": 1.6299, "learning_rate": 2.2222222222222223e-05, "loss": 2.6441, "norm_diff": 0.0551, "norm_loss": 0.0, "num_token_doc": 66.685, "num_token_overlap": 17.7956, "num_token_query": 52.3618, "num_token_union": 73.7168, "num_word_context": 202.2992, "num_word_doc": 49.7413, "num_word_query": 39.9347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5362.1497, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9009, "query_norm": 1.6042, "queue_k_norm": 1.6622, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3618, "sent_len_1": 66.685, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4538, "stdk": 0.0492, "stdq": 0.046, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 60000 }, { "dev_runtime": 26.8961, "dev_samples_per_second": 1.19, "dev_steps_per_second": 0.037, "epoch": 0.59, "step": 60000, "test_accuracy": 93.85986328125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3351326882839203, "test_doc_norm": 1.6306490898132324, "test_inbatch_neg_score": 1.1829142570495605, "test_inbatch_pos_score": 2.181324005126953, "test_loss": 0.3351326882839203, "test_loss_align": 1.0014610290527344, "test_loss_unif": 2.44789457321167, "test_loss_unif_q@queue": 2.44789457321167, "test_norm_diff": 0.009095370769500732, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.895216703414917, "test_query_norm": 1.6386017799377441, "test_queue_k_norm": 1.6619563102722168, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0437331423163414, "test_stdq": 0.04330170154571533, "test_stdqueue_k": 0.04940096661448479, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.8961, "dev_samples_per_second": 1.19, "dev_steps_per_second": 0.037, "epoch": 0.59, "eval_beir-arguana_ndcg@10": 0.36484, "eval_beir-arguana_recall@10": 0.62091, "eval_beir-arguana_recall@100": 0.92745, "eval_beir-arguana_recall@20": 0.77027, "eval_beir-avg_ndcg@10": 0.3762105, "eval_beir-avg_recall@10": 0.44667575, "eval_beir-avg_recall@100": 0.6319665, "eval_beir-avg_recall@20": 0.5059663333333333, "eval_beir-cqadupstack_ndcg@10": 0.279615, "eval_beir-cqadupstack_recall@10": 0.3772975, "eval_beir-cqadupstack_recall@100": 0.607885, "eval_beir-cqadupstack_recall@20": 0.4450233333333335, "eval_beir-fiqa_ndcg@10": 0.25146, "eval_beir-fiqa_recall@10": 0.31535, "eval_beir-fiqa_recall@100": 0.59577, "eval_beir-fiqa_recall@20": 0.38421, "eval_beir-nfcorpus_ndcg@10": 0.28772, "eval_beir-nfcorpus_recall@10": 0.14178, "eval_beir-nfcorpus_recall@100": 0.27638, "eval_beir-nfcorpus_recall@20": 0.17383, "eval_beir-nq_ndcg@10": 0.27688, "eval_beir-nq_recall@10": 0.45027, "eval_beir-nq_recall@100": 0.79707, "eval_beir-nq_recall@20": 0.56786, "eval_beir-quora_ndcg@10": 0.77059, "eval_beir-quora_recall@10": 0.88081, "eval_beir-quora_recall@100": 0.97704, "eval_beir-quora_recall@20": 0.92477, "eval_beir-scidocs_ndcg@10": 0.15993, "eval_beir-scidocs_recall@10": 0.16648, "eval_beir-scidocs_recall@100": 0.37488, "eval_beir-scidocs_recall@20": 0.22448, "eval_beir-scifact_ndcg@10": 0.63981, "eval_beir-scifact_recall@10": 0.80467, "eval_beir-scifact_recall@100": 0.90656, "eval_beir-scifact_recall@20": 0.84411, "eval_beir-trec-covid_ndcg@10": 0.51446, "eval_beir-trec-covid_recall@10": 0.556, "eval_beir-trec-covid_recall@100": 0.414, "eval_beir-trec-covid_recall@20": 0.52, "eval_beir-webis-touche2020_ndcg@10": 0.2168, "eval_beir-webis-touche2020_recall@10": 0.15319, "eval_beir-webis-touche2020_recall@100": 0.44263, "eval_beir-webis-touche2020_recall@20": 0.20511, "eval_senteval-avg_sts": 0.7449305378614951, "eval_senteval-sickr_spearman": 0.731282164993524, "eval_senteval-stsb_spearman": 0.7585789107294661, "step": 60000, "test_accuracy": 93.85986328125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3351326882839203, "test_doc_norm": 1.6306490898132324, "test_inbatch_neg_score": 1.1829142570495605, "test_inbatch_pos_score": 2.181324005126953, "test_loss": 0.3351326882839203, "test_loss_align": 1.0014610290527344, "test_loss_unif": 2.44789457321167, "test_loss_unif_q@queue": 2.44789457321167, "test_norm_diff": 0.009095370769500732, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.895216703414917, "test_query_norm": 1.6386017799377441, "test_queue_k_norm": 1.6619563102722168, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0437331423163414, "test_stdq": 0.04330170154571533, "test_stdqueue_k": 0.04940096661448479, "test_stdqueue_q": 0.0 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.6545, "doc_norm": 1.6619, "encoder_q-embeddings": 2717.4875, "encoder_q-layer.0": 1740.1362, "encoder_q-layer.1": 1894.8118, "encoder_q-layer.10": 3435.2314, "encoder_q-layer.11": 6796.5669, "encoder_q-layer.2": 2202.4668, "encoder_q-layer.3": 2305.5767, "encoder_q-layer.4": 2463.1414, "encoder_q-layer.5": 2563.4062, "encoder_q-layer.6": 2917.949, "encoder_q-layer.7": 3115.0139, "encoder_q-layer.8": 3735.8142, "encoder_q-layer.9": 3327.7463, "epoch": 0.59, "inbatch_neg_score": 0.8979, "inbatch_pos_score": 1.6465, "learning_rate": 2.216666666666667e-05, "loss": 2.6545, "norm_diff": 0.0359, "norm_loss": 0.0, "num_token_doc": 66.7056, "num_token_overlap": 17.7738, "num_token_query": 52.2405, "num_token_union": 73.6939, "num_word_context": 202.2372, "num_word_doc": 49.7439, "num_word_query": 39.8168, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4802.25, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8994, "query_norm": 1.626, "queue_k_norm": 1.6601, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2405, "sent_len_1": 66.7056, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4638, "stdk": 0.0494, "stdq": 0.047, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 60100 }, { "accuracy": 58.9355, "active_queue_size": 16384.0, "cl_loss": 2.6464, "doc_norm": 1.661, "encoder_q-embeddings": 2908.938, "encoder_q-layer.0": 1835.7886, "encoder_q-layer.1": 2033.2238, "encoder_q-layer.10": 3381.4797, "encoder_q-layer.11": 7305.8589, "encoder_q-layer.2": 2281.989, "encoder_q-layer.3": 2466.5981, "encoder_q-layer.4": 2606.845, "encoder_q-layer.5": 2812.811, "encoder_q-layer.6": 3099.7031, "encoder_q-layer.7": 3392.1311, "encoder_q-layer.8": 3912.0205, "encoder_q-layer.9": 3481.377, "epoch": 0.59, "inbatch_neg_score": 0.8929, "inbatch_pos_score": 1.6289, "learning_rate": 2.211111111111111e-05, "loss": 2.6464, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.8587, "num_token_overlap": 17.8102, "num_token_query": 52.385, "num_token_union": 73.8374, "num_word_context": 202.6425, "num_word_doc": 49.86, "num_word_query": 39.9571, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5091.4818, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.894, "query_norm": 1.6053, "queue_k_norm": 1.6611, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.385, "sent_len_1": 66.8587, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9387, "stdk": 0.0493, "stdq": 0.0462, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 60200 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.6533, "doc_norm": 1.6612, "encoder_q-embeddings": 1367.361, "encoder_q-layer.0": 872.858, "encoder_q-layer.1": 977.0065, "encoder_q-layer.10": 1625.9948, "encoder_q-layer.11": 3354.3728, "encoder_q-layer.2": 1112.7527, "encoder_q-layer.3": 1183.5455, "encoder_q-layer.4": 1284.5912, "encoder_q-layer.5": 1298.7626, "encoder_q-layer.6": 1441.4471, "encoder_q-layer.7": 1612.5388, "encoder_q-layer.8": 1849.4138, "encoder_q-layer.9": 1646.7511, "epoch": 0.59, "inbatch_neg_score": 0.8916, "inbatch_pos_score": 1.6338, "learning_rate": 2.2055555555555557e-05, "loss": 2.6533, "norm_diff": 0.0457, "norm_loss": 0.0, "num_token_doc": 66.7916, "num_token_overlap": 17.7564, "num_token_query": 52.2939, "num_token_union": 73.7677, "num_word_context": 202.2917, "num_word_doc": 49.8009, "num_word_query": 39.8784, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2383.1517, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8911, "query_norm": 1.6155, "queue_k_norm": 1.6604, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2939, "sent_len_1": 66.7916, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6175, "stdk": 0.0493, "stdq": 0.0466, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 60300 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.6575, "doc_norm": 1.6607, "encoder_q-embeddings": 1317.4448, "encoder_q-layer.0": 844.2365, "encoder_q-layer.1": 905.2194, "encoder_q-layer.10": 1774.8646, "encoder_q-layer.11": 3462.3506, "encoder_q-layer.2": 1040.8158, "encoder_q-layer.3": 1085.8552, "encoder_q-layer.4": 1147.7991, "encoder_q-layer.5": 1181.3157, "encoder_q-layer.6": 1373.2153, "encoder_q-layer.7": 1523.3939, "encoder_q-layer.8": 1972.9862, "encoder_q-layer.9": 1773.8477, "epoch": 0.59, "inbatch_neg_score": 0.8923, "inbatch_pos_score": 1.626, "learning_rate": 2.2000000000000003e-05, "loss": 2.6575, "norm_diff": 0.0422, "norm_loss": 0.0, "num_token_doc": 66.6973, "num_token_overlap": 17.8155, "num_token_query": 52.3606, "num_token_union": 73.6867, "num_word_context": 202.2634, "num_word_doc": 49.7382, "num_word_query": 39.9426, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2388.3673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8921, "query_norm": 1.6186, "queue_k_norm": 1.6601, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3606, "sent_len_1": 66.6973, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3325, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 60400 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6425, "doc_norm": 1.6623, "encoder_q-embeddings": 1278.4752, "encoder_q-layer.0": 815.3339, "encoder_q-layer.1": 892.5793, "encoder_q-layer.10": 1987.8975, "encoder_q-layer.11": 3665.3748, "encoder_q-layer.2": 1026.3584, "encoder_q-layer.3": 1078.9023, "encoder_q-layer.4": 1169.6838, "encoder_q-layer.5": 1254.5055, "encoder_q-layer.6": 1411.2561, "encoder_q-layer.7": 1604.7609, "encoder_q-layer.8": 1970.1947, "encoder_q-layer.9": 1831.8971, "epoch": 0.59, "inbatch_neg_score": 0.8903, "inbatch_pos_score": 1.6533, "learning_rate": 2.1944444444444445e-05, "loss": 2.6425, "norm_diff": 0.0363, "norm_loss": 0.0, "num_token_doc": 66.8337, "num_token_overlap": 17.858, "num_token_query": 52.2882, "num_token_union": 73.7496, "num_word_context": 202.5148, "num_word_doc": 49.876, "num_word_query": 39.8571, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2417.9063, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8901, "query_norm": 1.6259, "queue_k_norm": 1.6633, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2882, "sent_len_1": 66.8337, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5325, "stdk": 0.0493, "stdq": 0.0472, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 60500 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.6463, "doc_norm": 1.6571, "encoder_q-embeddings": 1548.1935, "encoder_q-layer.0": 1010.0787, "encoder_q-layer.1": 1122.3955, "encoder_q-layer.10": 1668.0089, "encoder_q-layer.11": 3311.0977, "encoder_q-layer.2": 1315.8539, "encoder_q-layer.3": 1403.9551, "encoder_q-layer.4": 1642.2317, "encoder_q-layer.5": 1720.9246, "encoder_q-layer.6": 1761.2246, "encoder_q-layer.7": 1757.7256, "encoder_q-layer.8": 1900.1047, "encoder_q-layer.9": 1627.4984, "epoch": 0.59, "inbatch_neg_score": 0.8873, "inbatch_pos_score": 1.6367, "learning_rate": 2.188888888888889e-05, "loss": 2.6463, "norm_diff": 0.0436, "norm_loss": 0.0, "num_token_doc": 66.7762, "num_token_overlap": 17.8743, "num_token_query": 52.4001, "num_token_union": 73.7284, "num_word_context": 202.4435, "num_word_doc": 49.8559, "num_word_query": 39.9983, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2606.2183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8877, "query_norm": 1.6136, "queue_k_norm": 1.6631, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4001, "sent_len_1": 66.7762, "sent_len_max_0": 128.0, "sent_len_max_1": 209.85, "stdk": 0.0491, "stdq": 0.0466, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 60600 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.6457, "doc_norm": 1.6607, "encoder_q-embeddings": 1454.8251, "encoder_q-layer.0": 928.0139, "encoder_q-layer.1": 995.3632, "encoder_q-layer.10": 1622.593, "encoder_q-layer.11": 3389.2227, "encoder_q-layer.2": 1159.0575, "encoder_q-layer.3": 1231.2424, "encoder_q-layer.4": 1379.8234, "encoder_q-layer.5": 1307.4803, "encoder_q-layer.6": 1473.6619, "encoder_q-layer.7": 1560.598, "encoder_q-layer.8": 1876.6013, "encoder_q-layer.9": 1627.7537, "epoch": 0.59, "inbatch_neg_score": 0.8929, "inbatch_pos_score": 1.627, "learning_rate": 2.1833333333333333e-05, "loss": 2.6457, "norm_diff": 0.0465, "norm_loss": 0.0, "num_token_doc": 66.636, "num_token_overlap": 17.7905, "num_token_query": 52.23, "num_token_union": 73.5735, "num_word_context": 202.0353, "num_word_doc": 49.7125, "num_word_query": 39.8262, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2450.7933, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8936, "query_norm": 1.6143, "queue_k_norm": 1.6609, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.23, "sent_len_1": 66.636, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3862, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 60700 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6473, "doc_norm": 1.6621, "encoder_q-embeddings": 1397.2527, "encoder_q-layer.0": 864.6264, "encoder_q-layer.1": 949.1155, "encoder_q-layer.10": 1718.6006, "encoder_q-layer.11": 3528.1079, "encoder_q-layer.2": 1086.3013, "encoder_q-layer.3": 1163.0887, "encoder_q-layer.4": 1251.0385, "encoder_q-layer.5": 1342.6477, "encoder_q-layer.6": 1475.4526, "encoder_q-layer.7": 1678.0101, "encoder_q-layer.8": 1979.7888, "encoder_q-layer.9": 1714.6718, "epoch": 0.59, "inbatch_neg_score": 0.8954, "inbatch_pos_score": 1.6514, "learning_rate": 2.177777777777778e-05, "loss": 2.6473, "norm_diff": 0.037, "norm_loss": 0.0, "num_token_doc": 66.5105, "num_token_overlap": 17.7771, "num_token_query": 52.219, "num_token_union": 73.5388, "num_word_context": 201.9853, "num_word_doc": 49.6201, "num_word_query": 39.8267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2503.2812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.895, "query_norm": 1.6251, "queue_k_norm": 1.6628, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.219, "sent_len_1": 66.5105, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1987, "stdk": 0.0493, "stdq": 0.0469, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 60800 }, { "accuracy": 58.9355, "active_queue_size": 16384.0, "cl_loss": 2.6356, "doc_norm": 1.6595, "encoder_q-embeddings": 1197.6935, "encoder_q-layer.0": 789.3269, "encoder_q-layer.1": 869.0938, "encoder_q-layer.10": 1746.5681, "encoder_q-layer.11": 3482.355, "encoder_q-layer.2": 971.4757, "encoder_q-layer.3": 1018.2077, "encoder_q-layer.4": 1097.2765, "encoder_q-layer.5": 1167.9696, "encoder_q-layer.6": 1305.9457, "encoder_q-layer.7": 1525.3419, "encoder_q-layer.8": 1816.4479, "encoder_q-layer.9": 1672.1565, "epoch": 0.59, "inbatch_neg_score": 0.9008, "inbatch_pos_score": 1.626, "learning_rate": 2.1722222222222225e-05, "loss": 2.6356, "norm_diff": 0.0458, "norm_loss": 0.0, "num_token_doc": 66.7618, "num_token_overlap": 17.8232, "num_token_query": 52.3055, "num_token_union": 73.6617, "num_word_context": 202.1947, "num_word_doc": 49.8005, "num_word_query": 39.8932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2328.6317, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9009, "query_norm": 1.6137, "queue_k_norm": 1.6628, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3055, "sent_len_1": 66.7618, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6975, "stdk": 0.0492, "stdq": 0.0462, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 60900 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.638, "doc_norm": 1.6645, "encoder_q-embeddings": 1462.9106, "encoder_q-layer.0": 944.9917, "encoder_q-layer.1": 1085.8899, "encoder_q-layer.10": 1890.1346, "encoder_q-layer.11": 3713.0906, "encoder_q-layer.2": 1281.4307, "encoder_q-layer.3": 1380.6108, "encoder_q-layer.4": 1550.7059, "encoder_q-layer.5": 1672.8727, "encoder_q-layer.6": 1762.3684, "encoder_q-layer.7": 1913.8844, "encoder_q-layer.8": 2088.187, "encoder_q-layer.9": 1729.0157, "epoch": 0.6, "inbatch_neg_score": 0.9055, "inbatch_pos_score": 1.6641, "learning_rate": 2.1666666666666667e-05, "loss": 2.638, "norm_diff": 0.0416, "norm_loss": 0.0, "num_token_doc": 66.7102, "num_token_overlap": 17.8192, "num_token_query": 52.2727, "num_token_union": 73.6679, "num_word_context": 202.0859, "num_word_doc": 49.7572, "num_word_query": 39.8852, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2669.8087, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9053, "query_norm": 1.6229, "queue_k_norm": 1.663, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2727, "sent_len_1": 66.7102, "sent_len_max_0": 128.0, "sent_len_max_1": 207.625, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 61000 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.6541, "doc_norm": 1.6654, "encoder_q-embeddings": 1310.2439, "encoder_q-layer.0": 817.3109, "encoder_q-layer.1": 898.463, "encoder_q-layer.10": 1689.4218, "encoder_q-layer.11": 3542.9624, "encoder_q-layer.2": 1023.4065, "encoder_q-layer.3": 1082.3805, "encoder_q-layer.4": 1143.2606, "encoder_q-layer.5": 1223.6027, "encoder_q-layer.6": 1331.5826, "encoder_q-layer.7": 1490.3824, "encoder_q-layer.8": 1815.3682, "encoder_q-layer.9": 1636.7367, "epoch": 0.6, "inbatch_neg_score": 0.9099, "inbatch_pos_score": 1.6436, "learning_rate": 2.1611111111111113e-05, "loss": 2.6541, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.722, "num_token_overlap": 17.7881, "num_token_query": 52.2233, "num_token_union": 73.6719, "num_word_context": 202.4029, "num_word_doc": 49.8041, "num_word_query": 39.8363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2392.6861, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9097, "query_norm": 1.6174, "queue_k_norm": 1.6639, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2233, "sent_len_1": 66.722, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9175, "stdk": 0.0494, "stdq": 0.0461, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 61100 }, { "accuracy": 60.3027, "active_queue_size": 16384.0, "cl_loss": 2.6646, "doc_norm": 1.6658, "encoder_q-embeddings": 1413.1624, "encoder_q-layer.0": 900.9772, "encoder_q-layer.1": 1010.8566, "encoder_q-layer.10": 1972.1078, "encoder_q-layer.11": 3614.8225, "encoder_q-layer.2": 1179.9371, "encoder_q-layer.3": 1245.4218, "encoder_q-layer.4": 1384.2882, "encoder_q-layer.5": 1436.896, "encoder_q-layer.6": 1592.0837, "encoder_q-layer.7": 1722.5276, "encoder_q-layer.8": 2078.199, "encoder_q-layer.9": 1853.3418, "epoch": 0.6, "inbatch_neg_score": 0.912, "inbatch_pos_score": 1.6611, "learning_rate": 2.1555555555555555e-05, "loss": 2.6646, "norm_diff": 0.0267, "norm_loss": 0.0, "num_token_doc": 66.5865, "num_token_overlap": 17.6879, "num_token_query": 52.185, "num_token_union": 73.6357, "num_word_context": 201.8684, "num_word_doc": 49.6995, "num_word_query": 39.7837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2563.1101, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9116, "query_norm": 1.6391, "queue_k_norm": 1.663, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.185, "sent_len_1": 66.5865, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8537, "stdk": 0.0494, "stdq": 0.047, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 61200 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.6313, "doc_norm": 1.6655, "encoder_q-embeddings": 1942.327, "encoder_q-layer.0": 1293.653, "encoder_q-layer.1": 1469.2495, "encoder_q-layer.10": 1734.8879, "encoder_q-layer.11": 3452.6248, "encoder_q-layer.2": 1747.6895, "encoder_q-layer.3": 1922.4573, "encoder_q-layer.4": 2119.5508, "encoder_q-layer.5": 2223.6038, "encoder_q-layer.6": 2337.3992, "encoder_q-layer.7": 2153.3306, "encoder_q-layer.8": 2024.0135, "encoder_q-layer.9": 1765.1931, "epoch": 0.6, "inbatch_neg_score": 0.915, "inbatch_pos_score": 1.6621, "learning_rate": 2.15e-05, "loss": 2.6313, "norm_diff": 0.0196, "norm_loss": 0.0, "num_token_doc": 66.8365, "num_token_overlap": 17.8056, "num_token_query": 52.289, "num_token_union": 73.7351, "num_word_context": 202.2675, "num_word_doc": 49.8518, "num_word_query": 39.8706, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3013.0343, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9146, "query_norm": 1.6459, "queue_k_norm": 1.6646, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.289, "sent_len_1": 66.8365, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8425, "stdk": 0.0494, "stdq": 0.0471, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 61300 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.6528, "doc_norm": 1.6696, "encoder_q-embeddings": 1190.6613, "encoder_q-layer.0": 789.7278, "encoder_q-layer.1": 847.8373, "encoder_q-layer.10": 1656.533, "encoder_q-layer.11": 3555.7173, "encoder_q-layer.2": 927.6312, "encoder_q-layer.3": 997.3341, "encoder_q-layer.4": 1076.7943, "encoder_q-layer.5": 1081.2548, "encoder_q-layer.6": 1235.5022, "encoder_q-layer.7": 1466.6315, "encoder_q-layer.8": 1824.1587, "encoder_q-layer.9": 1674.9301, "epoch": 0.6, "inbatch_neg_score": 0.9243, "inbatch_pos_score": 1.6758, "learning_rate": 2.1444444444444443e-05, "loss": 2.6528, "norm_diff": 0.0338, "norm_loss": 0.0, "num_token_doc": 66.7542, "num_token_overlap": 17.812, "num_token_query": 52.2722, "num_token_union": 73.6938, "num_word_context": 202.1865, "num_word_doc": 49.798, "num_word_query": 39.8833, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2312.4456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9243, "query_norm": 1.6358, "queue_k_norm": 1.6648, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2722, "sent_len_1": 66.7542, "sent_len_max_0": 128.0, "sent_len_max_1": 207.04, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 61400 }, { "accuracy": 62.6953, "active_queue_size": 16384.0, "cl_loss": 2.646, "doc_norm": 1.6643, "encoder_q-embeddings": 1646.7096, "encoder_q-layer.0": 1065.616, "encoder_q-layer.1": 1189.3873, "encoder_q-layer.10": 1698.4387, "encoder_q-layer.11": 3405.4282, "encoder_q-layer.2": 1352.1323, "encoder_q-layer.3": 1418.3463, "encoder_q-layer.4": 1577.2048, "encoder_q-layer.5": 1607.907, "encoder_q-layer.6": 1560.9188, "encoder_q-layer.7": 1572.7482, "encoder_q-layer.8": 1791.1165, "encoder_q-layer.9": 1634.4156, "epoch": 0.6, "inbatch_neg_score": 0.9281, "inbatch_pos_score": 1.6904, "learning_rate": 2.138888888888889e-05, "loss": 2.646, "norm_diff": 0.0085, "norm_loss": 0.0, "num_token_doc": 66.8654, "num_token_overlap": 17.7949, "num_token_query": 52.2426, "num_token_union": 73.7237, "num_word_context": 202.3421, "num_word_doc": 49.8671, "num_word_query": 39.832, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2556.4935, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9277, "query_norm": 1.6707, "queue_k_norm": 1.6665, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2426, "sent_len_1": 66.8654, "sent_len_max_0": 128.0, "sent_len_max_1": 210.7, "stdk": 0.0493, "stdq": 0.0479, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 61500 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6456, "doc_norm": 1.6658, "encoder_q-embeddings": 1406.0359, "encoder_q-layer.0": 915.7458, "encoder_q-layer.1": 1029.9865, "encoder_q-layer.10": 1663.032, "encoder_q-layer.11": 3320.5676, "encoder_q-layer.2": 1247.6171, "encoder_q-layer.3": 1267.6844, "encoder_q-layer.4": 1293.6659, "encoder_q-layer.5": 1318.5576, "encoder_q-layer.6": 1389.8258, "encoder_q-layer.7": 1419.538, "encoder_q-layer.8": 1831.2675, "encoder_q-layer.9": 1588.4089, "epoch": 0.6, "inbatch_neg_score": 0.9353, "inbatch_pos_score": 1.6768, "learning_rate": 2.1333333333333335e-05, "loss": 2.6456, "norm_diff": 0.0197, "norm_loss": 0.0, "num_token_doc": 66.7065, "num_token_overlap": 17.7772, "num_token_query": 52.3386, "num_token_union": 73.7253, "num_word_context": 202.3389, "num_word_doc": 49.7601, "num_word_query": 39.9041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2383.4597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.936, "query_norm": 1.6461, "queue_k_norm": 1.6679, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3386, "sent_len_1": 66.7065, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7312, "stdk": 0.0493, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 61600 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.6381, "doc_norm": 1.6654, "encoder_q-embeddings": 1383.2506, "encoder_q-layer.0": 894.6412, "encoder_q-layer.1": 963.4472, "encoder_q-layer.10": 1734.371, "encoder_q-layer.11": 3587.8726, "encoder_q-layer.2": 1116.5684, "encoder_q-layer.3": 1187.3387, "encoder_q-layer.4": 1236.9165, "encoder_q-layer.5": 1264.8243, "encoder_q-layer.6": 1438.1693, "encoder_q-layer.7": 1594.4646, "encoder_q-layer.8": 1837.7853, "encoder_q-layer.9": 1716.4257, "epoch": 0.6, "inbatch_neg_score": 0.9399, "inbatch_pos_score": 1.6631, "learning_rate": 2.127777777777778e-05, "loss": 2.6381, "norm_diff": 0.0187, "norm_loss": 0.0, "num_token_doc": 66.7678, "num_token_overlap": 17.7858, "num_token_query": 52.304, "num_token_union": 73.7475, "num_word_context": 202.4961, "num_word_doc": 49.8334, "num_word_query": 39.8942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2469.7059, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9404, "query_norm": 1.6488, "queue_k_norm": 1.6678, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.304, "sent_len_1": 66.7678, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8212, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 61700 }, { "accuracy": 61.5723, "active_queue_size": 16384.0, "cl_loss": 2.6447, "doc_norm": 1.6697, "encoder_q-embeddings": 1281.0607, "encoder_q-layer.0": 821.9438, "encoder_q-layer.1": 899.2127, "encoder_q-layer.10": 1676.2141, "encoder_q-layer.11": 3566.2637, "encoder_q-layer.2": 1017.7379, "encoder_q-layer.3": 1089.7764, "encoder_q-layer.4": 1219.1416, "encoder_q-layer.5": 1264.377, "encoder_q-layer.6": 1480.202, "encoder_q-layer.7": 1629.0857, "encoder_q-layer.8": 1935.1078, "encoder_q-layer.9": 1675.5576, "epoch": 0.6, "inbatch_neg_score": 0.942, "inbatch_pos_score": 1.707, "learning_rate": 2.1222222222222223e-05, "loss": 2.6447, "norm_diff": 0.0186, "norm_loss": 0.0, "num_token_doc": 66.6489, "num_token_overlap": 17.8008, "num_token_query": 52.2451, "num_token_union": 73.6288, "num_word_context": 202.0974, "num_word_doc": 49.741, "num_word_query": 39.8296, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2458.5189, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9419, "query_norm": 1.6511, "queue_k_norm": 1.6693, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2451, "sent_len_1": 66.6489, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3713, "stdk": 0.0494, "stdq": 0.0469, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 61800 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.6373, "doc_norm": 1.6686, "encoder_q-embeddings": 1254.3447, "encoder_q-layer.0": 832.5255, "encoder_q-layer.1": 916.1412, "encoder_q-layer.10": 1693.5933, "encoder_q-layer.11": 3483.7788, "encoder_q-layer.2": 1053.9064, "encoder_q-layer.3": 1149.604, "encoder_q-layer.4": 1249.2288, "encoder_q-layer.5": 1330.3362, "encoder_q-layer.6": 1441.9193, "encoder_q-layer.7": 1494.8601, "encoder_q-layer.8": 1730.0157, "encoder_q-layer.9": 1607.8143, "epoch": 0.6, "inbatch_neg_score": 0.9551, "inbatch_pos_score": 1.6914, "learning_rate": 2.116666666666667e-05, "loss": 2.6373, "norm_diff": 0.0263, "norm_loss": 0.0, "num_token_doc": 66.8045, "num_token_overlap": 17.8457, "num_token_query": 52.4378, "num_token_union": 73.7853, "num_word_context": 202.3683, "num_word_doc": 49.8419, "num_word_query": 39.984, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2356.5853, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9546, "query_norm": 1.6423, "queue_k_norm": 1.6703, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4378, "sent_len_1": 66.8045, "sent_len_max_0": 128.0, "sent_len_max_1": 209.795, "stdk": 0.0493, "stdq": 0.0463, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 61900 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.6252, "doc_norm": 1.6678, "encoder_q-embeddings": 1360.6989, "encoder_q-layer.0": 912.6629, "encoder_q-layer.1": 1009.5251, "encoder_q-layer.10": 1663.2181, "encoder_q-layer.11": 3433.6924, "encoder_q-layer.2": 1199.6593, "encoder_q-layer.3": 1285.4596, "encoder_q-layer.4": 1338.1614, "encoder_q-layer.5": 1511.2266, "encoder_q-layer.6": 1518.9065, "encoder_q-layer.7": 1518.4393, "encoder_q-layer.8": 1741.1403, "encoder_q-layer.9": 1600.3322, "epoch": 0.61, "inbatch_neg_score": 0.9558, "inbatch_pos_score": 1.7197, "learning_rate": 2.111111111111111e-05, "loss": 2.6252, "norm_diff": 0.0084, "norm_loss": 0.0, "num_token_doc": 66.7247, "num_token_overlap": 17.7858, "num_token_query": 52.2662, "num_token_union": 73.6887, "num_word_context": 202.2196, "num_word_doc": 49.8032, "num_word_query": 39.8749, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2434.8828, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9546, "query_norm": 1.6629, "queue_k_norm": 1.6709, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2662, "sent_len_1": 66.7247, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5687, "stdk": 0.0492, "stdq": 0.0475, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 62000 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.6344, "doc_norm": 1.6714, "encoder_q-embeddings": 1397.0548, "encoder_q-layer.0": 857.4065, "encoder_q-layer.1": 959.9512, "encoder_q-layer.10": 1795.309, "encoder_q-layer.11": 3622.2559, "encoder_q-layer.2": 1107.3247, "encoder_q-layer.3": 1177.6846, "encoder_q-layer.4": 1315.2274, "encoder_q-layer.5": 1344.2104, "encoder_q-layer.6": 1515.3307, "encoder_q-layer.7": 1660.0717, "encoder_q-layer.8": 2003.3665, "encoder_q-layer.9": 1793.0662, "epoch": 0.61, "inbatch_neg_score": 0.9565, "inbatch_pos_score": 1.7002, "learning_rate": 2.1055555555555556e-05, "loss": 2.6344, "norm_diff": 0.0303, "norm_loss": 0.0, "num_token_doc": 66.6793, "num_token_overlap": 17.7864, "num_token_query": 52.2672, "num_token_union": 73.6813, "num_word_context": 202.15, "num_word_doc": 49.7595, "num_word_query": 39.869, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2508.6826, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9561, "query_norm": 1.6438, "queue_k_norm": 1.6723, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2672, "sent_len_1": 66.6793, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6763, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 62100 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.6291, "doc_norm": 1.6731, "encoder_q-embeddings": 1459.1338, "encoder_q-layer.0": 945.9893, "encoder_q-layer.1": 1036.6069, "encoder_q-layer.10": 1720.2926, "encoder_q-layer.11": 3461.3872, "encoder_q-layer.2": 1133.8698, "encoder_q-layer.3": 1188.9122, "encoder_q-layer.4": 1270.5109, "encoder_q-layer.5": 1311.7861, "encoder_q-layer.6": 1496.9034, "encoder_q-layer.7": 1684.8828, "encoder_q-layer.8": 1940.5348, "encoder_q-layer.9": 1725.2139, "epoch": 0.61, "inbatch_neg_score": 0.9628, "inbatch_pos_score": 1.7041, "learning_rate": 2.1e-05, "loss": 2.6291, "norm_diff": 0.0218, "norm_loss": 0.0, "num_token_doc": 66.996, "num_token_overlap": 17.8826, "num_token_query": 52.414, "num_token_union": 73.8391, "num_word_context": 202.7911, "num_word_doc": 49.9893, "num_word_query": 40.0041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2518.675, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9624, "query_norm": 1.6513, "queue_k_norm": 1.6747, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.414, "sent_len_1": 66.996, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1625, "stdk": 0.0494, "stdq": 0.047, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 62200 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6437, "doc_norm": 1.6767, "encoder_q-embeddings": 2467.4705, "encoder_q-layer.0": 1592.0835, "encoder_q-layer.1": 1793.6331, "encoder_q-layer.10": 3653.3735, "encoder_q-layer.11": 7335.8096, "encoder_q-layer.2": 2068.127, "encoder_q-layer.3": 2195.0283, "encoder_q-layer.4": 2386.0198, "encoder_q-layer.5": 2369.4246, "encoder_q-layer.6": 2731.0898, "encoder_q-layer.7": 3152.5332, "encoder_q-layer.8": 3800.6704, "encoder_q-layer.9": 3468.4363, "epoch": 0.61, "inbatch_neg_score": 0.9599, "inbatch_pos_score": 1.7041, "learning_rate": 2.0944444444444445e-05, "loss": 2.6437, "norm_diff": 0.0449, "norm_loss": 0.0, "num_token_doc": 66.7652, "num_token_overlap": 17.7871, "num_token_query": 52.1837, "num_token_union": 73.6676, "num_word_context": 202.1167, "num_word_doc": 49.7904, "num_word_query": 39.8083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4880.6546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9604, "query_norm": 1.6318, "queue_k_norm": 1.6751, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1837, "sent_len_1": 66.7652, "sent_len_max_0": 128.0, "sent_len_max_1": 210.7237, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 62300 }, { "accuracy": 62.1582, "active_queue_size": 16384.0, "cl_loss": 2.6275, "doc_norm": 1.6782, "encoder_q-embeddings": 2904.313, "encoder_q-layer.0": 1993.4938, "encoder_q-layer.1": 2174.8589, "encoder_q-layer.10": 3403.2815, "encoder_q-layer.11": 6542.5254, "encoder_q-layer.2": 2486.1267, "encoder_q-layer.3": 2625.0769, "encoder_q-layer.4": 2822.605, "encoder_q-layer.5": 2802.9995, "encoder_q-layer.6": 3162.0369, "encoder_q-layer.7": 3600.0837, "encoder_q-layer.8": 3837.7695, "encoder_q-layer.9": 3336.7957, "epoch": 0.61, "inbatch_neg_score": 0.9613, "inbatch_pos_score": 1.7129, "learning_rate": 2.088888888888889e-05, "loss": 2.6275, "norm_diff": 0.0421, "norm_loss": 0.0, "num_token_doc": 66.6933, "num_token_overlap": 17.8575, "num_token_query": 52.3223, "num_token_union": 73.6519, "num_word_context": 202.0303, "num_word_doc": 49.7717, "num_word_query": 39.9077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4932.2132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9609, "query_norm": 1.6361, "queue_k_norm": 1.6755, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3223, "sent_len_1": 66.6933, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2962, "stdk": 0.0495, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 62400 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.6325, "doc_norm": 1.6759, "encoder_q-embeddings": 2338.6812, "encoder_q-layer.0": 1504.8064, "encoder_q-layer.1": 1650.4264, "encoder_q-layer.10": 3707.4329, "encoder_q-layer.11": 7396.0244, "encoder_q-layer.2": 1893.5288, "encoder_q-layer.3": 1987.7249, "encoder_q-layer.4": 2071.6272, "encoder_q-layer.5": 2278.3423, "encoder_q-layer.6": 2566.1519, "encoder_q-layer.7": 3088.7183, "encoder_q-layer.8": 3761.4524, "encoder_q-layer.9": 3646.9199, "epoch": 0.61, "inbatch_neg_score": 0.9651, "inbatch_pos_score": 1.7139, "learning_rate": 2.0833333333333336e-05, "loss": 2.6325, "norm_diff": 0.0346, "norm_loss": 0.0, "num_token_doc": 66.7811, "num_token_overlap": 17.7748, "num_token_query": 52.3023, "num_token_union": 73.7557, "num_word_context": 202.4603, "num_word_doc": 49.8384, "num_word_query": 39.9016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4745.9085, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9644, "query_norm": 1.6413, "queue_k_norm": 1.6759, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3023, "sent_len_1": 66.7811, "sent_len_max_0": 128.0, "sent_len_max_1": 208.58, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 62500 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.6335, "doc_norm": 1.6809, "encoder_q-embeddings": 2734.4221, "encoder_q-layer.0": 1750.205, "encoder_q-layer.1": 1844.1362, "encoder_q-layer.10": 3435.6018, "encoder_q-layer.11": 6801.5781, "encoder_q-layer.2": 2085.7422, "encoder_q-layer.3": 2195.4817, "encoder_q-layer.4": 2454.8967, "encoder_q-layer.5": 2368.8235, "encoder_q-layer.6": 2760.9263, "encoder_q-layer.7": 3045.2722, "encoder_q-layer.8": 3463.991, "encoder_q-layer.9": 3246.1262, "epoch": 0.61, "inbatch_neg_score": 0.9658, "inbatch_pos_score": 1.7217, "learning_rate": 2.077777777777778e-05, "loss": 2.6335, "norm_diff": 0.0381, "norm_loss": 0.0, "num_token_doc": 66.9204, "num_token_overlap": 17.8383, "num_token_query": 52.3199, "num_token_union": 73.8185, "num_word_context": 202.5122, "num_word_doc": 49.9609, "num_word_query": 39.9046, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4739.3787, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9668, "query_norm": 1.6429, "queue_k_norm": 1.6778, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3199, "sent_len_1": 66.9204, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0387, "stdk": 0.0496, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 62600 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6392, "doc_norm": 1.6779, "encoder_q-embeddings": 1886.7509, "encoder_q-layer.0": 1402.4507, "encoder_q-layer.1": 1484.6188, "encoder_q-layer.10": 1743.6453, "encoder_q-layer.11": 3371.0591, "encoder_q-layer.2": 1831.7323, "encoder_q-layer.3": 1976.8859, "encoder_q-layer.4": 2138.5881, "encoder_q-layer.5": 2003.0764, "encoder_q-layer.6": 1857.678, "encoder_q-layer.7": 1892.7134, "encoder_q-layer.8": 1919.0854, "encoder_q-layer.9": 1698.4799, "epoch": 0.61, "inbatch_neg_score": 0.9644, "inbatch_pos_score": 1.6973, "learning_rate": 2.0722222222222224e-05, "loss": 2.6392, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.6903, "num_token_overlap": 17.7884, "num_token_query": 52.3111, "num_token_union": 73.6925, "num_word_context": 202.1184, "num_word_doc": 49.7616, "num_word_query": 39.8925, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2941.6577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9639, "query_norm": 1.63, "queue_k_norm": 1.6801, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3111, "sent_len_1": 66.6903, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8175, "stdk": 0.0494, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 62700 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.6486, "doc_norm": 1.6764, "encoder_q-embeddings": 2821.4971, "encoder_q-layer.0": 1900.4451, "encoder_q-layer.1": 2074.9246, "encoder_q-layer.10": 1770.3768, "encoder_q-layer.11": 3514.1667, "encoder_q-layer.2": 2289.7407, "encoder_q-layer.3": 2396.4858, "encoder_q-layer.4": 2579.7053, "encoder_q-layer.5": 2693.5161, "encoder_q-layer.6": 2683.4668, "encoder_q-layer.7": 2766.3904, "encoder_q-layer.8": 2659.5044, "encoder_q-layer.9": 1974.6628, "epoch": 0.61, "inbatch_neg_score": 0.9689, "inbatch_pos_score": 1.7012, "learning_rate": 2.0666666666666666e-05, "loss": 2.6486, "norm_diff": 0.0471, "norm_loss": 0.0, "num_token_doc": 66.7436, "num_token_overlap": 17.7498, "num_token_query": 52.229, "num_token_union": 73.7159, "num_word_context": 202.311, "num_word_doc": 49.778, "num_word_query": 39.8239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3778.146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9688, "query_norm": 1.6293, "queue_k_norm": 1.678, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.229, "sent_len_1": 66.7436, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6188, "stdk": 0.0493, "stdq": 0.0461, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 62800 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.6347, "doc_norm": 1.6805, "encoder_q-embeddings": 3335.5442, "encoder_q-layer.0": 2277.2583, "encoder_q-layer.1": 2826.0159, "encoder_q-layer.10": 1732.293, "encoder_q-layer.11": 3543.512, "encoder_q-layer.2": 3658.6499, "encoder_q-layer.3": 4007.8088, "encoder_q-layer.4": 4153.7725, "encoder_q-layer.5": 4269.4224, "encoder_q-layer.6": 3735.9756, "encoder_q-layer.7": 2691.2502, "encoder_q-layer.8": 2197.0195, "encoder_q-layer.9": 1751.1443, "epoch": 0.61, "inbatch_neg_score": 0.9661, "inbatch_pos_score": 1.7139, "learning_rate": 2.0611111111111112e-05, "loss": 2.6347, "norm_diff": 0.0481, "norm_loss": 0.0, "num_token_doc": 66.9181, "num_token_overlap": 17.8463, "num_token_query": 52.3594, "num_token_union": 73.8258, "num_word_context": 202.8008, "num_word_doc": 49.9602, "num_word_query": 39.9498, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4670.3737, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9663, "query_norm": 1.6324, "queue_k_norm": 1.679, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3594, "sent_len_1": 66.9181, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7237, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 62900 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.6332, "doc_norm": 1.6834, "encoder_q-embeddings": 1674.0024, "encoder_q-layer.0": 1066.2526, "encoder_q-layer.1": 1211.7932, "encoder_q-layer.10": 1912.8586, "encoder_q-layer.11": 3837.7922, "encoder_q-layer.2": 1450.0518, "encoder_q-layer.3": 1567.5671, "encoder_q-layer.4": 1746.8235, "encoder_q-layer.5": 1870.0822, "encoder_q-layer.6": 2026.1617, "encoder_q-layer.7": 1756.8392, "encoder_q-layer.8": 1935.514, "encoder_q-layer.9": 1782.8373, "epoch": 0.62, "inbatch_neg_score": 0.9641, "inbatch_pos_score": 1.7148, "learning_rate": 2.0555555555555555e-05, "loss": 2.6332, "norm_diff": 0.0407, "norm_loss": 0.0, "num_token_doc": 66.8546, "num_token_overlap": 17.8529, "num_token_query": 52.3623, "num_token_union": 73.7724, "num_word_context": 202.2997, "num_word_doc": 49.8939, "num_word_query": 39.9302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2822.735, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9629, "query_norm": 1.6428, "queue_k_norm": 1.6811, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3623, "sent_len_1": 66.8546, "sent_len_max_0": 128.0, "sent_len_max_1": 209.155, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 63000 }, { "accuracy": 60.9863, "active_queue_size": 16384.0, "cl_loss": 2.6351, "doc_norm": 1.6819, "encoder_q-embeddings": 1519.4681, "encoder_q-layer.0": 978.876, "encoder_q-layer.1": 1057.3612, "encoder_q-layer.10": 1785.7078, "encoder_q-layer.11": 3573.1475, "encoder_q-layer.2": 1191.6277, "encoder_q-layer.3": 1273.6465, "encoder_q-layer.4": 1336.468, "encoder_q-layer.5": 1380.1238, "encoder_q-layer.6": 1580.9862, "encoder_q-layer.7": 1619.7137, "encoder_q-layer.8": 1876.1404, "encoder_q-layer.9": 1667.3276, "epoch": 0.62, "inbatch_neg_score": 0.9621, "inbatch_pos_score": 1.7158, "learning_rate": 2.05e-05, "loss": 2.6351, "norm_diff": 0.0499, "norm_loss": 0.0, "num_token_doc": 66.6871, "num_token_overlap": 17.8086, "num_token_query": 52.3433, "num_token_union": 73.6875, "num_word_context": 202.1554, "num_word_doc": 49.7911, "num_word_query": 39.9281, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2521.8477, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9629, "query_norm": 1.632, "queue_k_norm": 1.6801, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3433, "sent_len_1": 66.6871, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2788, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 63100 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.6217, "doc_norm": 1.6834, "encoder_q-embeddings": 1521.4626, "encoder_q-layer.0": 1012.2794, "encoder_q-layer.1": 1076.2769, "encoder_q-layer.10": 1634.6188, "encoder_q-layer.11": 3433.4873, "encoder_q-layer.2": 1248.7434, "encoder_q-layer.3": 1356.7808, "encoder_q-layer.4": 1355.3422, "encoder_q-layer.5": 1354.1589, "encoder_q-layer.6": 1465.001, "encoder_q-layer.7": 1536.7867, "encoder_q-layer.8": 1847.6785, "encoder_q-layer.9": 1676.4545, "epoch": 0.62, "inbatch_neg_score": 0.962, "inbatch_pos_score": 1.7168, "learning_rate": 2.0444444444444446e-05, "loss": 2.6217, "norm_diff": 0.0434, "norm_loss": 0.0, "num_token_doc": 66.6114, "num_token_overlap": 17.8279, "num_token_query": 52.4497, "num_token_union": 73.6981, "num_word_context": 202.1908, "num_word_doc": 49.7063, "num_word_query": 40.0282, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2474.4721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9609, "query_norm": 1.64, "queue_k_norm": 1.6822, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4497, "sent_len_1": 66.6114, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7363, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 63200 }, { "accuracy": 59.0332, "active_queue_size": 16384.0, "cl_loss": 2.6394, "doc_norm": 1.6802, "encoder_q-embeddings": 1296.5027, "encoder_q-layer.0": 858.817, "encoder_q-layer.1": 954.85, "encoder_q-layer.10": 1720.9504, "encoder_q-layer.11": 3576.3169, "encoder_q-layer.2": 1112.3375, "encoder_q-layer.3": 1244.9509, "encoder_q-layer.4": 1390.3516, "encoder_q-layer.5": 1452.9418, "encoder_q-layer.6": 1584.7828, "encoder_q-layer.7": 1667.5714, "encoder_q-layer.8": 1868.0773, "encoder_q-layer.9": 1675.6649, "epoch": 0.62, "inbatch_neg_score": 0.968, "inbatch_pos_score": 1.7031, "learning_rate": 2.0388888888888892e-05, "loss": 2.6394, "norm_diff": 0.0386, "norm_loss": 0.0, "num_token_doc": 66.8529, "num_token_overlap": 17.788, "num_token_query": 52.3377, "num_token_union": 73.7909, "num_word_context": 202.4345, "num_word_doc": 49.8901, "num_word_query": 39.9344, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2523.1239, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9673, "query_norm": 1.6416, "queue_k_norm": 1.6818, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3377, "sent_len_1": 66.8529, "sent_len_max_0": 128.0, "sent_len_max_1": 210.305, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 63300 }, { "accuracy": 59.4238, "active_queue_size": 16384.0, "cl_loss": 2.635, "doc_norm": 1.6792, "encoder_q-embeddings": 1256.1029, "encoder_q-layer.0": 825.9707, "encoder_q-layer.1": 904.8288, "encoder_q-layer.10": 1779.886, "encoder_q-layer.11": 3834.8706, "encoder_q-layer.2": 1024.6389, "encoder_q-layer.3": 1073.6267, "encoder_q-layer.4": 1188.0131, "encoder_q-layer.5": 1247.3696, "encoder_q-layer.6": 1451.3268, "encoder_q-layer.7": 1561.9054, "encoder_q-layer.8": 1985.6272, "encoder_q-layer.9": 1813.3965, "epoch": 0.62, "inbatch_neg_score": 0.9706, "inbatch_pos_score": 1.7061, "learning_rate": 2.0333333333333334e-05, "loss": 2.635, "norm_diff": 0.0332, "norm_loss": 0.0, "num_token_doc": 66.6323, "num_token_overlap": 17.8085, "num_token_query": 52.2674, "num_token_union": 73.6117, "num_word_context": 202.4796, "num_word_doc": 49.7321, "num_word_query": 39.8736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2528.9459, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9702, "query_norm": 1.646, "queue_k_norm": 1.6823, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2674, "sent_len_1": 66.6323, "sent_len_max_0": 128.0, "sent_len_max_1": 207.69, "stdk": 0.0493, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 63400 }, { "accuracy": 59.2285, "active_queue_size": 16384.0, "cl_loss": 2.6434, "doc_norm": 1.6811, "encoder_q-embeddings": 1458.2369, "encoder_q-layer.0": 899.5889, "encoder_q-layer.1": 1022.8887, "encoder_q-layer.10": 1784.0457, "encoder_q-layer.11": 3786.4131, "encoder_q-layer.2": 1154.4899, "encoder_q-layer.3": 1204.8748, "encoder_q-layer.4": 1284.2515, "encoder_q-layer.5": 1372.1897, "encoder_q-layer.6": 1519.6472, "encoder_q-layer.7": 1760.6559, "encoder_q-layer.8": 2003.7373, "encoder_q-layer.9": 1831.6718, "epoch": 0.62, "inbatch_neg_score": 0.9702, "inbatch_pos_score": 1.708, "learning_rate": 2.027777777777778e-05, "loss": 2.6434, "norm_diff": 0.0483, "norm_loss": 0.0, "num_token_doc": 66.6959, "num_token_overlap": 17.8369, "num_token_query": 52.2137, "num_token_union": 73.5868, "num_word_context": 202.1113, "num_word_doc": 49.7566, "num_word_query": 39.8177, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2631.5075, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9697, "query_norm": 1.6328, "queue_k_norm": 1.6824, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2137, "sent_len_1": 66.6959, "sent_len_max_0": 128.0, "sent_len_max_1": 211.4787, "stdk": 0.0493, "stdq": 0.0463, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 63500 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.6272, "doc_norm": 1.6855, "encoder_q-embeddings": 1540.4003, "encoder_q-layer.0": 966.8668, "encoder_q-layer.1": 1105.386, "encoder_q-layer.10": 1844.6605, "encoder_q-layer.11": 3722.2466, "encoder_q-layer.2": 1240.4857, "encoder_q-layer.3": 1328.338, "encoder_q-layer.4": 1511.3195, "encoder_q-layer.5": 1487.4811, "encoder_q-layer.6": 1728.5347, "encoder_q-layer.7": 1830.5747, "encoder_q-layer.8": 2021.3234, "encoder_q-layer.9": 1756.1877, "epoch": 0.62, "inbatch_neg_score": 0.9751, "inbatch_pos_score": 1.7158, "learning_rate": 2.0222222222222222e-05, "loss": 2.6272, "norm_diff": 0.0362, "norm_loss": 0.0, "num_token_doc": 66.7543, "num_token_overlap": 17.8336, "num_token_query": 52.3284, "num_token_union": 73.7022, "num_word_context": 202.328, "num_word_doc": 49.8204, "num_word_query": 39.9195, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2675.6707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9751, "query_norm": 1.6492, "queue_k_norm": 1.6831, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3284, "sent_len_1": 66.7543, "sent_len_max_0": 128.0, "sent_len_max_1": 208.19, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 63600 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6362, "doc_norm": 1.6853, "encoder_q-embeddings": 1576.8156, "encoder_q-layer.0": 1061.368, "encoder_q-layer.1": 1184.5071, "encoder_q-layer.10": 1826.8241, "encoder_q-layer.11": 3632.1875, "encoder_q-layer.2": 1396.8153, "encoder_q-layer.3": 1547.3066, "encoder_q-layer.4": 1751.3175, "encoder_q-layer.5": 1661.816, "encoder_q-layer.6": 1766.8075, "encoder_q-layer.7": 1898.3911, "encoder_q-layer.8": 1931.3455, "encoder_q-layer.9": 1705.7279, "epoch": 0.62, "inbatch_neg_score": 0.9747, "inbatch_pos_score": 1.7217, "learning_rate": 2.0166666666666668e-05, "loss": 2.6362, "norm_diff": 0.0325, "norm_loss": 0.0, "num_token_doc": 66.6241, "num_token_overlap": 17.7255, "num_token_query": 52.214, "num_token_union": 73.6748, "num_word_context": 202.2647, "num_word_doc": 49.7098, "num_word_query": 39.8245, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2713.1638, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9746, "query_norm": 1.6528, "queue_k_norm": 1.6823, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.214, "sent_len_1": 66.6241, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3825, "stdk": 0.0495, "stdq": 0.047, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 63700 }, { "accuracy": 60.498, "active_queue_size": 16384.0, "cl_loss": 2.6346, "doc_norm": 1.6838, "encoder_q-embeddings": 2780.178, "encoder_q-layer.0": 2079.9788, "encoder_q-layer.1": 2354.5183, "encoder_q-layer.10": 1798.7073, "encoder_q-layer.11": 3496.0828, "encoder_q-layer.2": 2514.3518, "encoder_q-layer.3": 2466.7864, "encoder_q-layer.4": 3122.321, "encoder_q-layer.5": 2903.8582, "encoder_q-layer.6": 2997.8894, "encoder_q-layer.7": 3152.2744, "encoder_q-layer.8": 2824.3643, "encoder_q-layer.9": 1814.0098, "epoch": 0.62, "inbatch_neg_score": 0.9757, "inbatch_pos_score": 1.7188, "learning_rate": 2.011111111111111e-05, "loss": 2.6346, "norm_diff": 0.0447, "norm_loss": 0.0, "num_token_doc": 66.818, "num_token_overlap": 17.7641, "num_token_query": 52.1889, "num_token_union": 73.7347, "num_word_context": 202.2737, "num_word_doc": 49.8676, "num_word_query": 39.809, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4010.3287, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9756, "query_norm": 1.639, "queue_k_norm": 1.6849, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1889, "sent_len_1": 66.818, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3363, "stdk": 0.0494, "stdq": 0.0462, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 63800 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.6349, "doc_norm": 1.6838, "encoder_q-embeddings": 1213.5198, "encoder_q-layer.0": 765.4069, "encoder_q-layer.1": 827.1979, "encoder_q-layer.10": 1629.4434, "encoder_q-layer.11": 3510.5369, "encoder_q-layer.2": 938.5638, "encoder_q-layer.3": 999.4924, "encoder_q-layer.4": 1066.3281, "encoder_q-layer.5": 1091.7733, "encoder_q-layer.6": 1244.9315, "encoder_q-layer.7": 1426.2224, "encoder_q-layer.8": 1775.4524, "encoder_q-layer.9": 1595.4128, "epoch": 0.62, "inbatch_neg_score": 0.9801, "inbatch_pos_score": 1.7227, "learning_rate": 2.0055555555555556e-05, "loss": 2.6349, "norm_diff": 0.0438, "norm_loss": 0.0, "num_token_doc": 66.7507, "num_token_overlap": 17.8179, "num_token_query": 52.2656, "num_token_union": 73.6828, "num_word_context": 202.2191, "num_word_doc": 49.7674, "num_word_query": 39.8488, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2294.3256, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.981, "query_norm": 1.64, "queue_k_norm": 1.6851, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2656, "sent_len_1": 66.7507, "sent_len_max_0": 128.0, "sent_len_max_1": 209.21, "stdk": 0.0494, "stdq": 0.0461, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 63900 }, { "accuracy": 59.3262, "active_queue_size": 16384.0, "cl_loss": 2.6295, "doc_norm": 1.6852, "encoder_q-embeddings": 1692.0728, "encoder_q-layer.0": 1118.6586, "encoder_q-layer.1": 1241.323, "encoder_q-layer.10": 1784.4387, "encoder_q-layer.11": 3651.2913, "encoder_q-layer.2": 1550.8906, "encoder_q-layer.3": 1577.4774, "encoder_q-layer.4": 1733.7922, "encoder_q-layer.5": 1830.3927, "encoder_q-layer.6": 2036.3472, "encoder_q-layer.7": 2034.2899, "encoder_q-layer.8": 2199.2361, "encoder_q-layer.9": 1770.6736, "epoch": 0.62, "inbatch_neg_score": 0.989, "inbatch_pos_score": 1.7285, "learning_rate": 2e-05, "loss": 2.6295, "norm_diff": 0.0312, "norm_loss": 0.0, "num_token_doc": 66.8261, "num_token_overlap": 17.8323, "num_token_query": 52.2163, "num_token_union": 73.6527, "num_word_context": 202.1829, "num_word_doc": 49.8209, "num_word_query": 39.8366, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2858.3466, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9897, "query_norm": 1.654, "queue_k_norm": 1.6867, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2163, "sent_len_1": 66.8261, "sent_len_max_0": 128.0, "sent_len_max_1": 212.0213, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 64000 }, { "accuracy": 62.8418, "active_queue_size": 16384.0, "cl_loss": 2.6205, "doc_norm": 1.6838, "encoder_q-embeddings": 1270.7703, "encoder_q-layer.0": 832.9498, "encoder_q-layer.1": 926.791, "encoder_q-layer.10": 1598.8145, "encoder_q-layer.11": 3378.719, "encoder_q-layer.2": 1062.9746, "encoder_q-layer.3": 1115.9863, "encoder_q-layer.4": 1207.9926, "encoder_q-layer.5": 1267.9247, "encoder_q-layer.6": 1441.5752, "encoder_q-layer.7": 1611.9824, "encoder_q-layer.8": 1824.2139, "encoder_q-layer.9": 1620.7628, "epoch": 0.63, "inbatch_neg_score": 0.9946, "inbatch_pos_score": 1.752, "learning_rate": 1.9944444444444447e-05, "loss": 2.6205, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.7166, "num_token_overlap": 17.8331, "num_token_query": 52.2876, "num_token_union": 73.6638, "num_word_context": 201.9835, "num_word_doc": 49.787, "num_word_query": 39.874, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2344.9461, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9941, "query_norm": 1.661, "queue_k_norm": 1.6852, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2876, "sent_len_1": 66.7166, "sent_len_max_0": 128.0, "sent_len_max_1": 208.61, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 64100 }, { "accuracy": 59.6191, "active_queue_size": 16384.0, "cl_loss": 2.6254, "doc_norm": 1.6851, "encoder_q-embeddings": 1440.262, "encoder_q-layer.0": 963.5082, "encoder_q-layer.1": 1065.6993, "encoder_q-layer.10": 1741.7611, "encoder_q-layer.11": 3606.9482, "encoder_q-layer.2": 1300.3619, "encoder_q-layer.3": 1347.2775, "encoder_q-layer.4": 1425.1265, "encoder_q-layer.5": 1475.8093, "encoder_q-layer.6": 1782.0558, "encoder_q-layer.7": 1832.5762, "encoder_q-layer.8": 2075.6646, "encoder_q-layer.9": 1762.8501, "epoch": 0.63, "inbatch_neg_score": 1.0024, "inbatch_pos_score": 1.7314, "learning_rate": 1.988888888888889e-05, "loss": 2.6254, "norm_diff": 0.032, "norm_loss": 0.0, "num_token_doc": 66.6317, "num_token_overlap": 17.8613, "num_token_query": 52.3295, "num_token_union": 73.6107, "num_word_context": 202.1452, "num_word_doc": 49.7359, "num_word_query": 39.9198, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2617.8558, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.001, "query_norm": 1.6531, "queue_k_norm": 1.6875, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3295, "sent_len_1": 66.6317, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3713, "stdk": 0.0493, "stdq": 0.0462, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 64200 }, { "accuracy": 60.6934, "active_queue_size": 16384.0, "cl_loss": 2.621, "doc_norm": 1.689, "encoder_q-embeddings": 1299.2318, "encoder_q-layer.0": 800.2546, "encoder_q-layer.1": 869.2258, "encoder_q-layer.10": 1694.9648, "encoder_q-layer.11": 3411.1001, "encoder_q-layer.2": 989.5283, "encoder_q-layer.3": 1084.0925, "encoder_q-layer.4": 1178.5303, "encoder_q-layer.5": 1201.0665, "encoder_q-layer.6": 1419.4541, "encoder_q-layer.7": 1563.0101, "encoder_q-layer.8": 1881.73, "encoder_q-layer.9": 1667.9896, "epoch": 0.63, "inbatch_neg_score": 1.0025, "inbatch_pos_score": 1.7471, "learning_rate": 1.9833333333333335e-05, "loss": 2.621, "norm_diff": 0.0223, "norm_loss": 0.0, "num_token_doc": 66.773, "num_token_overlap": 17.8601, "num_token_query": 52.3158, "num_token_union": 73.6803, "num_word_context": 202.4548, "num_word_doc": 49.8254, "num_word_query": 39.8984, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2367.9721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.002, "query_norm": 1.6667, "queue_k_norm": 1.6867, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3158, "sent_len_1": 66.773, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4187, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 64300 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.63, "doc_norm": 1.6913, "encoder_q-embeddings": 1472.2897, "encoder_q-layer.0": 1003.7682, "encoder_q-layer.1": 1138.0177, "encoder_q-layer.10": 1626.7637, "encoder_q-layer.11": 3390.176, "encoder_q-layer.2": 1358.271, "encoder_q-layer.3": 1414.5603, "encoder_q-layer.4": 1590.8492, "encoder_q-layer.5": 1895.3019, "encoder_q-layer.6": 2049.3572, "encoder_q-layer.7": 1861.4668, "encoder_q-layer.8": 2090.3628, "encoder_q-layer.9": 1668.7028, "epoch": 0.63, "inbatch_neg_score": 1.0076, "inbatch_pos_score": 1.7734, "learning_rate": 1.9777777777777778e-05, "loss": 2.63, "norm_diff": 0.0213, "norm_loss": 0.0, "num_token_doc": 66.7308, "num_token_overlap": 17.8174, "num_token_query": 52.3315, "num_token_union": 73.7302, "num_word_context": 202.2958, "num_word_doc": 49.8266, "num_word_query": 39.9143, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2672.9771, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0078, "query_norm": 1.67, "queue_k_norm": 1.6877, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3315, "sent_len_1": 66.7308, "sent_len_max_0": 128.0, "sent_len_max_1": 208.94, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 64400 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6184, "doc_norm": 1.6906, "encoder_q-embeddings": 1293.0205, "encoder_q-layer.0": 840.9991, "encoder_q-layer.1": 905.9833, "encoder_q-layer.10": 1607.9307, "encoder_q-layer.11": 3407.2927, "encoder_q-layer.2": 1025.2878, "encoder_q-layer.3": 1101.1754, "encoder_q-layer.4": 1235.5173, "encoder_q-layer.5": 1209.6118, "encoder_q-layer.6": 1373.0284, "encoder_q-layer.7": 1623.8892, "encoder_q-layer.8": 1929.6809, "encoder_q-layer.9": 1675.5952, "epoch": 0.63, "inbatch_neg_score": 1.0224, "inbatch_pos_score": 1.7637, "learning_rate": 1.9722222222222224e-05, "loss": 2.6184, "norm_diff": 0.0258, "norm_loss": 0.0, "num_token_doc": 66.7134, "num_token_overlap": 17.8241, "num_token_query": 52.2297, "num_token_union": 73.6084, "num_word_context": 201.9857, "num_word_doc": 49.752, "num_word_query": 39.8202, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2396.9526, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0215, "query_norm": 1.6647, "queue_k_norm": 1.6902, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2297, "sent_len_1": 66.7134, "sent_len_max_0": 128.0, "sent_len_max_1": 210.28, "stdk": 0.0494, "stdq": 0.0461, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 64500 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.62, "doc_norm": 1.6873, "encoder_q-embeddings": 1575.5204, "encoder_q-layer.0": 1022.8319, "encoder_q-layer.1": 1159.8015, "encoder_q-layer.10": 2023.881, "encoder_q-layer.11": 3765.2673, "encoder_q-layer.2": 1320.5621, "encoder_q-layer.3": 1344.041, "encoder_q-layer.4": 1480.3217, "encoder_q-layer.5": 1586.8462, "encoder_q-layer.6": 1732.4316, "encoder_q-layer.7": 1855.4304, "encoder_q-layer.8": 2196.3782, "encoder_q-layer.9": 1903.6346, "epoch": 0.63, "inbatch_neg_score": 1.0259, "inbatch_pos_score": 1.7725, "learning_rate": 1.9666666666666666e-05, "loss": 2.62, "norm_diff": 0.0171, "norm_loss": 0.0, "num_token_doc": 66.9544, "num_token_overlap": 17.8473, "num_token_query": 52.378, "num_token_union": 73.8167, "num_word_context": 202.4343, "num_word_doc": 49.9377, "num_word_query": 39.9399, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2761.1615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0254, "query_norm": 1.6702, "queue_k_norm": 1.6915, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.378, "sent_len_1": 66.9544, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6687, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 64600 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.6164, "doc_norm": 1.6898, "encoder_q-embeddings": 3343.1316, "encoder_q-layer.0": 2302.5718, "encoder_q-layer.1": 2585.5271, "encoder_q-layer.10": 3341.9268, "encoder_q-layer.11": 6910.165, "encoder_q-layer.2": 2884.0435, "encoder_q-layer.3": 2801.0833, "encoder_q-layer.4": 3067.0586, "encoder_q-layer.5": 3185.6438, "encoder_q-layer.6": 3439.9424, "encoder_q-layer.7": 3584.9963, "encoder_q-layer.8": 3997.8298, "encoder_q-layer.9": 3417.093, "epoch": 0.63, "inbatch_neg_score": 1.034, "inbatch_pos_score": 1.7998, "learning_rate": 1.9611111111111115e-05, "loss": 2.6164, "norm_diff": 0.0081, "norm_loss": 0.0, "num_token_doc": 66.6582, "num_token_overlap": 17.8127, "num_token_query": 52.4452, "num_token_union": 73.6977, "num_word_context": 202.1742, "num_word_doc": 49.7342, "num_word_query": 40.0099, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5355.5547, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0332, "query_norm": 1.6869, "queue_k_norm": 1.6933, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4452, "sent_len_1": 66.6582, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3837, "stdk": 0.0493, "stdq": 0.0471, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 64700 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.6286, "doc_norm": 1.6955, "encoder_q-embeddings": 2964.0037, "encoder_q-layer.0": 1886.7975, "encoder_q-layer.1": 2061.8501, "encoder_q-layer.10": 3623.2559, "encoder_q-layer.11": 7236.6187, "encoder_q-layer.2": 2477.8052, "encoder_q-layer.3": 2641.1802, "encoder_q-layer.4": 2800.3879, "encoder_q-layer.5": 3050.0242, "encoder_q-layer.6": 3197.728, "encoder_q-layer.7": 3536.6565, "encoder_q-layer.8": 3958.0977, "encoder_q-layer.9": 3491.6909, "epoch": 0.63, "inbatch_neg_score": 1.0397, "inbatch_pos_score": 1.7803, "learning_rate": 1.9555555555555557e-05, "loss": 2.6286, "norm_diff": 0.0281, "norm_loss": 0.0, "num_token_doc": 66.8333, "num_token_overlap": 17.7983, "num_token_query": 52.3193, "num_token_union": 73.7904, "num_word_context": 202.4665, "num_word_doc": 49.8796, "num_word_query": 39.919, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5231.3663, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.04, "query_norm": 1.6673, "queue_k_norm": 1.6944, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3193, "sent_len_1": 66.8333, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5337, "stdk": 0.0495, "stdq": 0.0462, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 64800 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.6372, "doc_norm": 1.6968, "encoder_q-embeddings": 2493.4167, "encoder_q-layer.0": 1609.436, "encoder_q-layer.1": 1763.2128, "encoder_q-layer.10": 3563.3486, "encoder_q-layer.11": 7592.1509, "encoder_q-layer.2": 2003.6799, "encoder_q-layer.3": 2124.0239, "encoder_q-layer.4": 2366.5354, "encoder_q-layer.5": 2474.6819, "encoder_q-layer.6": 2863.3264, "encoder_q-layer.7": 3458.3979, "encoder_q-layer.8": 3880.1748, "encoder_q-layer.9": 3456.3042, "epoch": 0.63, "inbatch_neg_score": 1.0418, "inbatch_pos_score": 1.7871, "learning_rate": 1.9500000000000003e-05, "loss": 2.6372, "norm_diff": 0.0322, "norm_loss": 0.0, "num_token_doc": 66.6821, "num_token_overlap": 17.7798, "num_token_query": 52.2077, "num_token_union": 73.6277, "num_word_context": 202.3658, "num_word_doc": 49.7546, "num_word_query": 39.8124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4927.3915, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.041, "query_norm": 1.6647, "queue_k_norm": 1.6944, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2077, "sent_len_1": 66.6821, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9487, "stdk": 0.0495, "stdq": 0.0462, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 64900 }, { "accuracy": 62.6465, "active_queue_size": 16384.0, "cl_loss": 2.6343, "doc_norm": 1.6982, "encoder_q-embeddings": 2542.4968, "encoder_q-layer.0": 1709.3489, "encoder_q-layer.1": 1828.9877, "encoder_q-layer.10": 3358.3433, "encoder_q-layer.11": 6843.083, "encoder_q-layer.2": 2095.1355, "encoder_q-layer.3": 2231.333, "encoder_q-layer.4": 2471.3711, "encoder_q-layer.5": 2544.877, "encoder_q-layer.6": 2817.5303, "encoder_q-layer.7": 3165.134, "encoder_q-layer.8": 3718.5923, "encoder_q-layer.9": 3415.3984, "epoch": 0.63, "inbatch_neg_score": 1.0463, "inbatch_pos_score": 1.8115, "learning_rate": 1.9444444444444445e-05, "loss": 2.6343, "norm_diff": 0.0218, "norm_loss": 0.0, "num_token_doc": 66.8275, "num_token_overlap": 17.7874, "num_token_query": 52.2732, "num_token_union": 73.753, "num_word_context": 202.2003, "num_word_doc": 49.8694, "num_word_query": 39.8736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4804.5206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0469, "query_norm": 1.6763, "queue_k_norm": 1.6964, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2732, "sent_len_1": 66.8275, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6325, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 65000 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.6209, "doc_norm": 1.699, "encoder_q-embeddings": 4414.5835, "encoder_q-layer.0": 2818.019, "encoder_q-layer.1": 3311.6538, "encoder_q-layer.10": 3336.218, "encoder_q-layer.11": 7079.0752, "encoder_q-layer.2": 3907.4443, "encoder_q-layer.3": 3922.5559, "encoder_q-layer.4": 4270.6548, "encoder_q-layer.5": 4345.1621, "encoder_q-layer.6": 4834.9844, "encoder_q-layer.7": 5042.1704, "encoder_q-layer.8": 4378.4883, "encoder_q-layer.9": 3257.0859, "epoch": 0.64, "inbatch_neg_score": 1.0467, "inbatch_pos_score": 1.8008, "learning_rate": 1.938888888888889e-05, "loss": 2.6209, "norm_diff": 0.0243, "norm_loss": 0.0, "num_token_doc": 66.7917, "num_token_overlap": 17.7928, "num_token_query": 52.35, "num_token_union": 73.8102, "num_word_context": 202.3288, "num_word_doc": 49.8293, "num_word_query": 39.9533, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6450.126, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.0469, "query_norm": 1.6747, "queue_k_norm": 1.6988, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.35, "sent_len_1": 66.7917, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0375, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 65100 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.6281, "doc_norm": 1.702, "encoder_q-embeddings": 2677.301, "encoder_q-layer.0": 1723.9525, "encoder_q-layer.1": 1866.4664, "encoder_q-layer.10": 3481.7451, "encoder_q-layer.11": 7186.5542, "encoder_q-layer.2": 2186.2588, "encoder_q-layer.3": 2281.157, "encoder_q-layer.4": 2570.0798, "encoder_q-layer.5": 2794.8933, "encoder_q-layer.6": 3136.7644, "encoder_q-layer.7": 3317.7805, "encoder_q-layer.8": 4042.7737, "encoder_q-layer.9": 3503.6609, "epoch": 0.64, "inbatch_neg_score": 1.044, "inbatch_pos_score": 1.7891, "learning_rate": 1.9333333333333333e-05, "loss": 2.6281, "norm_diff": 0.03, "norm_loss": 0.0, "num_token_doc": 66.7478, "num_token_overlap": 17.7895, "num_token_query": 52.2341, "num_token_union": 73.6655, "num_word_context": 202.0039, "num_word_doc": 49.8167, "num_word_query": 39.8432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4938.8598, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0449, "query_norm": 1.672, "queue_k_norm": 1.6999, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2341, "sent_len_1": 66.7478, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5613, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 65200 }, { "accuracy": 62.5488, "active_queue_size": 16384.0, "cl_loss": 2.612, "doc_norm": 1.7037, "encoder_q-embeddings": 1248.6515, "encoder_q-layer.0": 837.8502, "encoder_q-layer.1": 930.256, "encoder_q-layer.10": 1696.0096, "encoder_q-layer.11": 3481.7278, "encoder_q-layer.2": 1087.7284, "encoder_q-layer.3": 1158.3242, "encoder_q-layer.4": 1252.8304, "encoder_q-layer.5": 1289.7599, "encoder_q-layer.6": 1443.2156, "encoder_q-layer.7": 1624.6538, "encoder_q-layer.8": 1905.4667, "encoder_q-layer.9": 1657.6338, "epoch": 0.64, "inbatch_neg_score": 1.0443, "inbatch_pos_score": 1.8154, "learning_rate": 1.927777777777778e-05, "loss": 2.612, "norm_diff": 0.0349, "norm_loss": 0.0, "num_token_doc": 66.6731, "num_token_overlap": 17.8145, "num_token_query": 52.2162, "num_token_union": 73.5868, "num_word_context": 202.0879, "num_word_doc": 49.7475, "num_word_query": 39.8307, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2427.7134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0459, "query_norm": 1.6688, "queue_k_norm": 1.7011, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2162, "sent_len_1": 66.6731, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7637, "stdk": 0.0496, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 65300 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.6231, "doc_norm": 1.7069, "encoder_q-embeddings": 1381.7174, "encoder_q-layer.0": 904.1158, "encoder_q-layer.1": 974.953, "encoder_q-layer.10": 1750.7858, "encoder_q-layer.11": 3526.415, "encoder_q-layer.2": 1155.9365, "encoder_q-layer.3": 1270.5029, "encoder_q-layer.4": 1361.6713, "encoder_q-layer.5": 1449.0355, "encoder_q-layer.6": 1550.9877, "encoder_q-layer.7": 1721.0342, "encoder_q-layer.8": 1999.4113, "encoder_q-layer.9": 1773.9788, "epoch": 0.64, "inbatch_neg_score": 1.0445, "inbatch_pos_score": 1.7891, "learning_rate": 1.922222222222222e-05, "loss": 2.6231, "norm_diff": 0.047, "norm_loss": 0.0, "num_token_doc": 66.7004, "num_token_overlap": 17.8192, "num_token_query": 52.2342, "num_token_union": 73.636, "num_word_context": 202.0193, "num_word_doc": 49.7846, "num_word_query": 39.8186, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2491.5454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0449, "query_norm": 1.6598, "queue_k_norm": 1.7008, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2342, "sent_len_1": 66.7004, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5337, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 65400 }, { "accuracy": 63.3789, "active_queue_size": 16384.0, "cl_loss": 2.6252, "doc_norm": 1.7061, "encoder_q-embeddings": 1722.681, "encoder_q-layer.0": 1122.1218, "encoder_q-layer.1": 1376.3577, "encoder_q-layer.10": 1997.6992, "encoder_q-layer.11": 3680.6299, "encoder_q-layer.2": 1640.106, "encoder_q-layer.3": 1715.4475, "encoder_q-layer.4": 1802.7775, "encoder_q-layer.5": 2103.2749, "encoder_q-layer.6": 2291.0537, "encoder_q-layer.7": 2703.7808, "encoder_q-layer.8": 2880.9976, "encoder_q-layer.9": 2007.2555, "epoch": 0.64, "inbatch_neg_score": 1.0427, "inbatch_pos_score": 1.8086, "learning_rate": 1.9166666666666667e-05, "loss": 2.6252, "norm_diff": 0.0505, "norm_loss": 0.0, "num_token_doc": 66.8228, "num_token_overlap": 17.833, "num_token_query": 52.2698, "num_token_union": 73.7267, "num_word_context": 202.4612, "num_word_doc": 49.8599, "num_word_query": 39.8751, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3120.9854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.043, "query_norm": 1.6556, "queue_k_norm": 1.7007, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2698, "sent_len_1": 66.8228, "sent_len_max_0": 128.0, "sent_len_max_1": 208.36, "stdk": 0.0496, "stdq": 0.0463, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 65500 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6283, "doc_norm": 1.7037, "encoder_q-embeddings": 1941.8575, "encoder_q-layer.0": 1363.1486, "encoder_q-layer.1": 1618.3149, "encoder_q-layer.10": 1812.9238, "encoder_q-layer.11": 3611.7246, "encoder_q-layer.2": 1819.2125, "encoder_q-layer.3": 1963.2698, "encoder_q-layer.4": 2037.6953, "encoder_q-layer.5": 2161.6794, "encoder_q-layer.6": 2238.6226, "encoder_q-layer.7": 2212.1011, "encoder_q-layer.8": 2383.5359, "encoder_q-layer.9": 1832.8055, "epoch": 0.64, "inbatch_neg_score": 1.0393, "inbatch_pos_score": 1.7822, "learning_rate": 1.9111111111111113e-05, "loss": 2.6283, "norm_diff": 0.044, "norm_loss": 0.0, "num_token_doc": 66.8912, "num_token_overlap": 17.8009, "num_token_query": 52.3527, "num_token_union": 73.8416, "num_word_context": 202.4634, "num_word_doc": 49.8888, "num_word_query": 39.9231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3138.4462, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.041, "query_norm": 1.6598, "queue_k_norm": 1.7039, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3527, "sent_len_1": 66.8912, "sent_len_max_0": 128.0, "sent_len_max_1": 210.44, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 65600 }, { "accuracy": 60.5957, "active_queue_size": 16384.0, "cl_loss": 2.6314, "doc_norm": 1.7049, "encoder_q-embeddings": 1417.9999, "encoder_q-layer.0": 898.5571, "encoder_q-layer.1": 957.9291, "encoder_q-layer.10": 1658.8228, "encoder_q-layer.11": 3445.8977, "encoder_q-layer.2": 1103.8612, "encoder_q-layer.3": 1183.7483, "encoder_q-layer.4": 1341.8239, "encoder_q-layer.5": 1372.9268, "encoder_q-layer.6": 1484.8357, "encoder_q-layer.7": 1656.2911, "encoder_q-layer.8": 1907.2455, "encoder_q-layer.9": 1752.558, "epoch": 0.64, "inbatch_neg_score": 1.0412, "inbatch_pos_score": 1.7881, "learning_rate": 1.905555555555556e-05, "loss": 2.6314, "norm_diff": 0.045, "norm_loss": 0.0, "num_token_doc": 66.7669, "num_token_overlap": 17.7455, "num_token_query": 52.2071, "num_token_union": 73.7472, "num_word_context": 202.4135, "num_word_doc": 49.8393, "num_word_query": 39.8083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2489.156, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.042, "query_norm": 1.6599, "queue_k_norm": 1.7043, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2071, "sent_len_1": 66.7669, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3663, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 65700 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.6278, "doc_norm": 1.7018, "encoder_q-embeddings": 2908.2896, "encoder_q-layer.0": 2215.9495, "encoder_q-layer.1": 2539.9084, "encoder_q-layer.10": 1787.5065, "encoder_q-layer.11": 3737.9099, "encoder_q-layer.2": 2935.8118, "encoder_q-layer.3": 3203.3066, "encoder_q-layer.4": 3723.4509, "encoder_q-layer.5": 4121.0679, "encoder_q-layer.6": 4266.0391, "encoder_q-layer.7": 4228.0142, "encoder_q-layer.8": 3804.4558, "encoder_q-layer.9": 2301.3743, "epoch": 0.64, "inbatch_neg_score": 1.0458, "inbatch_pos_score": 1.7842, "learning_rate": 1.9e-05, "loss": 2.6278, "norm_diff": 0.0404, "norm_loss": 0.0, "num_token_doc": 66.7133, "num_token_overlap": 17.8018, "num_token_query": 52.2013, "num_token_union": 73.6484, "num_word_context": 202.1584, "num_word_doc": 49.7976, "num_word_query": 39.8203, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4807.4198, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0459, "query_norm": 1.6614, "queue_k_norm": 1.7048, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2013, "sent_len_1": 66.7133, "sent_len_max_0": 128.0, "sent_len_max_1": 207.865, "stdk": 0.0493, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 65800 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.6231, "doc_norm": 1.7044, "encoder_q-embeddings": 5133.2505, "encoder_q-layer.0": 4009.2012, "encoder_q-layer.1": 3737.3401, "encoder_q-layer.10": 1677.4241, "encoder_q-layer.11": 3612.4426, "encoder_q-layer.2": 4477.23, "encoder_q-layer.3": 4358.0215, "encoder_q-layer.4": 5049.3521, "encoder_q-layer.5": 5479.5571, "encoder_q-layer.6": 6423.8706, "encoder_q-layer.7": 6375.2681, "encoder_q-layer.8": 5155.1733, "encoder_q-layer.9": 2164.2161, "epoch": 0.64, "inbatch_neg_score": 1.0428, "inbatch_pos_score": 1.8047, "learning_rate": 1.8944444444444447e-05, "loss": 2.6231, "norm_diff": 0.0413, "norm_loss": 0.0, "num_token_doc": 66.7162, "num_token_overlap": 17.7749, "num_token_query": 52.2849, "num_token_union": 73.7102, "num_word_context": 202.266, "num_word_doc": 49.7671, "num_word_query": 39.8656, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7151.5316, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.043, "query_norm": 1.6631, "queue_k_norm": 1.7044, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2849, "sent_len_1": 66.7162, "sent_len_max_0": 128.0, "sent_len_max_1": 209.13, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 65900 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.6301, "doc_norm": 1.7062, "encoder_q-embeddings": 1529.5367, "encoder_q-layer.0": 1036.7994, "encoder_q-layer.1": 1216.2952, "encoder_q-layer.10": 1803.0725, "encoder_q-layer.11": 3647.8276, "encoder_q-layer.2": 1495.1921, "encoder_q-layer.3": 1510.6584, "encoder_q-layer.4": 1569.7477, "encoder_q-layer.5": 1643.5134, "encoder_q-layer.6": 1668.1387, "encoder_q-layer.7": 1820.3181, "encoder_q-layer.8": 2134.0259, "encoder_q-layer.9": 1772.9879, "epoch": 0.64, "inbatch_neg_score": 1.0452, "inbatch_pos_score": 1.7988, "learning_rate": 1.888888888888889e-05, "loss": 2.6301, "norm_diff": 0.0339, "norm_loss": 0.0, "num_token_doc": 66.8281, "num_token_overlap": 17.8495, "num_token_query": 52.3637, "num_token_union": 73.7529, "num_word_context": 202.5844, "num_word_doc": 49.8858, "num_word_query": 39.9547, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2723.068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0459, "query_norm": 1.6723, "queue_k_norm": 1.7058, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3637, "sent_len_1": 66.8281, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4288, "stdk": 0.0495, "stdq": 0.047, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 66000 }, { "accuracy": 62.8418, "active_queue_size": 16384.0, "cl_loss": 2.6273, "doc_norm": 1.7054, "encoder_q-embeddings": 1184.3578, "encoder_q-layer.0": 781.9121, "encoder_q-layer.1": 845.6145, "encoder_q-layer.10": 1656.5388, "encoder_q-layer.11": 3386.1528, "encoder_q-layer.2": 955.1282, "encoder_q-layer.3": 1005.824, "encoder_q-layer.4": 1064.3568, "encoder_q-layer.5": 1132.8296, "encoder_q-layer.6": 1287.2401, "encoder_q-layer.7": 1437.6649, "encoder_q-layer.8": 1851.1782, "encoder_q-layer.9": 1589.0879, "epoch": 0.65, "inbatch_neg_score": 1.0447, "inbatch_pos_score": 1.8096, "learning_rate": 1.8833333333333335e-05, "loss": 2.6273, "norm_diff": 0.0427, "norm_loss": 0.0, "num_token_doc": 66.7073, "num_token_overlap": 17.8182, "num_token_query": 52.3571, "num_token_union": 73.6955, "num_word_context": 202.1608, "num_word_doc": 49.7432, "num_word_query": 39.9391, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2293.9966, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0439, "query_norm": 1.6627, "queue_k_norm": 1.7052, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3571, "sent_len_1": 66.7073, "sent_len_max_0": 128.0, "sent_len_max_1": 210.9588, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 66100 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.6188, "doc_norm": 1.7065, "encoder_q-embeddings": 1259.0973, "encoder_q-layer.0": 827.6188, "encoder_q-layer.1": 911.5217, "encoder_q-layer.10": 1749.6768, "encoder_q-layer.11": 3425.8364, "encoder_q-layer.2": 1056.8761, "encoder_q-layer.3": 1115.2659, "encoder_q-layer.4": 1184.089, "encoder_q-layer.5": 1205.6063, "encoder_q-layer.6": 1380.2076, "encoder_q-layer.7": 1457.8676, "encoder_q-layer.8": 1807.5669, "encoder_q-layer.9": 1644.5833, "epoch": 0.65, "inbatch_neg_score": 1.0436, "inbatch_pos_score": 1.792, "learning_rate": 1.8777777777777777e-05, "loss": 2.6188, "norm_diff": 0.0462, "norm_loss": 0.0, "num_token_doc": 66.8471, "num_token_overlap": 17.8433, "num_token_query": 52.298, "num_token_union": 73.7368, "num_word_context": 202.41, "num_word_doc": 49.907, "num_word_query": 39.8864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2340.385, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0439, "query_norm": 1.6603, "queue_k_norm": 1.7062, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.298, "sent_len_1": 66.8471, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7587, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 66200 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.6212, "doc_norm": 1.7021, "encoder_q-embeddings": 1427.13, "encoder_q-layer.0": 920.9579, "encoder_q-layer.1": 1034.5953, "encoder_q-layer.10": 1865.0859, "encoder_q-layer.11": 3606.2263, "encoder_q-layer.2": 1154.5566, "encoder_q-layer.3": 1242.6204, "encoder_q-layer.4": 1381.3977, "encoder_q-layer.5": 1433.4551, "encoder_q-layer.6": 1567.4272, "encoder_q-layer.7": 1705.788, "encoder_q-layer.8": 2052.7986, "encoder_q-layer.9": 1693.6158, "epoch": 0.65, "inbatch_neg_score": 1.0503, "inbatch_pos_score": 1.7764, "learning_rate": 1.8722222222222223e-05, "loss": 2.6212, "norm_diff": 0.0361, "norm_loss": 0.0, "num_token_doc": 66.7146, "num_token_overlap": 17.8265, "num_token_query": 52.3387, "num_token_union": 73.6893, "num_word_context": 202.1434, "num_word_doc": 49.8108, "num_word_query": 39.9235, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2546.2387, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0488, "query_norm": 1.6659, "queue_k_norm": 1.7074, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3387, "sent_len_1": 66.7146, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8487, "stdk": 0.0492, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 66300 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.6145, "doc_norm": 1.7125, "encoder_q-embeddings": 1322.2516, "encoder_q-layer.0": 877.7339, "encoder_q-layer.1": 945.5493, "encoder_q-layer.10": 1643.251, "encoder_q-layer.11": 3519.4912, "encoder_q-layer.2": 1101.6158, "encoder_q-layer.3": 1164.811, "encoder_q-layer.4": 1274.0283, "encoder_q-layer.5": 1347.5848, "encoder_q-layer.6": 1492.8564, "encoder_q-layer.7": 1674.2605, "encoder_q-layer.8": 1871.8636, "encoder_q-layer.9": 1677.0831, "epoch": 0.65, "inbatch_neg_score": 1.0504, "inbatch_pos_score": 1.8223, "learning_rate": 1.866666666666667e-05, "loss": 2.6145, "norm_diff": 0.0355, "norm_loss": 0.0, "num_token_doc": 66.7532, "num_token_overlap": 17.863, "num_token_query": 52.3987, "num_token_union": 73.7491, "num_word_context": 202.4699, "num_word_doc": 49.8295, "num_word_query": 39.9698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2454.0021, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0508, "query_norm": 1.677, "queue_k_norm": 1.7079, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3987, "sent_len_1": 66.7532, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9087, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 66400 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.624, "doc_norm": 1.7101, "encoder_q-embeddings": 1365.6714, "encoder_q-layer.0": 894.2328, "encoder_q-layer.1": 1003.6124, "encoder_q-layer.10": 1642.1786, "encoder_q-layer.11": 3401.1406, "encoder_q-layer.2": 1144.5385, "encoder_q-layer.3": 1188.6118, "encoder_q-layer.4": 1278.9205, "encoder_q-layer.5": 1332.2682, "encoder_q-layer.6": 1484.8809, "encoder_q-layer.7": 1755.3804, "encoder_q-layer.8": 1876.2722, "encoder_q-layer.9": 1637.9294, "epoch": 0.65, "inbatch_neg_score": 1.0552, "inbatch_pos_score": 1.8154, "learning_rate": 1.861111111111111e-05, "loss": 2.624, "norm_diff": 0.0346, "norm_loss": 0.0, "num_token_doc": 66.8415, "num_token_overlap": 17.8601, "num_token_query": 52.4208, "num_token_union": 73.7771, "num_word_context": 202.2527, "num_word_doc": 49.8534, "num_word_query": 39.9953, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2445.6998, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0547, "query_norm": 1.6754, "queue_k_norm": 1.7082, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4208, "sent_len_1": 66.8415, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1325, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 66500 }, { "accuracy": 60.9863, "active_queue_size": 16384.0, "cl_loss": 2.6292, "doc_norm": 1.7068, "encoder_q-embeddings": 1340.8801, "encoder_q-layer.0": 841.1135, "encoder_q-layer.1": 900.2589, "encoder_q-layer.10": 1732.7019, "encoder_q-layer.11": 3525.7229, "encoder_q-layer.2": 1023.47, "encoder_q-layer.3": 1108.625, "encoder_q-layer.4": 1167.6857, "encoder_q-layer.5": 1248.5934, "encoder_q-layer.6": 1396.823, "encoder_q-layer.7": 1591.4695, "encoder_q-layer.8": 1950.0928, "encoder_q-layer.9": 1705.4639, "epoch": 0.65, "inbatch_neg_score": 1.0543, "inbatch_pos_score": 1.8008, "learning_rate": 1.8555555555555557e-05, "loss": 2.6292, "norm_diff": 0.0382, "norm_loss": 0.0, "num_token_doc": 66.7493, "num_token_overlap": 17.8182, "num_token_query": 52.3091, "num_token_union": 73.6683, "num_word_context": 202.3364, "num_word_doc": 49.8071, "num_word_query": 39.8906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2445.4489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0547, "query_norm": 1.6686, "queue_k_norm": 1.7088, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3091, "sent_len_1": 66.7493, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6362, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 66600 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.6246, "doc_norm": 1.7104, "encoder_q-embeddings": 1721.1039, "encoder_q-layer.0": 1174.9568, "encoder_q-layer.1": 1324.9816, "encoder_q-layer.10": 1744.9109, "encoder_q-layer.11": 3653.7285, "encoder_q-layer.2": 1544.9554, "encoder_q-layer.3": 1469.1406, "encoder_q-layer.4": 1551.2303, "encoder_q-layer.5": 1530.8173, "encoder_q-layer.6": 1689.0, "encoder_q-layer.7": 2112.2026, "encoder_q-layer.8": 1964.3696, "encoder_q-layer.9": 1751.4585, "epoch": 0.65, "inbatch_neg_score": 1.0651, "inbatch_pos_score": 1.8271, "learning_rate": 1.85e-05, "loss": 2.6246, "norm_diff": 0.0272, "norm_loss": 0.0, "num_token_doc": 66.8247, "num_token_overlap": 17.8214, "num_token_query": 52.3006, "num_token_union": 73.7405, "num_word_context": 202.5301, "num_word_doc": 49.8634, "num_word_query": 39.88, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2801.2424, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0645, "query_norm": 1.6832, "queue_k_norm": 1.708, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3006, "sent_len_1": 66.8247, "sent_len_max_0": 128.0, "sent_len_max_1": 208.635, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 66700 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6295, "doc_norm": 1.7107, "encoder_q-embeddings": 1422.411, "encoder_q-layer.0": 896.1667, "encoder_q-layer.1": 1021.3745, "encoder_q-layer.10": 1742.1957, "encoder_q-layer.11": 3653.9219, "encoder_q-layer.2": 1199.8357, "encoder_q-layer.3": 1326.4304, "encoder_q-layer.4": 1408.3505, "encoder_q-layer.5": 1497.4172, "encoder_q-layer.6": 1700.6343, "encoder_q-layer.7": 1801.4882, "encoder_q-layer.8": 2022.5806, "encoder_q-layer.9": 1675.0221, "epoch": 0.65, "inbatch_neg_score": 1.0646, "inbatch_pos_score": 1.8154, "learning_rate": 1.8444444444444445e-05, "loss": 2.6295, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 66.9019, "num_token_overlap": 17.7863, "num_token_query": 52.1916, "num_token_union": 73.7428, "num_word_context": 202.5637, "num_word_doc": 49.9234, "num_word_query": 39.8173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2597.8111, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0654, "query_norm": 1.6839, "queue_k_norm": 1.7089, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1916, "sent_len_1": 66.9019, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8787, "stdk": 0.0495, "stdq": 0.047, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 66800 }, { "accuracy": 61.7188, "active_queue_size": 16384.0, "cl_loss": 2.6167, "doc_norm": 1.7076, "encoder_q-embeddings": 1382.5581, "encoder_q-layer.0": 904.1691, "encoder_q-layer.1": 1007.9104, "encoder_q-layer.10": 1632.8646, "encoder_q-layer.11": 3463.0483, "encoder_q-layer.2": 1156.3286, "encoder_q-layer.3": 1260.9382, "encoder_q-layer.4": 1325.933, "encoder_q-layer.5": 1361.5927, "encoder_q-layer.6": 1631.0381, "encoder_q-layer.7": 1743.5065, "encoder_q-layer.8": 1922.3234, "encoder_q-layer.9": 1659.0159, "epoch": 0.65, "inbatch_neg_score": 1.0704, "inbatch_pos_score": 1.8203, "learning_rate": 1.838888888888889e-05, "loss": 2.6167, "norm_diff": 0.0302, "norm_loss": 0.0, "num_token_doc": 66.8196, "num_token_overlap": 17.8155, "num_token_query": 52.2635, "num_token_union": 73.7481, "num_word_context": 202.3576, "num_word_doc": 49.8474, "num_word_query": 39.8536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2503.4064, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0703, "query_norm": 1.6774, "queue_k_norm": 1.7122, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2635, "sent_len_1": 66.8196, "sent_len_max_0": 128.0, "sent_len_max_1": 211.0813, "stdk": 0.0493, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 66900 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.6254, "doc_norm": 1.714, "encoder_q-embeddings": 1813.7532, "encoder_q-layer.0": 1222.908, "encoder_q-layer.1": 1490.9436, "encoder_q-layer.10": 1796.6782, "encoder_q-layer.11": 3495.2319, "encoder_q-layer.2": 1788.0814, "encoder_q-layer.3": 1820.9601, "encoder_q-layer.4": 1933.5647, "encoder_q-layer.5": 2060.7324, "encoder_q-layer.6": 2107.3298, "encoder_q-layer.7": 1844.1082, "encoder_q-layer.8": 1833.1909, "encoder_q-layer.9": 1599.0883, "epoch": 0.65, "inbatch_neg_score": 1.0715, "inbatch_pos_score": 1.835, "learning_rate": 1.8333333333333333e-05, "loss": 2.6254, "norm_diff": 0.0323, "norm_loss": 0.0, "num_token_doc": 66.7251, "num_token_overlap": 17.73, "num_token_query": 52.1621, "num_token_union": 73.6944, "num_word_context": 202.4239, "num_word_doc": 49.8333, "num_word_query": 39.8087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2916.9205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0732, "query_norm": 1.6817, "queue_k_norm": 1.7102, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1621, "sent_len_1": 66.7251, "sent_len_max_0": 128.0, "sent_len_max_1": 210.62, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 67000 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.6069, "doc_norm": 1.7134, "encoder_q-embeddings": 1218.6248, "encoder_q-layer.0": 794.9152, "encoder_q-layer.1": 881.3494, "encoder_q-layer.10": 1757.8297, "encoder_q-layer.11": 3674.0752, "encoder_q-layer.2": 1010.1198, "encoder_q-layer.3": 1058.5814, "encoder_q-layer.4": 1129.119, "encoder_q-layer.5": 1199.6084, "encoder_q-layer.6": 1359.2335, "encoder_q-layer.7": 1528.7931, "encoder_q-layer.8": 1863.9751, "encoder_q-layer.9": 1737.0874, "epoch": 0.66, "inbatch_neg_score": 1.0781, "inbatch_pos_score": 1.8252, "learning_rate": 1.827777777777778e-05, "loss": 2.6069, "norm_diff": 0.0444, "norm_loss": 0.0, "num_token_doc": 66.9082, "num_token_overlap": 17.837, "num_token_query": 52.3322, "num_token_union": 73.7791, "num_word_context": 202.3421, "num_word_doc": 49.9245, "num_word_query": 39.9089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2417.0992, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0791, "query_norm": 1.669, "queue_k_norm": 1.7136, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3322, "sent_len_1": 66.9082, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5563, "stdk": 0.0496, "stdq": 0.0459, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 67100 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.6261, "doc_norm": 1.7139, "encoder_q-embeddings": 1528.4802, "encoder_q-layer.0": 1044.666, "encoder_q-layer.1": 1199.3807, "encoder_q-layer.10": 1682.4225, "encoder_q-layer.11": 3451.5488, "encoder_q-layer.2": 1395.6556, "encoder_q-layer.3": 1456.2161, "encoder_q-layer.4": 1559.757, "encoder_q-layer.5": 1638.3531, "encoder_q-layer.6": 1712.6497, "encoder_q-layer.7": 1835.6079, "encoder_q-layer.8": 1916.8197, "encoder_q-layer.9": 1662.606, "epoch": 0.66, "inbatch_neg_score": 1.0866, "inbatch_pos_score": 1.834, "learning_rate": 1.8222222222222224e-05, "loss": 2.6261, "norm_diff": 0.0301, "norm_loss": 0.0, "num_token_doc": 66.6397, "num_token_overlap": 17.7902, "num_token_query": 52.3574, "num_token_union": 73.7058, "num_word_context": 202.3119, "num_word_doc": 49.7807, "num_word_query": 39.9718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2647.7148, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.085, "query_norm": 1.6838, "queue_k_norm": 1.7111, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3574, "sent_len_1": 66.6397, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3575, "stdk": 0.0495, "stdq": 0.0465, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 67200 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.6117, "doc_norm": 1.7101, "encoder_q-embeddings": 2687.3564, "encoder_q-layer.0": 1713.4972, "encoder_q-layer.1": 1903.1501, "encoder_q-layer.10": 3783.0781, "encoder_q-layer.11": 7440.3511, "encoder_q-layer.2": 2196.7373, "encoder_q-layer.3": 2316.9495, "encoder_q-layer.4": 2496.9792, "encoder_q-layer.5": 2713.9529, "encoder_q-layer.6": 3081.5544, "encoder_q-layer.7": 3191.0491, "encoder_q-layer.8": 3965.2925, "encoder_q-layer.9": 3650.728, "epoch": 0.66, "inbatch_neg_score": 1.0905, "inbatch_pos_score": 1.8281, "learning_rate": 1.8166666666666667e-05, "loss": 2.6117, "norm_diff": 0.0336, "norm_loss": 0.0, "num_token_doc": 66.8682, "num_token_overlap": 17.8519, "num_token_query": 52.3256, "num_token_union": 73.7374, "num_word_context": 202.3909, "num_word_doc": 49.9048, "num_word_query": 39.9141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5010.7686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0898, "query_norm": 1.6765, "queue_k_norm": 1.7135, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3256, "sent_len_1": 66.8682, "sent_len_max_0": 128.0, "sent_len_max_1": 210.8225, "stdk": 0.0493, "stdq": 0.046, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 67300 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.6167, "doc_norm": 1.7137, "encoder_q-embeddings": 2669.0173, "encoder_q-layer.0": 1682.6779, "encoder_q-layer.1": 1950.1239, "encoder_q-layer.10": 3810.1721, "encoder_q-layer.11": 7688.6206, "encoder_q-layer.2": 2175.1304, "encoder_q-layer.3": 2320.8186, "encoder_q-layer.4": 2581.3049, "encoder_q-layer.5": 2661.7561, "encoder_q-layer.6": 2949.8169, "encoder_q-layer.7": 3406.5161, "encoder_q-layer.8": 3910.363, "encoder_q-layer.9": 3530.8232, "epoch": 0.66, "inbatch_neg_score": 1.0962, "inbatch_pos_score": 1.8525, "learning_rate": 1.8111111111111112e-05, "loss": 2.6167, "norm_diff": 0.0153, "norm_loss": 0.0, "num_token_doc": 66.8085, "num_token_overlap": 17.8216, "num_token_query": 52.2953, "num_token_union": 73.7391, "num_word_context": 202.3792, "num_word_doc": 49.8819, "num_word_query": 39.885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5135.0684, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0967, "query_norm": 1.7006, "queue_k_norm": 1.7143, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2953, "sent_len_1": 66.8085, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2675, "stdk": 0.0494, "stdq": 0.0472, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 67400 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.61, "doc_norm": 1.7183, "encoder_q-embeddings": 2690.416, "encoder_q-layer.0": 1704.2524, "encoder_q-layer.1": 1849.4652, "encoder_q-layer.10": 3238.207, "encoder_q-layer.11": 6859.5571, "encoder_q-layer.2": 2189.8237, "encoder_q-layer.3": 2311.7622, "encoder_q-layer.4": 2546.2229, "encoder_q-layer.5": 2683.5884, "encoder_q-layer.6": 2975.6335, "encoder_q-layer.7": 3285.437, "encoder_q-layer.8": 3868.8494, "encoder_q-layer.9": 3535.585, "epoch": 0.66, "inbatch_neg_score": 1.0948, "inbatch_pos_score": 1.8545, "learning_rate": 1.8055555555555555e-05, "loss": 2.61, "norm_diff": 0.0323, "norm_loss": 0.0, "num_token_doc": 66.877, "num_token_overlap": 17.8479, "num_token_query": 52.2593, "num_token_union": 73.7306, "num_word_context": 202.4118, "num_word_doc": 49.8776, "num_word_query": 39.8447, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4867.3857, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0967, "query_norm": 1.6859, "queue_k_norm": 1.7152, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2593, "sent_len_1": 66.877, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4338, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 67500 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.6251, "doc_norm": 1.7159, "encoder_q-embeddings": 3254.2314, "encoder_q-layer.0": 2175.3779, "encoder_q-layer.1": 2469.415, "encoder_q-layer.10": 3754.7566, "encoder_q-layer.11": 7449.9893, "encoder_q-layer.2": 2907.2456, "encoder_q-layer.3": 2897.0554, "encoder_q-layer.4": 3192.6956, "encoder_q-layer.5": 3342.8218, "encoder_q-layer.6": 3773.1599, "encoder_q-layer.7": 3751.0112, "encoder_q-layer.8": 4159.5723, "encoder_q-layer.9": 3554.5703, "epoch": 0.66, "inbatch_neg_score": 1.101, "inbatch_pos_score": 1.8486, "learning_rate": 1.8e-05, "loss": 2.6251, "norm_diff": 0.0169, "norm_loss": 0.0, "num_token_doc": 66.8736, "num_token_overlap": 17.8289, "num_token_query": 52.3342, "num_token_union": 73.7718, "num_word_context": 202.453, "num_word_doc": 49.8736, "num_word_query": 39.9018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5539.3204, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1006, "query_norm": 1.699, "queue_k_norm": 1.7178, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3342, "sent_len_1": 66.8736, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4162, "stdk": 0.0494, "stdq": 0.0472, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 67600 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.62, "doc_norm": 1.7179, "encoder_q-embeddings": 3872.2734, "encoder_q-layer.0": 2616.7224, "encoder_q-layer.1": 3034.7351, "encoder_q-layer.10": 3644.3723, "encoder_q-layer.11": 7674.8975, "encoder_q-layer.2": 3885.5906, "encoder_q-layer.3": 4147.6992, "encoder_q-layer.4": 4415.0718, "encoder_q-layer.5": 4513.3955, "encoder_q-layer.6": 5077.7319, "encoder_q-layer.7": 4703.9307, "encoder_q-layer.8": 4702.5713, "encoder_q-layer.9": 3666.2573, "epoch": 0.66, "inbatch_neg_score": 1.1044, "inbatch_pos_score": 1.8418, "learning_rate": 1.7944444444444443e-05, "loss": 2.62, "norm_diff": 0.0349, "norm_loss": 0.0, "num_token_doc": 66.7734, "num_token_overlap": 17.7899, "num_token_query": 52.3755, "num_token_union": 73.7895, "num_word_context": 202.5884, "num_word_doc": 49.8135, "num_word_query": 39.9572, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6559.6208, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1035, "query_norm": 1.683, "queue_k_norm": 1.7176, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3755, "sent_len_1": 66.7734, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1525, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 67700 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.6125, "doc_norm": 1.7139, "encoder_q-embeddings": 3361.9875, "encoder_q-layer.0": 2265.7515, "encoder_q-layer.1": 2524.4688, "encoder_q-layer.10": 3298.7241, "encoder_q-layer.11": 6937.4346, "encoder_q-layer.2": 2913.4648, "encoder_q-layer.3": 3145.2522, "encoder_q-layer.4": 3403.647, "encoder_q-layer.5": 3635.0806, "encoder_q-layer.6": 4040.1482, "encoder_q-layer.7": 4302.0122, "encoder_q-layer.8": 4467.2593, "encoder_q-layer.9": 3575.2034, "epoch": 0.66, "inbatch_neg_score": 1.1055, "inbatch_pos_score": 1.8574, "learning_rate": 1.788888888888889e-05, "loss": 2.6125, "norm_diff": 0.0306, "norm_loss": 0.0, "num_token_doc": 66.7777, "num_token_overlap": 17.8187, "num_token_query": 52.2055, "num_token_union": 73.6751, "num_word_context": 202.2629, "num_word_doc": 49.8505, "num_word_query": 39.8162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5650.2889, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1055, "query_norm": 1.6833, "queue_k_norm": 1.7187, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2055, "sent_len_1": 66.7777, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7775, "stdk": 0.0493, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 67800 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.6115, "doc_norm": 1.717, "encoder_q-embeddings": 2539.7427, "encoder_q-layer.0": 1685.6602, "encoder_q-layer.1": 1830.442, "encoder_q-layer.10": 3269.3938, "encoder_q-layer.11": 7095.9639, "encoder_q-layer.2": 2182.2104, "encoder_q-layer.3": 2318.1431, "encoder_q-layer.4": 2460.5698, "encoder_q-layer.5": 2568.8828, "encoder_q-layer.6": 2728.3213, "encoder_q-layer.7": 3105.7703, "encoder_q-layer.8": 3653.478, "encoder_q-layer.9": 3336.4343, "epoch": 0.66, "inbatch_neg_score": 1.1098, "inbatch_pos_score": 1.8623, "learning_rate": 1.7833333333333334e-05, "loss": 2.6115, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.8152, "num_token_overlap": 17.8529, "num_token_query": 52.3213, "num_token_union": 73.7005, "num_word_context": 202.0461, "num_word_doc": 49.8645, "num_word_query": 39.9238, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4804.9491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1094, "query_norm": 1.6878, "queue_k_norm": 1.7211, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3213, "sent_len_1": 66.8152, "sent_len_max_0": 128.0, "sent_len_max_1": 207.52, "stdk": 0.0493, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 67900 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.6186, "doc_norm": 1.7215, "encoder_q-embeddings": 2491.6689, "encoder_q-layer.0": 1603.9844, "encoder_q-layer.1": 1758.3107, "encoder_q-layer.10": 3249.4417, "encoder_q-layer.11": 7184.4629, "encoder_q-layer.2": 2010.6326, "encoder_q-layer.3": 2065.887, "encoder_q-layer.4": 2264.9243, "encoder_q-layer.5": 2367.3696, "encoder_q-layer.6": 2672.259, "encoder_q-layer.7": 3133.8477, "encoder_q-layer.8": 3662.3711, "encoder_q-layer.9": 3327.3237, "epoch": 0.66, "inbatch_neg_score": 1.1131, "inbatch_pos_score": 1.875, "learning_rate": 1.777777777777778e-05, "loss": 2.6186, "norm_diff": 0.0329, "norm_loss": 0.0, "num_token_doc": 66.7747, "num_token_overlap": 17.8114, "num_token_query": 52.2978, "num_token_union": 73.7554, "num_word_context": 202.325, "num_word_doc": 49.8497, "num_word_query": 39.9036, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4694.2193, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1123, "query_norm": 1.6886, "queue_k_norm": 1.7215, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2978, "sent_len_1": 66.7747, "sent_len_max_0": 128.0, "sent_len_max_1": 207.135, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 68000 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.6208, "doc_norm": 1.7193, "encoder_q-embeddings": 8589.1895, "encoder_q-layer.0": 5557.5718, "encoder_q-layer.1": 6721.4409, "encoder_q-layer.10": 3310.4609, "encoder_q-layer.11": 7180.4912, "encoder_q-layer.2": 7834.4194, "encoder_q-layer.3": 7752.5342, "encoder_q-layer.4": 8161.1826, "encoder_q-layer.5": 8475.833, "encoder_q-layer.6": 8333.3506, "encoder_q-layer.7": 7728.0903, "encoder_q-layer.8": 4561.8169, "encoder_q-layer.9": 3522.1123, "epoch": 0.66, "inbatch_neg_score": 1.114, "inbatch_pos_score": 1.8584, "learning_rate": 1.7722222222222222e-05, "loss": 2.6208, "norm_diff": 0.0335, "norm_loss": 0.0, "num_token_doc": 66.7245, "num_token_overlap": 17.7378, "num_token_query": 52.259, "num_token_union": 73.7531, "num_word_context": 202.206, "num_word_doc": 49.7973, "num_word_query": 39.8517, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10664.3458, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1133, "query_norm": 1.6858, "queue_k_norm": 1.7217, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.259, "sent_len_1": 66.7245, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6975, "stdk": 0.0493, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 68100 }, { "accuracy": 60.791, "active_queue_size": 16384.0, "cl_loss": 2.6215, "doc_norm": 1.7207, "encoder_q-embeddings": 2938.769, "encoder_q-layer.0": 1948.422, "encoder_q-layer.1": 2134.4321, "encoder_q-layer.10": 3712.9214, "encoder_q-layer.11": 7263.2754, "encoder_q-layer.2": 2465.1255, "encoder_q-layer.3": 2627.1182, "encoder_q-layer.4": 2968.0334, "encoder_q-layer.5": 3041.9697, "encoder_q-layer.6": 3376.9297, "encoder_q-layer.7": 3901.8711, "encoder_q-layer.8": 4472.626, "encoder_q-layer.9": 3825.7598, "epoch": 0.67, "inbatch_neg_score": 1.113, "inbatch_pos_score": 1.877, "learning_rate": 1.7666666666666668e-05, "loss": 2.6215, "norm_diff": 0.0293, "norm_loss": 0.0, "num_token_doc": 66.6231, "num_token_overlap": 17.8319, "num_token_query": 52.3262, "num_token_union": 73.6075, "num_word_context": 202.3824, "num_word_doc": 49.6908, "num_word_query": 39.913, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5357.0045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1143, "query_norm": 1.6914, "queue_k_norm": 1.7209, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3262, "sent_len_1": 66.6231, "sent_len_max_0": 128.0, "sent_len_max_1": 210.9212, "stdk": 0.0494, "stdq": 0.047, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 68200 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.6047, "doc_norm": 1.7248, "encoder_q-embeddings": 2677.8574, "encoder_q-layer.0": 1698.8961, "encoder_q-layer.1": 1915.8671, "encoder_q-layer.10": 3372.877, "encoder_q-layer.11": 6815.5723, "encoder_q-layer.2": 2173.1199, "encoder_q-layer.3": 2295.4697, "encoder_q-layer.4": 2505.5337, "encoder_q-layer.5": 2537.9268, "encoder_q-layer.6": 2863.3518, "encoder_q-layer.7": 3142.1123, "encoder_q-layer.8": 3599.7935, "encoder_q-layer.9": 3156.5186, "epoch": 0.67, "inbatch_neg_score": 1.1144, "inbatch_pos_score": 1.8633, "learning_rate": 1.761111111111111e-05, "loss": 2.6047, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.7143, "num_token_overlap": 17.8104, "num_token_query": 52.3929, "num_token_union": 73.76, "num_word_context": 202.4392, "num_word_doc": 49.7672, "num_word_query": 39.9596, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4807.8203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1152, "query_norm": 1.6769, "queue_k_norm": 1.725, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3929, "sent_len_1": 66.7143, "sent_len_max_0": 128.0, "sent_len_max_1": 210.38, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 68300 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.6155, "doc_norm": 1.7267, "encoder_q-embeddings": 2816.6926, "encoder_q-layer.0": 1788.0295, "encoder_q-layer.1": 2029.2903, "encoder_q-layer.10": 3391.4531, "encoder_q-layer.11": 7090.0977, "encoder_q-layer.2": 2342.5173, "encoder_q-layer.3": 2529.4395, "encoder_q-layer.4": 2642.7048, "encoder_q-layer.5": 3003.1641, "encoder_q-layer.6": 3013.6799, "encoder_q-layer.7": 3259.1794, "encoder_q-layer.8": 3854.6084, "encoder_q-layer.9": 3471.0605, "epoch": 0.67, "inbatch_neg_score": 1.1168, "inbatch_pos_score": 1.876, "learning_rate": 1.7555555555555556e-05, "loss": 2.6155, "norm_diff": 0.0356, "norm_loss": 0.0, "num_token_doc": 66.602, "num_token_overlap": 17.8222, "num_token_query": 52.3194, "num_token_union": 73.6245, "num_word_context": 202.0999, "num_word_doc": 49.717, "num_word_query": 39.9167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5072.5892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1172, "query_norm": 1.6911, "queue_k_norm": 1.724, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3194, "sent_len_1": 66.602, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3025, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 68400 }, { "accuracy": 60.4004, "active_queue_size": 16384.0, "cl_loss": 2.6315, "doc_norm": 1.7252, "encoder_q-embeddings": 2748.3193, "encoder_q-layer.0": 1888.2896, "encoder_q-layer.1": 1985.4152, "encoder_q-layer.10": 3332.7202, "encoder_q-layer.11": 6882.7666, "encoder_q-layer.2": 2253.04, "encoder_q-layer.3": 2419.8972, "encoder_q-layer.4": 2477.9836, "encoder_q-layer.5": 2478.6951, "encoder_q-layer.6": 2728.9568, "encoder_q-layer.7": 3108.459, "encoder_q-layer.8": 3728.1084, "encoder_q-layer.9": 3306.0037, "epoch": 0.67, "inbatch_neg_score": 1.1157, "inbatch_pos_score": 1.8701, "learning_rate": 1.75e-05, "loss": 2.6315, "norm_diff": 0.0374, "norm_loss": 0.0, "num_token_doc": 66.7748, "num_token_overlap": 17.7425, "num_token_query": 52.2342, "num_token_union": 73.7586, "num_word_context": 202.367, "num_word_doc": 49.8161, "num_word_query": 39.8347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4831.5819, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1152, "query_norm": 1.6878, "queue_k_norm": 1.7254, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2342, "sent_len_1": 66.7748, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2088, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 68500 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.6193, "doc_norm": 1.7255, "encoder_q-embeddings": 2722.885, "encoder_q-layer.0": 1736.5764, "encoder_q-layer.1": 1935.8442, "encoder_q-layer.10": 3632.6821, "encoder_q-layer.11": 7197.4062, "encoder_q-layer.2": 2202.595, "encoder_q-layer.3": 2301.5452, "encoder_q-layer.4": 2455.9426, "encoder_q-layer.5": 2651.7268, "encoder_q-layer.6": 2842.3059, "encoder_q-layer.7": 3361.3677, "encoder_q-layer.8": 3852.8884, "encoder_q-layer.9": 3432.6648, "epoch": 0.67, "inbatch_neg_score": 1.1159, "inbatch_pos_score": 1.8682, "learning_rate": 1.7444444444444448e-05, "loss": 2.6193, "norm_diff": 0.0422, "norm_loss": 0.0, "num_token_doc": 66.8824, "num_token_overlap": 17.8008, "num_token_query": 52.2926, "num_token_union": 73.7797, "num_word_context": 202.6233, "num_word_doc": 49.8693, "num_word_query": 39.8696, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4910.2405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1162, "query_norm": 1.6833, "queue_k_norm": 1.7263, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2926, "sent_len_1": 66.8824, "sent_len_max_0": 128.0, "sent_len_max_1": 209.475, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 68600 }, { "accuracy": 62.3535, "active_queue_size": 16384.0, "cl_loss": 2.6132, "doc_norm": 1.7288, "encoder_q-embeddings": 1180.178, "encoder_q-layer.0": 783.6704, "encoder_q-layer.1": 837.2899, "encoder_q-layer.10": 1764.6056, "encoder_q-layer.11": 3602.3931, "encoder_q-layer.2": 942.092, "encoder_q-layer.3": 997.4053, "encoder_q-layer.4": 1052.9292, "encoder_q-layer.5": 1140.3104, "encoder_q-layer.6": 1279.309, "encoder_q-layer.7": 1471.9551, "encoder_q-layer.8": 1718.7872, "encoder_q-layer.9": 1662.686, "epoch": 0.67, "inbatch_neg_score": 1.1093, "inbatch_pos_score": 1.873, "learning_rate": 1.738888888888889e-05, "loss": 2.6132, "norm_diff": 0.0497, "norm_loss": 0.0, "num_token_doc": 66.7787, "num_token_overlap": 17.8472, "num_token_query": 52.3233, "num_token_union": 73.6888, "num_word_context": 202.0356, "num_word_doc": 49.817, "num_word_query": 39.9119, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2315.1399, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1104, "query_norm": 1.679, "queue_k_norm": 1.726, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3233, "sent_len_1": 66.7787, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3325, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 68700 }, { "accuracy": 62.7441, "active_queue_size": 16384.0, "cl_loss": 2.6164, "doc_norm": 1.7286, "encoder_q-embeddings": 1195.5553, "encoder_q-layer.0": 795.8255, "encoder_q-layer.1": 891.8716, "encoder_q-layer.10": 1691.5449, "encoder_q-layer.11": 3472.2876, "encoder_q-layer.2": 1028.1926, "encoder_q-layer.3": 1085.8838, "encoder_q-layer.4": 1173.0103, "encoder_q-layer.5": 1224.9652, "encoder_q-layer.6": 1356.2343, "encoder_q-layer.7": 1496.662, "encoder_q-layer.8": 1807.943, "encoder_q-layer.9": 1689.1598, "epoch": 0.67, "inbatch_neg_score": 1.1176, "inbatch_pos_score": 1.8672, "learning_rate": 1.7333333333333336e-05, "loss": 2.6164, "norm_diff": 0.0425, "norm_loss": 0.0, "num_token_doc": 66.79, "num_token_overlap": 17.839, "num_token_query": 52.3045, "num_token_union": 73.7355, "num_word_context": 202.5641, "num_word_doc": 49.8439, "num_word_query": 39.8853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2360.968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1172, "query_norm": 1.6862, "queue_k_norm": 1.7264, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3045, "sent_len_1": 66.79, "sent_len_max_0": 128.0, "sent_len_max_1": 209.34, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 68800 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.6, "doc_norm": 1.7222, "encoder_q-embeddings": 1410.0582, "encoder_q-layer.0": 895.8287, "encoder_q-layer.1": 997.2594, "encoder_q-layer.10": 1731.2407, "encoder_q-layer.11": 3654.3252, "encoder_q-layer.2": 1169.5986, "encoder_q-layer.3": 1207.4077, "encoder_q-layer.4": 1301.2568, "encoder_q-layer.5": 1341.2537, "encoder_q-layer.6": 1454.9869, "encoder_q-layer.7": 1606.9825, "encoder_q-layer.8": 1894.7068, "encoder_q-layer.9": 1693.6105, "epoch": 0.67, "inbatch_neg_score": 1.1216, "inbatch_pos_score": 1.8701, "learning_rate": 1.7277777777777778e-05, "loss": 2.6, "norm_diff": 0.0317, "norm_loss": 0.0, "num_token_doc": 66.8809, "num_token_overlap": 17.8588, "num_token_query": 52.3168, "num_token_union": 73.7592, "num_word_context": 202.2175, "num_word_doc": 49.9351, "num_word_query": 39.9189, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2532.9428, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1211, "query_norm": 1.6905, "queue_k_norm": 1.7277, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3168, "sent_len_1": 66.8809, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9787, "stdk": 0.0492, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 68900 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6214, "doc_norm": 1.7279, "encoder_q-embeddings": 2771.9478, "encoder_q-layer.0": 1800.8044, "encoder_q-layer.1": 2332.8215, "encoder_q-layer.10": 1832.8899, "encoder_q-layer.11": 3697.0222, "encoder_q-layer.2": 2959.3247, "encoder_q-layer.3": 3285.3181, "encoder_q-layer.4": 3261.8911, "encoder_q-layer.5": 3480.1282, "encoder_q-layer.6": 3338.2144, "encoder_q-layer.7": 2602.1194, "encoder_q-layer.8": 2360.0066, "encoder_q-layer.9": 1938.2749, "epoch": 0.67, "inbatch_neg_score": 1.1156, "inbatch_pos_score": 1.875, "learning_rate": 1.7222222222222224e-05, "loss": 2.6214, "norm_diff": 0.0347, "norm_loss": 0.0, "num_token_doc": 66.8668, "num_token_overlap": 17.8018, "num_token_query": 52.2517, "num_token_union": 73.7267, "num_word_context": 202.5157, "num_word_doc": 49.8629, "num_word_query": 39.8407, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4087.127, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1162, "query_norm": 1.6933, "queue_k_norm": 1.7279, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2517, "sent_len_1": 66.8668, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1075, "stdk": 0.0495, "stdq": 0.0471, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 69000 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.6099, "doc_norm": 1.7301, "encoder_q-embeddings": 1562.3385, "encoder_q-layer.0": 1019.0344, "encoder_q-layer.1": 1144.0172, "encoder_q-layer.10": 1691.2662, "encoder_q-layer.11": 3483.1973, "encoder_q-layer.2": 1337.6787, "encoder_q-layer.3": 1454.2781, "encoder_q-layer.4": 1673.6956, "encoder_q-layer.5": 1757.7371, "encoder_q-layer.6": 1929.552, "encoder_q-layer.7": 1897.0079, "encoder_q-layer.8": 2173.3992, "encoder_q-layer.9": 1755.5845, "epoch": 0.67, "inbatch_neg_score": 1.119, "inbatch_pos_score": 1.8779, "learning_rate": 1.7166666666666666e-05, "loss": 2.6099, "norm_diff": 0.0361, "norm_loss": 0.0, "num_token_doc": 66.8422, "num_token_overlap": 17.8033, "num_token_query": 52.2055, "num_token_union": 73.7155, "num_word_context": 202.1478, "num_word_doc": 49.9045, "num_word_query": 39.7955, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2723.4812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1182, "query_norm": 1.694, "queue_k_norm": 1.7286, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2055, "sent_len_1": 66.8422, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1562, "stdk": 0.0496, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 69100 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.6259, "doc_norm": 1.7291, "encoder_q-embeddings": 1447.3285, "encoder_q-layer.0": 939.8809, "encoder_q-layer.1": 1044.4027, "encoder_q-layer.10": 1873.0782, "encoder_q-layer.11": 3474.7527, "encoder_q-layer.2": 1190.0659, "encoder_q-layer.3": 1282.9824, "encoder_q-layer.4": 1329.8522, "encoder_q-layer.5": 1386.5536, "encoder_q-layer.6": 1547.3776, "encoder_q-layer.7": 1796.4625, "encoder_q-layer.8": 1899.2587, "encoder_q-layer.9": 1721.5679, "epoch": 0.68, "inbatch_neg_score": 1.1203, "inbatch_pos_score": 1.875, "learning_rate": 1.7111111111111112e-05, "loss": 2.6259, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.8054, "num_token_overlap": 17.8301, "num_token_query": 52.3664, "num_token_union": 73.7607, "num_word_context": 202.5938, "num_word_doc": 49.834, "num_word_query": 39.9387, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2516.0235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1211, "query_norm": 1.6916, "queue_k_norm": 1.7288, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3664, "sent_len_1": 66.8054, "sent_len_max_0": 128.0, "sent_len_max_1": 209.115, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 69200 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.6077, "doc_norm": 1.7239, "encoder_q-embeddings": 1391.4264, "encoder_q-layer.0": 950.0169, "encoder_q-layer.1": 1026.2378, "encoder_q-layer.10": 1592.947, "encoder_q-layer.11": 3365.5383, "encoder_q-layer.2": 1153.7832, "encoder_q-layer.3": 1207.7767, "encoder_q-layer.4": 1360.1116, "encoder_q-layer.5": 1358.0773, "encoder_q-layer.6": 1456.4453, "encoder_q-layer.7": 1563.813, "encoder_q-layer.8": 1743.8679, "encoder_q-layer.9": 1579.1772, "epoch": 0.68, "inbatch_neg_score": 1.1207, "inbatch_pos_score": 1.875, "learning_rate": 1.7055555555555554e-05, "loss": 2.6077, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.782, "num_token_overlap": 17.8529, "num_token_query": 52.3169, "num_token_union": 73.6983, "num_word_context": 202.4477, "num_word_doc": 49.8095, "num_word_query": 39.8731, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2405.9664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1201, "query_norm": 1.6886, "queue_k_norm": 1.7282, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3169, "sent_len_1": 66.782, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0188, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 69300 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.6136, "doc_norm": 1.7289, "encoder_q-embeddings": 1231.6079, "encoder_q-layer.0": 776.5112, "encoder_q-layer.1": 877.4767, "encoder_q-layer.10": 1728.6577, "encoder_q-layer.11": 3473.2925, "encoder_q-layer.2": 981.4852, "encoder_q-layer.3": 1053.7795, "encoder_q-layer.4": 1139.6559, "encoder_q-layer.5": 1201.4989, "encoder_q-layer.6": 1338.3572, "encoder_q-layer.7": 1500.1824, "encoder_q-layer.8": 1750.5377, "encoder_q-layer.9": 1635.9517, "epoch": 0.68, "inbatch_neg_score": 1.1248, "inbatch_pos_score": 1.875, "learning_rate": 1.7000000000000003e-05, "loss": 2.6136, "norm_diff": 0.0427, "norm_loss": 0.0, "num_token_doc": 66.8769, "num_token_overlap": 17.8126, "num_token_query": 52.3248, "num_token_union": 73.7933, "num_word_context": 202.5474, "num_word_doc": 49.8941, "num_word_query": 39.9014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2342.935, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.124, "query_norm": 1.6862, "queue_k_norm": 1.7291, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3248, "sent_len_1": 66.8769, "sent_len_max_0": 128.0, "sent_len_max_1": 211.615, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 69400 }, { "accuracy": 60.4004, "active_queue_size": 16384.0, "cl_loss": 2.6221, "doc_norm": 1.7228, "encoder_q-embeddings": 1246.0308, "encoder_q-layer.0": 802.03, "encoder_q-layer.1": 873.7966, "encoder_q-layer.10": 1758.8663, "encoder_q-layer.11": 3667.7383, "encoder_q-layer.2": 994.5566, "encoder_q-layer.3": 1048.7488, "encoder_q-layer.4": 1112.8359, "encoder_q-layer.5": 1146.1868, "encoder_q-layer.6": 1369.1555, "encoder_q-layer.7": 1640.6881, "encoder_q-layer.8": 1907.2415, "encoder_q-layer.9": 1694.3257, "epoch": 0.68, "inbatch_neg_score": 1.1305, "inbatch_pos_score": 1.876, "learning_rate": 1.6944444444444446e-05, "loss": 2.6221, "norm_diff": 0.0334, "norm_loss": 0.0, "num_token_doc": 66.7185, "num_token_overlap": 17.8012, "num_token_query": 52.2185, "num_token_union": 73.6907, "num_word_context": 202.5766, "num_word_doc": 49.8006, "num_word_query": 39.8433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2452.1187, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1299, "query_norm": 1.6894, "queue_k_norm": 1.7314, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2185, "sent_len_1": 66.7185, "sent_len_max_0": 128.0, "sent_len_max_1": 207.845, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 69500 }, { "accuracy": 59.1309, "active_queue_size": 16384.0, "cl_loss": 2.6097, "doc_norm": 1.7284, "encoder_q-embeddings": 1344.0073, "encoder_q-layer.0": 848.8593, "encoder_q-layer.1": 947.5948, "encoder_q-layer.10": 1764.6718, "encoder_q-layer.11": 3498.6951, "encoder_q-layer.2": 1068.4779, "encoder_q-layer.3": 1122.5702, "encoder_q-layer.4": 1220.4791, "encoder_q-layer.5": 1320.9832, "encoder_q-layer.6": 1487.7511, "encoder_q-layer.7": 1593.0486, "encoder_q-layer.8": 1873.109, "encoder_q-layer.9": 1714.6965, "epoch": 0.68, "inbatch_neg_score": 1.1361, "inbatch_pos_score": 1.8887, "learning_rate": 1.688888888888889e-05, "loss": 2.6097, "norm_diff": 0.0312, "norm_loss": 0.0, "num_token_doc": 66.8142, "num_token_overlap": 17.8087, "num_token_query": 52.2221, "num_token_union": 73.6648, "num_word_context": 202.1887, "num_word_doc": 49.8767, "num_word_query": 39.8286, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2439.9866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1348, "query_norm": 1.6972, "queue_k_norm": 1.7304, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2221, "sent_len_1": 66.8142, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3887, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 69600 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.6225, "doc_norm": 1.7326, "encoder_q-embeddings": 1238.7845, "encoder_q-layer.0": 791.6512, "encoder_q-layer.1": 844.9818, "encoder_q-layer.10": 1709.4908, "encoder_q-layer.11": 3428.917, "encoder_q-layer.2": 929.4925, "encoder_q-layer.3": 1005.8643, "encoder_q-layer.4": 1089.9139, "encoder_q-layer.5": 1117.5989, "encoder_q-layer.6": 1309.0283, "encoder_q-layer.7": 1488.728, "encoder_q-layer.8": 1805.7122, "encoder_q-layer.9": 1614.4049, "epoch": 0.68, "inbatch_neg_score": 1.1394, "inbatch_pos_score": 1.8916, "learning_rate": 1.6833333333333334e-05, "loss": 2.6225, "norm_diff": 0.0343, "norm_loss": 0.0, "num_token_doc": 66.6779, "num_token_overlap": 17.7879, "num_token_query": 52.1817, "num_token_union": 73.6209, "num_word_context": 202.0508, "num_word_doc": 49.7432, "num_word_query": 39.7797, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2305.271, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1396, "query_norm": 1.6983, "queue_k_norm": 1.7306, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1817, "sent_len_1": 66.6779, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4588, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 69700 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.6194, "doc_norm": 1.7328, "encoder_q-embeddings": 1295.061, "encoder_q-layer.0": 825.753, "encoder_q-layer.1": 906.582, "encoder_q-layer.10": 1687.4298, "encoder_q-layer.11": 3495.8943, "encoder_q-layer.2": 1033.3406, "encoder_q-layer.3": 1070.2607, "encoder_q-layer.4": 1140.3413, "encoder_q-layer.5": 1187.0043, "encoder_q-layer.6": 1300.6307, "encoder_q-layer.7": 1405.2089, "encoder_q-layer.8": 1786.4171, "encoder_q-layer.9": 1709.6858, "epoch": 0.68, "inbatch_neg_score": 1.1473, "inbatch_pos_score": 1.8906, "learning_rate": 1.677777777777778e-05, "loss": 2.6194, "norm_diff": 0.0362, "norm_loss": 0.0, "num_token_doc": 66.8516, "num_token_overlap": 17.8326, "num_token_query": 52.394, "num_token_union": 73.8071, "num_word_context": 202.6355, "num_word_doc": 49.8946, "num_word_query": 39.9869, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2358.6394, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1465, "query_norm": 1.6965, "queue_k_norm": 1.7315, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.394, "sent_len_1": 66.8516, "sent_len_max_0": 128.0, "sent_len_max_1": 209.265, "stdk": 0.0495, "stdq": 0.0462, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 69800 }, { "accuracy": 61.8652, "active_queue_size": 16384.0, "cl_loss": 2.6069, "doc_norm": 1.7352, "encoder_q-embeddings": 1199.2859, "encoder_q-layer.0": 788.1312, "encoder_q-layer.1": 901.117, "encoder_q-layer.10": 1776.8022, "encoder_q-layer.11": 3659.3005, "encoder_q-layer.2": 1006.0925, "encoder_q-layer.3": 1069.6925, "encoder_q-layer.4": 1153.2212, "encoder_q-layer.5": 1205.3871, "encoder_q-layer.6": 1336.4857, "encoder_q-layer.7": 1539.051, "encoder_q-layer.8": 1979.1316, "encoder_q-layer.9": 1682.2399, "epoch": 0.68, "inbatch_neg_score": 1.1519, "inbatch_pos_score": 1.9023, "learning_rate": 1.6722222222222222e-05, "loss": 2.6069, "norm_diff": 0.0251, "norm_loss": 0.0, "num_token_doc": 66.6929, "num_token_overlap": 17.8147, "num_token_query": 52.1955, "num_token_union": 73.6021, "num_word_context": 202.0539, "num_word_doc": 49.7519, "num_word_query": 39.7942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2446.1484, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1523, "query_norm": 1.7101, "queue_k_norm": 1.7323, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1955, "sent_len_1": 66.6929, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4238, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 69900 }, { "accuracy": 60.9863, "active_queue_size": 16384.0, "cl_loss": 2.6235, "doc_norm": 1.7328, "encoder_q-embeddings": 1227.629, "encoder_q-layer.0": 767.6464, "encoder_q-layer.1": 839.7347, "encoder_q-layer.10": 1669.8033, "encoder_q-layer.11": 3456.6965, "encoder_q-layer.2": 964.5469, "encoder_q-layer.3": 1036.1326, "encoder_q-layer.4": 1116.0824, "encoder_q-layer.5": 1113.3956, "encoder_q-layer.6": 1326.2334, "encoder_q-layer.7": 1464.9263, "encoder_q-layer.8": 1739.9714, "encoder_q-layer.9": 1661.7073, "epoch": 0.68, "inbatch_neg_score": 1.1581, "inbatch_pos_score": 1.8936, "learning_rate": 1.6666666666666667e-05, "loss": 2.6235, "norm_diff": 0.029, "norm_loss": 0.0, "num_token_doc": 66.5812, "num_token_overlap": 17.7975, "num_token_query": 52.2651, "num_token_union": 73.5723, "num_word_context": 202.0205, "num_word_doc": 49.6644, "num_word_query": 39.8574, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2328.2422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1572, "query_norm": 1.7037, "queue_k_norm": 1.7339, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2651, "sent_len_1": 66.5812, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0825, "stdk": 0.0494, "stdq": 0.0463, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 70000 }, { "dev_runtime": 26.3637, "dev_samples_per_second": 1.214, "dev_steps_per_second": 0.038, "epoch": 0.68, "step": 70000, "test_accuracy": 94.39697265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.31526046991348267, "test_doc_norm": 1.7026302814483643, "test_inbatch_neg_score": 1.4524195194244385, "test_inbatch_pos_score": 2.468806743621826, "test_loss": 0.31526046991348267, "test_loss_align": 1.0422990322113037, "test_loss_unif": 1.4446170330047607, "test_loss_unif_q@queue": 1.4446170330047607, "test_norm_diff": 0.04699883237481117, "test_norm_loss": 0.0, "test_q@queue_neg_score": 1.1516295671463013, "test_query_norm": 1.7496291399002075, "test_queue_k_norm": 1.733569860458374, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04379366338253021, "test_stdq": 0.043976251035928726, "test_stdqueue_k": 0.04952038452029228, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.3637, "dev_samples_per_second": 1.214, "dev_steps_per_second": 0.038, "epoch": 0.68, "eval_beir-arguana_ndcg@10": 0.40152, "eval_beir-arguana_recall@10": 0.68421, "eval_beir-arguana_recall@100": 0.94666, "eval_beir-arguana_recall@20": 0.80797, "eval_beir-avg_ndcg@10": 0.3803593333333333, "eval_beir-avg_recall@10": 0.4539435, "eval_beir-avg_recall@100": 0.6338015, "eval_beir-avg_recall@20": 0.5167015833333334, "eval_beir-cqadupstack_ndcg@10": 0.29029333333333335, "eval_beir-cqadupstack_recall@10": 0.38770499999999997, "eval_beir-cqadupstack_recall@100": 0.617445, "eval_beir-cqadupstack_recall@20": 0.4531958333333333, "eval_beir-fiqa_ndcg@10": 0.25873, "eval_beir-fiqa_recall@10": 0.32017, "eval_beir-fiqa_recall@100": 0.60205, "eval_beir-fiqa_recall@20": 0.40426, "eval_beir-nfcorpus_ndcg@10": 0.29154, "eval_beir-nfcorpus_recall@10": 0.14485, "eval_beir-nfcorpus_recall@100": 0.27832, "eval_beir-nfcorpus_recall@20": 0.17658, "eval_beir-nq_ndcg@10": 0.28764, "eval_beir-nq_recall@10": 0.46427, "eval_beir-nq_recall@100": 0.81006, "eval_beir-nq_recall@20": 0.59142, "eval_beir-quora_ndcg@10": 0.77888, "eval_beir-quora_recall@10": 0.8882, "eval_beir-quora_recall@100": 0.9787, "eval_beir-quora_recall@20": 0.92999, "eval_beir-scidocs_ndcg@10": 0.16322, "eval_beir-scidocs_recall@10": 0.16888, "eval_beir-scidocs_recall@100": 0.37633, "eval_beir-scidocs_recall@20": 0.22957, "eval_beir-scifact_ndcg@10": 0.642, "eval_beir-scifact_recall@10": 0.79633, "eval_beir-scifact_recall@100": 0.90156, "eval_beir-scifact_recall@20": 0.84256, "eval_beir-trec-covid_ndcg@10": 0.50641, "eval_beir-trec-covid_recall@10": 0.548, "eval_beir-trec-covid_recall@100": 0.4066, "eval_beir-trec-covid_recall@20": 0.521, "eval_beir-webis-touche2020_ndcg@10": 0.18336, "eval_beir-webis-touche2020_recall@10": 0.13682, "eval_beir-webis-touche2020_recall@100": 0.42029, "eval_beir-webis-touche2020_recall@20": 0.21047, "eval_senteval-avg_sts": 0.747801598327142, "eval_senteval-sickr_spearman": 0.7326501868246844, "eval_senteval-stsb_spearman": 0.7629530098295997, "step": 70000, "test_accuracy": 94.39697265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.31526046991348267, "test_doc_norm": 1.7026302814483643, "test_inbatch_neg_score": 1.4524195194244385, "test_inbatch_pos_score": 2.468806743621826, "test_loss": 0.31526046991348267, "test_loss_align": 1.0422990322113037, "test_loss_unif": 1.4446170330047607, "test_loss_unif_q@queue": 1.4446170330047607, "test_norm_diff": 0.04699883237481117, "test_norm_loss": 0.0, "test_q@queue_neg_score": 1.1516295671463013, "test_query_norm": 1.7496291399002075, "test_queue_k_norm": 1.733569860458374, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04379366338253021, "test_stdq": 0.043976251035928726, "test_stdqueue_k": 0.04952038452029228, "test_stdqueue_q": 0.0 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.6159, "doc_norm": 1.7325, "encoder_q-embeddings": 1203.4337, "encoder_q-layer.0": 774.8282, "encoder_q-layer.1": 849.2732, "encoder_q-layer.10": 1671.4116, "encoder_q-layer.11": 3504.0142, "encoder_q-layer.2": 963.0985, "encoder_q-layer.3": 1010.6204, "encoder_q-layer.4": 1079.3074, "encoder_q-layer.5": 1098.6256, "encoder_q-layer.6": 1299.4373, "encoder_q-layer.7": 1379.0221, "encoder_q-layer.8": 1624.7406, "encoder_q-layer.9": 1583.6244, "epoch": 0.68, "inbatch_neg_score": 1.1638, "inbatch_pos_score": 1.917, "learning_rate": 1.661111111111111e-05, "loss": 2.6159, "norm_diff": 0.0218, "norm_loss": 0.0, "num_token_doc": 66.7192, "num_token_overlap": 17.7981, "num_token_query": 52.2802, "num_token_union": 73.6932, "num_word_context": 202.4595, "num_word_doc": 49.8043, "num_word_query": 39.8608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2280.5268, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1641, "query_norm": 1.7107, "queue_k_norm": 1.7349, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2802, "sent_len_1": 66.7192, "sent_len_max_0": 128.0, "sent_len_max_1": 208.61, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 70100 }, { "accuracy": 63.8672, "active_queue_size": 16384.0, "cl_loss": 2.6005, "doc_norm": 1.736, "encoder_q-embeddings": 1211.2889, "encoder_q-layer.0": 777.7662, "encoder_q-layer.1": 842.2386, "encoder_q-layer.10": 1601.7015, "encoder_q-layer.11": 3357.8579, "encoder_q-layer.2": 948.2357, "encoder_q-layer.3": 1030.053, "encoder_q-layer.4": 1092.6946, "encoder_q-layer.5": 1126.1879, "encoder_q-layer.6": 1282.7214, "encoder_q-layer.7": 1456.8817, "encoder_q-layer.8": 1689.6871, "encoder_q-layer.9": 1595.6072, "epoch": 0.69, "inbatch_neg_score": 1.1703, "inbatch_pos_score": 1.9434, "learning_rate": 1.655555555555556e-05, "loss": 2.6005, "norm_diff": 0.0208, "norm_loss": 0.0, "num_token_doc": 66.8568, "num_token_overlap": 17.8776, "num_token_query": 52.3925, "num_token_union": 73.793, "num_word_context": 202.5072, "num_word_doc": 49.9059, "num_word_query": 39.9689, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2267.424, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1709, "query_norm": 1.717, "queue_k_norm": 1.736, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3925, "sent_len_1": 66.8568, "sent_len_max_0": 128.0, "sent_len_max_1": 208.695, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 70200 }, { "accuracy": 60.9863, "active_queue_size": 16384.0, "cl_loss": 2.6029, "doc_norm": 1.7387, "encoder_q-embeddings": 1381.0459, "encoder_q-layer.0": 888.1396, "encoder_q-layer.1": 985.7297, "encoder_q-layer.10": 1716.8813, "encoder_q-layer.11": 3579.5095, "encoder_q-layer.2": 1078.7473, "encoder_q-layer.3": 1094.3958, "encoder_q-layer.4": 1156.0389, "encoder_q-layer.5": 1197.5011, "encoder_q-layer.6": 1403.687, "encoder_q-layer.7": 1566.2527, "encoder_q-layer.8": 1873.8922, "encoder_q-layer.9": 1783.2722, "epoch": 0.69, "inbatch_neg_score": 1.1795, "inbatch_pos_score": 1.9404, "learning_rate": 1.65e-05, "loss": 2.6029, "norm_diff": 0.0167, "norm_loss": 0.0, "num_token_doc": 66.7807, "num_token_overlap": 17.8366, "num_token_query": 52.3318, "num_token_union": 73.7359, "num_word_context": 202.3095, "num_word_doc": 49.8368, "num_word_query": 39.9156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2496.5226, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1797, "query_norm": 1.7242, "queue_k_norm": 1.7357, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3318, "sent_len_1": 66.7807, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1337, "stdk": 0.0496, "stdq": 0.0471, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 70300 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.6132, "doc_norm": 1.7374, "encoder_q-embeddings": 1233.8728, "encoder_q-layer.0": 800.2161, "encoder_q-layer.1": 896.7256, "encoder_q-layer.10": 2117.5767, "encoder_q-layer.11": 3941.9036, "encoder_q-layer.2": 1010.1045, "encoder_q-layer.3": 1066.6464, "encoder_q-layer.4": 1175.3577, "encoder_q-layer.5": 1253.236, "encoder_q-layer.6": 1416.6903, "encoder_q-layer.7": 1628.8231, "encoder_q-layer.8": 2069.8113, "encoder_q-layer.9": 1953.4594, "epoch": 0.69, "inbatch_neg_score": 1.1807, "inbatch_pos_score": 1.9346, "learning_rate": 1.6444444444444447e-05, "loss": 2.6132, "norm_diff": 0.0226, "norm_loss": 0.0, "num_token_doc": 66.6708, "num_token_overlap": 17.7567, "num_token_query": 52.1462, "num_token_union": 73.6133, "num_word_context": 202.1276, "num_word_doc": 49.7395, "num_word_query": 39.7579, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2566.9311, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1816, "query_norm": 1.7148, "queue_k_norm": 1.7377, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1462, "sent_len_1": 66.6708, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6937, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 70400 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.6005, "doc_norm": 1.7371, "encoder_q-embeddings": 1274.9929, "encoder_q-layer.0": 857.3318, "encoder_q-layer.1": 943.548, "encoder_q-layer.10": 1728.1057, "encoder_q-layer.11": 3703.6482, "encoder_q-layer.2": 1151.5972, "encoder_q-layer.3": 1159.7521, "encoder_q-layer.4": 1261.7025, "encoder_q-layer.5": 1257.6073, "encoder_q-layer.6": 1414.1893, "encoder_q-layer.7": 1640.611, "encoder_q-layer.8": 1857.5242, "encoder_q-layer.9": 1613.9357, "epoch": 0.69, "inbatch_neg_score": 1.1844, "inbatch_pos_score": 1.9512, "learning_rate": 1.638888888888889e-05, "loss": 2.6005, "norm_diff": 0.0238, "norm_loss": 0.0, "num_token_doc": 66.6778, "num_token_overlap": 17.7768, "num_token_query": 52.2695, "num_token_union": 73.6944, "num_word_context": 202.2539, "num_word_doc": 49.7679, "num_word_query": 39.8586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2492.3213, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1846, "query_norm": 1.7132, "queue_k_norm": 1.7399, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2695, "sent_len_1": 66.6778, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3137, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 70500 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.6118, "doc_norm": 1.7422, "encoder_q-embeddings": 1388.8573, "encoder_q-layer.0": 914.3209, "encoder_q-layer.1": 1063.3246, "encoder_q-layer.10": 1856.547, "encoder_q-layer.11": 3765.9456, "encoder_q-layer.2": 1248.4222, "encoder_q-layer.3": 1313.5559, "encoder_q-layer.4": 1459.1157, "encoder_q-layer.5": 1582.1827, "encoder_q-layer.6": 1917.6539, "encoder_q-layer.7": 2089.4421, "encoder_q-layer.8": 2082.0312, "encoder_q-layer.9": 1802.1066, "epoch": 0.69, "inbatch_neg_score": 1.1881, "inbatch_pos_score": 1.9424, "learning_rate": 1.6333333333333335e-05, "loss": 2.6118, "norm_diff": 0.0368, "norm_loss": 0.0, "num_token_doc": 66.6855, "num_token_overlap": 17.7998, "num_token_query": 52.1372, "num_token_union": 73.5924, "num_word_context": 202.0208, "num_word_doc": 49.7142, "num_word_query": 39.7627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2748.5604, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1875, "query_norm": 1.7054, "queue_k_norm": 1.7395, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1372, "sent_len_1": 66.6855, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 70600 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.6116, "doc_norm": 1.744, "encoder_q-embeddings": 2546.1438, "encoder_q-layer.0": 1588.2372, "encoder_q-layer.1": 1690.3167, "encoder_q-layer.10": 3475.624, "encoder_q-layer.11": 6639.8721, "encoder_q-layer.2": 1861.0831, "encoder_q-layer.3": 1980.3344, "encoder_q-layer.4": 2163.4968, "encoder_q-layer.5": 2320.6763, "encoder_q-layer.6": 2542.2805, "encoder_q-layer.7": 2907.6238, "encoder_q-layer.8": 3643.7698, "encoder_q-layer.9": 3219.717, "epoch": 0.69, "inbatch_neg_score": 1.1938, "inbatch_pos_score": 1.9482, "learning_rate": 1.6277777777777777e-05, "loss": 2.6116, "norm_diff": 0.0248, "norm_loss": 0.0, "num_token_doc": 66.6196, "num_token_overlap": 17.7873, "num_token_query": 52.1835, "num_token_union": 73.5727, "num_word_context": 202.3042, "num_word_doc": 49.7135, "num_word_query": 39.8063, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4569.8846, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1934, "query_norm": 1.7192, "queue_k_norm": 1.7406, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1835, "sent_len_1": 66.6196, "sent_len_max_0": 128.0, "sent_len_max_1": 208.955, "stdk": 0.0496, "stdq": 0.047, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 70700 }, { "accuracy": 59.5215, "active_queue_size": 16384.0, "cl_loss": 2.6045, "doc_norm": 1.7435, "encoder_q-embeddings": 2399.9397, "encoder_q-layer.0": 1549.9159, "encoder_q-layer.1": 1694.3774, "encoder_q-layer.10": 3570.176, "encoder_q-layer.11": 7141.9175, "encoder_q-layer.2": 1884.6245, "encoder_q-layer.3": 2017.2526, "encoder_q-layer.4": 2272.8804, "encoder_q-layer.5": 2399.3918, "encoder_q-layer.6": 2781.041, "encoder_q-layer.7": 3215.6021, "encoder_q-layer.8": 3838.8491, "encoder_q-layer.9": 3404.939, "epoch": 0.69, "inbatch_neg_score": 1.1945, "inbatch_pos_score": 1.9424, "learning_rate": 1.6222222222222223e-05, "loss": 2.6045, "norm_diff": 0.0313, "norm_loss": 0.0, "num_token_doc": 66.6565, "num_token_overlap": 17.8219, "num_token_query": 52.264, "num_token_union": 73.6237, "num_word_context": 202.208, "num_word_doc": 49.7372, "num_word_query": 39.8451, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4812.8192, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1953, "query_norm": 1.7122, "queue_k_norm": 1.7442, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.264, "sent_len_1": 66.6565, "sent_len_max_0": 128.0, "sent_len_max_1": 207.57, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 70800 }, { "accuracy": 61.7188, "active_queue_size": 16384.0, "cl_loss": 2.6096, "doc_norm": 1.7437, "encoder_q-embeddings": 2736.4529, "encoder_q-layer.0": 1822.4541, "encoder_q-layer.1": 2127.7969, "encoder_q-layer.10": 3248.1179, "encoder_q-layer.11": 6822.9531, "encoder_q-layer.2": 2418.3796, "encoder_q-layer.3": 2622.936, "encoder_q-layer.4": 2853.0835, "encoder_q-layer.5": 2934.5933, "encoder_q-layer.6": 3250.083, "encoder_q-layer.7": 3646.1462, "encoder_q-layer.8": 3907.3547, "encoder_q-layer.9": 3396.4448, "epoch": 0.69, "inbatch_neg_score": 1.1947, "inbatch_pos_score": 1.9531, "learning_rate": 1.6166666666666665e-05, "loss": 2.6096, "norm_diff": 0.0385, "norm_loss": 0.0, "num_token_doc": 66.793, "num_token_overlap": 17.8194, "num_token_query": 52.2931, "num_token_union": 73.7103, "num_word_context": 202.3072, "num_word_doc": 49.8322, "num_word_query": 39.8636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5062.4738, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1953, "query_norm": 1.7053, "queue_k_norm": 1.7446, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2931, "sent_len_1": 66.793, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6962, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 70900 }, { "accuracy": 59.6191, "active_queue_size": 16384.0, "cl_loss": 2.5967, "doc_norm": 1.7491, "encoder_q-embeddings": 2712.813, "encoder_q-layer.0": 1799.142, "encoder_q-layer.1": 2055.6836, "encoder_q-layer.10": 3421.9592, "encoder_q-layer.11": 7249.709, "encoder_q-layer.2": 2364.6633, "encoder_q-layer.3": 2570.3733, "encoder_q-layer.4": 2659.3574, "encoder_q-layer.5": 2859.3818, "encoder_q-layer.6": 3187.6775, "encoder_q-layer.7": 3393.1108, "encoder_q-layer.8": 3998.6755, "encoder_q-layer.9": 3417.1248, "epoch": 0.69, "inbatch_neg_score": 1.1929, "inbatch_pos_score": 1.9463, "learning_rate": 1.6111111111111115e-05, "loss": 2.5967, "norm_diff": 0.0347, "norm_loss": 0.0, "num_token_doc": 66.8383, "num_token_overlap": 17.8366, "num_token_query": 52.2744, "num_token_union": 73.7239, "num_word_context": 202.5896, "num_word_doc": 49.8886, "num_word_query": 39.895, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5085.3327, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1934, "query_norm": 1.7144, "queue_k_norm": 1.7454, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2744, "sent_len_1": 66.8383, "sent_len_max_0": 128.0, "sent_len_max_1": 208.87, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 71000 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.6067, "doc_norm": 1.746, "encoder_q-embeddings": 3108.6067, "encoder_q-layer.0": 2064.2925, "encoder_q-layer.1": 2407.7446, "encoder_q-layer.10": 3452.6887, "encoder_q-layer.11": 7391.7812, "encoder_q-layer.2": 2841.4187, "encoder_q-layer.3": 3077.002, "encoder_q-layer.4": 3371.9973, "encoder_q-layer.5": 3396.0156, "encoder_q-layer.6": 3470.9417, "encoder_q-layer.7": 3580.0083, "encoder_q-layer.8": 3971.4839, "encoder_q-layer.9": 3528.3232, "epoch": 0.69, "inbatch_neg_score": 1.1959, "inbatch_pos_score": 1.9375, "learning_rate": 1.6055555555555557e-05, "loss": 2.6067, "norm_diff": 0.0442, "norm_loss": 0.0, "num_token_doc": 66.8334, "num_token_overlap": 17.8093, "num_token_query": 52.3198, "num_token_union": 73.7679, "num_word_context": 202.7014, "num_word_doc": 49.887, "num_word_query": 39.8929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5477.9894, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1953, "query_norm": 1.7018, "queue_k_norm": 1.7473, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3198, "sent_len_1": 66.8334, "sent_len_max_0": 128.0, "sent_len_max_1": 208.16, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 71100 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.6097, "doc_norm": 1.7472, "encoder_q-embeddings": 2468.1465, "encoder_q-layer.0": 1563.0404, "encoder_q-layer.1": 1693.8169, "encoder_q-layer.10": 3228.7014, "encoder_q-layer.11": 6889.7568, "encoder_q-layer.2": 1915.5994, "encoder_q-layer.3": 2076.2344, "encoder_q-layer.4": 2197.2944, "encoder_q-layer.5": 2243.092, "encoder_q-layer.6": 2522.842, "encoder_q-layer.7": 2941.8416, "encoder_q-layer.8": 3460.9387, "encoder_q-layer.9": 3139.5498, "epoch": 0.7, "inbatch_neg_score": 1.1917, "inbatch_pos_score": 1.9375, "learning_rate": 1.6000000000000003e-05, "loss": 2.6097, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.982, "num_token_overlap": 17.8597, "num_token_query": 52.3066, "num_token_union": 73.8083, "num_word_context": 202.4401, "num_word_doc": 49.9848, "num_word_query": 39.8924, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4606.7543, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1924, "query_norm": 1.6923, "queue_k_norm": 1.7472, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3066, "sent_len_1": 66.982, "sent_len_max_0": 128.0, "sent_len_max_1": 210.14, "stdk": 0.0495, "stdq": 0.046, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 71200 }, { "accuracy": 61.5723, "active_queue_size": 16384.0, "cl_loss": 2.6173, "doc_norm": 1.7476, "encoder_q-embeddings": 2308.7441, "encoder_q-layer.0": 1567.2184, "encoder_q-layer.1": 1649.0731, "encoder_q-layer.10": 3486.7202, "encoder_q-layer.11": 6828.4834, "encoder_q-layer.2": 1866.6646, "encoder_q-layer.3": 1951.1459, "encoder_q-layer.4": 2119.2029, "encoder_q-layer.5": 2108.9822, "encoder_q-layer.6": 2502.6826, "encoder_q-layer.7": 2835.7327, "encoder_q-layer.8": 3507.157, "encoder_q-layer.9": 3160.5322, "epoch": 0.7, "inbatch_neg_score": 1.1955, "inbatch_pos_score": 1.9385, "learning_rate": 1.5944444444444445e-05, "loss": 2.6173, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.7924, "num_token_overlap": 17.7862, "num_token_query": 52.2615, "num_token_union": 73.717, "num_word_context": 202.2349, "num_word_doc": 49.8346, "num_word_query": 39.8527, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4551.0235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1953, "query_norm": 1.6927, "queue_k_norm": 1.7469, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2615, "sent_len_1": 66.7924, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1438, "stdk": 0.0495, "stdq": 0.046, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 71300 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.6082, "doc_norm": 1.7463, "encoder_q-embeddings": 5014.0972, "encoder_q-layer.0": 3350.3455, "encoder_q-layer.1": 3780.844, "encoder_q-layer.10": 3379.0249, "encoder_q-layer.11": 7281.3564, "encoder_q-layer.2": 4566.5605, "encoder_q-layer.3": 4464.4092, "encoder_q-layer.4": 4551.4185, "encoder_q-layer.5": 5074.2026, "encoder_q-layer.6": 5050.5635, "encoder_q-layer.7": 4602.8926, "encoder_q-layer.8": 4126.252, "encoder_q-layer.9": 3395.1272, "epoch": 0.7, "inbatch_neg_score": 1.194, "inbatch_pos_score": 1.9453, "learning_rate": 1.588888888888889e-05, "loss": 2.6082, "norm_diff": 0.0351, "norm_loss": 0.0, "num_token_doc": 66.7755, "num_token_overlap": 17.8332, "num_token_query": 52.3731, "num_token_union": 73.7323, "num_word_context": 202.3369, "num_word_doc": 49.8224, "num_word_query": 39.9488, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6902.2297, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1943, "query_norm": 1.7111, "queue_k_norm": 1.7483, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3731, "sent_len_1": 66.7755, "sent_len_max_0": 128.0, "sent_len_max_1": 210.7287, "stdk": 0.0494, "stdq": 0.0469, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 71400 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.6104, "doc_norm": 1.7484, "encoder_q-embeddings": 2741.2224, "encoder_q-layer.0": 1685.8744, "encoder_q-layer.1": 1862.4177, "encoder_q-layer.10": 3603.9631, "encoder_q-layer.11": 7808.6572, "encoder_q-layer.2": 2183.6208, "encoder_q-layer.3": 2428.2996, "encoder_q-layer.4": 2725.7446, "encoder_q-layer.5": 2906.4551, "encoder_q-layer.6": 3230.6555, "encoder_q-layer.7": 3448.9941, "encoder_q-layer.8": 3862.0237, "encoder_q-layer.9": 3543.2666, "epoch": 0.7, "inbatch_neg_score": 1.1942, "inbatch_pos_score": 1.9414, "learning_rate": 1.5833333333333333e-05, "loss": 2.6104, "norm_diff": 0.0502, "norm_loss": 0.0, "num_token_doc": 67.0188, "num_token_overlap": 17.8326, "num_token_query": 52.2407, "num_token_union": 73.8099, "num_word_context": 202.5835, "num_word_doc": 50.0107, "num_word_query": 39.8221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5155.6803, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1943, "query_norm": 1.6982, "queue_k_norm": 1.7492, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2407, "sent_len_1": 67.0188, "sent_len_max_0": 128.0, "sent_len_max_1": 207.215, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 71500 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.6305, "doc_norm": 1.7479, "encoder_q-embeddings": 2589.3735, "encoder_q-layer.0": 1697.9816, "encoder_q-layer.1": 1842.4789, "encoder_q-layer.10": 3543.4541, "encoder_q-layer.11": 7210.9028, "encoder_q-layer.2": 2069.3652, "encoder_q-layer.3": 2247.3274, "encoder_q-layer.4": 2630.5242, "encoder_q-layer.5": 2735.0466, "encoder_q-layer.6": 3163.1985, "encoder_q-layer.7": 3643.8132, "encoder_q-layer.8": 4010.9858, "encoder_q-layer.9": 3533.6143, "epoch": 0.7, "inbatch_neg_score": 1.2002, "inbatch_pos_score": 1.9463, "learning_rate": 1.577777777777778e-05, "loss": 2.6305, "norm_diff": 0.0409, "norm_loss": 0.0, "num_token_doc": 66.7567, "num_token_overlap": 17.7721, "num_token_query": 52.1745, "num_token_union": 73.6907, "num_word_context": 202.1719, "num_word_doc": 49.8343, "num_word_query": 39.7949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4984.4693, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2002, "query_norm": 1.7069, "queue_k_norm": 1.7487, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1745, "sent_len_1": 66.7567, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7825, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 71600 }, { "accuracy": 59.4238, "active_queue_size": 16384.0, "cl_loss": 2.6121, "doc_norm": 1.7485, "encoder_q-embeddings": 3188.9102, "encoder_q-layer.0": 1968.3182, "encoder_q-layer.1": 2302.0649, "encoder_q-layer.10": 3822.4807, "encoder_q-layer.11": 7264.0039, "encoder_q-layer.2": 2585.0938, "encoder_q-layer.3": 2745.5574, "encoder_q-layer.4": 2989.4766, "encoder_q-layer.5": 3150.8059, "encoder_q-layer.6": 3819.3845, "encoder_q-layer.7": 4105.1348, "encoder_q-layer.8": 4313.4668, "encoder_q-layer.9": 3555.3064, "epoch": 0.7, "inbatch_neg_score": 1.1969, "inbatch_pos_score": 1.9424, "learning_rate": 1.5722222222222225e-05, "loss": 2.6121, "norm_diff": 0.0417, "norm_loss": 0.0, "num_token_doc": 66.7273, "num_token_overlap": 17.809, "num_token_query": 52.2031, "num_token_union": 73.6199, "num_word_context": 202.2531, "num_word_doc": 49.7735, "num_word_query": 39.8113, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5522.1031, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1963, "query_norm": 1.7068, "queue_k_norm": 1.7506, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2031, "sent_len_1": 66.7273, "sent_len_max_0": 128.0, "sent_len_max_1": 209.14, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 71700 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.5932, "doc_norm": 1.7515, "encoder_q-embeddings": 4371.7749, "encoder_q-layer.0": 3163.1638, "encoder_q-layer.1": 3609.4783, "encoder_q-layer.10": 3352.9312, "encoder_q-layer.11": 7214.043, "encoder_q-layer.2": 4190.2119, "encoder_q-layer.3": 4350.1426, "encoder_q-layer.4": 4489.875, "encoder_q-layer.5": 4868.1982, "encoder_q-layer.6": 4716.5981, "encoder_q-layer.7": 4193.5112, "encoder_q-layer.8": 4665.084, "encoder_q-layer.9": 3482.3984, "epoch": 0.7, "inbatch_neg_score": 1.2004, "inbatch_pos_score": 1.958, "learning_rate": 1.5666666666666667e-05, "loss": 2.5932, "norm_diff": 0.0334, "norm_loss": 0.0, "num_token_doc": 66.7364, "num_token_overlap": 17.8054, "num_token_query": 52.2331, "num_token_union": 73.669, "num_word_context": 202.2482, "num_word_doc": 49.7914, "num_word_query": 39.8203, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6624.333, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2012, "query_norm": 1.7181, "queue_k_norm": 1.7508, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2331, "sent_len_1": 66.7364, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3363, "stdk": 0.0495, "stdq": 0.0471, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 71800 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.5982, "doc_norm": 1.7513, "encoder_q-embeddings": 2401.1973, "encoder_q-layer.0": 1568.658, "encoder_q-layer.1": 1715.1548, "encoder_q-layer.10": 3359.4053, "encoder_q-layer.11": 7319.3096, "encoder_q-layer.2": 1950.2944, "encoder_q-layer.3": 2109.2024, "encoder_q-layer.4": 2217.6057, "encoder_q-layer.5": 2421.7947, "encoder_q-layer.6": 2706.6553, "encoder_q-layer.7": 3055.1877, "encoder_q-layer.8": 3690.571, "encoder_q-layer.9": 3285.6099, "epoch": 0.7, "inbatch_neg_score": 1.2005, "inbatch_pos_score": 1.9531, "learning_rate": 1.5611111111111113e-05, "loss": 2.5982, "norm_diff": 0.0441, "norm_loss": 0.0, "num_token_doc": 67.0112, "num_token_overlap": 17.8538, "num_token_query": 52.3607, "num_token_union": 73.8468, "num_word_context": 202.4154, "num_word_doc": 49.9642, "num_word_query": 39.9051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4722.2436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2002, "query_norm": 1.7071, "queue_k_norm": 1.751, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3607, "sent_len_1": 67.0112, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5588, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 71900 }, { "accuracy": 60.3027, "active_queue_size": 16384.0, "cl_loss": 2.5897, "doc_norm": 1.7498, "encoder_q-embeddings": 3235.1592, "encoder_q-layer.0": 2157.3196, "encoder_q-layer.1": 2592.9128, "encoder_q-layer.10": 4047.0859, "encoder_q-layer.11": 7799.1885, "encoder_q-layer.2": 2987.4285, "encoder_q-layer.3": 2883.4761, "encoder_q-layer.4": 2816.502, "encoder_q-layer.5": 2997.0144, "encoder_q-layer.6": 3337.0254, "encoder_q-layer.7": 3828.5657, "encoder_q-layer.8": 4126.1831, "encoder_q-layer.9": 3611.5029, "epoch": 0.7, "inbatch_neg_score": 1.1982, "inbatch_pos_score": 1.9414, "learning_rate": 1.5555555555555555e-05, "loss": 2.5897, "norm_diff": 0.0509, "norm_loss": 0.0, "num_token_doc": 66.8621, "num_token_overlap": 17.8623, "num_token_query": 52.2913, "num_token_union": 73.716, "num_word_context": 202.1823, "num_word_doc": 49.9051, "num_word_query": 39.8784, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5529.261, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2002, "query_norm": 1.6989, "queue_k_norm": 1.7512, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2913, "sent_len_1": 66.8621, "sent_len_max_0": 128.0, "sent_len_max_1": 206.7488, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 72000 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.6096, "doc_norm": 1.7554, "encoder_q-embeddings": 3622.5559, "encoder_q-layer.0": 2392.0913, "encoder_q-layer.1": 2642.7832, "encoder_q-layer.10": 3821.511, "encoder_q-layer.11": 6984.0181, "encoder_q-layer.2": 3207.2964, "encoder_q-layer.3": 3459.7676, "encoder_q-layer.4": 3983.9634, "encoder_q-layer.5": 4145.1802, "encoder_q-layer.6": 4830.4922, "encoder_q-layer.7": 4138.7852, "encoder_q-layer.8": 3967.6409, "encoder_q-layer.9": 3729.4312, "epoch": 0.7, "inbatch_neg_score": 1.203, "inbatch_pos_score": 1.9736, "learning_rate": 1.55e-05, "loss": 2.6096, "norm_diff": 0.0369, "norm_loss": 0.0, "num_token_doc": 66.7972, "num_token_overlap": 17.772, "num_token_query": 52.0928, "num_token_union": 73.621, "num_word_context": 202.1047, "num_word_doc": 49.8505, "num_word_query": 39.7032, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5925.2619, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2031, "query_norm": 1.7185, "queue_k_norm": 1.7523, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0928, "sent_len_1": 66.7972, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9112, "stdk": 0.0496, "stdq": 0.0471, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 72100 }, { "accuracy": 63.3301, "active_queue_size": 16384.0, "cl_loss": 2.6056, "doc_norm": 1.7504, "encoder_q-embeddings": 3755.0334, "encoder_q-layer.0": 2345.0989, "encoder_q-layer.1": 2677.0649, "encoder_q-layer.10": 3401.04, "encoder_q-layer.11": 7013.5234, "encoder_q-layer.2": 3001.7258, "encoder_q-layer.3": 3276.8374, "encoder_q-layer.4": 3584.5117, "encoder_q-layer.5": 3777.094, "encoder_q-layer.6": 3746.6208, "encoder_q-layer.7": 3516.9595, "encoder_q-layer.8": 3837.4634, "encoder_q-layer.9": 3300.0083, "epoch": 0.7, "inbatch_neg_score": 1.1965, "inbatch_pos_score": 1.9434, "learning_rate": 1.5444444444444446e-05, "loss": 2.6056, "norm_diff": 0.0531, "norm_loss": 0.0, "num_token_doc": 66.8595, "num_token_overlap": 17.8635, "num_token_query": 52.3955, "num_token_union": 73.7895, "num_word_context": 202.5835, "num_word_doc": 49.9151, "num_word_query": 39.9834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5598.5158, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1963, "query_norm": 1.6973, "queue_k_norm": 1.7526, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3955, "sent_len_1": 66.8595, "sent_len_max_0": 128.0, "sent_len_max_1": 206.8475, "stdk": 0.0494, "stdq": 0.0463, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 72200 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.6119, "doc_norm": 1.7541, "encoder_q-embeddings": 2603.4541, "encoder_q-layer.0": 1681.5085, "encoder_q-layer.1": 1856.0295, "encoder_q-layer.10": 3209.6655, "encoder_q-layer.11": 7016.5723, "encoder_q-layer.2": 2097.9802, "encoder_q-layer.3": 2321.0327, "encoder_q-layer.4": 2516.9465, "encoder_q-layer.5": 2550.3057, "encoder_q-layer.6": 2826.2285, "encoder_q-layer.7": 3022.7937, "encoder_q-layer.8": 3617.4106, "encoder_q-layer.9": 3265.8108, "epoch": 0.71, "inbatch_neg_score": 1.2026, "inbatch_pos_score": 1.9326, "learning_rate": 1.538888888888889e-05, "loss": 2.6119, "norm_diff": 0.0591, "norm_loss": 0.0, "num_token_doc": 66.7793, "num_token_overlap": 17.7988, "num_token_query": 52.2389, "num_token_union": 73.7127, "num_word_context": 202.0499, "num_word_doc": 49.8497, "num_word_query": 39.8337, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4824.1093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2012, "query_norm": 1.6949, "queue_k_norm": 1.7537, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2389, "sent_len_1": 66.7793, "sent_len_max_0": 128.0, "sent_len_max_1": 208.705, "stdk": 0.0495, "stdq": 0.046, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 72300 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6012, "doc_norm": 1.7518, "encoder_q-embeddings": 1513.8019, "encoder_q-layer.0": 1091.1914, "encoder_q-layer.1": 1227.3512, "encoder_q-layer.10": 1798.5616, "encoder_q-layer.11": 3580.8411, "encoder_q-layer.2": 1483.6064, "encoder_q-layer.3": 1656.339, "encoder_q-layer.4": 1842.9174, "encoder_q-layer.5": 2009.7217, "encoder_q-layer.6": 1949.1056, "encoder_q-layer.7": 2022.4072, "encoder_q-layer.8": 1987.6208, "encoder_q-layer.9": 1738.8337, "epoch": 0.71, "inbatch_neg_score": 1.2041, "inbatch_pos_score": 1.9492, "learning_rate": 1.5333333333333334e-05, "loss": 2.6012, "norm_diff": 0.045, "norm_loss": 0.0, "num_token_doc": 66.8701, "num_token_overlap": 17.8312, "num_token_query": 52.2732, "num_token_union": 73.7572, "num_word_context": 202.409, "num_word_doc": 49.9292, "num_word_query": 39.8764, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2800.8045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2031, "query_norm": 1.7068, "queue_k_norm": 1.7528, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2732, "sent_len_1": 66.8701, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7725, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 72400 }, { "accuracy": 62.5488, "active_queue_size": 16384.0, "cl_loss": 2.6092, "doc_norm": 1.7545, "encoder_q-embeddings": 1575.2131, "encoder_q-layer.0": 997.5767, "encoder_q-layer.1": 1145.4833, "encoder_q-layer.10": 1673.5007, "encoder_q-layer.11": 3540.7795, "encoder_q-layer.2": 1320.1525, "encoder_q-layer.3": 1418.2194, "encoder_q-layer.4": 1541.7792, "encoder_q-layer.5": 1594.7864, "encoder_q-layer.6": 1757.5087, "encoder_q-layer.7": 1720.8412, "encoder_q-layer.8": 2040.2185, "encoder_q-layer.9": 1727.674, "epoch": 0.71, "inbatch_neg_score": 1.2019, "inbatch_pos_score": 1.958, "learning_rate": 1.527777777777778e-05, "loss": 2.6092, "norm_diff": 0.0421, "norm_loss": 0.0, "num_token_doc": 66.8376, "num_token_overlap": 17.8046, "num_token_query": 52.289, "num_token_union": 73.7902, "num_word_context": 202.1218, "num_word_doc": 49.8649, "num_word_query": 39.8966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2661.1422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2012, "query_norm": 1.7124, "queue_k_norm": 1.755, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.289, "sent_len_1": 66.8376, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6387, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 72500 }, { "accuracy": 62.3535, "active_queue_size": 16384.0, "cl_loss": 2.6033, "doc_norm": 1.7551, "encoder_q-embeddings": 1246.3689, "encoder_q-layer.0": 799.3621, "encoder_q-layer.1": 884.2984, "encoder_q-layer.10": 1641.9476, "encoder_q-layer.11": 3443.3562, "encoder_q-layer.2": 1011.0852, "encoder_q-layer.3": 1086.9268, "encoder_q-layer.4": 1142.021, "encoder_q-layer.5": 1211.9485, "encoder_q-layer.6": 1367.4042, "encoder_q-layer.7": 1502.6896, "encoder_q-layer.8": 1903.8679, "encoder_q-layer.9": 1601.0615, "epoch": 0.71, "inbatch_neg_score": 1.2003, "inbatch_pos_score": 1.9707, "learning_rate": 1.5222222222222224e-05, "loss": 2.6033, "norm_diff": 0.0424, "norm_loss": 0.0, "num_token_doc": 66.7439, "num_token_overlap": 17.7677, "num_token_query": 52.2985, "num_token_union": 73.7369, "num_word_context": 202.2141, "num_word_doc": 49.7778, "num_word_query": 39.877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2337.4164, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2012, "query_norm": 1.7126, "queue_k_norm": 1.7533, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2985, "sent_len_1": 66.7439, "sent_len_max_0": 128.0, "sent_len_max_1": 209.7937, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 72600 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.5975, "doc_norm": 1.7548, "encoder_q-embeddings": 1284.5688, "encoder_q-layer.0": 786.748, "encoder_q-layer.1": 869.65, "encoder_q-layer.10": 2093.5312, "encoder_q-layer.11": 3926.5093, "encoder_q-layer.2": 987.6224, "encoder_q-layer.3": 1059.6971, "encoder_q-layer.4": 1141.0814, "encoder_q-layer.5": 1247.6261, "encoder_q-layer.6": 1433.1208, "encoder_q-layer.7": 1703.0403, "encoder_q-layer.8": 2121.8284, "encoder_q-layer.9": 1995.5471, "epoch": 0.71, "inbatch_neg_score": 1.2082, "inbatch_pos_score": 1.959, "learning_rate": 1.5166666666666668e-05, "loss": 2.5975, "norm_diff": 0.0282, "norm_loss": 0.0, "num_token_doc": 66.8828, "num_token_overlap": 17.8227, "num_token_query": 52.3067, "num_token_union": 73.7848, "num_word_context": 202.4868, "num_word_doc": 49.9022, "num_word_query": 39.8976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2555.0344, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2061, "query_norm": 1.7266, "queue_k_norm": 1.7539, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3067, "sent_len_1": 66.8828, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1825, "stdk": 0.0495, "stdq": 0.0472, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 72700 }, { "accuracy": 61.377, "active_queue_size": 16384.0, "cl_loss": 2.6117, "doc_norm": 1.7546, "encoder_q-embeddings": 1415.161, "encoder_q-layer.0": 937.6465, "encoder_q-layer.1": 1065.08, "encoder_q-layer.10": 1659.1842, "encoder_q-layer.11": 3520.9785, "encoder_q-layer.2": 1242.684, "encoder_q-layer.3": 1344.7914, "encoder_q-layer.4": 1485.9247, "encoder_q-layer.5": 1541.7784, "encoder_q-layer.6": 1653.2603, "encoder_q-layer.7": 1621.4586, "encoder_q-layer.8": 1825.1764, "encoder_q-layer.9": 1685.6747, "epoch": 0.71, "inbatch_neg_score": 1.213, "inbatch_pos_score": 1.9629, "learning_rate": 1.5111111111111112e-05, "loss": 2.6117, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.7832, "num_token_overlap": 17.8149, "num_token_query": 52.2459, "num_token_union": 73.6966, "num_word_context": 202.2026, "num_word_doc": 49.8084, "num_word_query": 39.8338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2542.8166, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2129, "query_norm": 1.7163, "queue_k_norm": 1.7545, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2459, "sent_len_1": 66.7832, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9825, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 72800 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.6056, "doc_norm": 1.7567, "encoder_q-embeddings": 2310.853, "encoder_q-layer.0": 1579.7504, "encoder_q-layer.1": 1852.8185, "encoder_q-layer.10": 1998.837, "encoder_q-layer.11": 3689.2261, "encoder_q-layer.2": 2309.2793, "encoder_q-layer.3": 2548.7598, "encoder_q-layer.4": 2821.6821, "encoder_q-layer.5": 3216.5232, "encoder_q-layer.6": 2841.9214, "encoder_q-layer.7": 2691.4561, "encoder_q-layer.8": 2361.8921, "encoder_q-layer.9": 1802.3235, "epoch": 0.71, "inbatch_neg_score": 1.2131, "inbatch_pos_score": 1.9697, "learning_rate": 1.5055555555555556e-05, "loss": 2.6056, "norm_diff": 0.039, "norm_loss": 0.0, "num_token_doc": 66.8051, "num_token_overlap": 17.8376, "num_token_query": 52.3097, "num_token_union": 73.7125, "num_word_context": 202.2217, "num_word_doc": 49.8635, "num_word_query": 39.9112, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3664.5924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2148, "query_norm": 1.7177, "queue_k_norm": 1.7537, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3097, "sent_len_1": 66.8051, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5375, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 72900 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.6058, "doc_norm": 1.7531, "encoder_q-embeddings": 1183.2438, "encoder_q-layer.0": 765.938, "encoder_q-layer.1": 833.4144, "encoder_q-layer.10": 1746.9543, "encoder_q-layer.11": 3454.2939, "encoder_q-layer.2": 918.1786, "encoder_q-layer.3": 993.999, "encoder_q-layer.4": 1096.6152, "encoder_q-layer.5": 1121.287, "encoder_q-layer.6": 1279.8749, "encoder_q-layer.7": 1421.5767, "encoder_q-layer.8": 1717.4233, "encoder_q-layer.9": 1625.3339, "epoch": 0.71, "inbatch_neg_score": 1.2222, "inbatch_pos_score": 1.9727, "learning_rate": 1.5e-05, "loss": 2.6058, "norm_diff": 0.0356, "norm_loss": 0.0, "num_token_doc": 66.771, "num_token_overlap": 17.7899, "num_token_query": 52.2525, "num_token_union": 73.698, "num_word_context": 202.3865, "num_word_doc": 49.8385, "num_word_query": 39.8669, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2279.4107, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2217, "query_norm": 1.7175, "queue_k_norm": 1.7556, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2525, "sent_len_1": 66.771, "sent_len_max_0": 128.0, "sent_len_max_1": 209.435, "stdk": 0.0493, "stdq": 0.0464, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 73000 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.6048, "doc_norm": 1.7575, "encoder_q-embeddings": 1305.7031, "encoder_q-layer.0": 830.0543, "encoder_q-layer.1": 923.3173, "encoder_q-layer.10": 1649.0405, "encoder_q-layer.11": 3367.2781, "encoder_q-layer.2": 1066.4624, "encoder_q-layer.3": 1113.0438, "encoder_q-layer.4": 1251.4764, "encoder_q-layer.5": 1314.7256, "encoder_q-layer.6": 1481.873, "encoder_q-layer.7": 1560.6724, "encoder_q-layer.8": 1833.2053, "encoder_q-layer.9": 1636.8369, "epoch": 0.71, "inbatch_neg_score": 1.2243, "inbatch_pos_score": 1.9854, "learning_rate": 1.4944444444444444e-05, "loss": 2.6048, "norm_diff": 0.0235, "norm_loss": 0.0, "num_token_doc": 66.7134, "num_token_overlap": 17.7969, "num_token_query": 52.2378, "num_token_union": 73.6164, "num_word_context": 202.0958, "num_word_doc": 49.7617, "num_word_query": 39.8217, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2395.6757, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2227, "query_norm": 1.7339, "queue_k_norm": 1.7562, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2378, "sent_len_1": 66.7134, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4412, "stdk": 0.0495, "stdq": 0.0471, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 73100 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.627, "doc_norm": 1.7583, "encoder_q-embeddings": 1630.856, "encoder_q-layer.0": 1111.9011, "encoder_q-layer.1": 1230.6852, "encoder_q-layer.10": 1819.2622, "encoder_q-layer.11": 3519.2458, "encoder_q-layer.2": 1463.3016, "encoder_q-layer.3": 1491.3857, "encoder_q-layer.4": 1590.3046, "encoder_q-layer.5": 1568.7402, "encoder_q-layer.6": 1665.344, "encoder_q-layer.7": 1788.1185, "encoder_q-layer.8": 1903.2439, "encoder_q-layer.9": 1721.53, "epoch": 0.71, "inbatch_neg_score": 1.2257, "inbatch_pos_score": 1.9688, "learning_rate": 1.4888888888888888e-05, "loss": 2.627, "norm_diff": 0.0387, "norm_loss": 0.0, "num_token_doc": 66.7656, "num_token_overlap": 17.7942, "num_token_query": 52.3028, "num_token_union": 73.7387, "num_word_context": 202.5805, "num_word_doc": 49.8641, "num_word_query": 39.9094, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2680.4377, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2266, "query_norm": 1.7196, "queue_k_norm": 1.757, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3028, "sent_len_1": 66.7656, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4613, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 73200 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.6021, "doc_norm": 1.7569, "encoder_q-embeddings": 1392.165, "encoder_q-layer.0": 878.5121, "encoder_q-layer.1": 1000.3816, "encoder_q-layer.10": 1798.7302, "encoder_q-layer.11": 3685.9199, "encoder_q-layer.2": 1132.9937, "encoder_q-layer.3": 1207.0146, "encoder_q-layer.4": 1324.5668, "encoder_q-layer.5": 1387.7708, "encoder_q-layer.6": 1509.7744, "encoder_q-layer.7": 1667.9751, "encoder_q-layer.8": 1995.6985, "encoder_q-layer.9": 1793.7086, "epoch": 0.72, "inbatch_neg_score": 1.2341, "inbatch_pos_score": 1.9609, "learning_rate": 1.4833333333333336e-05, "loss": 2.6021, "norm_diff": 0.0394, "norm_loss": 0.0, "num_token_doc": 66.7694, "num_token_overlap": 17.7958, "num_token_query": 52.3198, "num_token_union": 73.7303, "num_word_context": 202.1413, "num_word_doc": 49.8, "num_word_query": 39.8811, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2559.1331, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2334, "query_norm": 1.7176, "queue_k_norm": 1.7581, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3198, "sent_len_1": 66.7694, "sent_len_max_0": 128.0, "sent_len_max_1": 210.4663, "stdk": 0.0494, "stdq": 0.0462, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 73300 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.609, "doc_norm": 1.7586, "encoder_q-embeddings": 1277.2262, "encoder_q-layer.0": 823.9373, "encoder_q-layer.1": 915.9221, "encoder_q-layer.10": 1736.4832, "encoder_q-layer.11": 3688.7051, "encoder_q-layer.2": 1028.5625, "encoder_q-layer.3": 1103.2264, "encoder_q-layer.4": 1196.3937, "encoder_q-layer.5": 1221.4983, "encoder_q-layer.6": 1411.7336, "encoder_q-layer.7": 1586.8667, "encoder_q-layer.8": 1864.1356, "encoder_q-layer.9": 1666.4458, "epoch": 0.72, "inbatch_neg_score": 1.2373, "inbatch_pos_score": 1.9863, "learning_rate": 1.477777777777778e-05, "loss": 2.609, "norm_diff": 0.03, "norm_loss": 0.0, "num_token_doc": 66.8617, "num_token_overlap": 17.7863, "num_token_query": 52.3049, "num_token_union": 73.8009, "num_word_context": 202.4372, "num_word_doc": 49.8843, "num_word_query": 39.8889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2432.8015, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2363, "query_norm": 1.7287, "queue_k_norm": 1.7588, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3049, "sent_len_1": 66.8617, "sent_len_max_0": 128.0, "sent_len_max_1": 209.665, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 73400 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.6071, "doc_norm": 1.7564, "encoder_q-embeddings": 1204.4926, "encoder_q-layer.0": 764.5307, "encoder_q-layer.1": 826.5986, "encoder_q-layer.10": 1728.5031, "encoder_q-layer.11": 3456.0359, "encoder_q-layer.2": 930.4294, "encoder_q-layer.3": 1003.1284, "encoder_q-layer.4": 1124.8143, "encoder_q-layer.5": 1170.7546, "encoder_q-layer.6": 1290.5862, "encoder_q-layer.7": 1454.2263, "encoder_q-layer.8": 1772.8351, "encoder_q-layer.9": 1608.6095, "epoch": 0.72, "inbatch_neg_score": 1.2427, "inbatch_pos_score": 1.9883, "learning_rate": 1.4722222222222224e-05, "loss": 2.6071, "norm_diff": 0.0287, "norm_loss": 0.0, "num_token_doc": 66.7895, "num_token_overlap": 17.7995, "num_token_query": 52.278, "num_token_union": 73.7062, "num_word_context": 202.2289, "num_word_doc": 49.8206, "num_word_query": 39.8622, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2313.1071, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2432, "query_norm": 1.7277, "queue_k_norm": 1.7595, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.278, "sent_len_1": 66.7895, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3663, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 73500 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.5931, "doc_norm": 1.7586, "encoder_q-embeddings": 1366.8206, "encoder_q-layer.0": 865.3265, "encoder_q-layer.1": 957.6136, "encoder_q-layer.10": 1635.3131, "encoder_q-layer.11": 3416.6409, "encoder_q-layer.2": 1116.9775, "encoder_q-layer.3": 1205.3835, "encoder_q-layer.4": 1336.5609, "encoder_q-layer.5": 1343.2916, "encoder_q-layer.6": 1468.5927, "encoder_q-layer.7": 1552.8999, "encoder_q-layer.8": 1824.8615, "encoder_q-layer.9": 1594.6554, "epoch": 0.72, "inbatch_neg_score": 1.2429, "inbatch_pos_score": 1.9873, "learning_rate": 1.4666666666666668e-05, "loss": 2.5931, "norm_diff": 0.0344, "norm_loss": 0.0, "num_token_doc": 66.7163, "num_token_overlap": 17.8106, "num_token_query": 52.208, "num_token_union": 73.641, "num_word_context": 202.29, "num_word_doc": 49.7866, "num_word_query": 39.8106, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2414.7215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2441, "query_norm": 1.7242, "queue_k_norm": 1.7594, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.208, "sent_len_1": 66.7163, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9837, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 73600 }, { "accuracy": 61.8652, "active_queue_size": 16384.0, "cl_loss": 2.5973, "doc_norm": 1.7607, "encoder_q-embeddings": 1332.8823, "encoder_q-layer.0": 900.4518, "encoder_q-layer.1": 979.2568, "encoder_q-layer.10": 1810.6528, "encoder_q-layer.11": 3592.1597, "encoder_q-layer.2": 1133.1985, "encoder_q-layer.3": 1250.424, "encoder_q-layer.4": 1365.7134, "encoder_q-layer.5": 1464.8591, "encoder_q-layer.6": 1644.3956, "encoder_q-layer.7": 1695.674, "encoder_q-layer.8": 1941.3346, "encoder_q-layer.9": 1741.6591, "epoch": 0.72, "inbatch_neg_score": 1.2463, "inbatch_pos_score": 2.0156, "learning_rate": 1.4611111111111112e-05, "loss": 2.5973, "norm_diff": 0.0238, "norm_loss": 0.0, "num_token_doc": 66.7633, "num_token_overlap": 17.8416, "num_token_query": 52.4044, "num_token_union": 73.782, "num_word_context": 202.5263, "num_word_doc": 49.8366, "num_word_query": 39.9758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2527.2097, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2451, "query_norm": 1.7369, "queue_k_norm": 1.7623, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4044, "sent_len_1": 66.7633, "sent_len_max_0": 128.0, "sent_len_max_1": 206.29, "stdk": 0.0495, "stdq": 0.0472, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 73700 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.5967, "doc_norm": 1.7625, "encoder_q-embeddings": 1252.2073, "encoder_q-layer.0": 805.3632, "encoder_q-layer.1": 881.7589, "encoder_q-layer.10": 1850.4943, "encoder_q-layer.11": 3634.989, "encoder_q-layer.2": 1021.0482, "encoder_q-layer.3": 1071.5286, "encoder_q-layer.4": 1163.8823, "encoder_q-layer.5": 1251.7002, "encoder_q-layer.6": 1371.761, "encoder_q-layer.7": 1558.0387, "encoder_q-layer.8": 1923.8469, "encoder_q-layer.9": 1760.6262, "epoch": 0.72, "inbatch_neg_score": 1.2508, "inbatch_pos_score": 2.0039, "learning_rate": 1.4555555555555556e-05, "loss": 2.5967, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.8538, "num_token_overlap": 17.8342, "num_token_query": 52.2644, "num_token_union": 73.7297, "num_word_context": 202.488, "num_word_doc": 49.8708, "num_word_query": 39.8598, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2434.2032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.251, "query_norm": 1.724, "queue_k_norm": 1.7622, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2644, "sent_len_1": 66.8538, "sent_len_max_0": 128.0, "sent_len_max_1": 210.51, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 73800 }, { "accuracy": 63.3789, "active_queue_size": 16384.0, "cl_loss": 2.6031, "doc_norm": 1.7673, "encoder_q-embeddings": 1220.4033, "encoder_q-layer.0": 782.3663, "encoder_q-layer.1": 850.6141, "encoder_q-layer.10": 1706.2281, "encoder_q-layer.11": 3653.7529, "encoder_q-layer.2": 945.1786, "encoder_q-layer.3": 1017.4406, "encoder_q-layer.4": 1092.78, "encoder_q-layer.5": 1097.2443, "encoder_q-layer.6": 1271.7495, "encoder_q-layer.7": 1496.3331, "encoder_q-layer.8": 1791.1443, "encoder_q-layer.9": 1645.37, "epoch": 0.72, "inbatch_neg_score": 1.2529, "inbatch_pos_score": 2.0293, "learning_rate": 1.45e-05, "loss": 2.6031, "norm_diff": 0.0451, "norm_loss": 0.0, "num_token_doc": 66.6131, "num_token_overlap": 17.7583, "num_token_query": 52.2172, "num_token_union": 73.6311, "num_word_context": 202.0675, "num_word_doc": 49.6986, "num_word_query": 39.8239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2360.8823, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2529, "query_norm": 1.7222, "queue_k_norm": 1.7614, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2172, "sent_len_1": 66.6131, "sent_len_max_0": 128.0, "sent_len_max_1": 208.865, "stdk": 0.0497, "stdq": 0.0466, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 73900 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.6014, "doc_norm": 1.7659, "encoder_q-embeddings": 1291.068, "encoder_q-layer.0": 835.7197, "encoder_q-layer.1": 910.1963, "encoder_q-layer.10": 1722.4652, "encoder_q-layer.11": 3463.2244, "encoder_q-layer.2": 1030.5068, "encoder_q-layer.3": 1121.1381, "encoder_q-layer.4": 1222.7335, "encoder_q-layer.5": 1236.787, "encoder_q-layer.6": 1384.4545, "encoder_q-layer.7": 1541.9241, "encoder_q-layer.8": 1819.1923, "encoder_q-layer.9": 1604.7869, "epoch": 0.72, "inbatch_neg_score": 1.254, "inbatch_pos_score": 2.0176, "learning_rate": 1.4444444444444444e-05, "loss": 2.6014, "norm_diff": 0.0367, "norm_loss": 0.0, "num_token_doc": 66.9319, "num_token_overlap": 17.8721, "num_token_query": 52.3263, "num_token_union": 73.7511, "num_word_context": 202.3568, "num_word_doc": 49.955, "num_word_query": 39.8903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2407.9263, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2549, "query_norm": 1.7292, "queue_k_norm": 1.7643, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3263, "sent_len_1": 66.9319, "sent_len_max_0": 128.0, "sent_len_max_1": 207.92, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 74000 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.6035, "doc_norm": 1.7653, "encoder_q-embeddings": 1442.3883, "encoder_q-layer.0": 903.2243, "encoder_q-layer.1": 991.4381, "encoder_q-layer.10": 1654.9753, "encoder_q-layer.11": 3737.3179, "encoder_q-layer.2": 1138.7577, "encoder_q-layer.3": 1245.6528, "encoder_q-layer.4": 1331.8828, "encoder_q-layer.5": 1360.2269, "encoder_q-layer.6": 1530.3162, "encoder_q-layer.7": 1752.4158, "encoder_q-layer.8": 1963.1337, "encoder_q-layer.9": 1693.8503, "epoch": 0.72, "inbatch_neg_score": 1.2599, "inbatch_pos_score": 2.0234, "learning_rate": 1.438888888888889e-05, "loss": 2.6035, "norm_diff": 0.0286, "norm_loss": 0.0, "num_token_doc": 66.8694, "num_token_overlap": 17.7636, "num_token_query": 52.1956, "num_token_union": 73.7165, "num_word_context": 202.3546, "num_word_doc": 49.8905, "num_word_query": 39.7898, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2575.5437, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2607, "query_norm": 1.7366, "queue_k_norm": 1.7649, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1956, "sent_len_1": 66.8694, "sent_len_max_0": 128.0, "sent_len_max_1": 211.4925, "stdk": 0.0495, "stdq": 0.0471, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 74100 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.5939, "doc_norm": 1.7652, "encoder_q-embeddings": 1321.6053, "encoder_q-layer.0": 888.5134, "encoder_q-layer.1": 1001.3019, "encoder_q-layer.10": 1722.7822, "encoder_q-layer.11": 3570.6091, "encoder_q-layer.2": 1159.3542, "encoder_q-layer.3": 1305.587, "encoder_q-layer.4": 1392.6606, "encoder_q-layer.5": 1418.2025, "encoder_q-layer.6": 1620.4065, "encoder_q-layer.7": 1718.9064, "encoder_q-layer.8": 1893.1931, "encoder_q-layer.9": 1706.2593, "epoch": 0.72, "inbatch_neg_score": 1.262, "inbatch_pos_score": 2.0137, "learning_rate": 1.4333333333333334e-05, "loss": 2.5939, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.8154, "num_token_overlap": 17.8137, "num_token_query": 52.2403, "num_token_union": 73.6958, "num_word_context": 202.1275, "num_word_doc": 49.847, "num_word_query": 39.8444, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2519.0471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2607, "query_norm": 1.7277, "queue_k_norm": 1.766, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2403, "sent_len_1": 66.8154, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3988, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 74200 }, { "accuracy": 62.207, "active_queue_size": 16384.0, "cl_loss": 2.5898, "doc_norm": 1.7715, "encoder_q-embeddings": 2646.3474, "encoder_q-layer.0": 1884.7318, "encoder_q-layer.1": 2049.5901, "encoder_q-layer.10": 1904.6248, "encoder_q-layer.11": 3793.3193, "encoder_q-layer.2": 2454.6462, "encoder_q-layer.3": 2731.5947, "encoder_q-layer.4": 2916.5745, "encoder_q-layer.5": 2919.365, "encoder_q-layer.6": 3007.345, "encoder_q-layer.7": 2715.9697, "encoder_q-layer.8": 2340.6057, "encoder_q-layer.9": 1777.5955, "epoch": 0.73, "inbatch_neg_score": 1.261, "inbatch_pos_score": 2.0234, "learning_rate": 1.427777777777778e-05, "loss": 2.5898, "norm_diff": 0.0505, "norm_loss": 0.0, "num_token_doc": 66.9487, "num_token_overlap": 17.836, "num_token_query": 52.3009, "num_token_union": 73.7905, "num_word_context": 202.4848, "num_word_doc": 49.9156, "num_word_query": 39.8859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3881.5625, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2617, "query_norm": 1.721, "queue_k_norm": 1.7667, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3009, "sent_len_1": 66.9487, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8663, "stdk": 0.0498, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 74300 }, { "accuracy": 60.9863, "active_queue_size": 16384.0, "cl_loss": 2.5958, "doc_norm": 1.7667, "encoder_q-embeddings": 2827.4983, "encoder_q-layer.0": 1913.2827, "encoder_q-layer.1": 2053.2764, "encoder_q-layer.10": 3404.1604, "encoder_q-layer.11": 7079.8062, "encoder_q-layer.2": 2367.3345, "encoder_q-layer.3": 2421.1406, "encoder_q-layer.4": 2757.6379, "encoder_q-layer.5": 2972.8174, "encoder_q-layer.6": 3142.0515, "encoder_q-layer.7": 3203.4756, "encoder_q-layer.8": 3510.3228, "encoder_q-layer.9": 3321.2546, "epoch": 0.73, "inbatch_neg_score": 1.2625, "inbatch_pos_score": 2.0137, "learning_rate": 1.4222222222222224e-05, "loss": 2.5958, "norm_diff": 0.0448, "norm_loss": 0.0, "num_token_doc": 66.7564, "num_token_overlap": 17.8175, "num_token_query": 52.3388, "num_token_union": 73.6896, "num_word_context": 202.2466, "num_word_doc": 49.8271, "num_word_query": 39.9325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5004.5206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2617, "query_norm": 1.722, "queue_k_norm": 1.7674, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3388, "sent_len_1": 66.7564, "sent_len_max_0": 128.0, "sent_len_max_1": 210.2413, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 74400 }, { "accuracy": 60.498, "active_queue_size": 16384.0, "cl_loss": 2.6102, "doc_norm": 1.7688, "encoder_q-embeddings": 2698.0332, "encoder_q-layer.0": 1735.64, "encoder_q-layer.1": 1905.9839, "encoder_q-layer.10": 3664.0857, "encoder_q-layer.11": 7477.9639, "encoder_q-layer.2": 2189.2705, "encoder_q-layer.3": 2335.0767, "encoder_q-layer.4": 2543.4448, "encoder_q-layer.5": 2651.937, "encoder_q-layer.6": 3025.4319, "encoder_q-layer.7": 3317.6025, "encoder_q-layer.8": 3972.7297, "encoder_q-layer.9": 3587.9043, "epoch": 0.73, "inbatch_neg_score": 1.2644, "inbatch_pos_score": 2.0156, "learning_rate": 1.4166666666666668e-05, "loss": 2.6102, "norm_diff": 0.0391, "norm_loss": 0.0, "num_token_doc": 66.6925, "num_token_overlap": 17.7792, "num_token_query": 52.2794, "num_token_union": 73.6968, "num_word_context": 202.3551, "num_word_doc": 49.758, "num_word_query": 39.8597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5019.3895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2637, "query_norm": 1.7297, "queue_k_norm": 1.7687, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2794, "sent_len_1": 66.6925, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9425, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 74500 }, { "accuracy": 62.5488, "active_queue_size": 16384.0, "cl_loss": 2.6154, "doc_norm": 1.7726, "encoder_q-embeddings": 2406.6995, "encoder_q-layer.0": 1549.5173, "encoder_q-layer.1": 1698.5752, "encoder_q-layer.10": 3779.5894, "encoder_q-layer.11": 7155.2183, "encoder_q-layer.2": 1953.8495, "encoder_q-layer.3": 2065.8423, "encoder_q-layer.4": 2315.3337, "encoder_q-layer.5": 2385.0525, "encoder_q-layer.6": 2680.0134, "encoder_q-layer.7": 3124.9587, "encoder_q-layer.8": 3927.4739, "encoder_q-layer.9": 3464.571, "epoch": 0.73, "inbatch_neg_score": 1.2612, "inbatch_pos_score": 2.0195, "learning_rate": 1.4111111111111112e-05, "loss": 2.6154, "norm_diff": 0.0515, "norm_loss": 0.0, "num_token_doc": 66.6659, "num_token_overlap": 17.7319, "num_token_query": 52.1352, "num_token_union": 73.6413, "num_word_context": 202.1408, "num_word_doc": 49.7706, "num_word_query": 39.7615, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4733.6873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2627, "query_norm": 1.7211, "queue_k_norm": 1.7685, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1352, "sent_len_1": 66.6659, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1438, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 74600 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.5946, "doc_norm": 1.7671, "encoder_q-embeddings": 2500.3398, "encoder_q-layer.0": 1575.0679, "encoder_q-layer.1": 1735.8163, "encoder_q-layer.10": 3486.8572, "encoder_q-layer.11": 7293.1875, "encoder_q-layer.2": 2002.3341, "encoder_q-layer.3": 2150.8574, "encoder_q-layer.4": 2288.9482, "encoder_q-layer.5": 2469.5898, "encoder_q-layer.6": 2798.7534, "encoder_q-layer.7": 3140.7522, "encoder_q-layer.8": 3787.614, "encoder_q-layer.9": 3395.4976, "epoch": 0.73, "inbatch_neg_score": 1.2676, "inbatch_pos_score": 2.0176, "learning_rate": 1.4055555555555556e-05, "loss": 2.5946, "norm_diff": 0.0465, "norm_loss": 0.0, "num_token_doc": 66.7258, "num_token_overlap": 17.8019, "num_token_query": 52.2778, "num_token_union": 73.6668, "num_word_context": 202.4275, "num_word_doc": 49.7892, "num_word_query": 39.8608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4885.9459, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2686, "query_norm": 1.7206, "queue_k_norm": 1.768, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2778, "sent_len_1": 66.7258, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9963, "stdk": 0.0494, "stdq": 0.0463, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 74700 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6006, "doc_norm": 1.7693, "encoder_q-embeddings": 2608.7468, "encoder_q-layer.0": 1625.0868, "encoder_q-layer.1": 1786.7944, "encoder_q-layer.10": 3282.6814, "encoder_q-layer.11": 7025.1875, "encoder_q-layer.2": 2015.8455, "encoder_q-layer.3": 2113.571, "encoder_q-layer.4": 2300.325, "encoder_q-layer.5": 2353.4651, "encoder_q-layer.6": 2691.2715, "encoder_q-layer.7": 3174.3914, "encoder_q-layer.8": 3675.1812, "encoder_q-layer.9": 3270.188, "epoch": 0.73, "inbatch_neg_score": 1.2719, "inbatch_pos_score": 2.0273, "learning_rate": 1.4000000000000001e-05, "loss": 2.6006, "norm_diff": 0.0465, "norm_loss": 0.0, "num_token_doc": 66.8072, "num_token_overlap": 17.7905, "num_token_query": 52.2457, "num_token_union": 73.7362, "num_word_context": 202.3252, "num_word_doc": 49.8534, "num_word_query": 39.8516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4808.2622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2715, "query_norm": 1.7228, "queue_k_norm": 1.7702, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2457, "sent_len_1": 66.8072, "sent_len_max_0": 128.0, "sent_len_max_1": 209.345, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 74800 }, { "accuracy": 62.5977, "active_queue_size": 16384.0, "cl_loss": 2.6025, "doc_norm": 1.7749, "encoder_q-embeddings": 4968.3384, "encoder_q-layer.0": 3335.6521, "encoder_q-layer.1": 4001.8494, "encoder_q-layer.10": 3594.2852, "encoder_q-layer.11": 7183.5103, "encoder_q-layer.2": 4535.1895, "encoder_q-layer.3": 4621.3433, "encoder_q-layer.4": 5150.4912, "encoder_q-layer.5": 5106.0732, "encoder_q-layer.6": 5221.1675, "encoder_q-layer.7": 5260.3252, "encoder_q-layer.8": 4480.9585, "encoder_q-layer.9": 3507.03, "epoch": 0.73, "inbatch_neg_score": 1.269, "inbatch_pos_score": 2.0293, "learning_rate": 1.3944444444444446e-05, "loss": 2.6025, "norm_diff": 0.0441, "norm_loss": 0.0, "num_token_doc": 66.8223, "num_token_overlap": 17.8046, "num_token_query": 52.3129, "num_token_union": 73.7861, "num_word_context": 202.3782, "num_word_doc": 49.8561, "num_word_query": 39.9018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7068.0393, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2705, "query_norm": 1.7307, "queue_k_norm": 1.7698, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3129, "sent_len_1": 66.8223, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5525, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 74900 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.5983, "doc_norm": 1.7709, "encoder_q-embeddings": 2502.0027, "encoder_q-layer.0": 1600.0912, "encoder_q-layer.1": 1807.1859, "encoder_q-layer.10": 3927.605, "encoder_q-layer.11": 7124.0596, "encoder_q-layer.2": 2044.1118, "encoder_q-layer.3": 2178.6248, "encoder_q-layer.4": 2281.6055, "encoder_q-layer.5": 2392.7424, "encoder_q-layer.6": 2786.3142, "encoder_q-layer.7": 3127.3193, "encoder_q-layer.8": 3740.594, "encoder_q-layer.9": 3592.1897, "epoch": 0.73, "inbatch_neg_score": 1.269, "inbatch_pos_score": 2.0156, "learning_rate": 1.388888888888889e-05, "loss": 2.5983, "norm_diff": 0.0539, "norm_loss": 0.0, "num_token_doc": 66.7685, "num_token_overlap": 17.8077, "num_token_query": 52.2767, "num_token_union": 73.7001, "num_word_context": 202.1757, "num_word_doc": 49.8097, "num_word_query": 39.8651, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4792.5351, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2705, "query_norm": 1.7169, "queue_k_norm": 1.7708, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2767, "sent_len_1": 66.7685, "sent_len_max_0": 128.0, "sent_len_max_1": 206.7788, "stdk": 0.0495, "stdq": 0.0462, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 75000 }, { "accuracy": 60.3027, "active_queue_size": 16384.0, "cl_loss": 2.5951, "doc_norm": 1.773, "encoder_q-embeddings": 2422.8862, "encoder_q-layer.0": 1594.8759, "encoder_q-layer.1": 1761.2153, "encoder_q-layer.10": 3540.3296, "encoder_q-layer.11": 7242.6353, "encoder_q-layer.2": 1997.0001, "encoder_q-layer.3": 2015.6577, "encoder_q-layer.4": 2195.3386, "encoder_q-layer.5": 2277.7192, "encoder_q-layer.6": 2574.5752, "encoder_q-layer.7": 2934.4702, "encoder_q-layer.8": 3695.3235, "encoder_q-layer.9": 3295.5249, "epoch": 0.73, "inbatch_neg_score": 1.2745, "inbatch_pos_score": 2.0234, "learning_rate": 1.3833333333333334e-05, "loss": 2.5951, "norm_diff": 0.0395, "norm_loss": 0.0, "num_token_doc": 66.8402, "num_token_overlap": 17.8165, "num_token_query": 52.2688, "num_token_union": 73.7401, "num_word_context": 202.384, "num_word_doc": 49.8477, "num_word_query": 39.8624, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4714.8373, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2744, "query_norm": 1.7335, "queue_k_norm": 1.7721, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2688, "sent_len_1": 66.8402, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3288, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 75100 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.6021, "doc_norm": 1.7675, "encoder_q-embeddings": 2440.0889, "encoder_q-layer.0": 1634.6056, "encoder_q-layer.1": 1760.6469, "encoder_q-layer.10": 3582.3169, "encoder_q-layer.11": 7201.2344, "encoder_q-layer.2": 1951.6141, "encoder_q-layer.3": 2057.696, "encoder_q-layer.4": 2246.6011, "encoder_q-layer.5": 2425.6882, "encoder_q-layer.6": 2819.9778, "encoder_q-layer.7": 3357.2898, "encoder_q-layer.8": 3801.8982, "encoder_q-layer.9": 3437.5867, "epoch": 0.73, "inbatch_neg_score": 1.2761, "inbatch_pos_score": 2.0234, "learning_rate": 1.3777777777777778e-05, "loss": 2.6021, "norm_diff": 0.0444, "norm_loss": 0.0, "num_token_doc": 66.6746, "num_token_overlap": 17.8297, "num_token_query": 52.2166, "num_token_union": 73.589, "num_word_context": 202.0871, "num_word_doc": 49.7228, "num_word_query": 39.8228, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4855.3162, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2764, "query_norm": 1.7232, "queue_k_norm": 1.7711, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2166, "sent_len_1": 66.6746, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9875, "stdk": 0.0493, "stdq": 0.0463, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 75200 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.6037, "doc_norm": 1.7711, "encoder_q-embeddings": 2792.3462, "encoder_q-layer.0": 1799.225, "encoder_q-layer.1": 2017.3406, "encoder_q-layer.10": 3590.1431, "encoder_q-layer.11": 7566.0576, "encoder_q-layer.2": 2257.718, "encoder_q-layer.3": 2446.3618, "encoder_q-layer.4": 2737.7625, "encoder_q-layer.5": 3023.2397, "encoder_q-layer.6": 3123.7703, "encoder_q-layer.7": 3396.8823, "encoder_q-layer.8": 3942.739, "encoder_q-layer.9": 3392.4036, "epoch": 0.74, "inbatch_neg_score": 1.2794, "inbatch_pos_score": 2.0371, "learning_rate": 1.3722222222222222e-05, "loss": 2.6037, "norm_diff": 0.0451, "norm_loss": 0.0, "num_token_doc": 66.7517, "num_token_overlap": 17.8024, "num_token_query": 52.2477, "num_token_union": 73.6908, "num_word_context": 202.2842, "num_word_doc": 49.8022, "num_word_query": 39.8477, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5215.7454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2812, "query_norm": 1.726, "queue_k_norm": 1.772, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2477, "sent_len_1": 66.7517, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6425, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 75300 }, { "accuracy": 61.377, "active_queue_size": 16384.0, "cl_loss": 2.6076, "doc_norm": 1.773, "encoder_q-embeddings": 2392.3857, "encoder_q-layer.0": 1490.119, "encoder_q-layer.1": 1669.985, "encoder_q-layer.10": 3349.0457, "encoder_q-layer.11": 6854.0703, "encoder_q-layer.2": 1901.9919, "encoder_q-layer.3": 2017.6506, "encoder_q-layer.4": 2156.2, "encoder_q-layer.5": 2341.4875, "encoder_q-layer.6": 2659.314, "encoder_q-layer.7": 3090.5479, "encoder_q-layer.8": 3559.009, "encoder_q-layer.9": 3328.1194, "epoch": 0.74, "inbatch_neg_score": 1.2842, "inbatch_pos_score": 2.0293, "learning_rate": 1.3666666666666666e-05, "loss": 2.6076, "norm_diff": 0.046, "norm_loss": 0.0, "num_token_doc": 66.6973, "num_token_overlap": 17.8054, "num_token_query": 52.2682, "num_token_union": 73.6901, "num_word_context": 202.1815, "num_word_doc": 49.7661, "num_word_query": 39.8738, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4633.371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2842, "query_norm": 1.727, "queue_k_norm": 1.7735, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2682, "sent_len_1": 66.6973, "sent_len_max_0": 128.0, "sent_len_max_1": 209.06, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 75400 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.5804, "doc_norm": 1.773, "encoder_q-embeddings": 2702.8672, "encoder_q-layer.0": 1742.9266, "encoder_q-layer.1": 1885.6523, "encoder_q-layer.10": 3424.4478, "encoder_q-layer.11": 7150.9336, "encoder_q-layer.2": 2156.3718, "encoder_q-layer.3": 2343.2422, "encoder_q-layer.4": 2574.3516, "encoder_q-layer.5": 2711.8416, "encoder_q-layer.6": 3154.0112, "encoder_q-layer.7": 3316.0454, "encoder_q-layer.8": 3655.8699, "encoder_q-layer.9": 3354.449, "epoch": 0.74, "inbatch_neg_score": 1.2824, "inbatch_pos_score": 2.041, "learning_rate": 1.3611111111111111e-05, "loss": 2.5804, "norm_diff": 0.0277, "norm_loss": 0.0, "num_token_doc": 66.8262, "num_token_overlap": 17.8302, "num_token_query": 52.2487, "num_token_union": 73.6455, "num_word_context": 202.4502, "num_word_doc": 49.87, "num_word_query": 39.8631, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4990.6093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2822, "query_norm": 1.7453, "queue_k_norm": 1.7741, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2487, "sent_len_1": 66.8262, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5813, "stdk": 0.0495, "stdq": 0.0473, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 75500 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.5999, "doc_norm": 1.7753, "encoder_q-embeddings": 2477.5515, "encoder_q-layer.0": 1599.5138, "encoder_q-layer.1": 1748.5256, "encoder_q-layer.10": 3376.4292, "encoder_q-layer.11": 7159.4429, "encoder_q-layer.2": 2059.3835, "encoder_q-layer.3": 2151.6045, "encoder_q-layer.4": 2438.2339, "encoder_q-layer.5": 2683.2051, "encoder_q-layer.6": 3050.0073, "encoder_q-layer.7": 3299.2351, "encoder_q-layer.8": 3615.5, "encoder_q-layer.9": 3413.6401, "epoch": 0.74, "inbatch_neg_score": 1.2836, "inbatch_pos_score": 2.0391, "learning_rate": 1.3555555555555557e-05, "loss": 2.5999, "norm_diff": 0.0413, "norm_loss": 0.0, "num_token_doc": 66.8715, "num_token_overlap": 17.7789, "num_token_query": 52.2891, "num_token_union": 73.8264, "num_word_context": 202.4105, "num_word_doc": 49.8918, "num_word_query": 39.899, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4869.3358, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2832, "query_norm": 1.734, "queue_k_norm": 1.7737, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2891, "sent_len_1": 66.8715, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1225, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 75600 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.5901, "doc_norm": 1.7757, "encoder_q-embeddings": 2663.0527, "encoder_q-layer.0": 1685.6108, "encoder_q-layer.1": 1863.384, "encoder_q-layer.10": 3469.0474, "encoder_q-layer.11": 7201.7534, "encoder_q-layer.2": 2071.8184, "encoder_q-layer.3": 2273.0759, "encoder_q-layer.4": 2384.6641, "encoder_q-layer.5": 2643.0908, "encoder_q-layer.6": 2888.0129, "encoder_q-layer.7": 3243.2178, "encoder_q-layer.8": 3693.0557, "encoder_q-layer.9": 3368.6926, "epoch": 0.74, "inbatch_neg_score": 1.2856, "inbatch_pos_score": 2.0527, "learning_rate": 1.3500000000000001e-05, "loss": 2.5901, "norm_diff": 0.0403, "norm_loss": 0.0, "num_token_doc": 66.7617, "num_token_overlap": 17.8268, "num_token_query": 52.2537, "num_token_union": 73.6681, "num_word_context": 202.148, "num_word_doc": 49.8062, "num_word_query": 39.8574, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4887.8233, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2852, "query_norm": 1.7355, "queue_k_norm": 1.7748, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2537, "sent_len_1": 66.7617, "sent_len_max_0": 128.0, "sent_len_max_1": 207.545, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 75700 }, { "accuracy": 63.7695, "active_queue_size": 16384.0, "cl_loss": 2.587, "doc_norm": 1.7753, "encoder_q-embeddings": 2658.2502, "encoder_q-layer.0": 1695.6083, "encoder_q-layer.1": 1874.9077, "encoder_q-layer.10": 3421.1169, "encoder_q-layer.11": 6798.0234, "encoder_q-layer.2": 2141.3545, "encoder_q-layer.3": 2378.8381, "encoder_q-layer.4": 2569.7527, "encoder_q-layer.5": 2642.6199, "encoder_q-layer.6": 2908.5979, "encoder_q-layer.7": 3280.126, "encoder_q-layer.8": 3692.2703, "encoder_q-layer.9": 3304.9761, "epoch": 0.74, "inbatch_neg_score": 1.2881, "inbatch_pos_score": 2.0723, "learning_rate": 1.3444444444444445e-05, "loss": 2.587, "norm_diff": 0.0283, "norm_loss": 0.0, "num_token_doc": 66.9288, "num_token_overlap": 17.8184, "num_token_query": 52.2699, "num_token_union": 73.7809, "num_word_context": 202.4031, "num_word_doc": 49.932, "num_word_query": 39.8593, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4825.2835, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2871, "query_norm": 1.747, "queue_k_norm": 1.7759, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2699, "sent_len_1": 66.9288, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8313, "stdk": 0.0495, "stdq": 0.0473, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 75800 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.6065, "doc_norm": 1.7739, "encoder_q-embeddings": 2576.531, "encoder_q-layer.0": 1664.4479, "encoder_q-layer.1": 1856.886, "encoder_q-layer.10": 3583.928, "encoder_q-layer.11": 7307.1992, "encoder_q-layer.2": 2095.4038, "encoder_q-layer.3": 2208.1101, "encoder_q-layer.4": 2488.3926, "encoder_q-layer.5": 2509.1133, "encoder_q-layer.6": 2877.0959, "encoder_q-layer.7": 3269.8508, "encoder_q-layer.8": 3832.3696, "encoder_q-layer.9": 3500.6033, "epoch": 0.74, "inbatch_neg_score": 1.2933, "inbatch_pos_score": 2.0312, "learning_rate": 1.338888888888889e-05, "loss": 2.6065, "norm_diff": 0.0399, "norm_loss": 0.0, "num_token_doc": 66.7096, "num_token_overlap": 17.7878, "num_token_query": 52.3137, "num_token_union": 73.7081, "num_word_context": 202.3298, "num_word_doc": 49.7773, "num_word_query": 39.8906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4965.4951, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.293, "query_norm": 1.7341, "queue_k_norm": 1.7757, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3137, "sent_len_1": 66.7096, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9425, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 75900 }, { "accuracy": 62.1582, "active_queue_size": 16384.0, "cl_loss": 2.5935, "doc_norm": 1.7789, "encoder_q-embeddings": 6712.0669, "encoder_q-layer.0": 4677.8271, "encoder_q-layer.1": 4850.9624, "encoder_q-layer.10": 3651.5996, "encoder_q-layer.11": 7058.8521, "encoder_q-layer.2": 6792.2832, "encoder_q-layer.3": 6231.9951, "encoder_q-layer.4": 5801.7988, "encoder_q-layer.5": 4872.123, "encoder_q-layer.6": 5180.3613, "encoder_q-layer.7": 4817.2754, "encoder_q-layer.8": 4465.2217, "encoder_q-layer.9": 3606.679, "epoch": 0.74, "inbatch_neg_score": 1.2947, "inbatch_pos_score": 2.0508, "learning_rate": 1.3333333333333333e-05, "loss": 2.5935, "norm_diff": 0.0395, "norm_loss": 0.0, "num_token_doc": 66.7872, "num_token_overlap": 17.7963, "num_token_query": 52.2515, "num_token_union": 73.7219, "num_word_context": 202.3506, "num_word_doc": 49.8546, "num_word_query": 39.8842, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8209.8326, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2939, "query_norm": 1.7395, "queue_k_norm": 1.7768, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2515, "sent_len_1": 66.7872, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0375, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 76000 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.5929, "doc_norm": 1.776, "encoder_q-embeddings": 3198.2366, "encoder_q-layer.0": 2182.2034, "encoder_q-layer.1": 2431.0166, "encoder_q-layer.10": 3373.1333, "encoder_q-layer.11": 7226.5024, "encoder_q-layer.2": 2873.8171, "encoder_q-layer.3": 3073.1609, "encoder_q-layer.4": 3577.6223, "encoder_q-layer.5": 3812.4028, "encoder_q-layer.6": 4007.8923, "encoder_q-layer.7": 3804.4495, "encoder_q-layer.8": 3777.7405, "encoder_q-layer.9": 3281.2195, "epoch": 0.74, "inbatch_neg_score": 1.3011, "inbatch_pos_score": 2.0488, "learning_rate": 1.3277777777777777e-05, "loss": 2.5929, "norm_diff": 0.034, "norm_loss": 0.0, "num_token_doc": 66.7633, "num_token_overlap": 17.8253, "num_token_query": 52.2827, "num_token_union": 73.6977, "num_word_context": 202.2613, "num_word_doc": 49.8453, "num_word_query": 39.8676, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5586.8374, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3018, "query_norm": 1.742, "queue_k_norm": 1.7775, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2827, "sent_len_1": 66.7633, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8725, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 76100 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.5964, "doc_norm": 1.7811, "encoder_q-embeddings": 5280.8237, "encoder_q-layer.0": 3527.7651, "encoder_q-layer.1": 3863.4216, "encoder_q-layer.10": 3681.0645, "encoder_q-layer.11": 7528.6299, "encoder_q-layer.2": 4494.1646, "encoder_q-layer.3": 5058.1973, "encoder_q-layer.4": 5771.2827, "encoder_q-layer.5": 5847.5972, "encoder_q-layer.6": 6841.0068, "encoder_q-layer.7": 6474.4033, "encoder_q-layer.8": 5360.2192, "encoder_q-layer.9": 3661.9724, "epoch": 0.74, "inbatch_neg_score": 1.3033, "inbatch_pos_score": 2.0566, "learning_rate": 1.3222222222222221e-05, "loss": 2.5964, "norm_diff": 0.0314, "norm_loss": 0.0, "num_token_doc": 66.7896, "num_token_overlap": 17.7832, "num_token_query": 52.2782, "num_token_union": 73.7637, "num_word_context": 202.5092, "num_word_doc": 49.8425, "num_word_query": 39.8755, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8179.2902, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3047, "query_norm": 1.7497, "queue_k_norm": 1.7784, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2782, "sent_len_1": 66.7896, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1738, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 76200 }, { "accuracy": 60.4004, "active_queue_size": 16384.0, "cl_loss": 2.5862, "doc_norm": 1.7772, "encoder_q-embeddings": 2498.2639, "encoder_q-layer.0": 1621.9078, "encoder_q-layer.1": 1761.6683, "encoder_q-layer.10": 3376.6475, "encoder_q-layer.11": 7089.791, "encoder_q-layer.2": 2002.1959, "encoder_q-layer.3": 2068.616, "encoder_q-layer.4": 2212.0913, "encoder_q-layer.5": 2346.7292, "encoder_q-layer.6": 2704.5249, "encoder_q-layer.7": 2923.3835, "encoder_q-layer.8": 3683.9233, "encoder_q-layer.9": 3359.752, "epoch": 0.74, "inbatch_neg_score": 1.3062, "inbatch_pos_score": 2.0508, "learning_rate": 1.3166666666666665e-05, "loss": 2.5862, "norm_diff": 0.03, "norm_loss": 0.0, "num_token_doc": 66.8156, "num_token_overlap": 17.8392, "num_token_query": 52.3424, "num_token_union": 73.7174, "num_word_context": 202.3232, "num_word_doc": 49.8306, "num_word_query": 39.9114, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4714.5692, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3057, "query_norm": 1.7473, "queue_k_norm": 1.7796, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3424, "sent_len_1": 66.8156, "sent_len_max_0": 128.0, "sent_len_max_1": 211.4613, "stdk": 0.0494, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 76300 }, { "accuracy": 62.2559, "active_queue_size": 16384.0, "cl_loss": 2.5765, "doc_norm": 1.7762, "encoder_q-embeddings": 4800.77, "encoder_q-layer.0": 3164.0505, "encoder_q-layer.1": 3431.3315, "encoder_q-layer.10": 6448.6865, "encoder_q-layer.11": 13917.2441, "encoder_q-layer.2": 3877.7, "encoder_q-layer.3": 4045.6477, "encoder_q-layer.4": 4428.4541, "encoder_q-layer.5": 4646.2842, "encoder_q-layer.6": 5234.8828, "encoder_q-layer.7": 5999.3296, "encoder_q-layer.8": 6973.5767, "encoder_q-layer.9": 6258.5967, "epoch": 0.75, "inbatch_neg_score": 1.3085, "inbatch_pos_score": 2.0527, "learning_rate": 1.3111111111111113e-05, "loss": 2.5765, "norm_diff": 0.0295, "norm_loss": 0.0, "num_token_doc": 66.8337, "num_token_overlap": 17.8444, "num_token_query": 52.386, "num_token_union": 73.7829, "num_word_context": 202.6869, "num_word_doc": 49.9029, "num_word_query": 39.9646, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9239.5028, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3076, "query_norm": 1.7467, "queue_k_norm": 1.7784, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.386, "sent_len_1": 66.8337, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6262, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 76400 }, { "accuracy": 59.3262, "active_queue_size": 16384.0, "cl_loss": 2.5845, "doc_norm": 1.7784, "encoder_q-embeddings": 6918.5298, "encoder_q-layer.0": 4780.7607, "encoder_q-layer.1": 5447.2441, "encoder_q-layer.10": 8284.7559, "encoder_q-layer.11": 16658.5254, "encoder_q-layer.2": 6613.0068, "encoder_q-layer.3": 6964.25, "encoder_q-layer.4": 7935.5044, "encoder_q-layer.5": 8336.4951, "encoder_q-layer.6": 8325.4316, "encoder_q-layer.7": 8334.9961, "encoder_q-layer.8": 8896.4062, "encoder_q-layer.9": 7964.6079, "epoch": 0.75, "inbatch_neg_score": 1.3138, "inbatch_pos_score": 2.0566, "learning_rate": 1.3055555555555557e-05, "loss": 2.5845, "norm_diff": 0.0236, "norm_loss": 0.0, "num_token_doc": 66.9226, "num_token_overlap": 17.8575, "num_token_query": 52.3503, "num_token_union": 73.8059, "num_word_context": 202.4911, "num_word_doc": 49.9114, "num_word_query": 39.923, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12316.8319, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3135, "query_norm": 1.7548, "queue_k_norm": 1.7807, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3503, "sent_len_1": 66.9226, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5687, "stdk": 0.0494, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 76500 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.5906, "doc_norm": 1.7783, "encoder_q-embeddings": 5811.5928, "encoder_q-layer.0": 3878.541, "encoder_q-layer.1": 4295.6172, "encoder_q-layer.10": 6819.668, "encoder_q-layer.11": 14727.4102, "encoder_q-layer.2": 5131.9468, "encoder_q-layer.3": 5467.3105, "encoder_q-layer.4": 6035.0923, "encoder_q-layer.5": 6656.3994, "encoder_q-layer.6": 7090.5293, "encoder_q-layer.7": 7824.062, "encoder_q-layer.8": 7717.2729, "encoder_q-layer.9": 6872.7729, "epoch": 0.75, "inbatch_neg_score": 1.3162, "inbatch_pos_score": 2.0703, "learning_rate": 1.3000000000000001e-05, "loss": 2.5906, "norm_diff": 0.028, "norm_loss": 0.0, "num_token_doc": 66.6799, "num_token_overlap": 17.8719, "num_token_query": 52.3311, "num_token_union": 73.6489, "num_word_context": 202.0072, "num_word_doc": 49.7683, "num_word_query": 39.9203, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10744.6399, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3164, "query_norm": 1.7503, "queue_k_norm": 1.782, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3311, "sent_len_1": 66.6799, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0838, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 76600 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.6139, "doc_norm": 1.7801, "encoder_q-embeddings": 4758.0483, "encoder_q-layer.0": 3092.5586, "encoder_q-layer.1": 3467.5657, "encoder_q-layer.10": 7250.2476, "encoder_q-layer.11": 15039.1182, "encoder_q-layer.2": 3963.1917, "encoder_q-layer.3": 4133.1494, "encoder_q-layer.4": 4514.395, "encoder_q-layer.5": 4571.7012, "encoder_q-layer.6": 5573.5103, "encoder_q-layer.7": 6310.7661, "encoder_q-layer.8": 7549.3647, "encoder_q-layer.9": 6963.1299, "epoch": 0.75, "inbatch_neg_score": 1.3248, "inbatch_pos_score": 2.0723, "learning_rate": 1.2944444444444445e-05, "loss": 2.6139, "norm_diff": 0.0257, "norm_loss": 0.0, "num_token_doc": 66.6076, "num_token_overlap": 17.7614, "num_token_query": 52.2661, "num_token_union": 73.6507, "num_word_context": 202.2981, "num_word_doc": 49.6999, "num_word_query": 39.8617, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9698.0124, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3252, "query_norm": 1.7545, "queue_k_norm": 1.7821, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2661, "sent_len_1": 66.6076, "sent_len_max_0": 128.0, "sent_len_max_1": 206.15, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 76700 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.6035, "doc_norm": 1.7838, "encoder_q-embeddings": 8438.5137, "encoder_q-layer.0": 6346.9375, "encoder_q-layer.1": 7415.4663, "encoder_q-layer.10": 6752.6484, "encoder_q-layer.11": 14070.7324, "encoder_q-layer.2": 6902.7036, "encoder_q-layer.3": 6491.9185, "encoder_q-layer.4": 6917.9585, "encoder_q-layer.5": 7426.4268, "encoder_q-layer.6": 7839.4565, "encoder_q-layer.7": 7420.2495, "encoder_q-layer.8": 7445.793, "encoder_q-layer.9": 6517.6294, "epoch": 0.75, "inbatch_neg_score": 1.3289, "inbatch_pos_score": 2.0703, "learning_rate": 1.2888888888888889e-05, "loss": 2.6035, "norm_diff": 0.0289, "norm_loss": 0.0, "num_token_doc": 66.5215, "num_token_overlap": 17.7746, "num_token_query": 52.3126, "num_token_union": 73.6041, "num_word_context": 202.2161, "num_word_doc": 49.6339, "num_word_query": 39.8775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11715.8696, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3281, "query_norm": 1.7549, "queue_k_norm": 1.7836, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3126, "sent_len_1": 66.5215, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4538, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 76800 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.5978, "doc_norm": 1.7805, "encoder_q-embeddings": 4910.3696, "encoder_q-layer.0": 3322.4294, "encoder_q-layer.1": 3602.6396, "encoder_q-layer.10": 7586.542, "encoder_q-layer.11": 14689.9102, "encoder_q-layer.2": 4035.6985, "encoder_q-layer.3": 4438.9258, "encoder_q-layer.4": 4797.8745, "encoder_q-layer.5": 5431.4082, "encoder_q-layer.6": 5754.1777, "encoder_q-layer.7": 6436.5059, "encoder_q-layer.8": 7674.8115, "encoder_q-layer.9": 7045.4282, "epoch": 0.75, "inbatch_neg_score": 1.3304, "inbatch_pos_score": 2.0645, "learning_rate": 1.2833333333333333e-05, "loss": 2.5978, "norm_diff": 0.0251, "norm_loss": 0.0, "num_token_doc": 66.922, "num_token_overlap": 17.8218, "num_token_query": 52.2666, "num_token_union": 73.7605, "num_word_context": 202.5853, "num_word_doc": 49.9173, "num_word_query": 39.8724, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9798.2062, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3301, "query_norm": 1.7554, "queue_k_norm": 1.7835, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2666, "sent_len_1": 66.922, "sent_len_max_0": 128.0, "sent_len_max_1": 210.8262, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 76900 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.5828, "doc_norm": 1.7838, "encoder_q-embeddings": 5314.1016, "encoder_q-layer.0": 3348.6191, "encoder_q-layer.1": 3789.6282, "encoder_q-layer.10": 7124.1343, "encoder_q-layer.11": 15055.9961, "encoder_q-layer.2": 4186.7422, "encoder_q-layer.3": 4568.124, "encoder_q-layer.4": 5128.8027, "encoder_q-layer.5": 5300.0811, "encoder_q-layer.6": 5909.2505, "encoder_q-layer.7": 6824.4204, "encoder_q-layer.8": 7488.374, "encoder_q-layer.9": 7010.9653, "epoch": 0.75, "inbatch_neg_score": 1.339, "inbatch_pos_score": 2.0957, "learning_rate": 1.2777777777777777e-05, "loss": 2.5828, "norm_diff": 0.0315, "norm_loss": 0.0, "num_token_doc": 66.8245, "num_token_overlap": 17.8711, "num_token_query": 52.3254, "num_token_union": 73.6661, "num_word_context": 202.2842, "num_word_doc": 49.8296, "num_word_query": 39.9073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10178.5325, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3389, "query_norm": 1.7523, "queue_k_norm": 1.7842, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3254, "sent_len_1": 66.8245, "sent_len_max_0": 128.0, "sent_len_max_1": 210.7688, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 77000 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.5834, "doc_norm": 1.782, "encoder_q-embeddings": 2636.8188, "encoder_q-layer.0": 1670.7566, "encoder_q-layer.1": 1809.0599, "encoder_q-layer.10": 3684.0825, "encoder_q-layer.11": 7240.0571, "encoder_q-layer.2": 2100.2649, "encoder_q-layer.3": 2241.2434, "encoder_q-layer.4": 2485.313, "encoder_q-layer.5": 2598.9241, "encoder_q-layer.6": 3053.2869, "encoder_q-layer.7": 3524.9163, "encoder_q-layer.8": 3918.2021, "encoder_q-layer.9": 3548.3665, "epoch": 0.75, "inbatch_neg_score": 1.3454, "inbatch_pos_score": 2.0859, "learning_rate": 1.2722222222222221e-05, "loss": 2.5834, "norm_diff": 0.0223, "norm_loss": 0.0, "num_token_doc": 66.8152, "num_token_overlap": 17.8412, "num_token_query": 52.3585, "num_token_union": 73.7673, "num_word_context": 202.4346, "num_word_doc": 49.8481, "num_word_query": 39.9353, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5012.3908, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3457, "query_norm": 1.7598, "queue_k_norm": 1.787, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3585, "sent_len_1": 66.8152, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8887, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 77100 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.5783, "doc_norm": 1.784, "encoder_q-embeddings": 3599.7397, "encoder_q-layer.0": 2426.5637, "encoder_q-layer.1": 2674.2437, "encoder_q-layer.10": 3578.5142, "encoder_q-layer.11": 7655.5601, "encoder_q-layer.2": 2985.3762, "encoder_q-layer.3": 3068.1538, "encoder_q-layer.4": 3312.6875, "encoder_q-layer.5": 3414.9875, "encoder_q-layer.6": 3603.7654, "encoder_q-layer.7": 3894.3806, "encoder_q-layer.8": 3919.2698, "encoder_q-layer.9": 3424.8638, "epoch": 0.75, "inbatch_neg_score": 1.3464, "inbatch_pos_score": 2.0898, "learning_rate": 1.2666666666666668e-05, "loss": 2.5783, "norm_diff": 0.0223, "norm_loss": 0.0, "num_token_doc": 66.8554, "num_token_overlap": 17.7933, "num_token_query": 52.2375, "num_token_union": 73.7024, "num_word_context": 202.4077, "num_word_doc": 49.8816, "num_word_query": 39.8429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5794.7722, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3457, "query_norm": 1.7618, "queue_k_norm": 1.7879, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2375, "sent_len_1": 66.8554, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0137, "stdk": 0.0494, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 77200 }, { "accuracy": 61.084, "active_queue_size": 16384.0, "cl_loss": 2.5954, "doc_norm": 1.7919, "encoder_q-embeddings": 2661.5464, "encoder_q-layer.0": 1703.4901, "encoder_q-layer.1": 1904.2688, "encoder_q-layer.10": 3504.3713, "encoder_q-layer.11": 7227.3389, "encoder_q-layer.2": 2235.5977, "encoder_q-layer.3": 2230.2009, "encoder_q-layer.4": 2395.2222, "encoder_q-layer.5": 2459.553, "encoder_q-layer.6": 2799.1934, "encoder_q-layer.7": 3188.2751, "encoder_q-layer.8": 3644.7773, "encoder_q-layer.9": 3334.1182, "epoch": 0.75, "inbatch_neg_score": 1.3477, "inbatch_pos_score": 2.1055, "learning_rate": 1.2611111111111113e-05, "loss": 2.5954, "norm_diff": 0.0298, "norm_loss": 0.0, "num_token_doc": 66.6804, "num_token_overlap": 17.7845, "num_token_query": 52.2358, "num_token_union": 73.6738, "num_word_context": 202.3871, "num_word_doc": 49.8102, "num_word_query": 39.8234, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4938.6528, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3477, "query_norm": 1.7622, "queue_k_norm": 1.7892, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2358, "sent_len_1": 66.6804, "sent_len_max_0": 128.0, "sent_len_max_1": 206.5337, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 77300 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.5952, "doc_norm": 1.7919, "encoder_q-embeddings": 2780.3521, "encoder_q-layer.0": 1773.0217, "encoder_q-layer.1": 1952.3029, "encoder_q-layer.10": 3465.6448, "encoder_q-layer.11": 7304.9897, "encoder_q-layer.2": 2279.1458, "encoder_q-layer.3": 2463.2297, "encoder_q-layer.4": 2642.4807, "encoder_q-layer.5": 2792.1936, "encoder_q-layer.6": 3020.3723, "encoder_q-layer.7": 3244.2153, "encoder_q-layer.8": 3883.0051, "encoder_q-layer.9": 3367.8245, "epoch": 0.76, "inbatch_neg_score": 1.3482, "inbatch_pos_score": 2.1055, "learning_rate": 1.2555555555555557e-05, "loss": 2.5952, "norm_diff": 0.0357, "norm_loss": 0.0, "num_token_doc": 66.8841, "num_token_overlap": 17.8155, "num_token_query": 52.3032, "num_token_union": 73.7689, "num_word_context": 202.2507, "num_word_doc": 49.9076, "num_word_query": 39.8856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5076.8286, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3496, "query_norm": 1.7562, "queue_k_norm": 1.7886, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3032, "sent_len_1": 66.8841, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0425, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 77400 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.5813, "doc_norm": 1.787, "encoder_q-embeddings": 2635.938, "encoder_q-layer.0": 1696.2767, "encoder_q-layer.1": 1893.4865, "encoder_q-layer.10": 3595.7822, "encoder_q-layer.11": 7679.8301, "encoder_q-layer.2": 2300.3691, "encoder_q-layer.3": 2455.6038, "encoder_q-layer.4": 2621.9592, "encoder_q-layer.5": 2640.3359, "encoder_q-layer.6": 2988.1472, "encoder_q-layer.7": 3263.9082, "encoder_q-layer.8": 3629.9297, "encoder_q-layer.9": 3503.0491, "epoch": 0.76, "inbatch_neg_score": 1.3482, "inbatch_pos_score": 2.1035, "learning_rate": 1.25e-05, "loss": 2.5813, "norm_diff": 0.0394, "norm_loss": 0.0, "num_token_doc": 66.789, "num_token_overlap": 17.8387, "num_token_query": 52.2289, "num_token_union": 73.6787, "num_word_context": 202.257, "num_word_doc": 49.8756, "num_word_query": 39.8237, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5019.5725, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3477, "query_norm": 1.7476, "queue_k_norm": 1.7891, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2289, "sent_len_1": 66.789, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4963, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 77500 }, { "accuracy": 60.5957, "active_queue_size": 16384.0, "cl_loss": 2.5923, "doc_norm": 1.7901, "encoder_q-embeddings": 2454.8313, "encoder_q-layer.0": 1640.243, "encoder_q-layer.1": 1735.8208, "encoder_q-layer.10": 3493.0791, "encoder_q-layer.11": 7455.6973, "encoder_q-layer.2": 1977.6207, "encoder_q-layer.3": 2037.8751, "encoder_q-layer.4": 2207.8213, "encoder_q-layer.5": 2250.6489, "encoder_q-layer.6": 2545.0867, "encoder_q-layer.7": 2892.0793, "encoder_q-layer.8": 3611.1587, "encoder_q-layer.9": 3382.832, "epoch": 0.76, "inbatch_neg_score": 1.3524, "inbatch_pos_score": 2.1016, "learning_rate": 1.2444444444444445e-05, "loss": 2.5923, "norm_diff": 0.0361, "norm_loss": 0.0, "num_token_doc": 66.7773, "num_token_overlap": 17.7915, "num_token_query": 52.2754, "num_token_union": 73.7179, "num_word_context": 202.3345, "num_word_doc": 49.831, "num_word_query": 39.8792, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4779.929, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3535, "query_norm": 1.7539, "queue_k_norm": 1.7909, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2754, "sent_len_1": 66.7773, "sent_len_max_0": 128.0, "sent_len_max_1": 207.185, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 77600 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.5802, "doc_norm": 1.7944, "encoder_q-embeddings": 8441.5186, "encoder_q-layer.0": 6274.8584, "encoder_q-layer.1": 6995.3481, "encoder_q-layer.10": 3311.7649, "encoder_q-layer.11": 7180.4653, "encoder_q-layer.2": 8448.3223, "encoder_q-layer.3": 8518.252, "encoder_q-layer.4": 9162.0986, "encoder_q-layer.5": 11906.4785, "encoder_q-layer.6": 9629.8994, "encoder_q-layer.7": 7353.7612, "encoder_q-layer.8": 5725.918, "encoder_q-layer.9": 3713.075, "epoch": 0.76, "inbatch_neg_score": 1.3519, "inbatch_pos_score": 2.0898, "learning_rate": 1.238888888888889e-05, "loss": 2.5802, "norm_diff": 0.0558, "norm_loss": 0.0, "num_token_doc": 66.7978, "num_token_overlap": 17.8392, "num_token_query": 52.2639, "num_token_union": 73.697, "num_word_context": 202.2205, "num_word_doc": 49.8681, "num_word_query": 39.8711, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11560.2662, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3516, "query_norm": 1.7386, "queue_k_norm": 1.7919, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2639, "sent_len_1": 66.7978, "sent_len_max_0": 128.0, "sent_len_max_1": 207.0625, "stdk": 0.0496, "stdq": 0.046, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 77700 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.5941, "doc_norm": 1.7929, "encoder_q-embeddings": 3272.8665, "encoder_q-layer.0": 2228.9507, "encoder_q-layer.1": 2406.7568, "encoder_q-layer.10": 3923.4045, "encoder_q-layer.11": 7731.0107, "encoder_q-layer.2": 2952.3694, "encoder_q-layer.3": 3264.2417, "encoder_q-layer.4": 3512.3142, "encoder_q-layer.5": 3582.696, "encoder_q-layer.6": 3922.7058, "encoder_q-layer.7": 4408.7769, "encoder_q-layer.8": 4852.6616, "encoder_q-layer.9": 3698.2888, "epoch": 0.76, "inbatch_neg_score": 1.3529, "inbatch_pos_score": 2.1016, "learning_rate": 1.2333333333333334e-05, "loss": 2.5941, "norm_diff": 0.0476, "norm_loss": 0.0, "num_token_doc": 66.7541, "num_token_overlap": 17.8347, "num_token_query": 52.2838, "num_token_union": 73.6936, "num_word_context": 202.2107, "num_word_doc": 49.8073, "num_word_query": 39.8681, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5972.7651, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3535, "query_norm": 1.7453, "queue_k_norm": 1.794, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2838, "sent_len_1": 66.7541, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3875, "stdk": 0.0496, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 77800 }, { "accuracy": 60.1074, "active_queue_size": 16384.0, "cl_loss": 2.5982, "doc_norm": 1.7957, "encoder_q-embeddings": 2867.8088, "encoder_q-layer.0": 1861.7263, "encoder_q-layer.1": 2091.1292, "encoder_q-layer.10": 3464.5903, "encoder_q-layer.11": 7392.6489, "encoder_q-layer.2": 2384.1809, "encoder_q-layer.3": 2564.8923, "encoder_q-layer.4": 2776.7332, "encoder_q-layer.5": 2996.5032, "encoder_q-layer.6": 3160.8525, "encoder_q-layer.7": 3167.0093, "encoder_q-layer.8": 3518.0671, "encoder_q-layer.9": 3178.4319, "epoch": 0.76, "inbatch_neg_score": 1.349, "inbatch_pos_score": 2.0996, "learning_rate": 1.2277777777777778e-05, "loss": 2.5982, "norm_diff": 0.0503, "norm_loss": 0.0, "num_token_doc": 66.6946, "num_token_overlap": 17.7958, "num_token_query": 52.2473, "num_token_union": 73.6532, "num_word_context": 202.3409, "num_word_doc": 49.7789, "num_word_query": 39.8635, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5021.5078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3496, "query_norm": 1.7454, "queue_k_norm": 1.794, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2473, "sent_len_1": 66.6946, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1362, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 77900 }, { "accuracy": 59.8145, "active_queue_size": 16384.0, "cl_loss": 2.5935, "doc_norm": 1.7929, "encoder_q-embeddings": 5242.29, "encoder_q-layer.0": 3520.9741, "encoder_q-layer.1": 4052.4473, "encoder_q-layer.10": 4010.9883, "encoder_q-layer.11": 7781.8574, "encoder_q-layer.2": 4995.0264, "encoder_q-layer.3": 5900.2114, "encoder_q-layer.4": 6811.8384, "encoder_q-layer.5": 7707.7441, "encoder_q-layer.6": 7682.3389, "encoder_q-layer.7": 6874.5317, "encoder_q-layer.8": 5478.6934, "encoder_q-layer.9": 3867.5405, "epoch": 0.76, "inbatch_neg_score": 1.3552, "inbatch_pos_score": 2.1113, "learning_rate": 1.2222222222222222e-05, "loss": 2.5935, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.7154, "num_token_overlap": 17.7658, "num_token_query": 52.1966, "num_token_union": 73.6492, "num_word_context": 202.3783, "num_word_doc": 49.7816, "num_word_query": 39.8037, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8552.478, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3555, "query_norm": 1.7554, "queue_k_norm": 1.7946, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1966, "sent_len_1": 66.7154, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1163, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 78000 }, { "accuracy": 61.8652, "active_queue_size": 16384.0, "cl_loss": 2.588, "doc_norm": 1.7979, "encoder_q-embeddings": 2415.5752, "encoder_q-layer.0": 1605.5231, "encoder_q-layer.1": 1760.4246, "encoder_q-layer.10": 4106.4502, "encoder_q-layer.11": 7815.0972, "encoder_q-layer.2": 2055.3611, "encoder_q-layer.3": 2260.3081, "encoder_q-layer.4": 2526.8508, "encoder_q-layer.5": 2648.1558, "encoder_q-layer.6": 3013.3528, "encoder_q-layer.7": 3350.0764, "encoder_q-layer.8": 3794.623, "encoder_q-layer.9": 3538.7947, "epoch": 0.76, "inbatch_neg_score": 1.3522, "inbatch_pos_score": 2.1074, "learning_rate": 1.2166666666666668e-05, "loss": 2.588, "norm_diff": 0.055, "norm_loss": 0.0, "num_token_doc": 66.8712, "num_token_overlap": 17.8353, "num_token_query": 52.1625, "num_token_union": 73.6512, "num_word_context": 202.2674, "num_word_doc": 49.8823, "num_word_query": 39.7734, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5070.3434, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3535, "query_norm": 1.7429, "queue_k_norm": 1.7961, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1625, "sent_len_1": 66.8712, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1025, "stdk": 0.0497, "stdq": 0.0463, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 78100 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.5707, "doc_norm": 1.7956, "encoder_q-embeddings": 2715.6128, "encoder_q-layer.0": 1718.0858, "encoder_q-layer.1": 1968.4061, "encoder_q-layer.10": 3599.4685, "encoder_q-layer.11": 7669.1914, "encoder_q-layer.2": 2237.7864, "encoder_q-layer.3": 2370.8931, "encoder_q-layer.4": 2548.6812, "encoder_q-layer.5": 2683.7195, "encoder_q-layer.6": 2986.2253, "encoder_q-layer.7": 3340.803, "encoder_q-layer.8": 3959.7708, "encoder_q-layer.9": 3572.2976, "epoch": 0.76, "inbatch_neg_score": 1.3545, "inbatch_pos_score": 2.1035, "learning_rate": 1.2111111111111112e-05, "loss": 2.5707, "norm_diff": 0.0463, "norm_loss": 0.0, "num_token_doc": 66.8145, "num_token_overlap": 17.8726, "num_token_query": 52.4242, "num_token_union": 73.7216, "num_word_context": 202.2631, "num_word_doc": 49.8636, "num_word_query": 39.9608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5199.783, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3535, "query_norm": 1.7493, "queue_k_norm": 1.7959, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4242, "sent_len_1": 66.8145, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4137, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 78200 }, { "accuracy": 59.5215, "active_queue_size": 16384.0, "cl_loss": 2.5938, "doc_norm": 1.7999, "encoder_q-embeddings": 2616.4595, "encoder_q-layer.0": 1661.787, "encoder_q-layer.1": 1805.7966, "encoder_q-layer.10": 3932.8481, "encoder_q-layer.11": 7704.2617, "encoder_q-layer.2": 2058.7393, "encoder_q-layer.3": 2163.2461, "encoder_q-layer.4": 2372.9639, "encoder_q-layer.5": 2427.019, "encoder_q-layer.6": 2779.9041, "encoder_q-layer.7": 3369.7136, "encoder_q-layer.8": 3911.6814, "encoder_q-layer.9": 3642.1672, "epoch": 0.76, "inbatch_neg_score": 1.3531, "inbatch_pos_score": 2.1074, "learning_rate": 1.2055555555555556e-05, "loss": 2.5938, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.7377, "num_token_overlap": 17.7848, "num_token_query": 52.2058, "num_token_union": 73.6726, "num_word_context": 202.3499, "num_word_doc": 49.8058, "num_word_query": 39.8161, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5015.1055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3525, "query_norm": 1.7515, "queue_k_norm": 1.7961, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2058, "sent_len_1": 66.7377, "sent_len_max_0": 128.0, "sent_len_max_1": 206.3025, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 78300 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.5783, "doc_norm": 1.7982, "encoder_q-embeddings": 2607.5908, "encoder_q-layer.0": 1768.4562, "encoder_q-layer.1": 1922.9323, "encoder_q-layer.10": 3979.8792, "encoder_q-layer.11": 7938.1743, "encoder_q-layer.2": 2230.6414, "encoder_q-layer.3": 2352.7935, "encoder_q-layer.4": 2391.0823, "encoder_q-layer.5": 2588.4011, "encoder_q-layer.6": 2918.8857, "encoder_q-layer.7": 3211.1406, "encoder_q-layer.8": 4008.0811, "encoder_q-layer.9": 3918.5562, "epoch": 0.77, "inbatch_neg_score": 1.3505, "inbatch_pos_score": 2.1094, "learning_rate": 1.2e-05, "loss": 2.5783, "norm_diff": 0.0408, "norm_loss": 0.0, "num_token_doc": 66.7889, "num_token_overlap": 17.8528, "num_token_query": 52.3232, "num_token_union": 73.7029, "num_word_context": 202.4384, "num_word_doc": 49.8775, "num_word_query": 39.9253, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5214.5497, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3506, "query_norm": 1.7574, "queue_k_norm": 1.7975, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3232, "sent_len_1": 66.7889, "sent_len_max_0": 128.0, "sent_len_max_1": 205.875, "stdk": 0.0496, "stdq": 0.0472, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 78400 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6, "doc_norm": 1.7977, "encoder_q-embeddings": 3332.7761, "encoder_q-layer.0": 2269.512, "encoder_q-layer.1": 2477.196, "encoder_q-layer.10": 3300.937, "encoder_q-layer.11": 7436.5659, "encoder_q-layer.2": 2828.5562, "encoder_q-layer.3": 2855.6794, "encoder_q-layer.4": 3046.3884, "encoder_q-layer.5": 3088.53, "encoder_q-layer.6": 3464.2429, "encoder_q-layer.7": 3497.5215, "encoder_q-layer.8": 3581.5171, "encoder_q-layer.9": 3325.9771, "epoch": 0.77, "inbatch_neg_score": 1.3554, "inbatch_pos_score": 2.1074, "learning_rate": 1.1944444444444446e-05, "loss": 2.6, "norm_diff": 0.0431, "norm_loss": 0.0, "num_token_doc": 66.807, "num_token_overlap": 17.7725, "num_token_query": 52.2297, "num_token_union": 73.7366, "num_word_context": 202.159, "num_word_doc": 49.8186, "num_word_query": 39.8304, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5391.8766, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3545, "query_norm": 1.7546, "queue_k_norm": 1.7977, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2297, "sent_len_1": 66.807, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6325, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 78500 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.5705, "doc_norm": 1.7995, "encoder_q-embeddings": 2603.2664, "encoder_q-layer.0": 1710.2926, "encoder_q-layer.1": 1905.5211, "encoder_q-layer.10": 3635.7532, "encoder_q-layer.11": 7523.3242, "encoder_q-layer.2": 2187.7808, "encoder_q-layer.3": 2385.5994, "encoder_q-layer.4": 2544.9958, "encoder_q-layer.5": 2687.7588, "encoder_q-layer.6": 3148.438, "encoder_q-layer.7": 3220.053, "encoder_q-layer.8": 3924.791, "encoder_q-layer.9": 3425.6841, "epoch": 0.77, "inbatch_neg_score": 1.3535, "inbatch_pos_score": 2.1152, "learning_rate": 1.188888888888889e-05, "loss": 2.5705, "norm_diff": 0.0452, "norm_loss": 0.0, "num_token_doc": 66.6684, "num_token_overlap": 17.7831, "num_token_query": 52.3244, "num_token_union": 73.7206, "num_word_context": 202.1535, "num_word_doc": 49.7362, "num_word_query": 39.9084, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5119.7305, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3535, "query_norm": 1.7543, "queue_k_norm": 1.7985, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3244, "sent_len_1": 66.6684, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7212, "stdk": 0.0497, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 78600 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.597, "doc_norm": 1.7958, "encoder_q-embeddings": 2549.2212, "encoder_q-layer.0": 1798.9597, "encoder_q-layer.1": 1933.8163, "encoder_q-layer.10": 3392.9695, "encoder_q-layer.11": 7178.416, "encoder_q-layer.2": 2310.0183, "encoder_q-layer.3": 2440.3457, "encoder_q-layer.4": 2553.5266, "encoder_q-layer.5": 2631.6274, "encoder_q-layer.6": 2962.3428, "encoder_q-layer.7": 3222.2886, "encoder_q-layer.8": 3529.946, "encoder_q-layer.9": 3171.2078, "epoch": 0.77, "inbatch_neg_score": 1.3585, "inbatch_pos_score": 2.1094, "learning_rate": 1.1833333333333334e-05, "loss": 2.597, "norm_diff": 0.0583, "norm_loss": 0.0, "num_token_doc": 66.8016, "num_token_overlap": 17.7902, "num_token_query": 52.2756, "num_token_union": 73.7495, "num_word_context": 202.382, "num_word_doc": 49.829, "num_word_query": 39.8552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4914.2953, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3574, "query_norm": 1.7375, "queue_k_norm": 1.7978, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2756, "sent_len_1": 66.8016, "sent_len_max_0": 128.0, "sent_len_max_1": 211.3988, "stdk": 0.0494, "stdq": 0.046, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 78700 }, { "accuracy": 61.7188, "active_queue_size": 16384.0, "cl_loss": 2.595, "doc_norm": 1.8002, "encoder_q-embeddings": 3754.9585, "encoder_q-layer.0": 2604.7339, "encoder_q-layer.1": 3170.8792, "encoder_q-layer.10": 3454.4971, "encoder_q-layer.11": 7187.7285, "encoder_q-layer.2": 4034.3354, "encoder_q-layer.3": 4160.7021, "encoder_q-layer.4": 4531.6353, "encoder_q-layer.5": 4360.9878, "encoder_q-layer.6": 4101.4917, "encoder_q-layer.7": 3788.748, "encoder_q-layer.8": 3956.7041, "encoder_q-layer.9": 3413.7393, "epoch": 0.77, "inbatch_neg_score": 1.3567, "inbatch_pos_score": 2.125, "learning_rate": 1.1777777777777778e-05, "loss": 2.595, "norm_diff": 0.0437, "norm_loss": 0.0, "num_token_doc": 66.5861, "num_token_overlap": 17.7714, "num_token_query": 52.1604, "num_token_union": 73.5596, "num_word_context": 201.9835, "num_word_doc": 49.6758, "num_word_query": 39.7796, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6210.0305, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3574, "query_norm": 1.7566, "queue_k_norm": 1.7977, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1604, "sent_len_1": 66.5861, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8787, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 78800 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.5993, "doc_norm": 1.7987, "encoder_q-embeddings": 2653.9783, "encoder_q-layer.0": 1684.1606, "encoder_q-layer.1": 1848.6219, "encoder_q-layer.10": 3376.3472, "encoder_q-layer.11": 7304.3447, "encoder_q-layer.2": 2105.8042, "encoder_q-layer.3": 2174.1135, "encoder_q-layer.4": 2280.6047, "encoder_q-layer.5": 2421.0918, "encoder_q-layer.6": 2729.3806, "encoder_q-layer.7": 3104.6987, "encoder_q-layer.8": 3667.1738, "encoder_q-layer.9": 3337.3472, "epoch": 0.77, "inbatch_neg_score": 1.3542, "inbatch_pos_score": 2.1172, "learning_rate": 1.1722222222222224e-05, "loss": 2.5993, "norm_diff": 0.0503, "norm_loss": 0.0, "num_token_doc": 66.8374, "num_token_overlap": 17.7964, "num_token_query": 52.2235, "num_token_union": 73.6813, "num_word_context": 202.1186, "num_word_doc": 49.8635, "num_word_query": 39.8125, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4887.0853, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3555, "query_norm": 1.7484, "queue_k_norm": 1.7988, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2235, "sent_len_1": 66.8374, "sent_len_max_0": 128.0, "sent_len_max_1": 210.7312, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 78900 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.5856, "doc_norm": 1.8029, "encoder_q-embeddings": 2699.7856, "encoder_q-layer.0": 1784.2606, "encoder_q-layer.1": 2071.7969, "encoder_q-layer.10": 3770.9084, "encoder_q-layer.11": 7664.8296, "encoder_q-layer.2": 2433.2114, "encoder_q-layer.3": 2515.2952, "encoder_q-layer.4": 2713.4231, "encoder_q-layer.5": 2938.4602, "encoder_q-layer.6": 3271.4629, "encoder_q-layer.7": 3415.1455, "encoder_q-layer.8": 4146.8784, "encoder_q-layer.9": 3637.3169, "epoch": 0.77, "inbatch_neg_score": 1.3533, "inbatch_pos_score": 2.1191, "learning_rate": 1.1666666666666668e-05, "loss": 2.5856, "norm_diff": 0.0508, "norm_loss": 0.0, "num_token_doc": 66.6455, "num_token_overlap": 17.7812, "num_token_query": 52.1827, "num_token_union": 73.6387, "num_word_context": 202.117, "num_word_doc": 49.7531, "num_word_query": 39.8187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5262.7964, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3555, "query_norm": 1.7521, "queue_k_norm": 1.7979, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1827, "sent_len_1": 66.6455, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9938, "stdk": 0.0498, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 79000 }, { "accuracy": 62.6465, "active_queue_size": 16384.0, "cl_loss": 2.5848, "doc_norm": 1.8009, "encoder_q-embeddings": 13500.9814, "encoder_q-layer.0": 9720.1465, "encoder_q-layer.1": 11427.9473, "encoder_q-layer.10": 7613.1724, "encoder_q-layer.11": 15358.6562, "encoder_q-layer.2": 11298.4072, "encoder_q-layer.3": 11962.6553, "encoder_q-layer.4": 11287.9092, "encoder_q-layer.5": 10834.2949, "encoder_q-layer.6": 11387.8506, "encoder_q-layer.7": 13566.1699, "encoder_q-layer.8": 11141.2109, "encoder_q-layer.9": 7428.0723, "epoch": 0.77, "inbatch_neg_score": 1.3566, "inbatch_pos_score": 2.1152, "learning_rate": 1.1611111111111112e-05, "loss": 2.5848, "norm_diff": 0.0538, "norm_loss": 0.0, "num_token_doc": 66.9547, "num_token_overlap": 17.9046, "num_token_query": 52.4969, "num_token_union": 73.8835, "num_word_context": 202.631, "num_word_doc": 49.9906, "num_word_query": 40.0605, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17292.1591, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.3574, "query_norm": 1.7471, "queue_k_norm": 1.7992, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4969, "sent_len_1": 66.9547, "sent_len_max_0": 128.0, "sent_len_max_1": 206.7675, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 79100 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.5979, "doc_norm": 1.7966, "encoder_q-embeddings": 4770.5249, "encoder_q-layer.0": 2986.4453, "encoder_q-layer.1": 3298.7898, "encoder_q-layer.10": 7084.1768, "encoder_q-layer.11": 14421.0566, "encoder_q-layer.2": 3759.0115, "encoder_q-layer.3": 4044.9353, "encoder_q-layer.4": 4332.0127, "encoder_q-layer.5": 4449.9634, "encoder_q-layer.6": 5051.3555, "encoder_q-layer.7": 5954.4404, "encoder_q-layer.8": 7088.9878, "encoder_q-layer.9": 6598.334, "epoch": 0.77, "inbatch_neg_score": 1.3578, "inbatch_pos_score": 2.1191, "learning_rate": 1.1555555555555556e-05, "loss": 2.5979, "norm_diff": 0.0449, "norm_loss": 0.0, "num_token_doc": 66.8375, "num_token_overlap": 17.8394, "num_token_query": 52.3091, "num_token_union": 73.7486, "num_word_context": 202.2676, "num_word_doc": 49.907, "num_word_query": 39.9137, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9380.8298, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3574, "query_norm": 1.7517, "queue_k_norm": 1.7985, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3091, "sent_len_1": 66.8375, "sent_len_max_0": 128.0, "sent_len_max_1": 209.7188, "stdk": 0.0494, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 79200 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.5838, "doc_norm": 1.7974, "encoder_q-embeddings": 5504.2734, "encoder_q-layer.0": 3754.3633, "encoder_q-layer.1": 4104.064, "encoder_q-layer.10": 7459.5322, "encoder_q-layer.11": 15052.6807, "encoder_q-layer.2": 4948.7676, "encoder_q-layer.3": 5178.0439, "encoder_q-layer.4": 6054.4561, "encoder_q-layer.5": 6398.8511, "encoder_q-layer.6": 6867.5127, "encoder_q-layer.7": 7397.6851, "encoder_q-layer.8": 7791.3945, "encoder_q-layer.9": 7060.0381, "epoch": 0.77, "inbatch_neg_score": 1.3557, "inbatch_pos_score": 2.1035, "learning_rate": 1.1500000000000002e-05, "loss": 2.5838, "norm_diff": 0.054, "norm_loss": 0.0, "num_token_doc": 66.5289, "num_token_overlap": 17.8071, "num_token_query": 52.3583, "num_token_union": 73.6536, "num_word_context": 202.0621, "num_word_doc": 49.652, "num_word_query": 39.9187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10683.4895, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3555, "query_norm": 1.7434, "queue_k_norm": 1.7997, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3583, "sent_len_1": 66.5289, "sent_len_max_0": 128.0, "sent_len_max_1": 206.6337, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 79300 }, { "accuracy": 60.791, "active_queue_size": 16384.0, "cl_loss": 2.5955, "doc_norm": 1.7993, "encoder_q-embeddings": 5160.8608, "encoder_q-layer.0": 3190.4653, "encoder_q-layer.1": 3502.4893, "encoder_q-layer.10": 7101.4355, "encoder_q-layer.11": 14156.1279, "encoder_q-layer.2": 3953.3564, "encoder_q-layer.3": 4274.1055, "encoder_q-layer.4": 4513.8555, "encoder_q-layer.5": 4731.6626, "encoder_q-layer.6": 5254.5576, "encoder_q-layer.7": 5878.1943, "encoder_q-layer.8": 7352.5645, "encoder_q-layer.9": 6547.3652, "epoch": 0.78, "inbatch_neg_score": 1.3561, "inbatch_pos_score": 2.1211, "learning_rate": 1.1444444444444446e-05, "loss": 2.5955, "norm_diff": 0.0543, "norm_loss": 0.0, "num_token_doc": 66.8172, "num_token_overlap": 17.7643, "num_token_query": 52.2089, "num_token_union": 73.7488, "num_word_context": 202.6028, "num_word_doc": 49.8684, "num_word_query": 39.8012, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9512.7776, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3555, "query_norm": 1.745, "queue_k_norm": 1.799, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2089, "sent_len_1": 66.8172, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0675, "stdk": 0.0495, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 79400 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.5847, "doc_norm": 1.8034, "encoder_q-embeddings": 5981.645, "encoder_q-layer.0": 4060.1873, "encoder_q-layer.1": 4491.6787, "encoder_q-layer.10": 7098.7021, "encoder_q-layer.11": 14747.6934, "encoder_q-layer.2": 5154.8296, "encoder_q-layer.3": 5477.1553, "encoder_q-layer.4": 6038.7905, "encoder_q-layer.5": 6373.3584, "encoder_q-layer.6": 6935.2266, "encoder_q-layer.7": 7587.856, "encoder_q-layer.8": 7900.3848, "encoder_q-layer.9": 6959.4917, "epoch": 0.78, "inbatch_neg_score": 1.3575, "inbatch_pos_score": 2.1172, "learning_rate": 1.138888888888889e-05, "loss": 2.5847, "norm_diff": 0.055, "norm_loss": 0.0, "num_token_doc": 66.75, "num_token_overlap": 17.8027, "num_token_query": 52.286, "num_token_union": 73.6986, "num_word_context": 202.4136, "num_word_doc": 49.8095, "num_word_query": 39.8894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10841.6355, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3574, "query_norm": 1.7484, "queue_k_norm": 1.7994, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.286, "sent_len_1": 66.75, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0687, "stdk": 0.0497, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 79500 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.5966, "doc_norm": 1.8034, "encoder_q-embeddings": 4652.7837, "encoder_q-layer.0": 3065.9465, "encoder_q-layer.1": 3248.5884, "encoder_q-layer.10": 6764.6841, "encoder_q-layer.11": 13780.6133, "encoder_q-layer.2": 3771.9905, "encoder_q-layer.3": 4014.0339, "encoder_q-layer.4": 4303.7778, "encoder_q-layer.5": 4518.8179, "encoder_q-layer.6": 5416.5483, "encoder_q-layer.7": 5990.4663, "encoder_q-layer.8": 6914.0986, "encoder_q-layer.9": 6535.7578, "epoch": 0.78, "inbatch_neg_score": 1.363, "inbatch_pos_score": 2.1211, "learning_rate": 1.1333333333333334e-05, "loss": 2.5966, "norm_diff": 0.0482, "norm_loss": 0.0, "num_token_doc": 66.7485, "num_token_overlap": 17.7913, "num_token_query": 52.3389, "num_token_union": 73.7249, "num_word_context": 202.3067, "num_word_doc": 49.7831, "num_word_query": 39.9018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9150.0601, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3623, "query_norm": 1.7552, "queue_k_norm": 1.7985, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3389, "sent_len_1": 66.7485, "sent_len_max_0": 128.0, "sent_len_max_1": 209.15, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 79600 }, { "accuracy": 62.4512, "active_queue_size": 16384.0, "cl_loss": 2.5827, "doc_norm": 1.8027, "encoder_q-embeddings": 2651.876, "encoder_q-layer.0": 1751.5254, "encoder_q-layer.1": 1974.0959, "encoder_q-layer.10": 3521.1221, "encoder_q-layer.11": 7514.8579, "encoder_q-layer.2": 2250.2048, "encoder_q-layer.3": 2486.4636, "encoder_q-layer.4": 2631.2256, "encoder_q-layer.5": 2679.0342, "encoder_q-layer.6": 3090.0085, "encoder_q-layer.7": 3399.3811, "encoder_q-layer.8": 3926.7175, "encoder_q-layer.9": 3384.1462, "epoch": 0.78, "inbatch_neg_score": 1.3615, "inbatch_pos_score": 2.1309, "learning_rate": 1.127777777777778e-05, "loss": 2.5827, "norm_diff": 0.0407, "norm_loss": 0.0, "num_token_doc": 66.7155, "num_token_overlap": 17.8179, "num_token_query": 52.3292, "num_token_union": 73.669, "num_word_context": 202.2294, "num_word_doc": 49.7735, "num_word_query": 39.9174, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5165.4344, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3623, "query_norm": 1.762, "queue_k_norm": 1.8005, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3292, "sent_len_1": 66.7155, "sent_len_max_0": 128.0, "sent_len_max_1": 210.2663, "stdk": 0.0497, "stdq": 0.0471, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 79700 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.5937, "doc_norm": 1.8031, "encoder_q-embeddings": 2590.1367, "encoder_q-layer.0": 1701.9458, "encoder_q-layer.1": 1933.6279, "encoder_q-layer.10": 3741.5012, "encoder_q-layer.11": 7466.8657, "encoder_q-layer.2": 2394.8333, "encoder_q-layer.3": 2582.3958, "encoder_q-layer.4": 2584.2583, "encoder_q-layer.5": 2617.3679, "encoder_q-layer.6": 2923.4675, "encoder_q-layer.7": 3246.0981, "encoder_q-layer.8": 4042.8945, "encoder_q-layer.9": 3473.4368, "epoch": 0.78, "inbatch_neg_score": 1.363, "inbatch_pos_score": 2.1211, "learning_rate": 1.1222222222222224e-05, "loss": 2.5937, "norm_diff": 0.0552, "norm_loss": 0.0, "num_token_doc": 66.761, "num_token_overlap": 17.8113, "num_token_query": 52.2381, "num_token_union": 73.6691, "num_word_context": 202.3125, "num_word_doc": 49.816, "num_word_query": 39.8571, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5088.4866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3633, "query_norm": 1.7479, "queue_k_norm": 1.7997, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2381, "sent_len_1": 66.761, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3625, "stdk": 0.0497, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 79800 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.5704, "doc_norm": 1.8045, "encoder_q-embeddings": 5306.7109, "encoder_q-layer.0": 3709.5344, "encoder_q-layer.1": 4149.7036, "encoder_q-layer.10": 3435.9004, "encoder_q-layer.11": 7094.4458, "encoder_q-layer.2": 4617.0879, "encoder_q-layer.3": 4721.2354, "encoder_q-layer.4": 5363.4629, "encoder_q-layer.5": 5633.293, "encoder_q-layer.6": 5551.2119, "encoder_q-layer.7": 4469.7417, "encoder_q-layer.8": 3853.6917, "encoder_q-layer.9": 3317.7761, "epoch": 0.78, "inbatch_neg_score": 1.3629, "inbatch_pos_score": 2.1348, "learning_rate": 1.1166666666666668e-05, "loss": 2.5704, "norm_diff": 0.0465, "norm_loss": 0.0, "num_token_doc": 66.9388, "num_token_overlap": 17.8167, "num_token_query": 52.319, "num_token_union": 73.7635, "num_word_context": 202.4883, "num_word_doc": 49.9405, "num_word_query": 39.9145, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7210.2361, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3633, "query_norm": 1.758, "queue_k_norm": 1.8015, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.319, "sent_len_1": 66.9388, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4263, "stdk": 0.0498, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 79900 }, { "accuracy": 62.0605, "active_queue_size": 16384.0, "cl_loss": 2.5754, "doc_norm": 1.7985, "encoder_q-embeddings": 2494.9023, "encoder_q-layer.0": 1691.2898, "encoder_q-layer.1": 1778.3154, "encoder_q-layer.10": 3211.5278, "encoder_q-layer.11": 6932.835, "encoder_q-layer.2": 2013.7799, "encoder_q-layer.3": 2068.1277, "encoder_q-layer.4": 2236.9243, "encoder_q-layer.5": 2301.4932, "encoder_q-layer.6": 2593.1377, "encoder_q-layer.7": 2939.3123, "encoder_q-layer.8": 3326.6558, "encoder_q-layer.9": 3198.6079, "epoch": 0.78, "inbatch_neg_score": 1.3652, "inbatch_pos_score": 2.123, "learning_rate": 1.1111111111111112e-05, "loss": 2.5754, "norm_diff": 0.0449, "norm_loss": 0.0, "num_token_doc": 66.6329, "num_token_overlap": 17.7793, "num_token_query": 52.202, "num_token_union": 73.5747, "num_word_context": 201.9231, "num_word_doc": 49.7129, "num_word_query": 39.8204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4641.4929, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3643, "query_norm": 1.7536, "queue_k_norm": 1.8009, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.202, "sent_len_1": 66.6329, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3162, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 80000 }, { "dev_runtime": 26.5577, "dev_samples_per_second": 1.205, "dev_steps_per_second": 0.038, "epoch": 0.78, "step": 80000, "test_accuracy": 94.6533203125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3042469620704651, "test_doc_norm": 1.776258945465088, "test_inbatch_neg_score": 1.6642873287200928, "test_inbatch_pos_score": 2.691110610961914, "test_loss": 0.3042469620704651, "test_loss_align": 1.0144414901733398, "test_loss_unif": 0.458503395318985, "test_loss_unif_q@queue": 0.4585034251213074, "test_norm_diff": 0.018641777336597443, "test_norm_loss": 0.0, "test_q@queue_neg_score": 1.3575085401535034, "test_query_norm": 1.7949007749557495, "test_queue_k_norm": 1.8003864288330078, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.044041357934474945, "test_stdq": 0.044134777039289474, "test_stdqueue_k": 0.04956661909818649, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.5577, "dev_samples_per_second": 1.205, "dev_steps_per_second": 0.038, "epoch": 0.78, "eval_beir-arguana_ndcg@10": 0.38735, "eval_beir-arguana_recall@10": 0.66856, "eval_beir-arguana_recall@100": 0.93599, "eval_beir-arguana_recall@20": 0.79232, "eval_beir-avg_ndcg@10": 0.385407, "eval_beir-avg_recall@10": 0.4585796666666667, "eval_beir-avg_recall@100": 0.6398845833333333, "eval_beir-avg_recall@20": 0.5202635, "eval_beir-cqadupstack_ndcg@10": 0.28503999999999996, "eval_beir-cqadupstack_recall@10": 0.3843766666666666, "eval_beir-cqadupstack_recall@100": 0.6186758333333334, "eval_beir-cqadupstack_recall@20": 0.452235, "eval_beir-fiqa_ndcg@10": 0.26533, "eval_beir-fiqa_recall@10": 0.32668, "eval_beir-fiqa_recall@100": 0.60962, "eval_beir-fiqa_recall@20": 0.40784, "eval_beir-nfcorpus_ndcg@10": 0.29199, "eval_beir-nfcorpus_recall@10": 0.14643, "eval_beir-nfcorpus_recall@100": 0.28063, "eval_beir-nfcorpus_recall@20": 0.17547, "eval_beir-nq_ndcg@10": 0.30081, "eval_beir-nq_recall@10": 0.48733, "eval_beir-nq_recall@100": 0.82064, "eval_beir-nq_recall@20": 0.60062, "eval_beir-quora_ndcg@10": 0.78408, "eval_beir-quora_recall@10": 0.89028, "eval_beir-quora_recall@100": 0.97945, "eval_beir-quora_recall@20": 0.93176, "eval_beir-scidocs_ndcg@10": 0.16334, "eval_beir-scidocs_recall@10": 0.17187, "eval_beir-scidocs_recall@100": 0.37912, "eval_beir-scidocs_recall@20": 0.23092, "eval_beir-scifact_ndcg@10": 0.63473, "eval_beir-scifact_recall@10": 0.79411, "eval_beir-scifact_recall@100": 0.90822, "eval_beir-scifact_recall@20": 0.84644, "eval_beir-trec-covid_ndcg@10": 0.54985, "eval_beir-trec-covid_recall@10": 0.58, "eval_beir-trec-covid_recall@100": 0.429, "eval_beir-trec-covid_recall@20": 0.559, "eval_beir-webis-touche2020_ndcg@10": 0.19155, "eval_beir-webis-touche2020_recall@10": 0.13616, "eval_beir-webis-touche2020_recall@100": 0.4375, "eval_beir-webis-touche2020_recall@20": 0.20603, "eval_senteval-avg_sts": 0.7490537628551152, "eval_senteval-sickr_spearman": 0.7330985091255077, "eval_senteval-stsb_spearman": 0.7650090165847226, "step": 80000, "test_accuracy": 94.6533203125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3042469620704651, "test_doc_norm": 1.776258945465088, "test_inbatch_neg_score": 1.6642873287200928, "test_inbatch_pos_score": 2.691110610961914, "test_loss": 0.3042469620704651, "test_loss_align": 1.0144414901733398, "test_loss_unif": 0.458503395318985, "test_loss_unif_q@queue": 0.4585034251213074, "test_norm_diff": 0.018641777336597443, "test_norm_loss": 0.0, "test_q@queue_neg_score": 1.3575085401535034, "test_query_norm": 1.7949007749557495, "test_queue_k_norm": 1.8003864288330078, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.044041357934474945, "test_stdq": 0.044134777039289474, "test_stdqueue_k": 0.04956661909818649, "test_stdqueue_q": 0.0 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.586, "doc_norm": 1.8048, "encoder_q-embeddings": 2420.1643, "encoder_q-layer.0": 1565.6862, "encoder_q-layer.1": 1685.3387, "encoder_q-layer.10": 3765.8423, "encoder_q-layer.11": 7516.8076, "encoder_q-layer.2": 1906.473, "encoder_q-layer.3": 2097.0581, "encoder_q-layer.4": 2248.958, "encoder_q-layer.5": 2409.7371, "encoder_q-layer.6": 2744.833, "encoder_q-layer.7": 3218.9822, "encoder_q-layer.8": 3801.3965, "encoder_q-layer.9": 3352.5486, "epoch": 0.78, "inbatch_neg_score": 1.3697, "inbatch_pos_score": 2.1133, "learning_rate": 1.1055555555555556e-05, "loss": 2.586, "norm_diff": 0.0555, "norm_loss": 0.0, "num_token_doc": 66.8699, "num_token_overlap": 17.8508, "num_token_query": 52.3975, "num_token_union": 73.8068, "num_word_context": 202.4272, "num_word_doc": 49.9276, "num_word_query": 39.9718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4892.1412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3691, "query_norm": 1.7493, "queue_k_norm": 1.8016, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3975, "sent_len_1": 66.8699, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5375, "stdk": 0.0497, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 80100 }, { "accuracy": 61.084, "active_queue_size": 16384.0, "cl_loss": 2.5786, "doc_norm": 1.8002, "encoder_q-embeddings": 2598.8545, "encoder_q-layer.0": 1684.688, "encoder_q-layer.1": 1909.281, "encoder_q-layer.10": 3800.292, "encoder_q-layer.11": 7557.624, "encoder_q-layer.2": 2223.8936, "encoder_q-layer.3": 2341.9485, "encoder_q-layer.4": 2556.7205, "encoder_q-layer.5": 2837.3333, "encoder_q-layer.6": 3142.187, "encoder_q-layer.7": 3583.5178, "encoder_q-layer.8": 4056.7998, "encoder_q-layer.9": 3735.1584, "epoch": 0.78, "inbatch_neg_score": 1.3704, "inbatch_pos_score": 2.1211, "learning_rate": 1.1000000000000001e-05, "loss": 2.5786, "norm_diff": 0.0441, "norm_loss": 0.0, "num_token_doc": 66.5989, "num_token_overlap": 17.7739, "num_token_query": 52.1355, "num_token_union": 73.5489, "num_word_context": 202.017, "num_word_doc": 49.7273, "num_word_query": 39.7467, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5254.202, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3701, "query_norm": 1.7562, "queue_k_norm": 1.8027, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1355, "sent_len_1": 66.5989, "sent_len_max_0": 128.0, "sent_len_max_1": 206.6575, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 80200 }, { "accuracy": 61.5723, "active_queue_size": 16384.0, "cl_loss": 2.5923, "doc_norm": 1.8002, "encoder_q-embeddings": 2624.6743, "encoder_q-layer.0": 1738.6515, "encoder_q-layer.1": 1898.6907, "encoder_q-layer.10": 3711.4934, "encoder_q-layer.11": 7666.2783, "encoder_q-layer.2": 2211.425, "encoder_q-layer.3": 2299.7878, "encoder_q-layer.4": 2552.5, "encoder_q-layer.5": 2639.3657, "encoder_q-layer.6": 2960.0542, "encoder_q-layer.7": 3124.7878, "encoder_q-layer.8": 3970.2542, "encoder_q-layer.9": 3538.1741, "epoch": 0.78, "inbatch_neg_score": 1.376, "inbatch_pos_score": 2.1367, "learning_rate": 1.0944444444444445e-05, "loss": 2.5923, "norm_diff": 0.0351, "norm_loss": 0.0, "num_token_doc": 66.7564, "num_token_overlap": 17.813, "num_token_query": 52.2786, "num_token_union": 73.6756, "num_word_context": 202.0948, "num_word_doc": 49.8164, "num_word_query": 39.8838, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5095.245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.375, "query_norm": 1.7651, "queue_k_norm": 1.8009, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2786, "sent_len_1": 66.7564, "sent_len_max_0": 128.0, "sent_len_max_1": 209.435, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 80300 }, { "accuracy": 61.5723, "active_queue_size": 16384.0, "cl_loss": 2.57, "doc_norm": 1.8047, "encoder_q-embeddings": 2302.436, "encoder_q-layer.0": 1504.7927, "encoder_q-layer.1": 1711.6698, "encoder_q-layer.10": 3245.5962, "encoder_q-layer.11": 7192.2139, "encoder_q-layer.2": 1883.64, "encoder_q-layer.3": 2013.608, "encoder_q-layer.4": 2175.0583, "encoder_q-layer.5": 2286.178, "encoder_q-layer.6": 2689.8926, "encoder_q-layer.7": 3078.7876, "encoder_q-layer.8": 3609.1567, "encoder_q-layer.9": 3173.1133, "epoch": 0.78, "inbatch_neg_score": 1.3813, "inbatch_pos_score": 2.1367, "learning_rate": 1.088888888888889e-05, "loss": 2.57, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.8999, "num_token_overlap": 17.8714, "num_token_query": 52.4199, "num_token_union": 73.7797, "num_word_context": 202.4519, "num_word_doc": 49.9277, "num_word_query": 39.9502, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4770.1128, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3809, "query_norm": 1.7563, "queue_k_norm": 1.8017, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4199, "sent_len_1": 66.8999, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7413, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 80400 }, { "accuracy": 60.5957, "active_queue_size": 16384.0, "cl_loss": 2.5873, "doc_norm": 1.8043, "encoder_q-embeddings": 3993.8198, "encoder_q-layer.0": 2735.2175, "encoder_q-layer.1": 3363.2844, "encoder_q-layer.10": 3595.6191, "encoder_q-layer.11": 7890.4292, "encoder_q-layer.2": 4045.3359, "encoder_q-layer.3": 4245.5537, "encoder_q-layer.4": 4661.7925, "encoder_q-layer.5": 4925.1372, "encoder_q-layer.6": 5386.8784, "encoder_q-layer.7": 5129.8262, "encoder_q-layer.8": 3940.3875, "encoder_q-layer.9": 3585.4551, "epoch": 0.79, "inbatch_neg_score": 1.3842, "inbatch_pos_score": 2.1367, "learning_rate": 1.0833333333333334e-05, "loss": 2.5873, "norm_diff": 0.0424, "norm_loss": 0.0, "num_token_doc": 66.9298, "num_token_overlap": 17.7909, "num_token_query": 52.2509, "num_token_union": 73.8145, "num_word_context": 202.4685, "num_word_doc": 49.9289, "num_word_query": 39.8204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6696.6094, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3838, "query_norm": 1.7619, "queue_k_norm": 1.8054, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2509, "sent_len_1": 66.9298, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9487, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 80500 }, { "accuracy": 61.4746, "active_queue_size": 16384.0, "cl_loss": 2.588, "doc_norm": 1.8062, "encoder_q-embeddings": 3573.104, "encoder_q-layer.0": 2670.2449, "encoder_q-layer.1": 2995.2319, "encoder_q-layer.10": 3776.1663, "encoder_q-layer.11": 7592.0186, "encoder_q-layer.2": 3114.708, "encoder_q-layer.3": 3386.7039, "encoder_q-layer.4": 3601.073, "encoder_q-layer.5": 3544.8503, "encoder_q-layer.6": 3717.8298, "encoder_q-layer.7": 3478.1777, "encoder_q-layer.8": 3631.3347, "encoder_q-layer.9": 3429.0781, "epoch": 0.79, "inbatch_neg_score": 1.3844, "inbatch_pos_score": 2.1523, "learning_rate": 1.0777777777777778e-05, "loss": 2.588, "norm_diff": 0.0415, "norm_loss": 0.0, "num_token_doc": 66.7838, "num_token_overlap": 17.778, "num_token_query": 52.1831, "num_token_union": 73.7193, "num_word_context": 202.2462, "num_word_doc": 49.8383, "num_word_query": 39.7846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5731.9657, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3848, "query_norm": 1.7647, "queue_k_norm": 1.804, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1831, "sent_len_1": 66.7838, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5737, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 80600 }, { "accuracy": 62.1582, "active_queue_size": 16384.0, "cl_loss": 2.5891, "doc_norm": 1.8038, "encoder_q-embeddings": 4458.3906, "encoder_q-layer.0": 3166.8865, "encoder_q-layer.1": 3596.0332, "encoder_q-layer.10": 3348.1438, "encoder_q-layer.11": 7219.251, "encoder_q-layer.2": 4302.2891, "encoder_q-layer.3": 4753.7646, "encoder_q-layer.4": 5461.5332, "encoder_q-layer.5": 5927.4824, "encoder_q-layer.6": 6213.5903, "encoder_q-layer.7": 5232.3477, "encoder_q-layer.8": 4274.9688, "encoder_q-layer.9": 3316.6914, "epoch": 0.79, "inbatch_neg_score": 1.3906, "inbatch_pos_score": 2.1562, "learning_rate": 1.0722222222222222e-05, "loss": 2.5891, "norm_diff": 0.0315, "norm_loss": 0.0, "num_token_doc": 66.7016, "num_token_overlap": 17.8083, "num_token_query": 52.2728, "num_token_union": 73.643, "num_word_context": 202.0436, "num_word_doc": 49.7463, "num_word_query": 39.8639, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7158.6906, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3896, "query_norm": 1.7723, "queue_k_norm": 1.8023, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2728, "sent_len_1": 66.7016, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1725, "stdk": 0.0495, "stdq": 0.0471, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 80700 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.5868, "doc_norm": 1.8031, "encoder_q-embeddings": 4233.6226, "encoder_q-layer.0": 3013.4595, "encoder_q-layer.1": 3831.957, "encoder_q-layer.10": 3403.0723, "encoder_q-layer.11": 7185.0615, "encoder_q-layer.2": 4576.0811, "encoder_q-layer.3": 4344.6802, "encoder_q-layer.4": 4465.1128, "encoder_q-layer.5": 4259.8677, "encoder_q-layer.6": 3677.3723, "encoder_q-layer.7": 3536.0835, "encoder_q-layer.8": 3756.4453, "encoder_q-layer.9": 3447.8303, "epoch": 0.79, "inbatch_neg_score": 1.3923, "inbatch_pos_score": 2.1387, "learning_rate": 1.0666666666666667e-05, "loss": 2.5868, "norm_diff": 0.0432, "norm_loss": 0.0, "num_token_doc": 66.7611, "num_token_overlap": 17.8169, "num_token_query": 52.259, "num_token_union": 73.6809, "num_word_context": 202.1392, "num_word_doc": 49.8062, "num_word_query": 39.8591, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6285.7558, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3926, "query_norm": 1.7599, "queue_k_norm": 1.8051, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.259, "sent_len_1": 66.7611, "sent_len_max_0": 128.0, "sent_len_max_1": 211.3988, "stdk": 0.0495, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 80800 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.5886, "doc_norm": 1.807, "encoder_q-embeddings": 3103.3259, "encoder_q-layer.0": 2149.5867, "encoder_q-layer.1": 2466.2307, "encoder_q-layer.10": 3545.2158, "encoder_q-layer.11": 7179.7451, "encoder_q-layer.2": 2828.8914, "encoder_q-layer.3": 3007.1614, "encoder_q-layer.4": 3357.0872, "encoder_q-layer.5": 3616.0562, "encoder_q-layer.6": 3628.7283, "encoder_q-layer.7": 3256.9221, "encoder_q-layer.8": 3739.2363, "encoder_q-layer.9": 3403.2778, "epoch": 0.79, "inbatch_neg_score": 1.3955, "inbatch_pos_score": 2.1406, "learning_rate": 1.0611111111111111e-05, "loss": 2.5886, "norm_diff": 0.0457, "norm_loss": 0.0, "num_token_doc": 66.6807, "num_token_overlap": 17.7841, "num_token_query": 52.1802, "num_token_union": 73.6227, "num_word_context": 202.0942, "num_word_doc": 49.7567, "num_word_query": 39.8056, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5407.9203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3955, "query_norm": 1.7614, "queue_k_norm": 1.8055, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1802, "sent_len_1": 66.6807, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7675, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 80900 }, { "accuracy": 64.0137, "active_queue_size": 16384.0, "cl_loss": 2.5743, "doc_norm": 1.8102, "encoder_q-embeddings": 2363.4387, "encoder_q-layer.0": 1528.9911, "encoder_q-layer.1": 1648.1372, "encoder_q-layer.10": 3405.9458, "encoder_q-layer.11": 6800.2686, "encoder_q-layer.2": 1844.0967, "encoder_q-layer.3": 1959.3506, "encoder_q-layer.4": 2068.4614, "encoder_q-layer.5": 2149.4971, "encoder_q-layer.6": 2382.697, "encoder_q-layer.7": 2829.7788, "encoder_q-layer.8": 3213.1777, "encoder_q-layer.9": 3224.6621, "epoch": 0.79, "inbatch_neg_score": 1.395, "inbatch_pos_score": 2.1641, "learning_rate": 1.0555555555555555e-05, "loss": 2.5743, "norm_diff": 0.0487, "norm_loss": 0.0, "num_token_doc": 66.8347, "num_token_overlap": 17.8536, "num_token_query": 52.3848, "num_token_union": 73.7442, "num_word_context": 202.498, "num_word_doc": 49.8801, "num_word_query": 39.9544, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4454.5752, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3955, "query_norm": 1.7615, "queue_k_norm": 1.806, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3848, "sent_len_1": 66.8347, "sent_len_max_0": 128.0, "sent_len_max_1": 209.8075, "stdk": 0.0498, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 81000 }, { "accuracy": 62.5, "active_queue_size": 16384.0, "cl_loss": 2.5816, "doc_norm": 1.8075, "encoder_q-embeddings": 2180.8083, "encoder_q-layer.0": 1472.4851, "encoder_q-layer.1": 1577.2999, "encoder_q-layer.10": 3269.3582, "encoder_q-layer.11": 6822.4316, "encoder_q-layer.2": 1767.3297, "encoder_q-layer.3": 1864.4683, "encoder_q-layer.4": 2057.3376, "encoder_q-layer.5": 2054.1841, "encoder_q-layer.6": 2365.7991, "encoder_q-layer.7": 2783.4868, "encoder_q-layer.8": 3342.0005, "encoder_q-layer.9": 3165.2395, "epoch": 0.79, "inbatch_neg_score": 1.3964, "inbatch_pos_score": 2.1641, "learning_rate": 1.05e-05, "loss": 2.5816, "norm_diff": 0.0408, "norm_loss": 0.0, "num_token_doc": 66.9508, "num_token_overlap": 17.8352, "num_token_query": 52.2797, "num_token_union": 73.7967, "num_word_context": 202.2673, "num_word_doc": 49.9774, "num_word_query": 39.8725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4454.0138, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3965, "query_norm": 1.7667, "queue_k_norm": 1.8077, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2797, "sent_len_1": 66.9508, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7887, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 81100 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.5904, "doc_norm": 1.8046, "encoder_q-embeddings": 2704.1575, "encoder_q-layer.0": 1637.957, "encoder_q-layer.1": 1812.5452, "encoder_q-layer.10": 3682.0859, "encoder_q-layer.11": 7230.5674, "encoder_q-layer.2": 2092.2146, "encoder_q-layer.3": 2289.1008, "encoder_q-layer.4": 2461.0879, "encoder_q-layer.5": 2641.9026, "encoder_q-layer.6": 2924.2803, "encoder_q-layer.7": 3215.1572, "encoder_q-layer.8": 3876.676, "encoder_q-layer.9": 3553.3665, "epoch": 0.79, "inbatch_neg_score": 1.3988, "inbatch_pos_score": 2.1523, "learning_rate": 1.0444444444444445e-05, "loss": 2.5904, "norm_diff": 0.0389, "norm_loss": 0.0, "num_token_doc": 66.6704, "num_token_overlap": 17.7559, "num_token_query": 52.2137, "num_token_union": 73.6266, "num_word_context": 202.3945, "num_word_doc": 49.7458, "num_word_query": 39.8236, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5000.0686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3984, "query_norm": 1.7657, "queue_k_norm": 1.8083, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2137, "sent_len_1": 66.6704, "sent_len_max_0": 128.0, "sent_len_max_1": 207.435, "stdk": 0.0494, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 81200 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.5732, "doc_norm": 1.8087, "encoder_q-embeddings": 2946.4475, "encoder_q-layer.0": 1959.7604, "encoder_q-layer.1": 2217.4094, "encoder_q-layer.10": 3469.897, "encoder_q-layer.11": 7347.6938, "encoder_q-layer.2": 2563.343, "encoder_q-layer.3": 2683.106, "encoder_q-layer.4": 2968.8491, "encoder_q-layer.5": 3058.5354, "encoder_q-layer.6": 3414.0027, "encoder_q-layer.7": 3383.8232, "encoder_q-layer.8": 4049.5161, "encoder_q-layer.9": 3354.4568, "epoch": 0.79, "inbatch_neg_score": 1.3988, "inbatch_pos_score": 2.1426, "learning_rate": 1.038888888888889e-05, "loss": 2.5732, "norm_diff": 0.0544, "norm_loss": 0.0, "num_token_doc": 66.906, "num_token_overlap": 17.8498, "num_token_query": 52.2904, "num_token_union": 73.7356, "num_word_context": 202.4964, "num_word_doc": 49.9021, "num_word_query": 39.8883, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5302.2098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3984, "query_norm": 1.7543, "queue_k_norm": 1.8095, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2904, "sent_len_1": 66.906, "sent_len_max_0": 128.0, "sent_len_max_1": 210.77, "stdk": 0.0496, "stdq": 0.0463, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 81300 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.5847, "doc_norm": 1.806, "encoder_q-embeddings": 2495.3838, "encoder_q-layer.0": 1633.5303, "encoder_q-layer.1": 1784.3722, "encoder_q-layer.10": 3846.2183, "encoder_q-layer.11": 7989.98, "encoder_q-layer.2": 2066.7502, "encoder_q-layer.3": 2163.9326, "encoder_q-layer.4": 2462.2954, "encoder_q-layer.5": 2542.2852, "encoder_q-layer.6": 2931.7983, "encoder_q-layer.7": 3494.5562, "encoder_q-layer.8": 4191.2471, "encoder_q-layer.9": 3841.6785, "epoch": 0.79, "inbatch_neg_score": 1.4011, "inbatch_pos_score": 2.1387, "learning_rate": 1.0333333333333333e-05, "loss": 2.5847, "norm_diff": 0.0462, "norm_loss": 0.0, "num_token_doc": 66.7384, "num_token_overlap": 17.7827, "num_token_query": 52.2586, "num_token_union": 73.6864, "num_word_context": 202.3271, "num_word_doc": 49.7683, "num_word_query": 39.8469, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5233.7349, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4004, "query_norm": 1.7598, "queue_k_norm": 1.8092, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2586, "sent_len_1": 66.7384, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4875, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 81400 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.579, "doc_norm": 1.8142, "encoder_q-embeddings": 2550.4192, "encoder_q-layer.0": 1713.4167, "encoder_q-layer.1": 1952.8622, "encoder_q-layer.10": 3550.2534, "encoder_q-layer.11": 7371.0771, "encoder_q-layer.2": 2131.4053, "encoder_q-layer.3": 2228.6274, "encoder_q-layer.4": 2481.2805, "encoder_q-layer.5": 2542.7234, "encoder_q-layer.6": 2933.0725, "encoder_q-layer.7": 3223.291, "encoder_q-layer.8": 3587.1831, "encoder_q-layer.9": 3327.7239, "epoch": 0.8, "inbatch_neg_score": 1.4013, "inbatch_pos_score": 2.1562, "learning_rate": 1.0277777777777777e-05, "loss": 2.579, "norm_diff": 0.0577, "norm_loss": 0.0, "num_token_doc": 66.7533, "num_token_overlap": 17.8384, "num_token_query": 52.4064, "num_token_union": 73.7529, "num_word_context": 202.4204, "num_word_doc": 49.8, "num_word_query": 39.9775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4949.9193, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4023, "query_norm": 1.7565, "queue_k_norm": 1.809, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4064, "sent_len_1": 66.7533, "sent_len_max_0": 128.0, "sent_len_max_1": 209.105, "stdk": 0.0498, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 81500 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.5693, "doc_norm": 1.8077, "encoder_q-embeddings": 2595.55, "encoder_q-layer.0": 1598.8496, "encoder_q-layer.1": 1688.5892, "encoder_q-layer.10": 3238.4436, "encoder_q-layer.11": 7154.1724, "encoder_q-layer.2": 1923.5924, "encoder_q-layer.3": 2034.4032, "encoder_q-layer.4": 2211.2295, "encoder_q-layer.5": 2314.7366, "encoder_q-layer.6": 2606.22, "encoder_q-layer.7": 3021.8037, "encoder_q-layer.8": 3457.0725, "encoder_q-layer.9": 3240.1741, "epoch": 0.8, "inbatch_neg_score": 1.403, "inbatch_pos_score": 2.1562, "learning_rate": 1.0222222222222223e-05, "loss": 2.5693, "norm_diff": 0.041, "norm_loss": 0.0, "num_token_doc": 66.8186, "num_token_overlap": 17.8272, "num_token_query": 52.3527, "num_token_union": 73.7638, "num_word_context": 202.6791, "num_word_doc": 49.8945, "num_word_query": 39.9398, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4783.873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4023, "query_norm": 1.7667, "queue_k_norm": 1.8096, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3527, "sent_len_1": 66.8186, "sent_len_max_0": 128.0, "sent_len_max_1": 206.4412, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 81600 }, { "accuracy": 62.2559, "active_queue_size": 16384.0, "cl_loss": 2.5868, "doc_norm": 1.8087, "encoder_q-embeddings": 4591.812, "encoder_q-layer.0": 3083.8098, "encoder_q-layer.1": 3301.9294, "encoder_q-layer.10": 6657.6235, "encoder_q-layer.11": 14727.8037, "encoder_q-layer.2": 3751.9861, "encoder_q-layer.3": 3942.7214, "encoder_q-layer.4": 4439.999, "encoder_q-layer.5": 4474.4268, "encoder_q-layer.6": 5027.6328, "encoder_q-layer.7": 5753.2173, "encoder_q-layer.8": 7327.8613, "encoder_q-layer.9": 6464.2261, "epoch": 0.8, "inbatch_neg_score": 1.4047, "inbatch_pos_score": 2.1621, "learning_rate": 1.0166666666666667e-05, "loss": 2.5868, "norm_diff": 0.0409, "norm_loss": 0.0, "num_token_doc": 66.7548, "num_token_overlap": 17.818, "num_token_query": 52.2535, "num_token_union": 73.6955, "num_word_context": 202.4165, "num_word_doc": 49.8101, "num_word_query": 39.8465, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9352.9658, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4043, "query_norm": 1.7678, "queue_k_norm": 1.811, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2535, "sent_len_1": 66.7548, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5625, "stdk": 0.0495, "stdq": 0.047, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 81700 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.5805, "doc_norm": 1.81, "encoder_q-embeddings": 4632.4814, "encoder_q-layer.0": 3077.8965, "encoder_q-layer.1": 3458.677, "encoder_q-layer.10": 7212.3516, "encoder_q-layer.11": 15010.0869, "encoder_q-layer.2": 3934.9456, "encoder_q-layer.3": 4125.4551, "encoder_q-layer.4": 4675.8853, "encoder_q-layer.5": 4700.4893, "encoder_q-layer.6": 5451.2622, "encoder_q-layer.7": 6115.8604, "encoder_q-layer.8": 7140.0825, "encoder_q-layer.9": 6680.6558, "epoch": 0.8, "inbatch_neg_score": 1.4032, "inbatch_pos_score": 2.1504, "learning_rate": 1.0111111111111111e-05, "loss": 2.5805, "norm_diff": 0.0517, "norm_loss": 0.0, "num_token_doc": 66.778, "num_token_overlap": 17.8174, "num_token_query": 52.3269, "num_token_union": 73.7443, "num_word_context": 202.2628, "num_word_doc": 49.7898, "num_word_query": 39.9033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9760.6327, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4033, "query_norm": 1.7583, "queue_k_norm": 1.8098, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3269, "sent_len_1": 66.778, "sent_len_max_0": 128.0, "sent_len_max_1": 210.955, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 81800 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.5864, "doc_norm": 1.8116, "encoder_q-embeddings": 5339.1226, "encoder_q-layer.0": 3439.9597, "encoder_q-layer.1": 3941.355, "encoder_q-layer.10": 7156.416, "encoder_q-layer.11": 15720.9629, "encoder_q-layer.2": 4624.1875, "encoder_q-layer.3": 4627.5835, "encoder_q-layer.4": 4891.5811, "encoder_q-layer.5": 5314.2671, "encoder_q-layer.6": 5759.2407, "encoder_q-layer.7": 6607.1763, "encoder_q-layer.8": 7818.9497, "encoder_q-layer.9": 6849.4702, "epoch": 0.8, "inbatch_neg_score": 1.4072, "inbatch_pos_score": 2.168, "learning_rate": 1.0055555555555555e-05, "loss": 2.5864, "norm_diff": 0.0493, "norm_loss": 0.0, "num_token_doc": 66.7755, "num_token_overlap": 17.786, "num_token_query": 52.3301, "num_token_union": 73.7449, "num_word_context": 202.4506, "num_word_doc": 49.8093, "num_word_query": 39.9187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10402.8822, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4062, "query_norm": 1.7623, "queue_k_norm": 1.8117, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3301, "sent_len_1": 66.7755, "sent_len_max_0": 128.0, "sent_len_max_1": 211.3125, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 81900 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.5742, "doc_norm": 1.8101, "encoder_q-embeddings": 5031.3501, "encoder_q-layer.0": 3284.7278, "encoder_q-layer.1": 3704.3247, "encoder_q-layer.10": 7018.9829, "encoder_q-layer.11": 14727.1934, "encoder_q-layer.2": 4235.0981, "encoder_q-layer.3": 4343.3262, "encoder_q-layer.4": 4647.9131, "encoder_q-layer.5": 4986.2085, "encoder_q-layer.6": 5434.9307, "encoder_q-layer.7": 6395.5259, "encoder_q-layer.8": 7498.8169, "encoder_q-layer.9": 6761.9922, "epoch": 0.8, "inbatch_neg_score": 1.4036, "inbatch_pos_score": 2.1543, "learning_rate": 1e-05, "loss": 2.5742, "norm_diff": 0.0419, "norm_loss": 0.0, "num_token_doc": 66.7393, "num_token_overlap": 17.833, "num_token_query": 52.3786, "num_token_union": 73.7403, "num_word_context": 202.585, "num_word_doc": 49.8229, "num_word_query": 39.9662, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9835.251, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4023, "query_norm": 1.7682, "queue_k_norm": 1.8129, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3786, "sent_len_1": 66.7393, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6238, "stdk": 0.0495, "stdq": 0.047, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 82000 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.5747, "doc_norm": 1.8142, "encoder_q-embeddings": 4679.688, "encoder_q-layer.0": 3109.0913, "encoder_q-layer.1": 3337.3818, "encoder_q-layer.10": 6617.0752, "encoder_q-layer.11": 14016.8994, "encoder_q-layer.2": 3701.6804, "encoder_q-layer.3": 3940.8086, "encoder_q-layer.4": 4379.4971, "encoder_q-layer.5": 4437.4932, "encoder_q-layer.6": 4994.6519, "encoder_q-layer.7": 6085.6943, "encoder_q-layer.8": 6990.3154, "encoder_q-layer.9": 6468.1089, "epoch": 0.8, "inbatch_neg_score": 1.4035, "inbatch_pos_score": 2.166, "learning_rate": 9.944444444444445e-06, "loss": 2.5747, "norm_diff": 0.0518, "norm_loss": 0.0, "num_token_doc": 66.8638, "num_token_overlap": 17.8347, "num_token_query": 52.3645, "num_token_union": 73.7916, "num_word_context": 202.3127, "num_word_doc": 49.8904, "num_word_query": 39.939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9160.149, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4043, "query_norm": 1.7625, "queue_k_norm": 1.813, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3645, "sent_len_1": 66.8638, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4837, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 82100 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.5747, "doc_norm": 1.8118, "encoder_q-embeddings": 5138.4424, "encoder_q-layer.0": 3340.3337, "encoder_q-layer.1": 3621.8252, "encoder_q-layer.10": 6965.5381, "encoder_q-layer.11": 14839.6592, "encoder_q-layer.2": 3991.4456, "encoder_q-layer.3": 4223.0762, "encoder_q-layer.4": 4502.626, "encoder_q-layer.5": 4757.4204, "encoder_q-layer.6": 5431.2075, "encoder_q-layer.7": 6200.4604, "encoder_q-layer.8": 7718.9453, "encoder_q-layer.9": 6956.4917, "epoch": 0.8, "inbatch_neg_score": 1.4031, "inbatch_pos_score": 2.166, "learning_rate": 9.888888888888889e-06, "loss": 2.5747, "norm_diff": 0.053, "norm_loss": 0.0, "num_token_doc": 66.8745, "num_token_overlap": 17.8176, "num_token_query": 52.2689, "num_token_union": 73.7386, "num_word_context": 202.3751, "num_word_doc": 49.9201, "num_word_query": 39.8576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9733.7435, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4033, "query_norm": 1.7589, "queue_k_norm": 1.8112, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2689, "sent_len_1": 66.8745, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9137, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 82200 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.5846, "doc_norm": 1.8125, "encoder_q-embeddings": 6315.311, "encoder_q-layer.0": 4223.334, "encoder_q-layer.1": 4797.6387, "encoder_q-layer.10": 7422.3154, "encoder_q-layer.11": 14513.4912, "encoder_q-layer.2": 5710.209, "encoder_q-layer.3": 6372.7783, "encoder_q-layer.4": 7027.0171, "encoder_q-layer.5": 7560.5337, "encoder_q-layer.6": 8067.9976, "encoder_q-layer.7": 8118.3091, "encoder_q-layer.8": 8818.3379, "encoder_q-layer.9": 7156.9292, "epoch": 0.8, "inbatch_neg_score": 1.4016, "inbatch_pos_score": 2.1523, "learning_rate": 9.833333333333333e-06, "loss": 2.5846, "norm_diff": 0.0533, "norm_loss": 0.0, "num_token_doc": 66.7856, "num_token_overlap": 17.7567, "num_token_query": 52.1123, "num_token_union": 73.6472, "num_word_context": 202.269, "num_word_doc": 49.8384, "num_word_query": 39.7421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11421.4693, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4014, "query_norm": 1.7592, "queue_k_norm": 1.812, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1123, "sent_len_1": 66.7856, "sent_len_max_0": 128.0, "sent_len_max_1": 209.33, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 82300 }, { "accuracy": 62.3535, "active_queue_size": 16384.0, "cl_loss": 2.561, "doc_norm": 1.8088, "encoder_q-embeddings": 4918.2568, "encoder_q-layer.0": 3139.0198, "encoder_q-layer.1": 3536.8657, "encoder_q-layer.10": 7065.1152, "encoder_q-layer.11": 14613.7373, "encoder_q-layer.2": 4030.3977, "encoder_q-layer.3": 4203.2983, "encoder_q-layer.4": 4679.4219, "encoder_q-layer.5": 5091.9746, "encoder_q-layer.6": 5535.9492, "encoder_q-layer.7": 6202.4863, "encoder_q-layer.8": 7013.8379, "encoder_q-layer.9": 6461.7476, "epoch": 0.8, "inbatch_neg_score": 1.4038, "inbatch_pos_score": 2.1777, "learning_rate": 9.777777777777779e-06, "loss": 2.561, "norm_diff": 0.045, "norm_loss": 0.0, "num_token_doc": 66.7615, "num_token_overlap": 17.8736, "num_token_query": 52.2628, "num_token_union": 73.6738, "num_word_context": 202.1664, "num_word_doc": 49.8318, "num_word_query": 39.8502, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9589.7787, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4043, "query_norm": 1.7638, "queue_k_norm": 1.8132, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2628, "sent_len_1": 66.7615, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5275, "stdk": 0.0494, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 82400 }, { "accuracy": 60.3027, "active_queue_size": 16384.0, "cl_loss": 2.5763, "doc_norm": 1.8128, "encoder_q-embeddings": 53820.7812, "encoder_q-layer.0": 39422.043, "encoder_q-layer.1": 46208.543, "encoder_q-layer.10": 6967.9751, "encoder_q-layer.11": 14785.0117, "encoder_q-layer.2": 52167.5781, "encoder_q-layer.3": 53432.2656, "encoder_q-layer.4": 65397.6953, "encoder_q-layer.5": 70666.6016, "encoder_q-layer.6": 60689.1406, "encoder_q-layer.7": 53928.375, "encoder_q-layer.8": 25545.7461, "encoder_q-layer.9": 9196.25, "epoch": 0.81, "inbatch_neg_score": 1.405, "inbatch_pos_score": 2.1465, "learning_rate": 9.722222222222223e-06, "loss": 2.5763, "norm_diff": 0.0555, "norm_loss": 0.0, "num_token_doc": 66.5898, "num_token_overlap": 17.8086, "num_token_query": 52.211, "num_token_union": 73.5661, "num_word_context": 202.1934, "num_word_doc": 49.6964, "num_word_query": 39.809, "postclip_grad_norm": 1.0, "preclip_grad_norm": 72272.7301, "preclip_grad_norm_avg": 0.0007, "q@queue_neg_score": 1.4053, "query_norm": 1.7573, "queue_k_norm": 1.8117, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.211, "sent_len_1": 66.5898, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8862, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 82500 }, { "accuracy": 61.7676, "active_queue_size": 16384.0, "cl_loss": 2.5849, "doc_norm": 1.8179, "encoder_q-embeddings": 5921.9355, "encoder_q-layer.0": 3879.9802, "encoder_q-layer.1": 4475.2539, "encoder_q-layer.10": 7138.3359, "encoder_q-layer.11": 15000.166, "encoder_q-layer.2": 5184.8555, "encoder_q-layer.3": 5548.9111, "encoder_q-layer.4": 6303.4453, "encoder_q-layer.5": 6963.0342, "encoder_q-layer.6": 7895.354, "encoder_q-layer.7": 7862.6963, "encoder_q-layer.8": 8204.1367, "encoder_q-layer.9": 7135.9219, "epoch": 0.81, "inbatch_neg_score": 1.4059, "inbatch_pos_score": 2.1641, "learning_rate": 9.666666666666667e-06, "loss": 2.5849, "norm_diff": 0.0664, "norm_loss": 0.0, "num_token_doc": 66.7314, "num_token_overlap": 17.8381, "num_token_query": 52.3368, "num_token_union": 73.6994, "num_word_context": 202.3883, "num_word_doc": 49.7969, "num_word_query": 39.9089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11079.2286, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4062, "query_norm": 1.7515, "queue_k_norm": 1.813, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3368, "sent_len_1": 66.7314, "sent_len_max_0": 128.0, "sent_len_max_1": 209.885, "stdk": 0.0498, "stdq": 0.0462, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 82600 }, { "accuracy": 61.4746, "active_queue_size": 16384.0, "cl_loss": 2.5731, "doc_norm": 1.815, "encoder_q-embeddings": 4778.207, "encoder_q-layer.0": 3182.5564, "encoder_q-layer.1": 3515.0188, "encoder_q-layer.10": 7516.1572, "encoder_q-layer.11": 14917.625, "encoder_q-layer.2": 4121.2949, "encoder_q-layer.3": 4250.4351, "encoder_q-layer.4": 4657.9609, "encoder_q-layer.5": 4629.5513, "encoder_q-layer.6": 5205.0854, "encoder_q-layer.7": 6040.856, "encoder_q-layer.8": 7253.1528, "encoder_q-layer.9": 6770.5, "epoch": 0.81, "inbatch_neg_score": 1.4085, "inbatch_pos_score": 2.1602, "learning_rate": 9.61111111111111e-06, "loss": 2.5731, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.8456, "num_token_overlap": 17.8153, "num_token_query": 52.2818, "num_token_union": 73.7712, "num_word_context": 202.5863, "num_word_doc": 49.9052, "num_word_query": 39.8859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9740.0082, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4082, "query_norm": 1.7671, "queue_k_norm": 1.8125, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2818, "sent_len_1": 66.8456, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1912, "stdk": 0.0497, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 82700 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.595, "doc_norm": 1.8162, "encoder_q-embeddings": 6480.9653, "encoder_q-layer.0": 4433.2637, "encoder_q-layer.1": 5128.1978, "encoder_q-layer.10": 7049.5933, "encoder_q-layer.11": 15025.9141, "encoder_q-layer.2": 6070.0195, "encoder_q-layer.3": 6494.5591, "encoder_q-layer.4": 6927.5469, "encoder_q-layer.5": 7146.9077, "encoder_q-layer.6": 7279.4546, "encoder_q-layer.7": 7010.3701, "encoder_q-layer.8": 7952.8115, "encoder_q-layer.9": 6823.5742, "epoch": 0.81, "inbatch_neg_score": 1.4098, "inbatch_pos_score": 2.1797, "learning_rate": 9.555555555555556e-06, "loss": 2.595, "norm_diff": 0.0505, "norm_loss": 0.0, "num_token_doc": 66.7706, "num_token_overlap": 17.7328, "num_token_query": 52.1366, "num_token_union": 73.6795, "num_word_context": 202.0545, "num_word_doc": 49.8063, "num_word_query": 39.7594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11207.8218, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4102, "query_norm": 1.7657, "queue_k_norm": 1.8141, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1366, "sent_len_1": 66.7706, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9225, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 82800 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.5699, "doc_norm": 1.8162, "encoder_q-embeddings": 4684.3115, "encoder_q-layer.0": 3101.7021, "encoder_q-layer.1": 3473.4675, "encoder_q-layer.10": 7388.855, "encoder_q-layer.11": 14297.7715, "encoder_q-layer.2": 4007.4773, "encoder_q-layer.3": 4133.9888, "encoder_q-layer.4": 4428.6357, "encoder_q-layer.5": 4915.4561, "encoder_q-layer.6": 5289.0273, "encoder_q-layer.7": 6182.5635, "encoder_q-layer.8": 7194.4072, "encoder_q-layer.9": 6631.7686, "epoch": 0.81, "inbatch_neg_score": 1.4109, "inbatch_pos_score": 2.166, "learning_rate": 9.5e-06, "loss": 2.5699, "norm_diff": 0.0563, "norm_loss": 0.0, "num_token_doc": 66.9232, "num_token_overlap": 17.8139, "num_token_query": 52.3672, "num_token_union": 73.8198, "num_word_context": 202.4882, "num_word_doc": 49.9277, "num_word_query": 39.9294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9483.0472, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4121, "query_norm": 1.7599, "queue_k_norm": 1.8146, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3672, "sent_len_1": 66.9232, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1387, "stdk": 0.0497, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 82900 }, { "accuracy": 62.6953, "active_queue_size": 16384.0, "cl_loss": 2.5788, "doc_norm": 1.8151, "encoder_q-embeddings": 4650.5444, "encoder_q-layer.0": 2961.5364, "encoder_q-layer.1": 3286.874, "encoder_q-layer.10": 6913.0142, "encoder_q-layer.11": 14550.6357, "encoder_q-layer.2": 3768.8645, "encoder_q-layer.3": 3964.1621, "encoder_q-layer.4": 4418.4785, "encoder_q-layer.5": 4364.6182, "encoder_q-layer.6": 4982.5356, "encoder_q-layer.7": 5738.4644, "encoder_q-layer.8": 7122.6182, "encoder_q-layer.9": 6658.7671, "epoch": 0.81, "inbatch_neg_score": 1.4135, "inbatch_pos_score": 2.1758, "learning_rate": 9.444444444444445e-06, "loss": 2.5788, "norm_diff": 0.0581, "norm_loss": 0.0, "num_token_doc": 66.8195, "num_token_overlap": 17.8069, "num_token_query": 52.2179, "num_token_union": 73.6694, "num_word_context": 202.1527, "num_word_doc": 49.871, "num_word_query": 39.8231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9404.1152, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4141, "query_norm": 1.7569, "queue_k_norm": 1.8137, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2179, "sent_len_1": 66.8195, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1, "stdk": 0.0496, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 83000 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.5668, "doc_norm": 1.8139, "encoder_q-embeddings": 5736.8887, "encoder_q-layer.0": 3651.3088, "encoder_q-layer.1": 4248.4385, "encoder_q-layer.10": 7149.0635, "encoder_q-layer.11": 15710.5781, "encoder_q-layer.2": 5116.3301, "encoder_q-layer.3": 5320.8438, "encoder_q-layer.4": 5615.8955, "encoder_q-layer.5": 5701.1924, "encoder_q-layer.6": 6383.8408, "encoder_q-layer.7": 6630.2246, "encoder_q-layer.8": 7483.3325, "encoder_q-layer.9": 6757.4004, "epoch": 0.81, "inbatch_neg_score": 1.4161, "inbatch_pos_score": 2.1719, "learning_rate": 9.388888888888889e-06, "loss": 2.5668, "norm_diff": 0.0524, "norm_loss": 0.0, "num_token_doc": 66.8916, "num_token_overlap": 17.8586, "num_token_query": 52.4241, "num_token_union": 73.8204, "num_word_context": 202.5721, "num_word_doc": 49.937, "num_word_query": 39.9906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10624.6247, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.416, "query_norm": 1.7615, "queue_k_norm": 1.815, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4241, "sent_len_1": 66.8916, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3413, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 83100 }, { "accuracy": 60.4004, "active_queue_size": 16384.0, "cl_loss": 2.5745, "doc_norm": 1.8117, "encoder_q-embeddings": 3274.3235, "encoder_q-layer.0": 2292.5259, "encoder_q-layer.1": 2607.9705, "encoder_q-layer.10": 3592.8784, "encoder_q-layer.11": 7450.9575, "encoder_q-layer.2": 2975.5376, "encoder_q-layer.3": 3087.6985, "encoder_q-layer.4": 3410.5505, "encoder_q-layer.5": 3217.7043, "encoder_q-layer.6": 3298.6118, "encoder_q-layer.7": 3551.9749, "encoder_q-layer.8": 3883.6433, "encoder_q-layer.9": 3216.9346, "epoch": 0.81, "inbatch_neg_score": 1.4177, "inbatch_pos_score": 2.1621, "learning_rate": 9.333333333333334e-06, "loss": 2.5745, "norm_diff": 0.0528, "norm_loss": 0.0, "num_token_doc": 66.9319, "num_token_overlap": 17.8011, "num_token_query": 52.1354, "num_token_union": 73.7006, "num_word_context": 202.2158, "num_word_doc": 49.9198, "num_word_query": 39.7307, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5596.5749, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.418, "query_norm": 1.7589, "queue_k_norm": 1.8162, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1354, "sent_len_1": 66.9319, "sent_len_max_0": 128.0, "sent_len_max_1": 211.6275, "stdk": 0.0494, "stdq": 0.0462, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 83200 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.564, "doc_norm": 1.8112, "encoder_q-embeddings": 2675.9417, "encoder_q-layer.0": 1757.3475, "encoder_q-layer.1": 1914.8438, "encoder_q-layer.10": 4058.0325, "encoder_q-layer.11": 7816.4146, "encoder_q-layer.2": 2186.7852, "encoder_q-layer.3": 2330.3899, "encoder_q-layer.4": 2544.7285, "encoder_q-layer.5": 2626.4866, "encoder_q-layer.6": 2930.9407, "encoder_q-layer.7": 3357.1763, "encoder_q-layer.8": 4039.1265, "encoder_q-layer.9": 3617.0325, "epoch": 0.81, "inbatch_neg_score": 1.4204, "inbatch_pos_score": 2.1699, "learning_rate": 9.277777777777778e-06, "loss": 2.564, "norm_diff": 0.0405, "norm_loss": 0.0, "num_token_doc": 66.8096, "num_token_overlap": 17.8395, "num_token_query": 52.4449, "num_token_union": 73.8098, "num_word_context": 202.2505, "num_word_doc": 49.8381, "num_word_query": 39.993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5264.2272, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4209, "query_norm": 1.7707, "queue_k_norm": 1.8154, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4449, "sent_len_1": 66.8096, "sent_len_max_0": 128.0, "sent_len_max_1": 207.41, "stdk": 0.0494, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 83300 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.5802, "doc_norm": 1.8163, "encoder_q-embeddings": 2549.0955, "encoder_q-layer.0": 1601.8286, "encoder_q-layer.1": 1771.6427, "encoder_q-layer.10": 3758.4753, "encoder_q-layer.11": 7440.5825, "encoder_q-layer.2": 1992.824, "encoder_q-layer.3": 2096.3772, "encoder_q-layer.4": 2279.9863, "encoder_q-layer.5": 2483.6619, "encoder_q-layer.6": 2936.0886, "encoder_q-layer.7": 3182.074, "encoder_q-layer.8": 3925.7847, "encoder_q-layer.9": 3730.502, "epoch": 0.81, "inbatch_neg_score": 1.4214, "inbatch_pos_score": 2.1836, "learning_rate": 9.222222222222222e-06, "loss": 2.5802, "norm_diff": 0.0417, "norm_loss": 0.0, "num_token_doc": 66.7202, "num_token_overlap": 17.7912, "num_token_query": 52.2748, "num_token_union": 73.701, "num_word_context": 202.4229, "num_word_doc": 49.7834, "num_word_query": 39.8762, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4977.1653, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4229, "query_norm": 1.7746, "queue_k_norm": 1.8147, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2748, "sent_len_1": 66.7202, "sent_len_max_0": 128.0, "sent_len_max_1": 208.435, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 83400 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.5852, "doc_norm": 1.8146, "encoder_q-embeddings": 2560.0942, "encoder_q-layer.0": 1687.6716, "encoder_q-layer.1": 1841.855, "encoder_q-layer.10": 3654.262, "encoder_q-layer.11": 7694.7837, "encoder_q-layer.2": 2077.593, "encoder_q-layer.3": 2136.3342, "encoder_q-layer.4": 2329.2817, "encoder_q-layer.5": 2429.561, "encoder_q-layer.6": 2740.3523, "encoder_q-layer.7": 2993.2209, "encoder_q-layer.8": 3572.4082, "encoder_q-layer.9": 3394.179, "epoch": 0.82, "inbatch_neg_score": 1.4293, "inbatch_pos_score": 2.1797, "learning_rate": 9.166666666666666e-06, "loss": 2.5852, "norm_diff": 0.0492, "norm_loss": 0.0, "num_token_doc": 66.809, "num_token_overlap": 17.7712, "num_token_query": 52.1697, "num_token_union": 73.6862, "num_word_context": 202.3715, "num_word_doc": 49.8446, "num_word_query": 39.8002, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5010.2161, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4287, "query_norm": 1.7654, "queue_k_norm": 1.8165, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1697, "sent_len_1": 66.809, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6425, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 83500 }, { "accuracy": 62.6465, "active_queue_size": 16384.0, "cl_loss": 2.5699, "doc_norm": 1.8163, "encoder_q-embeddings": 2365.3237, "encoder_q-layer.0": 1467.9213, "encoder_q-layer.1": 1634.0195, "encoder_q-layer.10": 3433.1331, "encoder_q-layer.11": 7338.9312, "encoder_q-layer.2": 1813.3132, "encoder_q-layer.3": 1960.634, "encoder_q-layer.4": 2161.4648, "encoder_q-layer.5": 2301.4797, "encoder_q-layer.6": 2614.793, "encoder_q-layer.7": 3057.1562, "encoder_q-layer.8": 3806.0015, "encoder_q-layer.9": 3348.4131, "epoch": 0.82, "inbatch_neg_score": 1.4275, "inbatch_pos_score": 2.1934, "learning_rate": 9.111111111111112e-06, "loss": 2.5699, "norm_diff": 0.0417, "norm_loss": 0.0, "num_token_doc": 66.863, "num_token_overlap": 17.8588, "num_token_query": 52.3169, "num_token_union": 73.7418, "num_word_context": 202.2943, "num_word_doc": 49.9094, "num_word_query": 39.927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4825.0808, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4277, "query_norm": 1.7745, "queue_k_norm": 1.8182, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3169, "sent_len_1": 66.863, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3425, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 83600 }, { "accuracy": 60.4004, "active_queue_size": 16384.0, "cl_loss": 2.566, "doc_norm": 1.817, "encoder_q-embeddings": 2533.4404, "encoder_q-layer.0": 1701.1372, "encoder_q-layer.1": 1851.7014, "encoder_q-layer.10": 3419.8569, "encoder_q-layer.11": 7232.8989, "encoder_q-layer.2": 2186.0691, "encoder_q-layer.3": 2263.1831, "encoder_q-layer.4": 2451.1309, "encoder_q-layer.5": 2596.5466, "encoder_q-layer.6": 2942.6624, "encoder_q-layer.7": 3352.8191, "encoder_q-layer.8": 3792.2361, "encoder_q-layer.9": 3261.0764, "epoch": 0.82, "inbatch_neg_score": 1.4309, "inbatch_pos_score": 2.1934, "learning_rate": 9.055555555555556e-06, "loss": 2.566, "norm_diff": 0.0454, "norm_loss": 0.0, "num_token_doc": 66.7775, "num_token_overlap": 17.8654, "num_token_query": 52.3835, "num_token_union": 73.7059, "num_word_context": 202.2396, "num_word_doc": 49.827, "num_word_query": 39.9494, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4992.4855, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4326, "query_norm": 1.7716, "queue_k_norm": 1.8178, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3835, "sent_len_1": 66.7775, "sent_len_max_0": 128.0, "sent_len_max_1": 207.62, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 83700 }, { "accuracy": 62.5, "active_queue_size": 16384.0, "cl_loss": 2.5756, "doc_norm": 1.8185, "encoder_q-embeddings": 2845.4536, "encoder_q-layer.0": 1822.8599, "encoder_q-layer.1": 2106.105, "encoder_q-layer.10": 3355.1345, "encoder_q-layer.11": 7448.2236, "encoder_q-layer.2": 2449.3911, "encoder_q-layer.3": 2638.0303, "encoder_q-layer.4": 2853.9263, "encoder_q-layer.5": 3144.1538, "encoder_q-layer.6": 3079.2844, "encoder_q-layer.7": 3359.311, "encoder_q-layer.8": 3797.9155, "encoder_q-layer.9": 3517.3706, "epoch": 0.82, "inbatch_neg_score": 1.4385, "inbatch_pos_score": 2.1992, "learning_rate": 9e-06, "loss": 2.5756, "norm_diff": 0.0452, "norm_loss": 0.0, "num_token_doc": 66.6342, "num_token_overlap": 17.7804, "num_token_query": 52.1674, "num_token_union": 73.5535, "num_word_context": 202.0382, "num_word_doc": 49.7082, "num_word_query": 39.7853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5294.7221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4375, "query_norm": 1.7733, "queue_k_norm": 1.8186, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1674, "sent_len_1": 66.6342, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3688, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 83800 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.572, "doc_norm": 1.8224, "encoder_q-embeddings": 6753.3833, "encoder_q-layer.0": 5088.4854, "encoder_q-layer.1": 6107.3936, "encoder_q-layer.10": 3712.9004, "encoder_q-layer.11": 7477.6943, "encoder_q-layer.2": 6628.8423, "encoder_q-layer.3": 7031.4155, "encoder_q-layer.4": 6610.9126, "encoder_q-layer.5": 6760.9521, "encoder_q-layer.6": 6700.9688, "encoder_q-layer.7": 6291.5669, "encoder_q-layer.8": 5684.2808, "encoder_q-layer.9": 3974.7419, "epoch": 0.82, "inbatch_neg_score": 1.4398, "inbatch_pos_score": 2.2031, "learning_rate": 8.944444444444444e-06, "loss": 2.572, "norm_diff": 0.0381, "norm_loss": 0.0, "num_token_doc": 66.7896, "num_token_overlap": 17.7904, "num_token_query": 52.1949, "num_token_union": 73.6976, "num_word_context": 202.3924, "num_word_doc": 49.8201, "num_word_query": 39.7861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9219.1497, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4414, "query_norm": 1.7843, "queue_k_norm": 1.8184, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1949, "sent_len_1": 66.7896, "sent_len_max_0": 128.0, "sent_len_max_1": 210.9988, "stdk": 0.0498, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 83900 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.5704, "doc_norm": 1.8182, "encoder_q-embeddings": 2574.4238, "encoder_q-layer.0": 1686.3162, "encoder_q-layer.1": 1900.1038, "encoder_q-layer.10": 3611.3945, "encoder_q-layer.11": 7152.521, "encoder_q-layer.2": 2222.1741, "encoder_q-layer.3": 2407.7366, "encoder_q-layer.4": 2514.9736, "encoder_q-layer.5": 2568.6584, "encoder_q-layer.6": 2917.0986, "encoder_q-layer.7": 3108.1963, "encoder_q-layer.8": 3562.4014, "encoder_q-layer.9": 3328.0215, "epoch": 0.82, "inbatch_neg_score": 1.4452, "inbatch_pos_score": 2.209, "learning_rate": 8.88888888888889e-06, "loss": 2.5704, "norm_diff": 0.0341, "norm_loss": 0.0, "num_token_doc": 66.7585, "num_token_overlap": 17.835, "num_token_query": 52.356, "num_token_union": 73.7179, "num_word_context": 202.1822, "num_word_doc": 49.8044, "num_word_query": 39.9364, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4936.704, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4453, "query_norm": 1.7841, "queue_k_norm": 1.8198, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.356, "sent_len_1": 66.7585, "sent_len_max_0": 128.0, "sent_len_max_1": 211.3762, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 84000 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.5721, "doc_norm": 1.823, "encoder_q-embeddings": 2433.4558, "encoder_q-layer.0": 1635.772, "encoder_q-layer.1": 1867.668, "encoder_q-layer.10": 3526.8989, "encoder_q-layer.11": 7147.1934, "encoder_q-layer.2": 2161.7329, "encoder_q-layer.3": 2312.8418, "encoder_q-layer.4": 2577.7087, "encoder_q-layer.5": 2649.365, "encoder_q-layer.6": 3056.9873, "encoder_q-layer.7": 3264.2285, "encoder_q-layer.8": 3664.3916, "encoder_q-layer.9": 3435.3975, "epoch": 0.82, "inbatch_neg_score": 1.4464, "inbatch_pos_score": 2.2168, "learning_rate": 8.833333333333334e-06, "loss": 2.5721, "norm_diff": 0.0376, "norm_loss": 0.0, "num_token_doc": 66.7622, "num_token_overlap": 17.8404, "num_token_query": 52.3609, "num_token_union": 73.7211, "num_word_context": 202.3242, "num_word_doc": 49.7949, "num_word_query": 39.9081, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4894.6114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4473, "query_norm": 1.7854, "queue_k_norm": 1.8212, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3609, "sent_len_1": 66.7622, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8088, "stdk": 0.0497, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 84100 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.584, "doc_norm": 1.8188, "encoder_q-embeddings": 2345.3574, "encoder_q-layer.0": 1557.2163, "encoder_q-layer.1": 1714.1958, "encoder_q-layer.10": 3411.5295, "encoder_q-layer.11": 7195.6245, "encoder_q-layer.2": 1875.3409, "encoder_q-layer.3": 1986.2637, "encoder_q-layer.4": 2173.3298, "encoder_q-layer.5": 2311.9883, "encoder_q-layer.6": 2538.7075, "encoder_q-layer.7": 2938.2971, "encoder_q-layer.8": 3722.3577, "encoder_q-layer.9": 3383.0808, "epoch": 0.82, "inbatch_neg_score": 1.4485, "inbatch_pos_score": 2.2031, "learning_rate": 8.777777777777778e-06, "loss": 2.584, "norm_diff": 0.04, "norm_loss": 0.0, "num_token_doc": 66.7784, "num_token_overlap": 17.7969, "num_token_query": 52.2433, "num_token_union": 73.7022, "num_word_context": 202.338, "num_word_doc": 49.8307, "num_word_query": 39.8631, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4761.2811, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4492, "query_norm": 1.7788, "queue_k_norm": 1.8211, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2433, "sent_len_1": 66.7784, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6675, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 84200 }, { "accuracy": 60.4004, "active_queue_size": 16384.0, "cl_loss": 2.5853, "doc_norm": 1.8208, "encoder_q-embeddings": 1531.8175, "encoder_q-layer.0": 1052.1427, "encoder_q-layer.1": 1187.1135, "encoder_q-layer.10": 2020.1836, "encoder_q-layer.11": 4087.728, "encoder_q-layer.2": 1356.6503, "encoder_q-layer.3": 1448.8822, "encoder_q-layer.4": 1606.0691, "encoder_q-layer.5": 1637.3717, "encoder_q-layer.6": 1713.8826, "encoder_q-layer.7": 1825.2513, "encoder_q-layer.8": 2066.4009, "encoder_q-layer.9": 1808.3688, "epoch": 0.82, "inbatch_neg_score": 1.4539, "inbatch_pos_score": 2.207, "learning_rate": 8.722222222222224e-06, "loss": 2.5853, "norm_diff": 0.0339, "norm_loss": 0.0, "num_token_doc": 66.665, "num_token_overlap": 17.7703, "num_token_query": 52.0933, "num_token_union": 73.5891, "num_word_context": 202.2508, "num_word_doc": 49.7814, "num_word_query": 39.7349, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2868.1953, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4531, "query_norm": 1.7869, "queue_k_norm": 1.8217, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0933, "sent_len_1": 66.665, "sent_len_max_0": 128.0, "sent_len_max_1": 206.8512, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 84300 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.5812, "doc_norm": 1.8239, "encoder_q-embeddings": 2312.9026, "encoder_q-layer.0": 1646.0403, "encoder_q-layer.1": 1969.4626, "encoder_q-layer.10": 1811.6875, "encoder_q-layer.11": 3707.5156, "encoder_q-layer.2": 2626.3127, "encoder_q-layer.3": 2728.0042, "encoder_q-layer.4": 3146.9426, "encoder_q-layer.5": 3469.6936, "encoder_q-layer.6": 3815.1821, "encoder_q-layer.7": 3369.4919, "encoder_q-layer.8": 2160.9263, "encoder_q-layer.9": 1759.7417, "epoch": 0.82, "inbatch_neg_score": 1.4552, "inbatch_pos_score": 2.2148, "learning_rate": 8.666666666666668e-06, "loss": 2.5812, "norm_diff": 0.0417, "norm_loss": 0.0, "num_token_doc": 66.6084, "num_token_overlap": 17.7602, "num_token_query": 52.1945, "num_token_union": 73.5593, "num_word_context": 202.086, "num_word_doc": 49.7225, "num_word_query": 39.8112, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4110.0808, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4551, "query_norm": 1.7822, "queue_k_norm": 1.8215, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1945, "sent_len_1": 66.6084, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7812, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 84400 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.5897, "doc_norm": 1.8231, "encoder_q-embeddings": 1204.694, "encoder_q-layer.0": 802.4384, "encoder_q-layer.1": 874.609, "encoder_q-layer.10": 1759.652, "encoder_q-layer.11": 3747.7969, "encoder_q-layer.2": 1011.7432, "encoder_q-layer.3": 1032.4377, "encoder_q-layer.4": 1143.8672, "encoder_q-layer.5": 1179.0054, "encoder_q-layer.6": 1385.17, "encoder_q-layer.7": 1548.941, "encoder_q-layer.8": 1829.5201, "encoder_q-layer.9": 1719.1427, "epoch": 0.82, "inbatch_neg_score": 1.4601, "inbatch_pos_score": 2.2168, "learning_rate": 8.611111111111112e-06, "loss": 2.5897, "norm_diff": 0.0371, "norm_loss": 0.0, "num_token_doc": 66.7112, "num_token_overlap": 17.775, "num_token_query": 52.2468, "num_token_union": 73.6818, "num_word_context": 202.3407, "num_word_doc": 49.7726, "num_word_query": 39.8446, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2441.7543, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.46, "query_norm": 1.7861, "queue_k_norm": 1.823, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2468, "sent_len_1": 66.7112, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5625, "stdk": 0.0496, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 84500 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.5851, "doc_norm": 1.8219, "encoder_q-embeddings": 1249.9045, "encoder_q-layer.0": 806.058, "encoder_q-layer.1": 898.4869, "encoder_q-layer.10": 1806.1033, "encoder_q-layer.11": 3771.002, "encoder_q-layer.2": 1019.6844, "encoder_q-layer.3": 1098.2006, "encoder_q-layer.4": 1167.2145, "encoder_q-layer.5": 1253.8213, "encoder_q-layer.6": 1417.1462, "encoder_q-layer.7": 1590.8274, "encoder_q-layer.8": 1885.3668, "encoder_q-layer.9": 1679.9583, "epoch": 0.83, "inbatch_neg_score": 1.4618, "inbatch_pos_score": 2.2012, "learning_rate": 8.555555555555556e-06, "loss": 2.5851, "norm_diff": 0.0392, "norm_loss": 0.0, "num_token_doc": 66.6766, "num_token_overlap": 17.7579, "num_token_query": 52.2234, "num_token_union": 73.6709, "num_word_context": 202.4288, "num_word_doc": 49.7692, "num_word_query": 39.827, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2498.5962, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4619, "query_norm": 1.7828, "queue_k_norm": 1.8234, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2234, "sent_len_1": 66.6766, "sent_len_max_0": 128.0, "sent_len_max_1": 207.6413, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 84600 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.577, "doc_norm": 1.8258, "encoder_q-embeddings": 1387.7916, "encoder_q-layer.0": 922.0066, "encoder_q-layer.1": 1060.0179, "encoder_q-layer.10": 1938.6935, "encoder_q-layer.11": 3796.1455, "encoder_q-layer.2": 1174.5027, "encoder_q-layer.3": 1277.0956, "encoder_q-layer.4": 1383.5872, "encoder_q-layer.5": 1487.8517, "encoder_q-layer.6": 1623.67, "encoder_q-layer.7": 1937.592, "encoder_q-layer.8": 2068.3491, "encoder_q-layer.9": 1754.4053, "epoch": 0.83, "inbatch_neg_score": 1.4636, "inbatch_pos_score": 2.2188, "learning_rate": 8.500000000000002e-06, "loss": 2.577, "norm_diff": 0.0348, "norm_loss": 0.0, "num_token_doc": 66.6877, "num_token_overlap": 17.8006, "num_token_query": 52.3401, "num_token_union": 73.6626, "num_word_context": 202.2372, "num_word_doc": 49.761, "num_word_query": 39.9314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2697.2845, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4629, "query_norm": 1.7911, "queue_k_norm": 1.824, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3401, "sent_len_1": 66.6877, "sent_len_max_0": 128.0, "sent_len_max_1": 210.5662, "stdk": 0.0496, "stdq": 0.0471, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 84700 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.5769, "doc_norm": 1.8228, "encoder_q-embeddings": 1202.7992, "encoder_q-layer.0": 784.4984, "encoder_q-layer.1": 847.4388, "encoder_q-layer.10": 1868.7931, "encoder_q-layer.11": 3790.3401, "encoder_q-layer.2": 957.2361, "encoder_q-layer.3": 988.6262, "encoder_q-layer.4": 1086.8513, "encoder_q-layer.5": 1180.4956, "encoder_q-layer.6": 1344.9685, "encoder_q-layer.7": 1559.5637, "encoder_q-layer.8": 1810.2424, "encoder_q-layer.9": 1733.9705, "epoch": 0.83, "inbatch_neg_score": 1.4649, "inbatch_pos_score": 2.2051, "learning_rate": 8.444444444444446e-06, "loss": 2.5769, "norm_diff": 0.0427, "norm_loss": 0.0, "num_token_doc": 66.639, "num_token_overlap": 17.7756, "num_token_query": 52.3121, "num_token_union": 73.6688, "num_word_context": 202.2314, "num_word_doc": 49.7158, "num_word_query": 39.891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2477.0865, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4648, "query_norm": 1.7801, "queue_k_norm": 1.8254, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3121, "sent_len_1": 66.639, "sent_len_max_0": 128.0, "sent_len_max_1": 208.885, "stdk": 0.0495, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 84800 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.5758, "doc_norm": 1.8264, "encoder_q-embeddings": 1259.3323, "encoder_q-layer.0": 819.1841, "encoder_q-layer.1": 905.7159, "encoder_q-layer.10": 1723.0566, "encoder_q-layer.11": 3776.9673, "encoder_q-layer.2": 1059.5688, "encoder_q-layer.3": 1116.1379, "encoder_q-layer.4": 1246.8291, "encoder_q-layer.5": 1405.3602, "encoder_q-layer.6": 1498.8752, "encoder_q-layer.7": 1515.4398, "encoder_q-layer.8": 1838.9346, "encoder_q-layer.9": 1612.3146, "epoch": 0.83, "inbatch_neg_score": 1.4645, "inbatch_pos_score": 2.2148, "learning_rate": 8.38888888888889e-06, "loss": 2.5758, "norm_diff": 0.0513, "norm_loss": 0.0, "num_token_doc": 66.7471, "num_token_overlap": 17.7951, "num_token_query": 52.3408, "num_token_union": 73.769, "num_word_context": 202.3969, "num_word_doc": 49.8031, "num_word_query": 39.9357, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2458.6236, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4648, "query_norm": 1.7751, "queue_k_norm": 1.8252, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3408, "sent_len_1": 66.7471, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2188, "stdk": 0.0496, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 84900 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.5677, "doc_norm": 1.8237, "encoder_q-embeddings": 1233.5846, "encoder_q-layer.0": 840.4923, "encoder_q-layer.1": 936.7908, "encoder_q-layer.10": 1985.4683, "encoder_q-layer.11": 3987.7805, "encoder_q-layer.2": 1056.1184, "encoder_q-layer.3": 1122.3488, "encoder_q-layer.4": 1218.5923, "encoder_q-layer.5": 1301.5206, "encoder_q-layer.6": 1492.7817, "encoder_q-layer.7": 1727.1343, "encoder_q-layer.8": 2123.1748, "encoder_q-layer.9": 1888.0773, "epoch": 0.83, "inbatch_neg_score": 1.4628, "inbatch_pos_score": 2.207, "learning_rate": 8.333333333333334e-06, "loss": 2.5677, "norm_diff": 0.0407, "norm_loss": 0.0, "num_token_doc": 66.8506, "num_token_overlap": 17.8426, "num_token_query": 52.3876, "num_token_union": 73.7702, "num_word_context": 202.4535, "num_word_doc": 49.8676, "num_word_query": 39.945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2623.3749, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4629, "query_norm": 1.783, "queue_k_norm": 1.826, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3876, "sent_len_1": 66.8506, "sent_len_max_0": 128.0, "sent_len_max_1": 210.3963, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 85000 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.5802, "doc_norm": 1.821, "encoder_q-embeddings": 1409.6654, "encoder_q-layer.0": 918.1901, "encoder_q-layer.1": 994.7557, "encoder_q-layer.10": 1696.7162, "encoder_q-layer.11": 3723.5415, "encoder_q-layer.2": 1146.6687, "encoder_q-layer.3": 1152.1045, "encoder_q-layer.4": 1309.8331, "encoder_q-layer.5": 1353.714, "encoder_q-layer.6": 1548.7728, "encoder_q-layer.7": 1587.4718, "encoder_q-layer.8": 1881.6025, "encoder_q-layer.9": 1710.9445, "epoch": 0.83, "inbatch_neg_score": 1.4633, "inbatch_pos_score": 2.1973, "learning_rate": 8.27777777777778e-06, "loss": 2.5802, "norm_diff": 0.0556, "norm_loss": 0.0, "num_token_doc": 66.7937, "num_token_overlap": 17.8096, "num_token_query": 52.3337, "num_token_union": 73.7345, "num_word_context": 202.2731, "num_word_doc": 49.8332, "num_word_query": 39.9133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2567.9, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4639, "query_norm": 1.7655, "queue_k_norm": 1.8274, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3337, "sent_len_1": 66.7937, "sent_len_max_0": 128.0, "sent_len_max_1": 207.585, "stdk": 0.0493, "stdq": 0.046, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 85100 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.5656, "doc_norm": 1.8253, "encoder_q-embeddings": 1194.1333, "encoder_q-layer.0": 790.0929, "encoder_q-layer.1": 879.9792, "encoder_q-layer.10": 1809.7277, "encoder_q-layer.11": 3757.803, "encoder_q-layer.2": 990.4109, "encoder_q-layer.3": 1023.1232, "encoder_q-layer.4": 1155.3448, "encoder_q-layer.5": 1181.8499, "encoder_q-layer.6": 1420.3925, "encoder_q-layer.7": 1601.8765, "encoder_q-layer.8": 1903.8521, "encoder_q-layer.9": 1768.9039, "epoch": 0.83, "inbatch_neg_score": 1.4644, "inbatch_pos_score": 2.2188, "learning_rate": 8.222222222222223e-06, "loss": 2.5656, "norm_diff": 0.0362, "norm_loss": 0.0, "num_token_doc": 66.8193, "num_token_overlap": 17.8318, "num_token_query": 52.2852, "num_token_union": 73.6944, "num_word_context": 202.4082, "num_word_doc": 49.8522, "num_word_query": 39.8797, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2501.4043, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4648, "query_norm": 1.7891, "queue_k_norm": 1.8269, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2852, "sent_len_1": 66.8193, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4963, "stdk": 0.0495, "stdq": 0.0472, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 85200 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.5649, "doc_norm": 1.83, "encoder_q-embeddings": 1241.036, "encoder_q-layer.0": 812.0856, "encoder_q-layer.1": 896.7781, "encoder_q-layer.10": 1693.2512, "encoder_q-layer.11": 3620.752, "encoder_q-layer.2": 1051.4342, "encoder_q-layer.3": 1087.4625, "encoder_q-layer.4": 1152.8281, "encoder_q-layer.5": 1209.8562, "encoder_q-layer.6": 1422.7043, "encoder_q-layer.7": 1598.3977, "encoder_q-layer.8": 1830.2903, "encoder_q-layer.9": 1682.3734, "epoch": 0.83, "inbatch_neg_score": 1.4615, "inbatch_pos_score": 2.2246, "learning_rate": 8.166666666666668e-06, "loss": 2.5649, "norm_diff": 0.0603, "norm_loss": 0.0, "num_token_doc": 66.7164, "num_token_overlap": 17.7977, "num_token_query": 52.3563, "num_token_union": 73.7397, "num_word_context": 202.5681, "num_word_doc": 49.8104, "num_word_query": 39.9468, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2432.9451, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4619, "query_norm": 1.7698, "queue_k_norm": 1.8282, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3563, "sent_len_1": 66.7164, "sent_len_max_0": 128.0, "sent_len_max_1": 206.1138, "stdk": 0.0497, "stdq": 0.0463, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 85300 }, { "accuracy": 60.791, "active_queue_size": 16384.0, "cl_loss": 2.5716, "doc_norm": 1.826, "encoder_q-embeddings": 1537.9041, "encoder_q-layer.0": 1026.4198, "encoder_q-layer.1": 1146.8263, "encoder_q-layer.10": 1716.9189, "encoder_q-layer.11": 3680.1943, "encoder_q-layer.2": 1385.9141, "encoder_q-layer.3": 1567.0365, "encoder_q-layer.4": 1600.9761, "encoder_q-layer.5": 1657.3641, "encoder_q-layer.6": 1617.056, "encoder_q-layer.7": 1767.3088, "encoder_q-layer.8": 1819.7665, "encoder_q-layer.9": 1653.3802, "epoch": 0.83, "inbatch_neg_score": 1.4627, "inbatch_pos_score": 2.2168, "learning_rate": 8.111111111111112e-06, "loss": 2.5716, "norm_diff": 0.0508, "norm_loss": 0.0, "num_token_doc": 66.8704, "num_token_overlap": 17.8175, "num_token_query": 52.1803, "num_token_union": 73.7143, "num_word_context": 202.3693, "num_word_doc": 49.8819, "num_word_query": 39.7826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2691.3562, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4629, "query_norm": 1.7752, "queue_k_norm": 1.8278, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1803, "sent_len_1": 66.8704, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2587, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 85400 }, { "accuracy": 62.8906, "active_queue_size": 16384.0, "cl_loss": 2.575, "doc_norm": 1.8334, "encoder_q-embeddings": 3406.2266, "encoder_q-layer.0": 2548.3665, "encoder_q-layer.1": 3286.2246, "encoder_q-layer.10": 1828.6028, "encoder_q-layer.11": 4112.8403, "encoder_q-layer.2": 4013.6963, "encoder_q-layer.3": 4327.3125, "encoder_q-layer.4": 4810.3174, "encoder_q-layer.5": 5372.0278, "encoder_q-layer.6": 4667.7524, "encoder_q-layer.7": 4924.6035, "encoder_q-layer.8": 3999.969, "encoder_q-layer.9": 2121.5188, "epoch": 0.83, "inbatch_neg_score": 1.4622, "inbatch_pos_score": 2.2402, "learning_rate": 8.055555555555557e-06, "loss": 2.575, "norm_diff": 0.0499, "norm_loss": 0.0, "num_token_doc": 66.6773, "num_token_overlap": 17.7888, "num_token_query": 52.2334, "num_token_union": 73.6369, "num_word_context": 202.2198, "num_word_doc": 49.7506, "num_word_query": 39.8382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5904.876, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4639, "query_norm": 1.7835, "queue_k_norm": 1.8275, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2334, "sent_len_1": 66.6773, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4863, "stdk": 0.0499, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 85500 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.5682, "doc_norm": 1.8267, "encoder_q-embeddings": 1475.863, "encoder_q-layer.0": 998.785, "encoder_q-layer.1": 1104.0504, "encoder_q-layer.10": 1754.5096, "encoder_q-layer.11": 3742.4458, "encoder_q-layer.2": 1246.8468, "encoder_q-layer.3": 1393.2998, "encoder_q-layer.4": 1561.0596, "encoder_q-layer.5": 1600.0089, "encoder_q-layer.6": 1641.3124, "encoder_q-layer.7": 1695.0488, "encoder_q-layer.8": 1852.6439, "encoder_q-layer.9": 1738.5007, "epoch": 0.84, "inbatch_neg_score": 1.4627, "inbatch_pos_score": 2.2148, "learning_rate": 8.000000000000001e-06, "loss": 2.5682, "norm_diff": 0.0561, "norm_loss": 0.0, "num_token_doc": 66.7958, "num_token_overlap": 17.8385, "num_token_query": 52.327, "num_token_union": 73.7143, "num_word_context": 202.3467, "num_word_doc": 49.8292, "num_word_query": 39.9094, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2622.4894, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4619, "query_norm": 1.7706, "queue_k_norm": 1.8301, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.327, "sent_len_1": 66.7958, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1538, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 85600 }, { "accuracy": 60.6934, "active_queue_size": 16384.0, "cl_loss": 2.5777, "doc_norm": 1.8261, "encoder_q-embeddings": 1453.9023, "encoder_q-layer.0": 972.4342, "encoder_q-layer.1": 1103.7618, "encoder_q-layer.10": 1802.1658, "encoder_q-layer.11": 3657.7354, "encoder_q-layer.2": 1242.8528, "encoder_q-layer.3": 1352.9131, "encoder_q-layer.4": 1464.7493, "encoder_q-layer.5": 1570.0424, "encoder_q-layer.6": 1767.5137, "encoder_q-layer.7": 1916.2285, "encoder_q-layer.8": 1931.7646, "encoder_q-layer.9": 1661.2126, "epoch": 0.84, "inbatch_neg_score": 1.4635, "inbatch_pos_score": 2.2227, "learning_rate": 7.944444444444445e-06, "loss": 2.5777, "norm_diff": 0.055, "norm_loss": 0.0, "num_token_doc": 66.8705, "num_token_overlap": 17.8095, "num_token_query": 52.2632, "num_token_union": 73.7501, "num_word_context": 202.6539, "num_word_doc": 49.8827, "num_word_query": 39.8643, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2660.2299, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4629, "query_norm": 1.7711, "queue_k_norm": 1.8307, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2632, "sent_len_1": 66.8705, "sent_len_max_0": 128.0, "sent_len_max_1": 209.365, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 85700 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.5759, "doc_norm": 1.8311, "encoder_q-embeddings": 1298.2325, "encoder_q-layer.0": 814.3777, "encoder_q-layer.1": 894.45, "encoder_q-layer.10": 1842.5714, "encoder_q-layer.11": 3863.8157, "encoder_q-layer.2": 1008.3391, "encoder_q-layer.3": 1070.146, "encoder_q-layer.4": 1184.509, "encoder_q-layer.5": 1303.35, "encoder_q-layer.6": 1479.8922, "encoder_q-layer.7": 1622.0568, "encoder_q-layer.8": 1902.7494, "encoder_q-layer.9": 1777.8386, "epoch": 0.84, "inbatch_neg_score": 1.4651, "inbatch_pos_score": 2.2324, "learning_rate": 7.88888888888889e-06, "loss": 2.5759, "norm_diff": 0.0545, "norm_loss": 0.0, "num_token_doc": 66.7293, "num_token_overlap": 17.8037, "num_token_query": 52.2421, "num_token_union": 73.6833, "num_word_context": 202.2472, "num_word_doc": 49.7998, "num_word_query": 39.8497, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2525.5577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4648, "query_norm": 1.7766, "queue_k_norm": 1.8306, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2421, "sent_len_1": 66.7293, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2875, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 85800 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.5667, "doc_norm": 1.8326, "encoder_q-embeddings": 1185.6785, "encoder_q-layer.0": 766.5556, "encoder_q-layer.1": 836.3497, "encoder_q-layer.10": 1859.5748, "encoder_q-layer.11": 3979.7637, "encoder_q-layer.2": 952.0905, "encoder_q-layer.3": 994.8513, "encoder_q-layer.4": 1113.9071, "encoder_q-layer.5": 1178.9258, "encoder_q-layer.6": 1433.5875, "encoder_q-layer.7": 1629.7534, "encoder_q-layer.8": 1928.8304, "encoder_q-layer.9": 1790.9658, "epoch": 0.84, "inbatch_neg_score": 1.4604, "inbatch_pos_score": 2.2188, "learning_rate": 7.833333333333333e-06, "loss": 2.5667, "norm_diff": 0.0628, "norm_loss": 0.0, "num_token_doc": 66.9176, "num_token_overlap": 17.8788, "num_token_query": 52.3773, "num_token_union": 73.7942, "num_word_context": 202.499, "num_word_doc": 49.9491, "num_word_query": 39.945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2553.1414, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4609, "query_norm": 1.7698, "queue_k_norm": 1.8296, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3773, "sent_len_1": 66.9176, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2788, "stdk": 0.0498, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 85900 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.5747, "doc_norm": 1.8298, "encoder_q-embeddings": 1293.4814, "encoder_q-layer.0": 836.3209, "encoder_q-layer.1": 895.7809, "encoder_q-layer.10": 1855.681, "encoder_q-layer.11": 3787.4614, "encoder_q-layer.2": 1050.0461, "encoder_q-layer.3": 1096.8961, "encoder_q-layer.4": 1198.9943, "encoder_q-layer.5": 1203.3284, "encoder_q-layer.6": 1351.2621, "encoder_q-layer.7": 1481.0771, "encoder_q-layer.8": 1783.6886, "encoder_q-layer.9": 1696.5985, "epoch": 0.84, "inbatch_neg_score": 1.4617, "inbatch_pos_score": 2.2031, "learning_rate": 7.777777777777777e-06, "loss": 2.5747, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 66.6585, "num_token_overlap": 17.7703, "num_token_query": 52.1704, "num_token_union": 73.6098, "num_word_context": 202.1461, "num_word_doc": 49.7291, "num_word_query": 39.7831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2506.4326, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4629, "query_norm": 1.7709, "queue_k_norm": 1.8316, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1704, "sent_len_1": 66.6585, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4462, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 86000 }, { "accuracy": 62.3535, "active_queue_size": 16384.0, "cl_loss": 2.569, "doc_norm": 1.8329, "encoder_q-embeddings": 1193.6521, "encoder_q-layer.0": 807.5563, "encoder_q-layer.1": 895.2394, "encoder_q-layer.10": 1982.1841, "encoder_q-layer.11": 3993.8396, "encoder_q-layer.2": 1033.3928, "encoder_q-layer.3": 1086.8698, "encoder_q-layer.4": 1192.2512, "encoder_q-layer.5": 1258.217, "encoder_q-layer.6": 1432.6399, "encoder_q-layer.7": 1513.767, "encoder_q-layer.8": 1890.8011, "encoder_q-layer.9": 1779.3417, "epoch": 0.84, "inbatch_neg_score": 1.4606, "inbatch_pos_score": 2.2188, "learning_rate": 7.722222222222223e-06, "loss": 2.569, "norm_diff": 0.0676, "norm_loss": 0.0, "num_token_doc": 66.9251, "num_token_overlap": 17.8897, "num_token_query": 52.3611, "num_token_union": 73.7893, "num_word_context": 202.2116, "num_word_doc": 49.917, "num_word_query": 39.9362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2551.7056, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4609, "query_norm": 1.7653, "queue_k_norm": 1.8303, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3611, "sent_len_1": 66.9251, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6538, "stdk": 0.0498, "stdq": 0.0462, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 86100 }, { "accuracy": 62.6953, "active_queue_size": 16384.0, "cl_loss": 2.5707, "doc_norm": 1.8316, "encoder_q-embeddings": 1605.9272, "encoder_q-layer.0": 1086.0908, "encoder_q-layer.1": 1240.811, "encoder_q-layer.10": 1716.6503, "encoder_q-layer.11": 3608.4199, "encoder_q-layer.2": 1474.2518, "encoder_q-layer.3": 1481.8096, "encoder_q-layer.4": 1706.1052, "encoder_q-layer.5": 1686.8743, "encoder_q-layer.6": 1892.8872, "encoder_q-layer.7": 1882.0121, "encoder_q-layer.8": 1893.3217, "encoder_q-layer.9": 1652.314, "epoch": 0.84, "inbatch_neg_score": 1.4635, "inbatch_pos_score": 2.2363, "learning_rate": 7.666666666666667e-06, "loss": 2.5707, "norm_diff": 0.049, "norm_loss": 0.0, "num_token_doc": 66.6058, "num_token_overlap": 17.8097, "num_token_query": 52.285, "num_token_union": 73.6231, "num_word_context": 202.2143, "num_word_doc": 49.7264, "num_word_query": 39.871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2748.7436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4629, "query_norm": 1.7826, "queue_k_norm": 1.8299, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.285, "sent_len_1": 66.6058, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8225, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 86200 }, { "accuracy": 58.1543, "active_queue_size": 16384.0, "cl_loss": 2.5745, "doc_norm": 1.8262, "encoder_q-embeddings": 2513.3979, "encoder_q-layer.0": 1595.4025, "encoder_q-layer.1": 1708.6206, "encoder_q-layer.10": 3810.2393, "encoder_q-layer.11": 7912.3247, "encoder_q-layer.2": 1936.9685, "encoder_q-layer.3": 2049.3479, "encoder_q-layer.4": 2216.9797, "encoder_q-layer.5": 2427.6101, "encoder_q-layer.6": 2695.3447, "encoder_q-layer.7": 3066.8379, "encoder_q-layer.8": 3894.4668, "encoder_q-layer.9": 3535.8821, "epoch": 0.84, "inbatch_neg_score": 1.4614, "inbatch_pos_score": 2.1914, "learning_rate": 7.611111111111112e-06, "loss": 2.5745, "norm_diff": 0.0583, "norm_loss": 0.0, "num_token_doc": 66.6805, "num_token_overlap": 17.76, "num_token_query": 52.2142, "num_token_union": 73.6607, "num_word_context": 202.1557, "num_word_doc": 49.7583, "num_word_query": 39.8188, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5073.9688, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4609, "query_norm": 1.7679, "queue_k_norm": 1.83, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2142, "sent_len_1": 66.6805, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0087, "stdk": 0.0494, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 86300 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.5766, "doc_norm": 1.8289, "encoder_q-embeddings": 2786.9197, "encoder_q-layer.0": 1801.0262, "encoder_q-layer.1": 1962.2559, "encoder_q-layer.10": 3461.7703, "encoder_q-layer.11": 7468.4565, "encoder_q-layer.2": 2331.6001, "encoder_q-layer.3": 2497.7681, "encoder_q-layer.4": 2734.0769, "encoder_q-layer.5": 3030.9893, "encoder_q-layer.6": 3461.2458, "encoder_q-layer.7": 3646.3276, "encoder_q-layer.8": 3916.9534, "encoder_q-layer.9": 3478.259, "epoch": 0.84, "inbatch_neg_score": 1.4585, "inbatch_pos_score": 2.2051, "learning_rate": 7.555555555555556e-06, "loss": 2.5766, "norm_diff": 0.0617, "norm_loss": 0.0, "num_token_doc": 66.8167, "num_token_overlap": 17.8324, "num_token_query": 52.3118, "num_token_union": 73.7683, "num_word_context": 202.3512, "num_word_doc": 49.839, "num_word_query": 39.8956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5298.0504, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.459, "query_norm": 1.7672, "queue_k_norm": 1.8302, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3118, "sent_len_1": 66.8167, "sent_len_max_0": 128.0, "sent_len_max_1": 210.2875, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 86400 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.5642, "doc_norm": 1.8356, "encoder_q-embeddings": 2803.7219, "encoder_q-layer.0": 1808.0303, "encoder_q-layer.1": 2019.0958, "encoder_q-layer.10": 3628.6741, "encoder_q-layer.11": 7588.5269, "encoder_q-layer.2": 2387.2734, "encoder_q-layer.3": 2547.1863, "encoder_q-layer.4": 2712.688, "encoder_q-layer.5": 2698.7288, "encoder_q-layer.6": 3040.2024, "encoder_q-layer.7": 3311.5417, "encoder_q-layer.8": 3817.5898, "encoder_q-layer.9": 3435.1309, "epoch": 0.84, "inbatch_neg_score": 1.4598, "inbatch_pos_score": 2.2148, "learning_rate": 7.5e-06, "loss": 2.5642, "norm_diff": 0.0628, "norm_loss": 0.0, "num_token_doc": 66.7559, "num_token_overlap": 17.8188, "num_token_query": 52.3557, "num_token_union": 73.7246, "num_word_context": 202.4459, "num_word_doc": 49.8077, "num_word_query": 39.9214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5243.2126, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.46, "query_norm": 1.7728, "queue_k_norm": 1.8306, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3557, "sent_len_1": 66.7559, "sent_len_max_0": 128.0, "sent_len_max_1": 210.15, "stdk": 0.0499, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 86500 }, { "accuracy": 63.5254, "active_queue_size": 16384.0, "cl_loss": 2.5628, "doc_norm": 1.8284, "encoder_q-embeddings": 2598.2986, "encoder_q-layer.0": 1688.4346, "encoder_q-layer.1": 1828.5872, "encoder_q-layer.10": 3534.4219, "encoder_q-layer.11": 7439.8662, "encoder_q-layer.2": 2117.813, "encoder_q-layer.3": 2359.3855, "encoder_q-layer.4": 2644.9707, "encoder_q-layer.5": 2600.5608, "encoder_q-layer.6": 2904.0356, "encoder_q-layer.7": 3067.2302, "encoder_q-layer.8": 3682.5586, "encoder_q-layer.9": 3398.5615, "epoch": 0.85, "inbatch_neg_score": 1.4546, "inbatch_pos_score": 2.2285, "learning_rate": 7.444444444444444e-06, "loss": 2.5628, "norm_diff": 0.0516, "norm_loss": 0.0, "num_token_doc": 66.9473, "num_token_overlap": 17.8782, "num_token_query": 52.3553, "num_token_union": 73.775, "num_word_context": 202.4478, "num_word_doc": 49.9343, "num_word_query": 39.9365, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4962.276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4561, "query_norm": 1.7768, "queue_k_norm": 1.8306, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3553, "sent_len_1": 66.9473, "sent_len_max_0": 128.0, "sent_len_max_1": 211.1062, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 86600 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.566, "doc_norm": 1.8314, "encoder_q-embeddings": 2365.4502, "encoder_q-layer.0": 1536.1573, "encoder_q-layer.1": 1708.5627, "encoder_q-layer.10": 3649.9919, "encoder_q-layer.11": 7583.9224, "encoder_q-layer.2": 1950.7939, "encoder_q-layer.3": 2032.5112, "encoder_q-layer.4": 2182.1321, "encoder_q-layer.5": 2453.6792, "encoder_q-layer.6": 2733.1125, "encoder_q-layer.7": 2989.4895, "encoder_q-layer.8": 3881.7126, "encoder_q-layer.9": 3440.218, "epoch": 0.85, "inbatch_neg_score": 1.457, "inbatch_pos_score": 2.2207, "learning_rate": 7.38888888888889e-06, "loss": 2.566, "norm_diff": 0.0513, "norm_loss": 0.0, "num_token_doc": 67.0404, "num_token_overlap": 17.8472, "num_token_query": 52.3051, "num_token_union": 73.8359, "num_word_context": 202.4757, "num_word_doc": 49.9619, "num_word_query": 39.881, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4967.2207, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.457, "query_norm": 1.7801, "queue_k_norm": 1.8305, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3051, "sent_len_1": 67.0404, "sent_len_max_0": 128.0, "sent_len_max_1": 212.76, "stdk": 0.0497, "stdq": 0.0471, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 86700 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.5855, "doc_norm": 1.8271, "encoder_q-embeddings": 2372.7251, "encoder_q-layer.0": 1576.1969, "encoder_q-layer.1": 1724.3452, "encoder_q-layer.10": 3414.1333, "encoder_q-layer.11": 7174.4629, "encoder_q-layer.2": 1978.3605, "encoder_q-layer.3": 2026.1958, "encoder_q-layer.4": 2178.5112, "encoder_q-layer.5": 2280.3665, "encoder_q-layer.6": 2628.04, "encoder_q-layer.7": 3082.6809, "encoder_q-layer.8": 3658.5186, "encoder_q-layer.9": 3251.4219, "epoch": 0.85, "inbatch_neg_score": 1.4564, "inbatch_pos_score": 2.2188, "learning_rate": 7.333333333333334e-06, "loss": 2.5855, "norm_diff": 0.0597, "norm_loss": 0.0, "num_token_doc": 66.617, "num_token_overlap": 17.7546, "num_token_query": 52.1554, "num_token_union": 73.6052, "num_word_context": 202.0886, "num_word_doc": 49.7137, "num_word_query": 39.7973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4809.5562, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4561, "query_norm": 1.7674, "queue_k_norm": 1.8309, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1554, "sent_len_1": 66.617, "sent_len_max_0": 128.0, "sent_len_max_1": 206.355, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 86800 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.559, "doc_norm": 1.8265, "encoder_q-embeddings": 2357.5388, "encoder_q-layer.0": 1539.5756, "encoder_q-layer.1": 1669.4806, "encoder_q-layer.10": 3380.1863, "encoder_q-layer.11": 7171.1074, "encoder_q-layer.2": 1920.4813, "encoder_q-layer.3": 2029.6847, "encoder_q-layer.4": 2236.9868, "encoder_q-layer.5": 2267.8623, "encoder_q-layer.6": 2602.8774, "encoder_q-layer.7": 2863.4426, "encoder_q-layer.8": 3517.7559, "encoder_q-layer.9": 3237.6399, "epoch": 0.85, "inbatch_neg_score": 1.4587, "inbatch_pos_score": 2.2031, "learning_rate": 7.277777777777778e-06, "loss": 2.559, "norm_diff": 0.056, "norm_loss": 0.0, "num_token_doc": 66.9813, "num_token_overlap": 17.8183, "num_token_query": 52.2605, "num_token_union": 73.8127, "num_word_context": 202.3679, "num_word_doc": 49.9784, "num_word_query": 39.8619, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4703.3105, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.459, "query_norm": 1.7705, "queue_k_norm": 1.8317, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2605, "sent_len_1": 66.9813, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9075, "stdk": 0.0494, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 86900 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.5854, "doc_norm": 1.8295, "encoder_q-embeddings": 3121.7888, "encoder_q-layer.0": 2051.48, "encoder_q-layer.1": 2219.0679, "encoder_q-layer.10": 3557.8279, "encoder_q-layer.11": 7471.2002, "encoder_q-layer.2": 2500.5442, "encoder_q-layer.3": 2693.9456, "encoder_q-layer.4": 2909.741, "encoder_q-layer.5": 2900.0552, "encoder_q-layer.6": 3184.813, "encoder_q-layer.7": 3498.731, "encoder_q-layer.8": 3760.7852, "encoder_q-layer.9": 3415.4639, "epoch": 0.85, "inbatch_neg_score": 1.458, "inbatch_pos_score": 2.2246, "learning_rate": 7.222222222222222e-06, "loss": 2.5854, "norm_diff": 0.0455, "norm_loss": 0.0, "num_token_doc": 66.8022, "num_token_overlap": 17.8101, "num_token_query": 52.3477, "num_token_union": 73.7448, "num_word_context": 202.2363, "num_word_doc": 49.8197, "num_word_query": 39.8965, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5338.779, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.459, "query_norm": 1.784, "queue_k_norm": 1.8304, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3477, "sent_len_1": 66.8022, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6163, "stdk": 0.0496, "stdq": 0.0472, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 87000 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.581, "doc_norm": 1.8334, "encoder_q-embeddings": 2578.3635, "encoder_q-layer.0": 1615.8818, "encoder_q-layer.1": 1833.8365, "encoder_q-layer.10": 3589.7725, "encoder_q-layer.11": 7754.4971, "encoder_q-layer.2": 2065.991, "encoder_q-layer.3": 2214.2087, "encoder_q-layer.4": 2336.3022, "encoder_q-layer.5": 2480.9934, "encoder_q-layer.6": 2892.4836, "encoder_q-layer.7": 3115.2168, "encoder_q-layer.8": 3837.2004, "encoder_q-layer.9": 3477.4866, "epoch": 0.85, "inbatch_neg_score": 1.4603, "inbatch_pos_score": 2.2148, "learning_rate": 7.166666666666667e-06, "loss": 2.581, "norm_diff": 0.0656, "norm_loss": 0.0, "num_token_doc": 66.6375, "num_token_overlap": 17.7502, "num_token_query": 52.1099, "num_token_union": 73.5551, "num_word_context": 202.1578, "num_word_doc": 49.7404, "num_word_query": 39.762, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5074.6608, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.46, "query_norm": 1.7678, "queue_k_norm": 1.8294, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1099, "sent_len_1": 66.6375, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5213, "stdk": 0.0497, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 87100 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.5672, "doc_norm": 1.8273, "encoder_q-embeddings": 2336.2598, "encoder_q-layer.0": 1561.6123, "encoder_q-layer.1": 1667.5955, "encoder_q-layer.10": 3461.1304, "encoder_q-layer.11": 7612.0244, "encoder_q-layer.2": 1898.7894, "encoder_q-layer.3": 1966.7717, "encoder_q-layer.4": 2126.344, "encoder_q-layer.5": 2292.5408, "encoder_q-layer.6": 2650.738, "encoder_q-layer.7": 2895.4724, "encoder_q-layer.8": 3606.5532, "encoder_q-layer.9": 3296.7466, "epoch": 0.85, "inbatch_neg_score": 1.4621, "inbatch_pos_score": 2.2031, "learning_rate": 7.111111111111112e-06, "loss": 2.5672, "norm_diff": 0.055, "norm_loss": 0.0, "num_token_doc": 66.6918, "num_token_overlap": 17.8504, "num_token_query": 52.4036, "num_token_union": 73.7174, "num_word_context": 202.2717, "num_word_doc": 49.7724, "num_word_query": 39.9489, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4826.0284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4619, "query_norm": 1.7724, "queue_k_norm": 1.8298, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4036, "sent_len_1": 66.6918, "sent_len_max_0": 128.0, "sent_len_max_1": 206.435, "stdk": 0.0495, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 87200 }, { "accuracy": 61.7188, "active_queue_size": 16384.0, "cl_loss": 2.5667, "doc_norm": 1.8327, "encoder_q-embeddings": 2706.0808, "encoder_q-layer.0": 1796.0278, "encoder_q-layer.1": 2044.8525, "encoder_q-layer.10": 3313.2485, "encoder_q-layer.11": 7191.5645, "encoder_q-layer.2": 2363.9858, "encoder_q-layer.3": 2560.4109, "encoder_q-layer.4": 2891.5542, "encoder_q-layer.5": 3159.4829, "encoder_q-layer.6": 3492.8652, "encoder_q-layer.7": 3703.9763, "encoder_q-layer.8": 3763.6743, "encoder_q-layer.9": 3223.4565, "epoch": 0.85, "inbatch_neg_score": 1.4588, "inbatch_pos_score": 2.2246, "learning_rate": 7.055555555555556e-06, "loss": 2.5667, "norm_diff": 0.0652, "norm_loss": 0.0, "num_token_doc": 66.8192, "num_token_overlap": 17.8512, "num_token_query": 52.285, "num_token_union": 73.7113, "num_word_context": 202.3234, "num_word_doc": 49.8629, "num_word_query": 39.8746, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5178.0368, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.459, "query_norm": 1.7675, "queue_k_norm": 1.8316, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.285, "sent_len_1": 66.8192, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7738, "stdk": 0.0497, "stdq": 0.0463, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 87300 }, { "accuracy": 62.7441, "active_queue_size": 16384.0, "cl_loss": 2.5845, "doc_norm": 1.8299, "encoder_q-embeddings": 2309.2361, "encoder_q-layer.0": 1481.5812, "encoder_q-layer.1": 1612.7462, "encoder_q-layer.10": 3559.2844, "encoder_q-layer.11": 7253.6514, "encoder_q-layer.2": 1821.1676, "encoder_q-layer.3": 1906.5056, "encoder_q-layer.4": 2009.6906, "encoder_q-layer.5": 2120.4126, "encoder_q-layer.6": 2545.6965, "encoder_q-layer.7": 2901.4844, "encoder_q-layer.8": 3497.8127, "encoder_q-layer.9": 3389.8286, "epoch": 0.85, "inbatch_neg_score": 1.4619, "inbatch_pos_score": 2.2285, "learning_rate": 7.000000000000001e-06, "loss": 2.5845, "norm_diff": 0.0506, "norm_loss": 0.0, "num_token_doc": 66.5338, "num_token_overlap": 17.743, "num_token_query": 52.1705, "num_token_union": 73.5977, "num_word_context": 202.2024, "num_word_doc": 49.6955, "num_word_query": 39.7949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4733.9356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4619, "query_norm": 1.7793, "queue_k_norm": 1.8306, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1705, "sent_len_1": 66.5338, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5137, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 87400 }, { "accuracy": 63.5254, "active_queue_size": 16384.0, "cl_loss": 2.5753, "doc_norm": 1.833, "encoder_q-embeddings": 1204.0458, "encoder_q-layer.0": 774.0482, "encoder_q-layer.1": 853.4981, "encoder_q-layer.10": 1727.4104, "encoder_q-layer.11": 3739.1267, "encoder_q-layer.2": 951.9747, "encoder_q-layer.3": 1029.535, "encoder_q-layer.4": 1109.0542, "encoder_q-layer.5": 1211.7253, "encoder_q-layer.6": 1267.9863, "encoder_q-layer.7": 1480.0824, "encoder_q-layer.8": 1847.3612, "encoder_q-layer.9": 1657.5111, "epoch": 0.85, "inbatch_neg_score": 1.4659, "inbatch_pos_score": 2.25, "learning_rate": 6.944444444444445e-06, "loss": 2.5753, "norm_diff": 0.0512, "norm_loss": 0.0, "num_token_doc": 66.8259, "num_token_overlap": 17.8082, "num_token_query": 52.2897, "num_token_union": 73.7523, "num_word_context": 202.5353, "num_word_doc": 49.8516, "num_word_query": 39.8696, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2453.4456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4648, "query_norm": 1.7818, "queue_k_norm": 1.8316, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2897, "sent_len_1": 66.8259, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1775, "stdk": 0.0497, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 87500 }, { "accuracy": 61.4746, "active_queue_size": 16384.0, "cl_loss": 2.5672, "doc_norm": 1.8289, "encoder_q-embeddings": 1313.0209, "encoder_q-layer.0": 862.4175, "encoder_q-layer.1": 946.551, "encoder_q-layer.10": 1811.2861, "encoder_q-layer.11": 3604.8127, "encoder_q-layer.2": 1094.0304, "encoder_q-layer.3": 1176.045, "encoder_q-layer.4": 1271.239, "encoder_q-layer.5": 1321.8153, "encoder_q-layer.6": 1472.6678, "encoder_q-layer.7": 1602.1982, "encoder_q-layer.8": 1934.4808, "encoder_q-layer.9": 1739.4163, "epoch": 0.86, "inbatch_neg_score": 1.4658, "inbatch_pos_score": 2.2266, "learning_rate": 6.888888888888889e-06, "loss": 2.5672, "norm_diff": 0.0463, "norm_loss": 0.0, "num_token_doc": 66.6828, "num_token_overlap": 17.8065, "num_token_query": 52.2746, "num_token_union": 73.6367, "num_word_context": 202.1499, "num_word_doc": 49.7571, "num_word_query": 39.8694, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2490.7027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4658, "query_norm": 1.7826, "queue_k_norm": 1.8306, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2746, "sent_len_1": 66.6828, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5975, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 87600 }, { "accuracy": 62.5977, "active_queue_size": 16384.0, "cl_loss": 2.5754, "doc_norm": 1.8303, "encoder_q-embeddings": 1298.1084, "encoder_q-layer.0": 842.9285, "encoder_q-layer.1": 960.6976, "encoder_q-layer.10": 1706.4771, "encoder_q-layer.11": 3573.2231, "encoder_q-layer.2": 1070.6903, "encoder_q-layer.3": 1140.4609, "encoder_q-layer.4": 1249.1415, "encoder_q-layer.5": 1351.5609, "encoder_q-layer.6": 1432.4406, "encoder_q-layer.7": 1530.5212, "encoder_q-layer.8": 1890.8916, "encoder_q-layer.9": 1678.61, "epoch": 0.86, "inbatch_neg_score": 1.468, "inbatch_pos_score": 2.2188, "learning_rate": 6.833333333333333e-06, "loss": 2.5754, "norm_diff": 0.0532, "norm_loss": 0.0, "num_token_doc": 66.5462, "num_token_overlap": 17.7538, "num_token_query": 52.1877, "num_token_union": 73.5622, "num_word_context": 201.9165, "num_word_doc": 49.6444, "num_word_query": 39.8158, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2448.7926, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4678, "query_norm": 1.7771, "queue_k_norm": 1.8318, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1877, "sent_len_1": 66.5462, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2363, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 87700 }, { "accuracy": 60.8887, "active_queue_size": 16384.0, "cl_loss": 2.5862, "doc_norm": 1.8303, "encoder_q-embeddings": 1170.3269, "encoder_q-layer.0": 778.6873, "encoder_q-layer.1": 863.0977, "encoder_q-layer.10": 1747.2421, "encoder_q-layer.11": 3637.7834, "encoder_q-layer.2": 973.5343, "encoder_q-layer.3": 1021.9805, "encoder_q-layer.4": 1094.714, "encoder_q-layer.5": 1125.3877, "encoder_q-layer.6": 1377.2911, "encoder_q-layer.7": 1614.4032, "encoder_q-layer.8": 1796.6559, "encoder_q-layer.9": 1630.0082, "epoch": 0.86, "inbatch_neg_score": 1.4698, "inbatch_pos_score": 2.2227, "learning_rate": 6.777777777777779e-06, "loss": 2.5862, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.6793, "num_token_overlap": 17.7672, "num_token_query": 52.2509, "num_token_union": 73.651, "num_word_context": 202.097, "num_word_doc": 49.7505, "num_word_query": 39.8438, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2393.6047, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4697, "query_norm": 1.7754, "queue_k_norm": 1.8317, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2509, "sent_len_1": 66.6793, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3525, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 87800 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.5676, "doc_norm": 1.8304, "encoder_q-embeddings": 1174.1912, "encoder_q-layer.0": 749.4403, "encoder_q-layer.1": 808.9438, "encoder_q-layer.10": 1689.2214, "encoder_q-layer.11": 3484.2383, "encoder_q-layer.2": 944.0598, "encoder_q-layer.3": 985.7153, "encoder_q-layer.4": 1059.5137, "encoder_q-layer.5": 1133.3622, "encoder_q-layer.6": 1293.6777, "encoder_q-layer.7": 1477.4791, "encoder_q-layer.8": 1768.9209, "encoder_q-layer.9": 1601.5536, "epoch": 0.86, "inbatch_neg_score": 1.4708, "inbatch_pos_score": 2.2344, "learning_rate": 6.722222222222223e-06, "loss": 2.5676, "norm_diff": 0.0502, "norm_loss": 0.0, "num_token_doc": 66.8555, "num_token_overlap": 17.805, "num_token_query": 52.2639, "num_token_union": 73.7418, "num_word_context": 202.1171, "num_word_doc": 49.8523, "num_word_query": 39.8388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2327.0493, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4697, "query_norm": 1.7802, "queue_k_norm": 1.8309, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2639, "sent_len_1": 66.8555, "sent_len_max_0": 128.0, "sent_len_max_1": 209.7912, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 87900 }, { "accuracy": 62.0605, "active_queue_size": 16384.0, "cl_loss": 2.5594, "doc_norm": 1.8299, "encoder_q-embeddings": 1594.2935, "encoder_q-layer.0": 1167.1198, "encoder_q-layer.1": 1342.0354, "encoder_q-layer.10": 1674.4006, "encoder_q-layer.11": 3528.6655, "encoder_q-layer.2": 1553.2067, "encoder_q-layer.3": 1522.1908, "encoder_q-layer.4": 1684.756, "encoder_q-layer.5": 1481.2031, "encoder_q-layer.6": 1690.6709, "encoder_q-layer.7": 1811.2617, "encoder_q-layer.8": 1936.0176, "encoder_q-layer.9": 1611.614, "epoch": 0.86, "inbatch_neg_score": 1.4704, "inbatch_pos_score": 2.2324, "learning_rate": 6.666666666666667e-06, "loss": 2.5594, "norm_diff": 0.0539, "norm_loss": 0.0, "num_token_doc": 66.9075, "num_token_overlap": 17.8772, "num_token_query": 52.4077, "num_token_union": 73.8041, "num_word_context": 202.3154, "num_word_doc": 49.9176, "num_word_query": 39.9914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2713.2404, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4717, "query_norm": 1.776, "queue_k_norm": 1.8334, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4077, "sent_len_1": 66.9075, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0888, "stdk": 0.0495, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 88000 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.5718, "doc_norm": 1.8277, "encoder_q-embeddings": 1582.8722, "encoder_q-layer.0": 1118.5789, "encoder_q-layer.1": 1249.1881, "encoder_q-layer.10": 1767.6356, "encoder_q-layer.11": 3675.5117, "encoder_q-layer.2": 1513.6501, "encoder_q-layer.3": 1571.5105, "encoder_q-layer.4": 1663.9828, "encoder_q-layer.5": 1824.2202, "encoder_q-layer.6": 2039.7058, "encoder_q-layer.7": 2186.9146, "encoder_q-layer.8": 2302.8245, "encoder_q-layer.9": 1756.9838, "epoch": 0.86, "inbatch_neg_score": 1.4745, "inbatch_pos_score": 2.2129, "learning_rate": 6.611111111111111e-06, "loss": 2.5718, "norm_diff": 0.0515, "norm_loss": 0.0, "num_token_doc": 66.8937, "num_token_overlap": 17.7973, "num_token_query": 52.2058, "num_token_union": 73.7247, "num_word_context": 202.317, "num_word_doc": 49.8908, "num_word_query": 39.816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2917.4313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4736, "query_norm": 1.7762, "queue_k_norm": 1.832, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2058, "sent_len_1": 66.8937, "sent_len_max_0": 128.0, "sent_len_max_1": 209.005, "stdk": 0.0494, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 88100 }, { "accuracy": 63.7207, "active_queue_size": 16384.0, "cl_loss": 2.5686, "doc_norm": 1.8316, "encoder_q-embeddings": 1106.8792, "encoder_q-layer.0": 747.8464, "encoder_q-layer.1": 857.1509, "encoder_q-layer.10": 1541.1696, "encoder_q-layer.11": 3535.8757, "encoder_q-layer.2": 941.5279, "encoder_q-layer.3": 973.1199, "encoder_q-layer.4": 1057.3032, "encoder_q-layer.5": 1109.4348, "encoder_q-layer.6": 1328.7217, "encoder_q-layer.7": 1474.4131, "encoder_q-layer.8": 1696.5165, "encoder_q-layer.9": 1528.8359, "epoch": 0.86, "inbatch_neg_score": 1.4731, "inbatch_pos_score": 2.2422, "learning_rate": 6.555555555555556e-06, "loss": 2.5686, "norm_diff": 0.054, "norm_loss": 0.0, "num_token_doc": 66.7024, "num_token_overlap": 17.7866, "num_token_query": 52.2504, "num_token_union": 73.7008, "num_word_context": 202.2948, "num_word_doc": 49.7778, "num_word_query": 39.853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2286.9, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4736, "query_norm": 1.7776, "queue_k_norm": 1.8325, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2504, "sent_len_1": 66.7024, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9863, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 88200 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.5812, "doc_norm": 1.8328, "encoder_q-embeddings": 1467.936, "encoder_q-layer.0": 947.0936, "encoder_q-layer.1": 1103.4364, "encoder_q-layer.10": 1784.8541, "encoder_q-layer.11": 3946.4087, "encoder_q-layer.2": 1319.12, "encoder_q-layer.3": 1496.1896, "encoder_q-layer.4": 1619.4893, "encoder_q-layer.5": 1708.2261, "encoder_q-layer.6": 1825.7894, "encoder_q-layer.7": 1853.9993, "encoder_q-layer.8": 1942.5742, "encoder_q-layer.9": 1726.2698, "epoch": 0.86, "inbatch_neg_score": 1.4794, "inbatch_pos_score": 2.2207, "learning_rate": 6.5000000000000004e-06, "loss": 2.5812, "norm_diff": 0.0593, "norm_loss": 0.0, "num_token_doc": 66.7339, "num_token_overlap": 17.763, "num_token_query": 52.2459, "num_token_union": 73.6958, "num_word_context": 202.3399, "num_word_doc": 49.7733, "num_word_query": 39.8333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2810.1061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4795, "query_norm": 1.7735, "queue_k_norm": 1.833, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2459, "sent_len_1": 66.7339, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9812, "stdk": 0.0496, "stdq": 0.0462, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 88300 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.572, "doc_norm": 1.8333, "encoder_q-embeddings": 1185.7533, "encoder_q-layer.0": 773.716, "encoder_q-layer.1": 846.3182, "encoder_q-layer.10": 1766.5239, "encoder_q-layer.11": 3728.6001, "encoder_q-layer.2": 950.1249, "encoder_q-layer.3": 995.4568, "encoder_q-layer.4": 1079.6896, "encoder_q-layer.5": 1142.568, "encoder_q-layer.6": 1292.3177, "encoder_q-layer.7": 1506.547, "encoder_q-layer.8": 1855.8656, "encoder_q-layer.9": 1711.6619, "epoch": 0.86, "inbatch_neg_score": 1.4816, "inbatch_pos_score": 2.2383, "learning_rate": 6.4444444444444445e-06, "loss": 2.572, "norm_diff": 0.0489, "norm_loss": 0.0, "num_token_doc": 66.852, "num_token_overlap": 17.8392, "num_token_query": 52.3255, "num_token_union": 73.7782, "num_word_context": 202.3264, "num_word_doc": 49.8866, "num_word_query": 39.9078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2415.9672, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4814, "query_norm": 1.7844, "queue_k_norm": 1.8332, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3255, "sent_len_1": 66.852, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5075, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 88400 }, { "accuracy": 62.6465, "active_queue_size": 16384.0, "cl_loss": 2.5719, "doc_norm": 1.8321, "encoder_q-embeddings": 1363.4305, "encoder_q-layer.0": 970.9315, "encoder_q-layer.1": 1046.7203, "encoder_q-layer.10": 1706.7864, "encoder_q-layer.11": 3625.2517, "encoder_q-layer.2": 1224.5825, "encoder_q-layer.3": 1265.5908, "encoder_q-layer.4": 1339.657, "encoder_q-layer.5": 1342.5338, "encoder_q-layer.6": 1436.9338, "encoder_q-layer.7": 1649.3282, "encoder_q-layer.8": 1863.8245, "encoder_q-layer.9": 1673.6089, "epoch": 0.86, "inbatch_neg_score": 1.4849, "inbatch_pos_score": 2.2422, "learning_rate": 6.3888888888888885e-06, "loss": 2.5719, "norm_diff": 0.0464, "norm_loss": 0.0, "num_token_doc": 66.7829, "num_token_overlap": 17.8145, "num_token_query": 52.2586, "num_token_union": 73.6738, "num_word_context": 202.3672, "num_word_doc": 49.8056, "num_word_query": 39.8535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2551.6785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4844, "query_norm": 1.7857, "queue_k_norm": 1.8317, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2586, "sent_len_1": 66.7829, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9263, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 88500 }, { "accuracy": 62.8906, "active_queue_size": 16384.0, "cl_loss": 2.5594, "doc_norm": 1.836, "encoder_q-embeddings": 3646.3892, "encoder_q-layer.0": 2525.0369, "encoder_q-layer.1": 2868.8987, "encoder_q-layer.10": 1749.614, "encoder_q-layer.11": 3692.6169, "encoder_q-layer.2": 3681.1045, "encoder_q-layer.3": 4007.8958, "encoder_q-layer.4": 3915.4299, "encoder_q-layer.5": 4214.5767, "encoder_q-layer.6": 3909.6929, "encoder_q-layer.7": 2914.1204, "encoder_q-layer.8": 2519.4331, "encoder_q-layer.9": 1967.0967, "epoch": 0.87, "inbatch_neg_score": 1.4834, "inbatch_pos_score": 2.2559, "learning_rate": 6.333333333333334e-06, "loss": 2.5594, "norm_diff": 0.0447, "norm_loss": 0.0, "num_token_doc": 66.6986, "num_token_overlap": 17.788, "num_token_query": 52.2905, "num_token_union": 73.735, "num_word_context": 201.9524, "num_word_doc": 49.7934, "num_word_query": 39.8545, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4928.9642, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4854, "query_norm": 1.7914, "queue_k_norm": 1.8333, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2905, "sent_len_1": 66.6986, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9487, "stdk": 0.0498, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 88600 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.5772, "doc_norm": 1.8294, "encoder_q-embeddings": 1159.6552, "encoder_q-layer.0": 739.2399, "encoder_q-layer.1": 795.7017, "encoder_q-layer.10": 2070.4944, "encoder_q-layer.11": 3987.0835, "encoder_q-layer.2": 902.5877, "encoder_q-layer.3": 996.4936, "encoder_q-layer.4": 1069.7102, "encoder_q-layer.5": 1128.1736, "encoder_q-layer.6": 1342.6366, "encoder_q-layer.7": 1535.1541, "encoder_q-layer.8": 1940.5587, "encoder_q-layer.9": 1778.59, "epoch": 0.87, "inbatch_neg_score": 1.4847, "inbatch_pos_score": 2.2441, "learning_rate": 6.277777777777778e-06, "loss": 2.5772, "norm_diff": 0.0446, "norm_loss": 0.0, "num_token_doc": 66.751, "num_token_overlap": 17.7785, "num_token_query": 52.298, "num_token_union": 73.7359, "num_word_context": 202.6421, "num_word_doc": 49.8177, "num_word_query": 39.8842, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2491.8541, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4844, "query_norm": 1.7848, "queue_k_norm": 1.8334, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.298, "sent_len_1": 66.751, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3738, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 88700 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.5562, "doc_norm": 1.8362, "encoder_q-embeddings": 1252.7764, "encoder_q-layer.0": 814.6114, "encoder_q-layer.1": 910.8813, "encoder_q-layer.10": 1702.2833, "encoder_q-layer.11": 3552.4587, "encoder_q-layer.2": 1060.8933, "encoder_q-layer.3": 1160.196, "encoder_q-layer.4": 1252.6227, "encoder_q-layer.5": 1313.9456, "encoder_q-layer.6": 1498.483, "encoder_q-layer.7": 1635.9418, "encoder_q-layer.8": 1839.3534, "encoder_q-layer.9": 1623.0736, "epoch": 0.87, "inbatch_neg_score": 1.4854, "inbatch_pos_score": 2.2559, "learning_rate": 6.222222222222222e-06, "loss": 2.5562, "norm_diff": 0.0494, "norm_loss": 0.0, "num_token_doc": 66.7186, "num_token_overlap": 17.8006, "num_token_query": 52.2237, "num_token_union": 73.6645, "num_word_context": 202.1906, "num_word_doc": 49.8234, "num_word_query": 39.8459, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2446.3941, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4854, "query_norm": 1.7868, "queue_k_norm": 1.835, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2237, "sent_len_1": 66.7186, "sent_len_max_0": 128.0, "sent_len_max_1": 207.8038, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 88800 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.5679, "doc_norm": 1.8365, "encoder_q-embeddings": 1471.078, "encoder_q-layer.0": 993.8126, "encoder_q-layer.1": 1160.1161, "encoder_q-layer.10": 1871.917, "encoder_q-layer.11": 3969.2803, "encoder_q-layer.2": 1334.5107, "encoder_q-layer.3": 1502.0265, "encoder_q-layer.4": 1544.3529, "encoder_q-layer.5": 1757.6647, "encoder_q-layer.6": 1919.6621, "encoder_q-layer.7": 1796.2214, "encoder_q-layer.8": 1979.0093, "encoder_q-layer.9": 1817.6833, "epoch": 0.87, "inbatch_neg_score": 1.4867, "inbatch_pos_score": 2.252, "learning_rate": 6.166666666666667e-06, "loss": 2.5679, "norm_diff": 0.0501, "norm_loss": 0.0, "num_token_doc": 66.8338, "num_token_overlap": 17.8254, "num_token_query": 52.3484, "num_token_union": 73.7966, "num_word_context": 202.3858, "num_word_doc": 49.9034, "num_word_query": 39.9429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2856.3217, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4873, "query_norm": 1.7864, "queue_k_norm": 1.8362, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3484, "sent_len_1": 66.8338, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6687, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 88900 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.5585, "doc_norm": 1.8387, "encoder_q-embeddings": 2060.5286, "encoder_q-layer.0": 1509.7351, "encoder_q-layer.1": 1752.9395, "encoder_q-layer.10": 1951.0566, "encoder_q-layer.11": 3841.5671, "encoder_q-layer.2": 1965.7357, "encoder_q-layer.3": 2170.6301, "encoder_q-layer.4": 2456.1294, "encoder_q-layer.5": 2310.4146, "encoder_q-layer.6": 2521.3784, "encoder_q-layer.7": 2514.3015, "encoder_q-layer.8": 2466.1533, "encoder_q-layer.9": 1830.288, "epoch": 0.87, "inbatch_neg_score": 1.4846, "inbatch_pos_score": 2.2578, "learning_rate": 6.111111111111111e-06, "loss": 2.5585, "norm_diff": 0.0499, "norm_loss": 0.0, "num_token_doc": 66.879, "num_token_overlap": 17.809, "num_token_query": 52.2978, "num_token_union": 73.7698, "num_word_context": 202.5601, "num_word_doc": 49.907, "num_word_query": 39.8725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3453.1641, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4863, "query_norm": 1.7888, "queue_k_norm": 1.8367, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2978, "sent_len_1": 66.879, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0263, "stdk": 0.0498, "stdq": 0.047, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 89000 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.5657, "doc_norm": 1.8335, "encoder_q-embeddings": 1352.4464, "encoder_q-layer.0": 868.212, "encoder_q-layer.1": 989.803, "encoder_q-layer.10": 1932.0908, "encoder_q-layer.11": 3800.7573, "encoder_q-layer.2": 1134.7422, "encoder_q-layer.3": 1180.0381, "encoder_q-layer.4": 1265.4504, "encoder_q-layer.5": 1317.2445, "encoder_q-layer.6": 1427.9327, "encoder_q-layer.7": 1624.489, "encoder_q-layer.8": 1964.7032, "encoder_q-layer.9": 1765.6418, "epoch": 0.87, "inbatch_neg_score": 1.4907, "inbatch_pos_score": 2.2461, "learning_rate": 6.055555555555556e-06, "loss": 2.5657, "norm_diff": 0.0444, "norm_loss": 0.0, "num_token_doc": 66.7656, "num_token_overlap": 17.8113, "num_token_query": 52.2687, "num_token_union": 73.6914, "num_word_context": 202.2173, "num_word_doc": 49.8161, "num_word_query": 39.8571, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2564.3491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4893, "query_norm": 1.7891, "queue_k_norm": 1.8356, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2687, "sent_len_1": 66.7656, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2575, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 89100 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.5724, "doc_norm": 1.836, "encoder_q-embeddings": 2001.4324, "encoder_q-layer.0": 1400.7244, "encoder_q-layer.1": 1586.8446, "encoder_q-layer.10": 1759.7333, "encoder_q-layer.11": 3821.2341, "encoder_q-layer.2": 1888.549, "encoder_q-layer.3": 2125.0337, "encoder_q-layer.4": 2427.001, "encoder_q-layer.5": 2741.949, "encoder_q-layer.6": 2920.9263, "encoder_q-layer.7": 2548.4839, "encoder_q-layer.8": 2396.6799, "encoder_q-layer.9": 1805.2153, "epoch": 0.87, "inbatch_neg_score": 1.4915, "inbatch_pos_score": 2.2402, "learning_rate": 6e-06, "loss": 2.5724, "norm_diff": 0.0586, "norm_loss": 0.0, "num_token_doc": 66.6706, "num_token_overlap": 17.7632, "num_token_query": 52.1535, "num_token_union": 73.6362, "num_word_context": 202.2237, "num_word_doc": 49.7611, "num_word_query": 39.7754, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3499.404, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4922, "query_norm": 1.7774, "queue_k_norm": 1.8354, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1535, "sent_len_1": 66.6706, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3787, "stdk": 0.0496, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 89200 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.555, "doc_norm": 1.8357, "encoder_q-embeddings": 1257.1641, "encoder_q-layer.0": 789.3657, "encoder_q-layer.1": 890.7291, "encoder_q-layer.10": 1807.191, "encoder_q-layer.11": 3797.189, "encoder_q-layer.2": 1028.2976, "encoder_q-layer.3": 1126.0958, "encoder_q-layer.4": 1218.8978, "encoder_q-layer.5": 1373.908, "encoder_q-layer.6": 1538.7067, "encoder_q-layer.7": 1703.8257, "encoder_q-layer.8": 1862.704, "encoder_q-layer.9": 1670.8077, "epoch": 0.87, "inbatch_neg_score": 1.4903, "inbatch_pos_score": 2.25, "learning_rate": 5.944444444444445e-06, "loss": 2.555, "norm_diff": 0.0546, "norm_loss": 0.0, "num_token_doc": 66.9103, "num_token_overlap": 17.822, "num_token_query": 52.356, "num_token_union": 73.8242, "num_word_context": 202.6609, "num_word_doc": 49.9098, "num_word_query": 39.9358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2536.8051, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4912, "query_norm": 1.7811, "queue_k_norm": 1.8358, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.356, "sent_len_1": 66.9103, "sent_len_max_0": 128.0, "sent_len_max_1": 211.8225, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 89300 }, { "accuracy": 62.3535, "active_queue_size": 16384.0, "cl_loss": 2.5719, "doc_norm": 1.8398, "encoder_q-embeddings": 1280.9979, "encoder_q-layer.0": 826.1464, "encoder_q-layer.1": 927.9602, "encoder_q-layer.10": 2034.0859, "encoder_q-layer.11": 3930.0398, "encoder_q-layer.2": 1078.1746, "encoder_q-layer.3": 1186.4495, "encoder_q-layer.4": 1290.8494, "encoder_q-layer.5": 1355.6467, "encoder_q-layer.6": 1583.3821, "encoder_q-layer.7": 1684.5159, "encoder_q-layer.8": 1912.7163, "encoder_q-layer.9": 1837.6096, "epoch": 0.87, "inbatch_neg_score": 1.4894, "inbatch_pos_score": 2.2539, "learning_rate": 5.888888888888889e-06, "loss": 2.5719, "norm_diff": 0.0618, "norm_loss": 0.0, "num_token_doc": 66.8493, "num_token_overlap": 17.8121, "num_token_query": 52.2186, "num_token_union": 73.7288, "num_word_context": 202.2518, "num_word_doc": 49.9147, "num_word_query": 39.8319, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2593.2599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4902, "query_norm": 1.778, "queue_k_norm": 1.8376, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2186, "sent_len_1": 66.8493, "sent_len_max_0": 128.0, "sent_len_max_1": 207.7325, "stdk": 0.0498, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 89400 }, { "accuracy": 62.207, "active_queue_size": 16384.0, "cl_loss": 2.5597, "doc_norm": 1.8384, "encoder_q-embeddings": 2395.8103, "encoder_q-layer.0": 1577.0299, "encoder_q-layer.1": 1726.0402, "encoder_q-layer.10": 3388.5762, "encoder_q-layer.11": 7078.2788, "encoder_q-layer.2": 2022.0929, "encoder_q-layer.3": 2168.8821, "encoder_q-layer.4": 2360.7366, "encoder_q-layer.5": 2448.4231, "encoder_q-layer.6": 2731.2097, "encoder_q-layer.7": 3177.1697, "encoder_q-layer.8": 3677.4465, "encoder_q-layer.9": 3227.8364, "epoch": 0.87, "inbatch_neg_score": 1.4895, "inbatch_pos_score": 2.2578, "learning_rate": 5.833333333333334e-06, "loss": 2.5597, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.8438, "num_token_overlap": 17.8003, "num_token_query": 52.2557, "num_token_union": 73.7635, "num_word_context": 202.4087, "num_word_doc": 49.8834, "num_word_query": 39.8646, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4765.1419, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4912, "query_norm": 1.79, "queue_k_norm": 1.8373, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2557, "sent_len_1": 66.8438, "sent_len_max_0": 128.0, "sent_len_max_1": 207.985, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 89500 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.571, "doc_norm": 1.8357, "encoder_q-embeddings": 2388.0879, "encoder_q-layer.0": 1554.5079, "encoder_q-layer.1": 1746.4391, "encoder_q-layer.10": 3577.063, "encoder_q-layer.11": 7418.7222, "encoder_q-layer.2": 1976.8663, "encoder_q-layer.3": 2087.5635, "encoder_q-layer.4": 2275.8735, "encoder_q-layer.5": 2359.9846, "encoder_q-layer.6": 2656.5029, "encoder_q-layer.7": 2971.9431, "encoder_q-layer.8": 3558.988, "encoder_q-layer.9": 3180.6836, "epoch": 0.87, "inbatch_neg_score": 1.495, "inbatch_pos_score": 2.2422, "learning_rate": 5.777777777777778e-06, "loss": 2.571, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.9598, "num_token_overlap": 17.7758, "num_token_query": 52.2589, "num_token_union": 73.8568, "num_word_context": 202.6189, "num_word_doc": 49.9821, "num_word_query": 39.8648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4875.5824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4941, "query_norm": 1.7809, "queue_k_norm": 1.8377, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2589, "sent_len_1": 66.9598, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9212, "stdk": 0.0495, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 89600 }, { "accuracy": 63.0371, "active_queue_size": 16384.0, "cl_loss": 2.5731, "doc_norm": 1.8375, "encoder_q-embeddings": 2353.7588, "encoder_q-layer.0": 1486.8398, "encoder_q-layer.1": 1652.7808, "encoder_q-layer.10": 3655.717, "encoder_q-layer.11": 7585.1387, "encoder_q-layer.2": 1924.0948, "encoder_q-layer.3": 2042.1299, "encoder_q-layer.4": 2217.1016, "encoder_q-layer.5": 2343.3376, "encoder_q-layer.6": 2643.4988, "encoder_q-layer.7": 2970.0496, "encoder_q-layer.8": 3749.4878, "encoder_q-layer.9": 3555.6489, "epoch": 0.88, "inbatch_neg_score": 1.4933, "inbatch_pos_score": 2.2598, "learning_rate": 5.722222222222223e-06, "loss": 2.5731, "norm_diff": 0.053, "norm_loss": 0.0, "num_token_doc": 66.8083, "num_token_overlap": 17.8214, "num_token_query": 52.4147, "num_token_union": 73.8018, "num_word_context": 202.2929, "num_word_doc": 49.8515, "num_word_query": 39.9824, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4920.054, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4951, "query_norm": 1.7845, "queue_k_norm": 1.837, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4147, "sent_len_1": 66.8083, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9762, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 89700 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.5592, "doc_norm": 1.8366, "encoder_q-embeddings": 2457.2583, "encoder_q-layer.0": 1593.0996, "encoder_q-layer.1": 1735.2794, "encoder_q-layer.10": 3560.7222, "encoder_q-layer.11": 7827.3105, "encoder_q-layer.2": 1968.8994, "encoder_q-layer.3": 2055.686, "encoder_q-layer.4": 2211.9482, "encoder_q-layer.5": 2285.2468, "encoder_q-layer.6": 2595.0205, "encoder_q-layer.7": 3087.0828, "encoder_q-layer.8": 3769.5117, "encoder_q-layer.9": 3534.9282, "epoch": 0.88, "inbatch_neg_score": 1.497, "inbatch_pos_score": 2.2363, "learning_rate": 5.666666666666667e-06, "loss": 2.5592, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 66.9056, "num_token_overlap": 17.8843, "num_token_query": 52.4209, "num_token_union": 73.7722, "num_word_context": 202.4848, "num_word_doc": 49.9194, "num_word_query": 39.9993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5027.1774, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4961, "query_norm": 1.7781, "queue_k_norm": 1.8383, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4209, "sent_len_1": 66.9056, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2713, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 89800 }, { "accuracy": 62.1582, "active_queue_size": 16384.0, "cl_loss": 2.5653, "doc_norm": 1.84, "encoder_q-embeddings": 4441.4282, "encoder_q-layer.0": 2936.0127, "encoder_q-layer.1": 3224.3562, "encoder_q-layer.10": 3331.0435, "encoder_q-layer.11": 7378.9478, "encoder_q-layer.2": 3578.1289, "encoder_q-layer.3": 3698.7373, "encoder_q-layer.4": 3544.5352, "encoder_q-layer.5": 3900.666, "encoder_q-layer.6": 4174.917, "encoder_q-layer.7": 4769.2124, "encoder_q-layer.8": 4439.2915, "encoder_q-layer.9": 3337.7075, "epoch": 0.88, "inbatch_neg_score": 1.4939, "inbatch_pos_score": 2.2617, "learning_rate": 5.611111111111112e-06, "loss": 2.5653, "norm_diff": 0.0596, "norm_loss": 0.0, "num_token_doc": 66.7974, "num_token_overlap": 17.8024, "num_token_query": 52.2986, "num_token_union": 73.7209, "num_word_context": 202.2609, "num_word_doc": 49.8246, "num_word_query": 39.8714, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6307.4881, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4941, "query_norm": 1.7804, "queue_k_norm": 1.8382, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2986, "sent_len_1": 66.7974, "sent_len_max_0": 128.0, "sent_len_max_1": 211.4325, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 89900 }, { "accuracy": 61.4746, "active_queue_size": 16384.0, "cl_loss": 2.565, "doc_norm": 1.8413, "encoder_q-embeddings": 2491.1226, "encoder_q-layer.0": 1551.4688, "encoder_q-layer.1": 1731.2145, "encoder_q-layer.10": 3644.3267, "encoder_q-layer.11": 7485.9058, "encoder_q-layer.2": 2018.7271, "encoder_q-layer.3": 2093.2954, "encoder_q-layer.4": 2292.228, "encoder_q-layer.5": 2371.7815, "encoder_q-layer.6": 2837.3765, "encoder_q-layer.7": 3095.5308, "encoder_q-layer.8": 3725.5264, "encoder_q-layer.9": 3344.2361, "epoch": 0.88, "inbatch_neg_score": 1.4946, "inbatch_pos_score": 2.2539, "learning_rate": 5.555555555555556e-06, "loss": 2.565, "norm_diff": 0.056, "norm_loss": 0.0, "num_token_doc": 66.6682, "num_token_overlap": 17.8319, "num_token_query": 52.3593, "num_token_union": 73.6586, "num_word_context": 202.2249, "num_word_doc": 49.7359, "num_word_query": 39.9284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4972.2263, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4941, "query_norm": 1.7853, "queue_k_norm": 1.8392, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3593, "sent_len_1": 66.6682, "sent_len_max_0": 128.0, "sent_len_max_1": 209.795, "stdk": 0.0498, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 90000 }, { "dev_runtime": 26.1531, "dev_samples_per_second": 1.224, "dev_steps_per_second": 0.038, "epoch": 0.88, "step": 90000, "test_accuracy": 94.4580078125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3103713393211365, "test_doc_norm": 1.811658263206482, "test_inbatch_neg_score": 1.7921929359436035, "test_inbatch_pos_score": 2.8145999908447266, "test_loss": 0.3103713393211365, "test_loss_align": 0.9648447036743164, "test_loss_unif": -0.2542109191417694, "test_loss_unif_q@queue": -0.2542109191417694, "test_norm_diff": 0.005566880106925964, "test_norm_loss": 0.0, "test_q@queue_neg_score": 1.487644076347351, "test_query_norm": 1.8149901628494263, "test_queue_k_norm": 1.8391661643981934, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04397153854370117, "test_stdq": 0.04380284994840622, "test_stdqueue_k": 0.04971005767583847, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.1531, "dev_samples_per_second": 1.224, "dev_steps_per_second": 0.038, "epoch": 0.88, "eval_beir-arguana_ndcg@10": 0.3941, "eval_beir-arguana_recall@10": 0.67354, "eval_beir-arguana_recall@100": 0.93741, "eval_beir-arguana_recall@20": 0.79801, "eval_beir-avg_ndcg@10": 0.38828108333333333, "eval_beir-avg_recall@10": 0.4623798333333333, "eval_beir-avg_recall@100": 0.640869, "eval_beir-avg_recall@20": 0.52339325, "eval_beir-cqadupstack_ndcg@10": 0.2928908333333333, "eval_beir-cqadupstack_recall@10": 0.39178833333333324, "eval_beir-cqadupstack_recall@100": 0.62368, "eval_beir-cqadupstack_recall@20": 0.4607825000000001, "eval_beir-fiqa_ndcg@10": 0.26853, "eval_beir-fiqa_recall@10": 0.33306, "eval_beir-fiqa_recall@100": 0.61227, "eval_beir-fiqa_recall@20": 0.4139, "eval_beir-nfcorpus_ndcg@10": 0.30108, "eval_beir-nfcorpus_recall@10": 0.15249, "eval_beir-nfcorpus_recall@100": 0.28273, "eval_beir-nfcorpus_recall@20": 0.18089, "eval_beir-nq_ndcg@10": 0.29428, "eval_beir-nq_recall@10": 0.47907, "eval_beir-nq_recall@100": 0.81499, "eval_beir-nq_recall@20": 0.59639, "eval_beir-quora_ndcg@10": 0.78077, "eval_beir-quora_recall@10": 0.88924, "eval_beir-quora_recall@100": 0.97906, "eval_beir-quora_recall@20": 0.92999, "eval_beir-scidocs_ndcg@10": 0.16666, "eval_beir-scidocs_recall@10": 0.17358, "eval_beir-scidocs_recall@100": 0.38227, "eval_beir-scidocs_recall@20": 0.23243, "eval_beir-scifact_ndcg@10": 0.65152, "eval_beir-scifact_recall@10": 0.81744, "eval_beir-scifact_recall@100": 0.90656, "eval_beir-scifact_recall@20": 0.85644, "eval_beir-trec-covid_ndcg@10": 0.53232, "eval_beir-trec-covid_recall@10": 0.568, "eval_beir-trec-covid_recall@100": 0.437, "eval_beir-trec-covid_recall@20": 0.556, "eval_beir-webis-touche2020_ndcg@10": 0.20066, "eval_beir-webis-touche2020_recall@10": 0.14559, "eval_beir-webis-touche2020_recall@100": 0.43272, "eval_beir-webis-touche2020_recall@20": 0.2091, "eval_senteval-avg_sts": 0.7459579262387156, "eval_senteval-sickr_spearman": 0.7318643019429539, "eval_senteval-stsb_spearman": 0.7600515505344773, "step": 90000, "test_accuracy": 94.4580078125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3103713393211365, "test_doc_norm": 1.811658263206482, "test_inbatch_neg_score": 1.7921929359436035, "test_inbatch_pos_score": 2.8145999908447266, "test_loss": 0.3103713393211365, "test_loss_align": 0.9648447036743164, "test_loss_unif": -0.2542109191417694, "test_loss_unif_q@queue": -0.2542109191417694, "test_norm_diff": 0.005566880106925964, "test_norm_loss": 0.0, "test_q@queue_neg_score": 1.487644076347351, "test_query_norm": 1.8149901628494263, "test_queue_k_norm": 1.8391661643981934, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04397153854370117, "test_stdq": 0.04380284994840622, "test_stdqueue_k": 0.04971005767583847, "test_stdqueue_q": 0.0 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.5564, "doc_norm": 1.8376, "encoder_q-embeddings": 2866.9675, "encoder_q-layer.0": 1901.0919, "encoder_q-layer.1": 2177.8672, "encoder_q-layer.10": 3430.6638, "encoder_q-layer.11": 7472.6689, "encoder_q-layer.2": 2501.9551, "encoder_q-layer.3": 2613.782, "encoder_q-layer.4": 2873.7786, "encoder_q-layer.5": 2968.2046, "encoder_q-layer.6": 3329.3296, "encoder_q-layer.7": 3474.6641, "encoder_q-layer.8": 3967.5498, "encoder_q-layer.9": 3484.8447, "epoch": 0.88, "inbatch_neg_score": 1.4968, "inbatch_pos_score": 2.2402, "learning_rate": 5.500000000000001e-06, "loss": 2.5564, "norm_diff": 0.0572, "norm_loss": 0.0, "num_token_doc": 66.9122, "num_token_overlap": 17.8317, "num_token_query": 52.37, "num_token_union": 73.8483, "num_word_context": 202.5206, "num_word_doc": 49.9438, "num_word_query": 39.9558, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5360.3135, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4971, "query_norm": 1.7805, "queue_k_norm": 1.8377, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.37, "sent_len_1": 66.9122, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9938, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 90100 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.5653, "doc_norm": 1.8385, "encoder_q-embeddings": 1571.5183, "encoder_q-layer.0": 1082.792, "encoder_q-layer.1": 1222.9496, "encoder_q-layer.10": 1744.173, "encoder_q-layer.11": 3696.7173, "encoder_q-layer.2": 1406.6953, "encoder_q-layer.3": 1465.5267, "encoder_q-layer.4": 1623.7886, "encoder_q-layer.5": 1730.449, "encoder_q-layer.6": 1943.8116, "encoder_q-layer.7": 2019.5947, "encoder_q-layer.8": 2182.0759, "encoder_q-layer.9": 1771.832, "epoch": 0.88, "inbatch_neg_score": 1.4944, "inbatch_pos_score": 2.2578, "learning_rate": 5.444444444444445e-06, "loss": 2.5653, "norm_diff": 0.0554, "norm_loss": 0.0, "num_token_doc": 66.7205, "num_token_overlap": 17.7557, "num_token_query": 52.2637, "num_token_union": 73.7422, "num_word_context": 202.2243, "num_word_doc": 49.8017, "num_word_query": 39.8734, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2852.237, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4951, "query_norm": 1.783, "queue_k_norm": 1.8387, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2637, "sent_len_1": 66.7205, "sent_len_max_0": 128.0, "sent_len_max_1": 206.8587, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 90200 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.5778, "doc_norm": 1.839, "encoder_q-embeddings": 1229.1562, "encoder_q-layer.0": 804.061, "encoder_q-layer.1": 888.6647, "encoder_q-layer.10": 1702.3846, "encoder_q-layer.11": 3691.2886, "encoder_q-layer.2": 1013.8018, "encoder_q-layer.3": 1042.2415, "encoder_q-layer.4": 1118.0243, "encoder_q-layer.5": 1139.1498, "encoder_q-layer.6": 1315.4659, "encoder_q-layer.7": 1466.5518, "encoder_q-layer.8": 1685.2697, "encoder_q-layer.9": 1620.7178, "epoch": 0.88, "inbatch_neg_score": 1.4971, "inbatch_pos_score": 2.2461, "learning_rate": 5.388888888888889e-06, "loss": 2.5778, "norm_diff": 0.0605, "norm_loss": 0.0, "num_token_doc": 66.8701, "num_token_overlap": 17.7863, "num_token_query": 52.2415, "num_token_union": 73.7791, "num_word_context": 202.4225, "num_word_doc": 49.8887, "num_word_query": 39.8511, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2386.8347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4961, "query_norm": 1.7784, "queue_k_norm": 1.8396, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2415, "sent_len_1": 66.8701, "sent_len_max_0": 128.0, "sent_len_max_1": 208.0037, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 90300 }, { "accuracy": 62.207, "active_queue_size": 16384.0, "cl_loss": 2.5826, "doc_norm": 1.8392, "encoder_q-embeddings": 1348.0115, "encoder_q-layer.0": 847.2619, "encoder_q-layer.1": 936.52, "encoder_q-layer.10": 1708.4736, "encoder_q-layer.11": 3617.4033, "encoder_q-layer.2": 1079.6935, "encoder_q-layer.3": 1149.5791, "encoder_q-layer.4": 1313.0189, "encoder_q-layer.5": 1339.2517, "encoder_q-layer.6": 1443.8894, "encoder_q-layer.7": 1681.1887, "encoder_q-layer.8": 1905.6521, "encoder_q-layer.9": 1773.3827, "epoch": 0.88, "inbatch_neg_score": 1.4963, "inbatch_pos_score": 2.2559, "learning_rate": 5.333333333333334e-06, "loss": 2.5826, "norm_diff": 0.0547, "norm_loss": 0.0, "num_token_doc": 66.7374, "num_token_overlap": 17.7537, "num_token_query": 52.1026, "num_token_union": 73.6046, "num_word_context": 202.2238, "num_word_doc": 49.7736, "num_word_query": 39.7122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2514.0157, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4961, "query_norm": 1.7846, "queue_k_norm": 1.8402, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1026, "sent_len_1": 66.7374, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0975, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 90400 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.5836, "doc_norm": 1.8363, "encoder_q-embeddings": 1260.5248, "encoder_q-layer.0": 805.2536, "encoder_q-layer.1": 918.0944, "encoder_q-layer.10": 1959.2969, "encoder_q-layer.11": 4032.0859, "encoder_q-layer.2": 1053.9613, "encoder_q-layer.3": 1144.3433, "encoder_q-layer.4": 1241.4274, "encoder_q-layer.5": 1302.1486, "encoder_q-layer.6": 1502.4819, "encoder_q-layer.7": 1701.321, "encoder_q-layer.8": 2039.6198, "encoder_q-layer.9": 1818.2817, "epoch": 0.88, "inbatch_neg_score": 1.4968, "inbatch_pos_score": 2.252, "learning_rate": 5.277777777777778e-06, "loss": 2.5836, "norm_diff": 0.0544, "norm_loss": 0.0, "num_token_doc": 66.7684, "num_token_overlap": 17.7677, "num_token_query": 52.185, "num_token_union": 73.7039, "num_word_context": 202.4618, "num_word_doc": 49.8168, "num_word_query": 39.7886, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2586.9384, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4971, "query_norm": 1.7819, "queue_k_norm": 1.8409, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.185, "sent_len_1": 66.7684, "sent_len_max_0": 128.0, "sent_len_max_1": 207.2, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 90500 }, { "accuracy": 63.2324, "active_queue_size": 16384.0, "cl_loss": 2.5532, "doc_norm": 1.8422, "encoder_q-embeddings": 1333.3657, "encoder_q-layer.0": 899.0897, "encoder_q-layer.1": 995.8982, "encoder_q-layer.10": 1763.9589, "encoder_q-layer.11": 3618.053, "encoder_q-layer.2": 1156.5903, "encoder_q-layer.3": 1267.3484, "encoder_q-layer.4": 1412.3979, "encoder_q-layer.5": 1529.7502, "encoder_q-layer.6": 1718.6826, "encoder_q-layer.7": 1891.4775, "encoder_q-layer.8": 2093.1438, "encoder_q-layer.9": 1737.5645, "epoch": 0.88, "inbatch_neg_score": 1.4984, "inbatch_pos_score": 2.2734, "learning_rate": 5.2222222222222226e-06, "loss": 2.5532, "norm_diff": 0.0498, "norm_loss": 0.0, "num_token_doc": 66.8147, "num_token_overlap": 17.8199, "num_token_query": 52.3375, "num_token_union": 73.7637, "num_word_context": 202.1224, "num_word_doc": 49.8397, "num_word_query": 39.9177, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2617.2651, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.498, "query_norm": 1.7923, "queue_k_norm": 1.8392, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3375, "sent_len_1": 66.8147, "sent_len_max_0": 128.0, "sent_len_max_1": 206.7837, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 90600 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.5585, "doc_norm": 1.8385, "encoder_q-embeddings": 1166.2319, "encoder_q-layer.0": 753.1073, "encoder_q-layer.1": 814.8842, "encoder_q-layer.10": 1828.3693, "encoder_q-layer.11": 3718.6426, "encoder_q-layer.2": 930.1621, "encoder_q-layer.3": 988.6345, "encoder_q-layer.4": 1095.8654, "encoder_q-layer.5": 1142.2874, "encoder_q-layer.6": 1337.7167, "encoder_q-layer.7": 1590.6111, "encoder_q-layer.8": 1838.8108, "encoder_q-layer.9": 1712.8422, "epoch": 0.89, "inbatch_neg_score": 1.5003, "inbatch_pos_score": 2.2656, "learning_rate": 5.166666666666667e-06, "loss": 2.5585, "norm_diff": 0.0508, "norm_loss": 0.0, "num_token_doc": 66.8075, "num_token_overlap": 17.8482, "num_token_query": 52.3494, "num_token_union": 73.7349, "num_word_context": 202.2659, "num_word_doc": 49.8503, "num_word_query": 39.8884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2431.0754, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5, "query_norm": 1.7877, "queue_k_norm": 1.8383, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3494, "sent_len_1": 66.8075, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9588, "stdk": 0.0495, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 90700 }, { "accuracy": 62.3535, "active_queue_size": 16384.0, "cl_loss": 2.5641, "doc_norm": 1.841, "encoder_q-embeddings": 1193.5519, "encoder_q-layer.0": 776.6205, "encoder_q-layer.1": 851.442, "encoder_q-layer.10": 1798.4813, "encoder_q-layer.11": 3762.4741, "encoder_q-layer.2": 999.3283, "encoder_q-layer.3": 1043.4758, "encoder_q-layer.4": 1162.09, "encoder_q-layer.5": 1208.967, "encoder_q-layer.6": 1384.3322, "encoder_q-layer.7": 1581.1373, "encoder_q-layer.8": 1873.8198, "encoder_q-layer.9": 1736.4045, "epoch": 0.89, "inbatch_neg_score": 1.5021, "inbatch_pos_score": 2.252, "learning_rate": 5.1111111111111115e-06, "loss": 2.5641, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 66.7783, "num_token_overlap": 17.8439, "num_token_query": 52.3711, "num_token_union": 73.7722, "num_word_context": 202.45, "num_word_doc": 49.8439, "num_word_query": 39.9369, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2483.5084, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.502, "query_norm": 1.7805, "queue_k_norm": 1.8405, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3711, "sent_len_1": 66.7783, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7887, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 90800 }, { "accuracy": 63.3789, "active_queue_size": 16384.0, "cl_loss": 2.5712, "doc_norm": 1.8417, "encoder_q-embeddings": 21141.6621, "encoder_q-layer.0": 16568.5215, "encoder_q-layer.1": 19263.4746, "encoder_q-layer.10": 2010.9197, "encoder_q-layer.11": 3965.2268, "encoder_q-layer.2": 26824.0723, "encoder_q-layer.3": 30245.4375, "encoder_q-layer.4": 30279.8691, "encoder_q-layer.5": 33203.5156, "encoder_q-layer.6": 29621.3574, "encoder_q-layer.7": 29123.7559, "encoder_q-layer.8": 18363.707, "encoder_q-layer.9": 4790.5576, "epoch": 0.89, "inbatch_neg_score": 1.5063, "inbatch_pos_score": 2.2734, "learning_rate": 5.0555555555555555e-06, "loss": 2.5712, "norm_diff": 0.0544, "norm_loss": 0.0, "num_token_doc": 66.7026, "num_token_overlap": 17.8261, "num_token_query": 52.3753, "num_token_union": 73.699, "num_word_context": 202.1823, "num_word_doc": 49.752, "num_word_query": 39.9447, "postclip_grad_norm": 1.0, "preclip_grad_norm": 34065.542, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 1.5059, "query_norm": 1.7873, "queue_k_norm": 1.8399, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3753, "sent_len_1": 66.7026, "sent_len_max_0": 128.0, "sent_len_max_1": 210.785, "stdk": 0.0497, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 90900 }, { "accuracy": 62.5, "active_queue_size": 16384.0, "cl_loss": 2.5806, "doc_norm": 1.835, "encoder_q-embeddings": 1198.233, "encoder_q-layer.0": 751.2139, "encoder_q-layer.1": 804.1293, "encoder_q-layer.10": 1710.1508, "encoder_q-layer.11": 3683.2991, "encoder_q-layer.2": 909.5605, "encoder_q-layer.3": 948.124, "encoder_q-layer.4": 1052.6862, "encoder_q-layer.5": 1077.0231, "encoder_q-layer.6": 1261.1239, "encoder_q-layer.7": 1479.73, "encoder_q-layer.8": 1797.3448, "encoder_q-layer.9": 1603.0946, "epoch": 0.89, "inbatch_neg_score": 1.5067, "inbatch_pos_score": 2.2773, "learning_rate": 5e-06, "loss": 2.5806, "norm_diff": 0.0423, "norm_loss": 0.0, "num_token_doc": 66.726, "num_token_overlap": 17.7848, "num_token_query": 52.2805, "num_token_union": 73.7152, "num_word_context": 202.3143, "num_word_doc": 49.7933, "num_word_query": 39.8572, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2355.1396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5059, "query_norm": 1.7926, "queue_k_norm": 1.8418, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2805, "sent_len_1": 66.726, "sent_len_max_0": 128.0, "sent_len_max_1": 207.12, "stdk": 0.0493, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 91000 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.5585, "doc_norm": 1.8367, "encoder_q-embeddings": 1397.136, "encoder_q-layer.0": 941.3348, "encoder_q-layer.1": 1079.1892, "encoder_q-layer.10": 1781.5114, "encoder_q-layer.11": 3996.4707, "encoder_q-layer.2": 1253.6929, "encoder_q-layer.3": 1383.4127, "encoder_q-layer.4": 1485.974, "encoder_q-layer.5": 1602.5005, "encoder_q-layer.6": 1817.8265, "encoder_q-layer.7": 1913.823, "encoder_q-layer.8": 2111.1621, "encoder_q-layer.9": 1782.3914, "epoch": 0.89, "inbatch_neg_score": 1.5087, "inbatch_pos_score": 2.2695, "learning_rate": 4.9444444444444444e-06, "loss": 2.5585, "norm_diff": 0.0447, "norm_loss": 0.0, "num_token_doc": 66.6385, "num_token_overlap": 17.8217, "num_token_query": 52.2241, "num_token_union": 73.6153, "num_word_context": 202.1933, "num_word_doc": 49.7332, "num_word_query": 39.8256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2784.5583, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5088, "query_norm": 1.792, "queue_k_norm": 1.8419, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2241, "sent_len_1": 66.6385, "sent_len_max_0": 128.0, "sent_len_max_1": 209.0762, "stdk": 0.0494, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 91100 }, { "accuracy": 59.1309, "active_queue_size": 16384.0, "cl_loss": 2.5601, "doc_norm": 1.8384, "encoder_q-embeddings": 1268.2292, "encoder_q-layer.0": 806.3911, "encoder_q-layer.1": 884.1838, "encoder_q-layer.10": 1775.7006, "encoder_q-layer.11": 3807.0066, "encoder_q-layer.2": 1012.1784, "encoder_q-layer.3": 1082.9485, "encoder_q-layer.4": 1167.1019, "encoder_q-layer.5": 1274.255, "encoder_q-layer.6": 1474.7596, "encoder_q-layer.7": 1589.7698, "encoder_q-layer.8": 1995.9231, "encoder_q-layer.9": 1769.8479, "epoch": 0.89, "inbatch_neg_score": 1.5078, "inbatch_pos_score": 2.2598, "learning_rate": 4.888888888888889e-06, "loss": 2.5601, "norm_diff": 0.0452, "norm_loss": 0.0, "num_token_doc": 66.6706, "num_token_overlap": 17.814, "num_token_query": 52.2426, "num_token_union": 73.6227, "num_word_context": 202.0941, "num_word_doc": 49.7386, "num_word_query": 39.8418, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2513.9572, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5078, "query_norm": 1.7932, "queue_k_norm": 1.8413, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2426, "sent_len_1": 66.6706, "sent_len_max_0": 128.0, "sent_len_max_1": 208.08, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 91200 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.562, "doc_norm": 1.8437, "encoder_q-embeddings": 1238.6932, "encoder_q-layer.0": 777.9088, "encoder_q-layer.1": 847.8614, "encoder_q-layer.10": 1706.6812, "encoder_q-layer.11": 3597.2378, "encoder_q-layer.2": 972.2148, "encoder_q-layer.3": 1043.3392, "encoder_q-layer.4": 1151.2388, "encoder_q-layer.5": 1202.1555, "encoder_q-layer.6": 1395.7367, "encoder_q-layer.7": 1547.3383, "encoder_q-layer.8": 1830.2452, "encoder_q-layer.9": 1672.5083, "epoch": 0.89, "inbatch_neg_score": 1.5096, "inbatch_pos_score": 2.2676, "learning_rate": 4.833333333333333e-06, "loss": 2.562, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.9098, "num_token_overlap": 17.8435, "num_token_query": 52.3267, "num_token_union": 73.816, "num_word_context": 202.5037, "num_word_doc": 49.9246, "num_word_query": 39.9306, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2439.5381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5098, "query_norm": 1.7888, "queue_k_norm": 1.8414, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3267, "sent_len_1": 66.9098, "sent_len_max_0": 128.0, "sent_len_max_1": 209.855, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 91300 }, { "accuracy": 59.7168, "active_queue_size": 16384.0, "cl_loss": 2.5452, "doc_norm": 1.8365, "encoder_q-embeddings": 1344.2478, "encoder_q-layer.0": 864.7904, "encoder_q-layer.1": 919.5682, "encoder_q-layer.10": 1826.6705, "encoder_q-layer.11": 3677.042, "encoder_q-layer.2": 1065.2175, "encoder_q-layer.3": 1156.6785, "encoder_q-layer.4": 1240.0001, "encoder_q-layer.5": 1254.6866, "encoder_q-layer.6": 1449.8162, "encoder_q-layer.7": 1591.3828, "encoder_q-layer.8": 1938.8242, "encoder_q-layer.9": 1678.0269, "epoch": 0.89, "inbatch_neg_score": 1.5112, "inbatch_pos_score": 2.2461, "learning_rate": 4.777777777777778e-06, "loss": 2.5452, "norm_diff": 0.0518, "norm_loss": 0.0, "num_token_doc": 66.8178, "num_token_overlap": 17.8586, "num_token_query": 52.2661, "num_token_union": 73.6935, "num_word_context": 202.2498, "num_word_doc": 49.8763, "num_word_query": 39.8707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2512.3785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5107, "query_norm": 1.7848, "queue_k_norm": 1.8437, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2661, "sent_len_1": 66.8178, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5925, "stdk": 0.0493, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 91400 }, { "accuracy": 61.5723, "active_queue_size": 16384.0, "cl_loss": 2.5709, "doc_norm": 1.8427, "encoder_q-embeddings": 1163.8842, "encoder_q-layer.0": 764.4816, "encoder_q-layer.1": 836.0015, "encoder_q-layer.10": 1697.2758, "encoder_q-layer.11": 3740.6165, "encoder_q-layer.2": 957.7662, "encoder_q-layer.3": 1028.8118, "encoder_q-layer.4": 1175.9705, "encoder_q-layer.5": 1171.0353, "encoder_q-layer.6": 1339.4131, "encoder_q-layer.7": 1493.6555, "encoder_q-layer.8": 1753.7712, "encoder_q-layer.9": 1622.8186, "epoch": 0.89, "inbatch_neg_score": 1.514, "inbatch_pos_score": 2.2656, "learning_rate": 4.722222222222222e-06, "loss": 2.5709, "norm_diff": 0.0556, "norm_loss": 0.0, "num_token_doc": 66.6295, "num_token_overlap": 17.7988, "num_token_query": 52.3931, "num_token_union": 73.6664, "num_word_context": 202.2912, "num_word_doc": 49.6815, "num_word_query": 39.9455, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2416.4508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5137, "query_norm": 1.787, "queue_k_norm": 1.8422, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3931, "sent_len_1": 66.6295, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9288, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 91500 }, { "accuracy": 61.5723, "active_queue_size": 16384.0, "cl_loss": 2.556, "doc_norm": 1.8418, "encoder_q-embeddings": 1140.136, "encoder_q-layer.0": 775.7639, "encoder_q-layer.1": 881.9519, "encoder_q-layer.10": 1861.3889, "encoder_q-layer.11": 3834.3943, "encoder_q-layer.2": 1022.7861, "encoder_q-layer.3": 1064.1194, "encoder_q-layer.4": 1145.0387, "encoder_q-layer.5": 1193.7822, "encoder_q-layer.6": 1359.7339, "encoder_q-layer.7": 1604.8333, "encoder_q-layer.8": 1796.4495, "encoder_q-layer.9": 1643.3099, "epoch": 0.89, "inbatch_neg_score": 1.5164, "inbatch_pos_score": 2.2617, "learning_rate": 4.666666666666667e-06, "loss": 2.556, "norm_diff": 0.0553, "norm_loss": 0.0, "num_token_doc": 66.7752, "num_token_overlap": 17.8401, "num_token_query": 52.3149, "num_token_union": 73.739, "num_word_context": 202.366, "num_word_doc": 49.8425, "num_word_query": 39.9025, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2475.766, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5156, "query_norm": 1.7865, "queue_k_norm": 1.8432, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3149, "sent_len_1": 66.7752, "sent_len_max_0": 128.0, "sent_len_max_1": 205.5425, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 91600 }, { "accuracy": 63.0859, "active_queue_size": 16384.0, "cl_loss": 2.5612, "doc_norm": 1.8445, "encoder_q-embeddings": 1332.8831, "encoder_q-layer.0": 800.6703, "encoder_q-layer.1": 872.9044, "encoder_q-layer.10": 1831.5951, "encoder_q-layer.11": 3806.5498, "encoder_q-layer.2": 980.3519, "encoder_q-layer.3": 1058.203, "encoder_q-layer.4": 1165.0027, "encoder_q-layer.5": 1190.2007, "encoder_q-layer.6": 1400.2571, "encoder_q-layer.7": 1606.1227, "encoder_q-layer.8": 1870.6493, "encoder_q-layer.9": 1850.1312, "epoch": 0.9, "inbatch_neg_score": 1.5146, "inbatch_pos_score": 2.2793, "learning_rate": 4.611111111111111e-06, "loss": 2.5612, "norm_diff": 0.0529, "norm_loss": 0.0, "num_token_doc": 66.7301, "num_token_overlap": 17.8068, "num_token_query": 52.3344, "num_token_union": 73.7165, "num_word_context": 202.3733, "num_word_doc": 49.7626, "num_word_query": 39.8994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2540.4791, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5146, "query_norm": 1.7916, "queue_k_norm": 1.8431, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3344, "sent_len_1": 66.7301, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5425, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 91700 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.5664, "doc_norm": 1.8442, "encoder_q-embeddings": 1567.3093, "encoder_q-layer.0": 1115.7466, "encoder_q-layer.1": 1265.6287, "encoder_q-layer.10": 1764.1567, "encoder_q-layer.11": 3669.2517, "encoder_q-layer.2": 1613.6129, "encoder_q-layer.3": 1768.5345, "encoder_q-layer.4": 1772.475, "encoder_q-layer.5": 2074.0896, "encoder_q-layer.6": 2115.3623, "encoder_q-layer.7": 2304.7935, "encoder_q-layer.8": 2140.6624, "encoder_q-layer.9": 1733.2723, "epoch": 0.9, "inbatch_neg_score": 1.5202, "inbatch_pos_score": 2.2676, "learning_rate": 4.555555555555556e-06, "loss": 2.5664, "norm_diff": 0.0567, "norm_loss": 0.0, "num_token_doc": 66.7012, "num_token_overlap": 17.7827, "num_token_query": 52.2599, "num_token_union": 73.7009, "num_word_context": 202.1828, "num_word_doc": 49.7662, "num_word_query": 39.8592, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2950.8489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5195, "query_norm": 1.7874, "queue_k_norm": 1.8429, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2599, "sent_len_1": 66.7012, "sent_len_max_0": 128.0, "sent_len_max_1": 208.895, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 91800 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.5673, "doc_norm": 1.845, "encoder_q-embeddings": 1324.3126, "encoder_q-layer.0": 871.6242, "encoder_q-layer.1": 985.0103, "encoder_q-layer.10": 1663.7329, "encoder_q-layer.11": 3824.8645, "encoder_q-layer.2": 1134.4236, "encoder_q-layer.3": 1205.1926, "encoder_q-layer.4": 1286.8149, "encoder_q-layer.5": 1351.7816, "encoder_q-layer.6": 1474.2518, "encoder_q-layer.7": 1664.0822, "encoder_q-layer.8": 1888.0211, "encoder_q-layer.9": 1681.2285, "epoch": 0.9, "inbatch_neg_score": 1.5186, "inbatch_pos_score": 2.2734, "learning_rate": 4.5e-06, "loss": 2.5673, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.7437, "num_token_overlap": 17.7914, "num_token_query": 52.2766, "num_token_union": 73.7116, "num_word_context": 202.2289, "num_word_doc": 49.7818, "num_word_query": 39.8842, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2605.6434, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5195, "query_norm": 1.7901, "queue_k_norm": 1.8448, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2766, "sent_len_1": 66.7437, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3775, "stdk": 0.0497, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 91900 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.5561, "doc_norm": 1.8451, "encoder_q-embeddings": 1153.6835, "encoder_q-layer.0": 757.7577, "encoder_q-layer.1": 820.139, "encoder_q-layer.10": 1889.8535, "encoder_q-layer.11": 3832.5774, "encoder_q-layer.2": 910.197, "encoder_q-layer.3": 954.7761, "encoder_q-layer.4": 1029.9089, "encoder_q-layer.5": 1077.0355, "encoder_q-layer.6": 1340.2516, "encoder_q-layer.7": 1567.6698, "encoder_q-layer.8": 1781.889, "encoder_q-layer.9": 1672.8284, "epoch": 0.9, "inbatch_neg_score": 1.5216, "inbatch_pos_score": 2.2695, "learning_rate": 4.444444444444445e-06, "loss": 2.5561, "norm_diff": 0.0567, "norm_loss": 0.0, "num_token_doc": 66.707, "num_token_overlap": 17.7895, "num_token_query": 52.1966, "num_token_union": 73.6625, "num_word_context": 202.2111, "num_word_doc": 49.7845, "num_word_query": 39.7976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2409.0618, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5205, "query_norm": 1.7884, "queue_k_norm": 1.8446, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1966, "sent_len_1": 66.707, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3212, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 92000 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.558, "doc_norm": 1.8447, "encoder_q-embeddings": 1275.1637, "encoder_q-layer.0": 813.5639, "encoder_q-layer.1": 881.8243, "encoder_q-layer.10": 1702.9025, "encoder_q-layer.11": 3758.1213, "encoder_q-layer.2": 997.4013, "encoder_q-layer.3": 1100.6671, "encoder_q-layer.4": 1147.9037, "encoder_q-layer.5": 1247.3334, "encoder_q-layer.6": 1412.022, "encoder_q-layer.7": 1495.0282, "encoder_q-layer.8": 1803.1239, "encoder_q-layer.9": 1588.3575, "epoch": 0.9, "inbatch_neg_score": 1.5209, "inbatch_pos_score": 2.2852, "learning_rate": 4.388888888888889e-06, "loss": 2.558, "norm_diff": 0.0476, "norm_loss": 0.0, "num_token_doc": 66.6606, "num_token_overlap": 17.8037, "num_token_query": 52.3383, "num_token_union": 73.6502, "num_word_context": 202.1612, "num_word_doc": 49.7135, "num_word_query": 39.917, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2467.228, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5205, "query_norm": 1.7972, "queue_k_norm": 1.8442, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3383, "sent_len_1": 66.6606, "sent_len_max_0": 128.0, "sent_len_max_1": 209.975, "stdk": 0.0496, "stdq": 0.0469, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 92100 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.5591, "doc_norm": 1.8474, "encoder_q-embeddings": 3761.7192, "encoder_q-layer.0": 2745.9824, "encoder_q-layer.1": 3279.3059, "encoder_q-layer.10": 3870.6194, "encoder_q-layer.11": 7870.5762, "encoder_q-layer.2": 4071.3076, "encoder_q-layer.3": 4166.8408, "encoder_q-layer.4": 4321.8267, "encoder_q-layer.5": 4587.6362, "encoder_q-layer.6": 4619.1055, "encoder_q-layer.7": 3735.0249, "encoder_q-layer.8": 3887.2605, "encoder_q-layer.9": 3586.2922, "epoch": 0.9, "inbatch_neg_score": 1.5219, "inbatch_pos_score": 2.2598, "learning_rate": 4.333333333333334e-06, "loss": 2.5591, "norm_diff": 0.058, "norm_loss": 0.0, "num_token_doc": 66.84, "num_token_overlap": 17.816, "num_token_query": 52.2562, "num_token_union": 73.729, "num_word_context": 202.567, "num_word_doc": 49.8837, "num_word_query": 39.8502, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6471.6743, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5215, "query_norm": 1.7893, "queue_k_norm": 1.8462, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2562, "sent_len_1": 66.84, "sent_len_max_0": 128.0, "sent_len_max_1": 209.29, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 92200 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.5704, "doc_norm": 1.8432, "encoder_q-embeddings": 2427.2686, "encoder_q-layer.0": 1582.327, "encoder_q-layer.1": 1722.6509, "encoder_q-layer.10": 3967.8438, "encoder_q-layer.11": 8188.2617, "encoder_q-layer.2": 1920.5925, "encoder_q-layer.3": 2056.2842, "encoder_q-layer.4": 2238.2388, "encoder_q-layer.5": 2520.0386, "encoder_q-layer.6": 2722.1938, "encoder_q-layer.7": 3083.7151, "encoder_q-layer.8": 3740.9382, "encoder_q-layer.9": 3554.9651, "epoch": 0.9, "inbatch_neg_score": 1.5223, "inbatch_pos_score": 2.2676, "learning_rate": 4.277777777777778e-06, "loss": 2.5704, "norm_diff": 0.0498, "norm_loss": 0.0, "num_token_doc": 66.6311, "num_token_overlap": 17.7989, "num_token_query": 52.2, "num_token_union": 73.56, "num_word_context": 201.9861, "num_word_doc": 49.7299, "num_word_query": 39.8146, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5170.3267, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5225, "query_norm": 1.7933, "queue_k_norm": 1.8463, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2, "sent_len_1": 66.6311, "sent_len_max_0": 128.0, "sent_len_max_1": 208.685, "stdk": 0.0495, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 92300 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.557, "doc_norm": 1.8419, "encoder_q-embeddings": 2380.9084, "encoder_q-layer.0": 1602.5325, "encoder_q-layer.1": 1783.8767, "encoder_q-layer.10": 3489.9509, "encoder_q-layer.11": 7331.2158, "encoder_q-layer.2": 2055.625, "encoder_q-layer.3": 2118.1248, "encoder_q-layer.4": 2307.1543, "encoder_q-layer.5": 2472.0068, "encoder_q-layer.6": 2709.157, "encoder_q-layer.7": 3010.2202, "encoder_q-layer.8": 3817.3833, "encoder_q-layer.9": 3405.4751, "epoch": 0.9, "inbatch_neg_score": 1.5229, "inbatch_pos_score": 2.2695, "learning_rate": 4.222222222222223e-06, "loss": 2.557, "norm_diff": 0.0543, "norm_loss": 0.0, "num_token_doc": 66.8176, "num_token_overlap": 17.88, "num_token_query": 52.4019, "num_token_union": 73.7814, "num_word_context": 202.4293, "num_word_doc": 49.886, "num_word_query": 39.9817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4901.8878, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5225, "query_norm": 1.7876, "queue_k_norm": 1.8456, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4019, "sent_len_1": 66.8176, "sent_len_max_0": 128.0, "sent_len_max_1": 207.3938, "stdk": 0.0494, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 92400 }, { "accuracy": 61.377, "active_queue_size": 16384.0, "cl_loss": 2.5689, "doc_norm": 1.8405, "encoder_q-embeddings": 4869.4595, "encoder_q-layer.0": 3442.0144, "encoder_q-layer.1": 4080.384, "encoder_q-layer.10": 3512.354, "encoder_q-layer.11": 7257.9395, "encoder_q-layer.2": 4555.9043, "encoder_q-layer.3": 4648.8242, "encoder_q-layer.4": 5173.7778, "encoder_q-layer.5": 5505.835, "encoder_q-layer.6": 5596.5522, "encoder_q-layer.7": 5187.8052, "encoder_q-layer.8": 4584.9355, "encoder_q-layer.9": 3626.7305, "epoch": 0.9, "inbatch_neg_score": 1.5251, "inbatch_pos_score": 2.2734, "learning_rate": 4.166666666666667e-06, "loss": 2.5689, "norm_diff": 0.0478, "norm_loss": 0.0, "num_token_doc": 66.6636, "num_token_overlap": 17.8097, "num_token_query": 52.3245, "num_token_union": 73.6745, "num_word_context": 202.1662, "num_word_doc": 49.7519, "num_word_query": 39.8922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7215.8932, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5244, "query_norm": 1.7926, "queue_k_norm": 1.8451, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3245, "sent_len_1": 66.6636, "sent_len_max_0": 128.0, "sent_len_max_1": 209.445, "stdk": 0.0494, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 92500 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.5625, "doc_norm": 1.8457, "encoder_q-embeddings": 2305.4866, "encoder_q-layer.0": 1510.7965, "encoder_q-layer.1": 1622.5969, "encoder_q-layer.10": 3393.6313, "encoder_q-layer.11": 7301.1641, "encoder_q-layer.2": 1903.5491, "encoder_q-layer.3": 1970.7327, "encoder_q-layer.4": 2078.863, "encoder_q-layer.5": 2164.0032, "encoder_q-layer.6": 2535.2246, "encoder_q-layer.7": 2913.062, "encoder_q-layer.8": 3356.821, "encoder_q-layer.9": 3229.3999, "epoch": 0.9, "inbatch_neg_score": 1.5266, "inbatch_pos_score": 2.2773, "learning_rate": 4.111111111111112e-06, "loss": 2.5625, "norm_diff": 0.0567, "norm_loss": 0.0, "num_token_doc": 66.7627, "num_token_overlap": 17.7968, "num_token_query": 52.3442, "num_token_union": 73.7416, "num_word_context": 202.3152, "num_word_doc": 49.8167, "num_word_query": 39.9173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4708.9556, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5264, "query_norm": 1.7889, "queue_k_norm": 1.8462, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3442, "sent_len_1": 66.7627, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3913, "stdk": 0.0496, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 92600 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.5565, "doc_norm": 1.8482, "encoder_q-embeddings": 2896.033, "encoder_q-layer.0": 1919.0168, "encoder_q-layer.1": 2124.2983, "encoder_q-layer.10": 3467.1416, "encoder_q-layer.11": 7626.3208, "encoder_q-layer.2": 2488.9158, "encoder_q-layer.3": 2717.6438, "encoder_q-layer.4": 2982.447, "encoder_q-layer.5": 3246.7734, "encoder_q-layer.6": 3419.4834, "encoder_q-layer.7": 3786.4316, "encoder_q-layer.8": 3909.4248, "encoder_q-layer.9": 3453.8416, "epoch": 0.91, "inbatch_neg_score": 1.5287, "inbatch_pos_score": 2.2969, "learning_rate": 4.055555555555556e-06, "loss": 2.5565, "norm_diff": 0.0481, "norm_loss": 0.0, "num_token_doc": 66.691, "num_token_overlap": 17.7851, "num_token_query": 52.224, "num_token_union": 73.6389, "num_word_context": 202.1945, "num_word_doc": 49.7445, "num_word_query": 39.833, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5438.5245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5293, "query_norm": 1.8001, "queue_k_norm": 1.8459, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.224, "sent_len_1": 66.691, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4288, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 92700 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.5728, "doc_norm": 1.8507, "encoder_q-embeddings": 3357.1746, "encoder_q-layer.0": 2213.0959, "encoder_q-layer.1": 2695.2402, "encoder_q-layer.10": 3646.1426, "encoder_q-layer.11": 7398.833, "encoder_q-layer.2": 3141.2188, "encoder_q-layer.3": 3151.6184, "encoder_q-layer.4": 3496.4553, "encoder_q-layer.5": 3633.1741, "encoder_q-layer.6": 3652.7556, "encoder_q-layer.7": 3839.1226, "encoder_q-layer.8": 3907.2656, "encoder_q-layer.9": 3422.7537, "epoch": 0.91, "inbatch_neg_score": 1.5288, "inbatch_pos_score": 2.2949, "learning_rate": 4.000000000000001e-06, "loss": 2.5728, "norm_diff": 0.0541, "norm_loss": 0.0, "num_token_doc": 66.6527, "num_token_overlap": 17.7796, "num_token_query": 52.2733, "num_token_union": 73.6548, "num_word_context": 202.3863, "num_word_doc": 49.7133, "num_word_query": 39.8852, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5740.3509, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5293, "query_norm": 1.7966, "queue_k_norm": 1.8479, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2733, "sent_len_1": 66.6527, "sent_len_max_0": 128.0, "sent_len_max_1": 210.0375, "stdk": 0.0498, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 92800 }, { "accuracy": 62.8906, "active_queue_size": 16384.0, "cl_loss": 2.5417, "doc_norm": 1.8478, "encoder_q-embeddings": 2386.0674, "encoder_q-layer.0": 1537.7081, "encoder_q-layer.1": 1653.1584, "encoder_q-layer.10": 3712.5068, "encoder_q-layer.11": 7521.667, "encoder_q-layer.2": 1868.0043, "encoder_q-layer.3": 1975.4729, "encoder_q-layer.4": 2245.7324, "encoder_q-layer.5": 2401.3093, "encoder_q-layer.6": 2830.4299, "encoder_q-layer.7": 3159.2437, "encoder_q-layer.8": 3843.8547, "encoder_q-layer.9": 3479.1733, "epoch": 0.91, "inbatch_neg_score": 1.5304, "inbatch_pos_score": 2.291, "learning_rate": 3.944444444444445e-06, "loss": 2.5417, "norm_diff": 0.0526, "norm_loss": 0.0, "num_token_doc": 66.7809, "num_token_overlap": 17.8856, "num_token_query": 52.3411, "num_token_union": 73.6477, "num_word_context": 202.2839, "num_word_doc": 49.8211, "num_word_query": 39.9167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4958.2213, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5303, "query_norm": 1.7952, "queue_k_norm": 1.8484, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3411, "sent_len_1": 66.7809, "sent_len_max_0": 128.0, "sent_len_max_1": 210.8812, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 92900 }, { "accuracy": 63.8184, "active_queue_size": 16384.0, "cl_loss": 2.5605, "doc_norm": 1.8481, "encoder_q-embeddings": 2279.2463, "encoder_q-layer.0": 1475.7089, "encoder_q-layer.1": 1647.5959, "encoder_q-layer.10": 3435.0671, "encoder_q-layer.11": 7058.5918, "encoder_q-layer.2": 1854.4805, "encoder_q-layer.3": 2022.0337, "encoder_q-layer.4": 2123.8948, "encoder_q-layer.5": 2224.1472, "encoder_q-layer.6": 2492.6387, "encoder_q-layer.7": 2852.4851, "encoder_q-layer.8": 3434.689, "encoder_q-layer.9": 3207.3687, "epoch": 0.91, "inbatch_neg_score": 1.5307, "inbatch_pos_score": 2.3164, "learning_rate": 3.888888888888889e-06, "loss": 2.5605, "norm_diff": 0.0473, "norm_loss": 0.0, "num_token_doc": 66.781, "num_token_overlap": 17.857, "num_token_query": 52.3389, "num_token_union": 73.7311, "num_word_context": 202.4066, "num_word_doc": 49.8652, "num_word_query": 39.9371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4591.1011, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5312, "query_norm": 1.8008, "queue_k_norm": 1.8478, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3389, "sent_len_1": 66.781, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8638, "stdk": 0.0497, "stdq": 0.047, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 93000 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.5572, "doc_norm": 1.8426, "encoder_q-embeddings": 2762.4026, "encoder_q-layer.0": 1793.3, "encoder_q-layer.1": 2030.3351, "encoder_q-layer.10": 3710.6245, "encoder_q-layer.11": 7405.1514, "encoder_q-layer.2": 2401.5901, "encoder_q-layer.3": 2504.9377, "encoder_q-layer.4": 2838.783, "encoder_q-layer.5": 2943.8298, "encoder_q-layer.6": 3279.1899, "encoder_q-layer.7": 3442.7683, "encoder_q-layer.8": 3746.5156, "encoder_q-layer.9": 3347.2019, "epoch": 0.91, "inbatch_neg_score": 1.533, "inbatch_pos_score": 2.2715, "learning_rate": 3.833333333333334e-06, "loss": 2.5572, "norm_diff": 0.0494, "norm_loss": 0.0, "num_token_doc": 66.7304, "num_token_overlap": 17.81, "num_token_query": 52.2947, "num_token_union": 73.7034, "num_word_context": 202.4141, "num_word_doc": 49.8387, "num_word_query": 39.8948, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5250.4681, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5322, "query_norm": 1.7932, "queue_k_norm": 1.8486, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2947, "sent_len_1": 66.7304, "sent_len_max_0": 128.0, "sent_len_max_1": 207.435, "stdk": 0.0493, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 93100 }, { "accuracy": 60.791, "active_queue_size": 16384.0, "cl_loss": 2.5676, "doc_norm": 1.8449, "encoder_q-embeddings": 2438.8936, "encoder_q-layer.0": 1591.1178, "encoder_q-layer.1": 1703.704, "encoder_q-layer.10": 3559.1128, "encoder_q-layer.11": 7473.811, "encoder_q-layer.2": 1894.2903, "encoder_q-layer.3": 2027.2373, "encoder_q-layer.4": 2232.7456, "encoder_q-layer.5": 2254.3848, "encoder_q-layer.6": 2697.395, "encoder_q-layer.7": 3010.4561, "encoder_q-layer.8": 3564.7095, "encoder_q-layer.9": 3374.9902, "epoch": 0.91, "inbatch_neg_score": 1.5359, "inbatch_pos_score": 2.293, "learning_rate": 3.777777777777778e-06, "loss": 2.5676, "norm_diff": 0.0468, "norm_loss": 0.0, "num_token_doc": 66.8185, "num_token_overlap": 17.8028, "num_token_query": 52.2249, "num_token_union": 73.7194, "num_word_context": 202.3041, "num_word_doc": 49.8513, "num_word_query": 39.812, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4866.6984, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5352, "query_norm": 1.7981, "queue_k_norm": 1.8485, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2249, "sent_len_1": 66.8185, "sent_len_max_0": 128.0, "sent_len_max_1": 207.9875, "stdk": 0.0494, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 93200 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.5571, "doc_norm": 1.8505, "encoder_q-embeddings": 2649.9868, "encoder_q-layer.0": 1721.5789, "encoder_q-layer.1": 1872.2939, "encoder_q-layer.10": 3670.3789, "encoder_q-layer.11": 7536.0552, "encoder_q-layer.2": 2128.7493, "encoder_q-layer.3": 2170.5625, "encoder_q-layer.4": 2387.7349, "encoder_q-layer.5": 2523.9387, "encoder_q-layer.6": 2868.2827, "encoder_q-layer.7": 3278.0815, "encoder_q-layer.8": 4005.6992, "encoder_q-layer.9": 3568.887, "epoch": 0.91, "inbatch_neg_score": 1.5368, "inbatch_pos_score": 2.2891, "learning_rate": 3.722222222222222e-06, "loss": 2.5571, "norm_diff": 0.0561, "norm_loss": 0.0, "num_token_doc": 66.8287, "num_token_overlap": 17.823, "num_token_query": 52.3786, "num_token_union": 73.7884, "num_word_context": 202.3933, "num_word_doc": 49.8446, "num_word_query": 39.9649, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5120.9381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5352, "query_norm": 1.7944, "queue_k_norm": 1.8501, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3786, "sent_len_1": 66.8287, "sent_len_max_0": 128.0, "sent_len_max_1": 210.155, "stdk": 0.0497, "stdq": 0.0466, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 93300 }, { "accuracy": 61.4746, "active_queue_size": 16384.0, "cl_loss": 2.5619, "doc_norm": 1.8497, "encoder_q-embeddings": 2394.6082, "encoder_q-layer.0": 1595.9088, "encoder_q-layer.1": 1688.8184, "encoder_q-layer.10": 3853.5698, "encoder_q-layer.11": 8122.1929, "encoder_q-layer.2": 1917.1927, "encoder_q-layer.3": 2059.1384, "encoder_q-layer.4": 2244.0559, "encoder_q-layer.5": 2337.4922, "encoder_q-layer.6": 2692.9282, "encoder_q-layer.7": 3121.3645, "encoder_q-layer.8": 3841.1416, "encoder_q-layer.9": 3498.9336, "epoch": 0.91, "inbatch_neg_score": 1.539, "inbatch_pos_score": 2.2832, "learning_rate": 3.666666666666667e-06, "loss": 2.5619, "norm_diff": 0.0577, "norm_loss": 0.0, "num_token_doc": 66.7462, "num_token_overlap": 17.7967, "num_token_query": 52.32, "num_token_union": 73.7169, "num_word_context": 202.2913, "num_word_doc": 49.7899, "num_word_query": 39.8816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5154.7866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5381, "query_norm": 1.7919, "queue_k_norm": 1.8487, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.32, "sent_len_1": 66.7462, "sent_len_max_0": 128.0, "sent_len_max_1": 210.54, "stdk": 0.0497, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 93400 }, { "accuracy": 61.8652, "active_queue_size": 16384.0, "cl_loss": 2.5546, "doc_norm": 1.8505, "encoder_q-embeddings": 7285.4731, "encoder_q-layer.0": 5085.4111, "encoder_q-layer.1": 5970.4385, "encoder_q-layer.10": 3603.5415, "encoder_q-layer.11": 7454.646, "encoder_q-layer.2": 7307.5112, "encoder_q-layer.3": 8440.2422, "encoder_q-layer.4": 9280.3506, "encoder_q-layer.5": 10791.4043, "encoder_q-layer.6": 10319.9404, "encoder_q-layer.7": 8796.9033, "encoder_q-layer.8": 6522.7275, "encoder_q-layer.9": 4004.0337, "epoch": 0.91, "inbatch_neg_score": 1.5348, "inbatch_pos_score": 2.2871, "learning_rate": 3.611111111111111e-06, "loss": 2.5546, "norm_diff": 0.0553, "norm_loss": 0.0, "num_token_doc": 66.9073, "num_token_overlap": 17.794, "num_token_query": 52.2194, "num_token_union": 73.7765, "num_word_context": 202.5808, "num_word_doc": 49.9434, "num_word_query": 39.8249, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11321.732, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5352, "query_norm": 1.7952, "queue_k_norm": 1.8507, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2194, "sent_len_1": 66.9073, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4725, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 93500 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.5615, "doc_norm": 1.8497, "encoder_q-embeddings": 3453.6594, "encoder_q-layer.0": 2282.9565, "encoder_q-layer.1": 2607.7148, "encoder_q-layer.10": 3653.3804, "encoder_q-layer.11": 7764.6201, "encoder_q-layer.2": 2589.0413, "encoder_q-layer.3": 2747.915, "encoder_q-layer.4": 2763.3547, "encoder_q-layer.5": 2739.752, "encoder_q-layer.6": 3157.4314, "encoder_q-layer.7": 3252.0552, "encoder_q-layer.8": 3954.8262, "encoder_q-layer.9": 3556.4854, "epoch": 0.91, "inbatch_neg_score": 1.5363, "inbatch_pos_score": 2.2988, "learning_rate": 3.555555555555556e-06, "loss": 2.5615, "norm_diff": 0.0547, "norm_loss": 0.0, "num_token_doc": 66.6559, "num_token_overlap": 17.7867, "num_token_query": 52.2268, "num_token_union": 73.6651, "num_word_context": 202.22, "num_word_doc": 49.7773, "num_word_query": 39.8396, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5526.5315, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5371, "query_norm": 1.795, "queue_k_norm": 1.8498, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2268, "sent_len_1": 66.6559, "sent_len_max_0": 128.0, "sent_len_max_1": 207.5312, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 93600 }, { "accuracy": 63.623, "active_queue_size": 16384.0, "cl_loss": 2.5662, "doc_norm": 1.8483, "encoder_q-embeddings": 2541.7935, "encoder_q-layer.0": 1703.524, "encoder_q-layer.1": 1876.8545, "encoder_q-layer.10": 3565.2773, "encoder_q-layer.11": 7136.564, "encoder_q-layer.2": 2111.7988, "encoder_q-layer.3": 2319.5212, "encoder_q-layer.4": 2604.7808, "encoder_q-layer.5": 2636.5432, "encoder_q-layer.6": 2909.4346, "encoder_q-layer.7": 3348.1455, "encoder_q-layer.8": 3705.1133, "encoder_q-layer.9": 3429.8762, "epoch": 0.91, "inbatch_neg_score": 1.5372, "inbatch_pos_score": 2.3203, "learning_rate": 3.5000000000000004e-06, "loss": 2.5662, "norm_diff": 0.05, "norm_loss": 0.0, "num_token_doc": 66.5873, "num_token_overlap": 17.7714, "num_token_query": 52.2404, "num_token_union": 73.6006, "num_word_context": 202.1925, "num_word_doc": 49.6971, "num_word_query": 39.8383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4969.0574, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5381, "query_norm": 1.7983, "queue_k_norm": 1.8497, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2404, "sent_len_1": 66.5873, "sent_len_max_0": 128.0, "sent_len_max_1": 210.03, "stdk": 0.0496, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 93700 }, { "accuracy": 61.5723, "active_queue_size": 16384.0, "cl_loss": 2.5486, "doc_norm": 1.8509, "encoder_q-embeddings": 2363.0754, "encoder_q-layer.0": 1558.275, "encoder_q-layer.1": 1745.6401, "encoder_q-layer.10": 3798.6785, "encoder_q-layer.11": 7488.4653, "encoder_q-layer.2": 1997.9606, "encoder_q-layer.3": 2172.356, "encoder_q-layer.4": 2359.844, "encoder_q-layer.5": 2393.4368, "encoder_q-layer.6": 2771.7075, "encoder_q-layer.7": 3056.5005, "encoder_q-layer.8": 3743.1721, "encoder_q-layer.9": 3381.9392, "epoch": 0.92, "inbatch_neg_score": 1.5417, "inbatch_pos_score": 2.293, "learning_rate": 3.4444444444444444e-06, "loss": 2.5486, "norm_diff": 0.0538, "norm_loss": 0.0, "num_token_doc": 66.6369, "num_token_overlap": 17.7688, "num_token_query": 52.1793, "num_token_union": 73.5753, "num_word_context": 202.0398, "num_word_doc": 49.7396, "num_word_query": 39.7976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5001.4663, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.54, "query_norm": 1.7971, "queue_k_norm": 1.8497, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1793, "sent_len_1": 66.6369, "sent_len_max_0": 128.0, "sent_len_max_1": 207.0525, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 93800 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.5678, "doc_norm": 1.8509, "encoder_q-embeddings": 7072.0962, "encoder_q-layer.0": 5393.5479, "encoder_q-layer.1": 6156.8555, "encoder_q-layer.10": 3676.5771, "encoder_q-layer.11": 7880.1406, "encoder_q-layer.2": 8070.2983, "encoder_q-layer.3": 8825.8076, "encoder_q-layer.4": 10491.0527, "encoder_q-layer.5": 12055.8125, "encoder_q-layer.6": 13206.2412, "encoder_q-layer.7": 11880.6738, "encoder_q-layer.8": 9067.9805, "encoder_q-layer.9": 5136.9102, "epoch": 0.92, "inbatch_neg_score": 1.5398, "inbatch_pos_score": 2.2949, "learning_rate": 3.3888888888888893e-06, "loss": 2.5678, "norm_diff": 0.0614, "norm_loss": 0.0, "num_token_doc": 66.593, "num_token_overlap": 17.7347, "num_token_query": 52.1946, "num_token_union": 73.6267, "num_word_context": 201.9845, "num_word_doc": 49.7018, "num_word_query": 39.8139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13240.408, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.541, "query_norm": 1.7896, "queue_k_norm": 1.8502, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1946, "sent_len_1": 66.593, "sent_len_max_0": 128.0, "sent_len_max_1": 209.4263, "stdk": 0.0497, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 93900 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.57, "doc_norm": 1.853, "encoder_q-embeddings": 2682.4775, "encoder_q-layer.0": 1745.9032, "encoder_q-layer.1": 1905.7881, "encoder_q-layer.10": 3546.2651, "encoder_q-layer.11": 7239.3828, "encoder_q-layer.2": 2207.5928, "encoder_q-layer.3": 2495.2307, "encoder_q-layer.4": 2621.7593, "encoder_q-layer.5": 2803.4644, "encoder_q-layer.6": 3124.9248, "encoder_q-layer.7": 3334.6785, "encoder_q-layer.8": 3716.4736, "encoder_q-layer.9": 3239.6355, "epoch": 0.92, "inbatch_neg_score": 1.5408, "inbatch_pos_score": 2.2969, "learning_rate": 3.3333333333333333e-06, "loss": 2.57, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 66.8142, "num_token_overlap": 17.8457, "num_token_query": 52.349, "num_token_union": 73.7496, "num_word_context": 202.6131, "num_word_doc": 49.8326, "num_word_query": 39.9242, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5078.8123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.541, "query_norm": 1.7946, "queue_k_norm": 1.8523, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.349, "sent_len_1": 66.8142, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5037, "stdk": 0.0498, "stdq": 0.0466, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 94000 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.5628, "doc_norm": 1.8478, "encoder_q-embeddings": 5905.6792, "encoder_q-layer.0": 4043.731, "encoder_q-layer.1": 5071.2549, "encoder_q-layer.10": 3521.4993, "encoder_q-layer.11": 7375.9551, "encoder_q-layer.2": 6596.8184, "encoder_q-layer.3": 8019.1426, "encoder_q-layer.4": 9369.8467, "encoder_q-layer.5": 11211.0508, "encoder_q-layer.6": 10861.625, "encoder_q-layer.7": 7923.9683, "encoder_q-layer.8": 4831.9844, "encoder_q-layer.9": 3705.5652, "epoch": 0.92, "inbatch_neg_score": 1.5398, "inbatch_pos_score": 2.3047, "learning_rate": 3.277777777777778e-06, "loss": 2.5628, "norm_diff": 0.057, "norm_loss": 0.0, "num_token_doc": 66.8218, "num_token_overlap": 17.7934, "num_token_query": 52.2733, "num_token_union": 73.7459, "num_word_context": 202.2461, "num_word_doc": 49.8559, "num_word_query": 39.8465, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10592.2508, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.54, "query_norm": 1.7908, "queue_k_norm": 1.8503, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2733, "sent_len_1": 66.8218, "sent_len_max_0": 128.0, "sent_len_max_1": 210.195, "stdk": 0.0495, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 94100 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.5482, "doc_norm": 1.8538, "encoder_q-embeddings": 5436.4087, "encoder_q-layer.0": 3512.5103, "encoder_q-layer.1": 3934.8215, "encoder_q-layer.10": 7030.1821, "encoder_q-layer.11": 14592.2812, "encoder_q-layer.2": 4631.1885, "encoder_q-layer.3": 4801.8062, "encoder_q-layer.4": 5289.9434, "encoder_q-layer.5": 5618.0537, "encoder_q-layer.6": 6048.4902, "encoder_q-layer.7": 6383.4365, "encoder_q-layer.8": 7313.1094, "encoder_q-layer.9": 6751.4785, "epoch": 0.92, "inbatch_neg_score": 1.5414, "inbatch_pos_score": 2.3008, "learning_rate": 3.2222222222222222e-06, "loss": 2.5482, "norm_diff": 0.0598, "norm_loss": 0.0, "num_token_doc": 66.822, "num_token_overlap": 17.8218, "num_token_query": 52.3471, "num_token_union": 73.7621, "num_word_context": 202.3725, "num_word_doc": 49.8342, "num_word_query": 39.9284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10025.7359, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.541, "query_norm": 1.794, "queue_k_norm": 1.851, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3471, "sent_len_1": 66.822, "sent_len_max_0": 128.0, "sent_len_max_1": 211.31, "stdk": 0.0498, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 94200 }, { "accuracy": 62.5488, "active_queue_size": 16384.0, "cl_loss": 2.5605, "doc_norm": 1.8557, "encoder_q-embeddings": 5390.436, "encoder_q-layer.0": 3566.5928, "encoder_q-layer.1": 4018.0835, "encoder_q-layer.10": 7010.5278, "encoder_q-layer.11": 14567.1465, "encoder_q-layer.2": 4822.9106, "encoder_q-layer.3": 5284.8896, "encoder_q-layer.4": 5913.2842, "encoder_q-layer.5": 6536.2993, "encoder_q-layer.6": 7309.4209, "encoder_q-layer.7": 7638.0659, "encoder_q-layer.8": 8175.3931, "encoder_q-layer.9": 7032.918, "epoch": 0.92, "inbatch_neg_score": 1.5429, "inbatch_pos_score": 2.3066, "learning_rate": 3.166666666666667e-06, "loss": 2.5605, "norm_diff": 0.062, "norm_loss": 0.0, "num_token_doc": 66.6717, "num_token_overlap": 17.8211, "num_token_query": 52.2264, "num_token_union": 73.6166, "num_word_context": 202.1566, "num_word_doc": 49.795, "num_word_query": 39.8392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10654.162, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.543, "query_norm": 1.7937, "queue_k_norm": 1.851, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2264, "sent_len_1": 66.6717, "sent_len_max_0": 128.0, "sent_len_max_1": 207.4787, "stdk": 0.0498, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 94300 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.5693, "doc_norm": 1.8539, "encoder_q-embeddings": 8087.1313, "encoder_q-layer.0": 5924.4419, "encoder_q-layer.1": 6564.5747, "encoder_q-layer.10": 7122.0518, "encoder_q-layer.11": 15325.123, "encoder_q-layer.2": 7763.3042, "encoder_q-layer.3": 8379.4131, "encoder_q-layer.4": 9322.8369, "encoder_q-layer.5": 10033.6807, "encoder_q-layer.6": 9752.3184, "encoder_q-layer.7": 9378.5693, "encoder_q-layer.8": 8375.4795, "encoder_q-layer.9": 6778.3101, "epoch": 0.92, "inbatch_neg_score": 1.5411, "inbatch_pos_score": 2.2969, "learning_rate": 3.111111111111111e-06, "loss": 2.5693, "norm_diff": 0.0644, "norm_loss": 0.0, "num_token_doc": 66.7925, "num_token_overlap": 17.7906, "num_token_query": 52.299, "num_token_union": 73.7549, "num_word_context": 202.4245, "num_word_doc": 49.8503, "num_word_query": 39.8889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13342.417, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.541, "query_norm": 1.7895, "queue_k_norm": 1.8517, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.299, "sent_len_1": 66.7925, "sent_len_max_0": 128.0, "sent_len_max_1": 208.8975, "stdk": 0.0497, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 94400 }, { "accuracy": 62.5, "active_queue_size": 16384.0, "cl_loss": 2.5695, "doc_norm": 1.8525, "encoder_q-embeddings": 8729.0039, "encoder_q-layer.0": 6318.2388, "encoder_q-layer.1": 6399.4028, "encoder_q-layer.10": 7288.7725, "encoder_q-layer.11": 15182.0361, "encoder_q-layer.2": 8211.5996, "encoder_q-layer.3": 7786.6611, "encoder_q-layer.4": 7921.5698, "encoder_q-layer.5": 7933.8823, "encoder_q-layer.6": 8836.415, "encoder_q-layer.7": 8428.6777, "encoder_q-layer.8": 7648.8579, "encoder_q-layer.9": 6653.1499, "epoch": 0.92, "inbatch_neg_score": 1.5405, "inbatch_pos_score": 2.3125, "learning_rate": 3.0555555555555556e-06, "loss": 2.5695, "norm_diff": 0.0588, "norm_loss": 0.0, "num_token_doc": 66.7661, "num_token_overlap": 17.7935, "num_token_query": 52.2079, "num_token_union": 73.7208, "num_word_context": 202.4464, "num_word_doc": 49.8379, "num_word_query": 39.8345, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12756.2938, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.54, "query_norm": 1.7937, "queue_k_norm": 1.8526, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2079, "sent_len_1": 66.7661, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6875, "stdk": 0.0497, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 94500 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.562, "doc_norm": 1.8527, "encoder_q-embeddings": 5098.4941, "encoder_q-layer.0": 3266.5457, "encoder_q-layer.1": 3473.5151, "encoder_q-layer.10": 7153.6582, "encoder_q-layer.11": 15226.9238, "encoder_q-layer.2": 3968.1919, "encoder_q-layer.3": 4176.4702, "encoder_q-layer.4": 4498.75, "encoder_q-layer.5": 4577.0552, "encoder_q-layer.6": 5422.1943, "encoder_q-layer.7": 6259.3389, "encoder_q-layer.8": 7521.0039, "encoder_q-layer.9": 6912.5605, "epoch": 0.92, "inbatch_neg_score": 1.5421, "inbatch_pos_score": 2.3008, "learning_rate": 3e-06, "loss": 2.562, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 66.7122, "num_token_overlap": 17.782, "num_token_query": 52.3304, "num_token_union": 73.7415, "num_word_context": 202.6347, "num_word_doc": 49.7758, "num_word_query": 39.9052, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10029.3241, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.541, "query_norm": 1.7938, "queue_k_norm": 1.8527, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3304, "sent_len_1": 66.7122, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2812, "stdk": 0.0497, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 94600 }, { "accuracy": 63.3789, "active_queue_size": 16384.0, "cl_loss": 2.5607, "doc_norm": 1.8491, "encoder_q-embeddings": 4831.4014, "encoder_q-layer.0": 3070.8169, "encoder_q-layer.1": 3409.1487, "encoder_q-layer.10": 7651.7666, "encoder_q-layer.11": 15192.2217, "encoder_q-layer.2": 3779.7517, "encoder_q-layer.3": 4090.4871, "encoder_q-layer.4": 4727.2393, "encoder_q-layer.5": 5042.0239, "encoder_q-layer.6": 5658.4624, "encoder_q-layer.7": 6585.6064, "encoder_q-layer.8": 7675.0933, "encoder_q-layer.9": 6833.5059, "epoch": 0.92, "inbatch_neg_score": 1.5422, "inbatch_pos_score": 2.3125, "learning_rate": 2.9444444444444445e-06, "loss": 2.5607, "norm_diff": 0.0552, "norm_loss": 0.0, "num_token_doc": 66.7799, "num_token_overlap": 17.8013, "num_token_query": 52.3126, "num_token_union": 73.711, "num_word_context": 202.193, "num_word_doc": 49.798, "num_word_query": 39.9054, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10023.5285, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.543, "query_norm": 1.7939, "queue_k_norm": 1.8518, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3126, "sent_len_1": 66.7799, "sent_len_max_0": 128.0, "sent_len_max_1": 211.4775, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 94700 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.5618, "doc_norm": 1.8503, "encoder_q-embeddings": 5241.9165, "encoder_q-layer.0": 3176.0447, "encoder_q-layer.1": 3579.8853, "encoder_q-layer.10": 7234.4189, "encoder_q-layer.11": 14779.1934, "encoder_q-layer.2": 3989.0435, "encoder_q-layer.3": 4377.8428, "encoder_q-layer.4": 4631.6592, "encoder_q-layer.5": 4900.6606, "encoder_q-layer.6": 5782.52, "encoder_q-layer.7": 6349.8789, "encoder_q-layer.8": 7528.3345, "encoder_q-layer.9": 6914.7026, "epoch": 0.93, "inbatch_neg_score": 1.5432, "inbatch_pos_score": 2.3145, "learning_rate": 2.888888888888889e-06, "loss": 2.5618, "norm_diff": 0.0509, "norm_loss": 0.0, "num_token_doc": 66.9079, "num_token_overlap": 17.8638, "num_token_query": 52.3316, "num_token_union": 73.7901, "num_word_context": 202.3285, "num_word_doc": 49.917, "num_word_query": 39.9091, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10017.2985, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.542, "query_norm": 1.7994, "queue_k_norm": 1.8535, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3316, "sent_len_1": 66.9079, "sent_len_max_0": 128.0, "sent_len_max_1": 208.9112, "stdk": 0.0495, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 94800 }, { "accuracy": 63.5254, "active_queue_size": 16384.0, "cl_loss": 2.5601, "doc_norm": 1.8524, "encoder_q-embeddings": 4488.5781, "encoder_q-layer.0": 2925.1912, "encoder_q-layer.1": 3221.3042, "encoder_q-layer.10": 7141.4253, "encoder_q-layer.11": 15571.4922, "encoder_q-layer.2": 3576.8215, "encoder_q-layer.3": 3812.2485, "encoder_q-layer.4": 4069.0603, "encoder_q-layer.5": 4414.7114, "encoder_q-layer.6": 4995.4712, "encoder_q-layer.7": 6097.5674, "encoder_q-layer.8": 7003.9819, "encoder_q-layer.9": 6571.8237, "epoch": 0.93, "inbatch_neg_score": 1.5415, "inbatch_pos_score": 2.3125, "learning_rate": 2.8333333333333335e-06, "loss": 2.5601, "norm_diff": 0.0591, "norm_loss": 0.0, "num_token_doc": 66.9227, "num_token_overlap": 17.8037, "num_token_query": 52.2395, "num_token_union": 73.7546, "num_word_context": 202.672, "num_word_doc": 49.962, "num_word_query": 39.8593, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9710.4216, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.542, "query_norm": 1.7933, "queue_k_norm": 1.8531, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2395, "sent_len_1": 66.9227, "sent_len_max_0": 128.0, "sent_len_max_1": 207.08, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 94900 }, { "accuracy": 61.9629, "active_queue_size": 16384.0, "cl_loss": 2.553, "doc_norm": 1.8479, "encoder_q-embeddings": 5079.833, "encoder_q-layer.0": 3310.668, "encoder_q-layer.1": 3821.1279, "encoder_q-layer.10": 6920.5278, "encoder_q-layer.11": 14226.9902, "encoder_q-layer.2": 4706.4229, "encoder_q-layer.3": 5055.4258, "encoder_q-layer.4": 5273.7817, "encoder_q-layer.5": 5545.3301, "encoder_q-layer.6": 5959.3003, "encoder_q-layer.7": 6162.479, "encoder_q-layer.8": 7059.2002, "encoder_q-layer.9": 6567.8306, "epoch": 0.93, "inbatch_neg_score": 1.5452, "inbatch_pos_score": 2.3066, "learning_rate": 2.777777777777778e-06, "loss": 2.553, "norm_diff": 0.0528, "norm_loss": 0.0, "num_token_doc": 66.729, "num_token_overlap": 17.8644, "num_token_query": 52.4185, "num_token_union": 73.7008, "num_word_context": 202.4404, "num_word_doc": 49.8108, "num_word_query": 40.008, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9986.9019, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5449, "query_norm": 1.7951, "queue_k_norm": 1.8528, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4185, "sent_len_1": 66.729, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5938, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 95000 }, { "accuracy": 60.791, "active_queue_size": 16384.0, "cl_loss": 2.5606, "doc_norm": 1.8524, "encoder_q-embeddings": 5351.0508, "encoder_q-layer.0": 3425.1685, "encoder_q-layer.1": 3932.2356, "encoder_q-layer.10": 7667.9326, "encoder_q-layer.11": 16139.4863, "encoder_q-layer.2": 4433.938, "encoder_q-layer.3": 4715.3833, "encoder_q-layer.4": 5299.0586, "encoder_q-layer.5": 5919.4995, "encoder_q-layer.6": 6458.3965, "encoder_q-layer.7": 7640.2822, "encoder_q-layer.8": 8366.2295, "encoder_q-layer.9": 7356.6865, "epoch": 0.93, "inbatch_neg_score": 1.545, "inbatch_pos_score": 2.2852, "learning_rate": 2.7222222222222224e-06, "loss": 2.5606, "norm_diff": 0.0561, "norm_loss": 0.0, "num_token_doc": 66.9283, "num_token_overlap": 17.8054, "num_token_query": 52.3468, "num_token_union": 73.849, "num_word_context": 202.532, "num_word_doc": 49.9185, "num_word_query": 39.9262, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10915.1171, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5439, "query_norm": 1.7963, "queue_k_norm": 1.8535, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3468, "sent_len_1": 66.9283, "sent_len_max_0": 128.0, "sent_len_max_1": 209.605, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 95100 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.558, "doc_norm": 1.854, "encoder_q-embeddings": 4821.2065, "encoder_q-layer.0": 3175.7554, "encoder_q-layer.1": 3378.7063, "encoder_q-layer.10": 7239.5586, "encoder_q-layer.11": 15264.4414, "encoder_q-layer.2": 3825.8271, "encoder_q-layer.3": 4136.2202, "encoder_q-layer.4": 4400.2715, "encoder_q-layer.5": 4637.8628, "encoder_q-layer.6": 5401.7681, "encoder_q-layer.7": 6279.5786, "encoder_q-layer.8": 7459.9883, "encoder_q-layer.9": 6684.229, "epoch": 0.93, "inbatch_neg_score": 1.5455, "inbatch_pos_score": 2.2969, "learning_rate": 2.666666666666667e-06, "loss": 2.558, "norm_diff": 0.0575, "norm_loss": 0.0, "num_token_doc": 66.8771, "num_token_overlap": 17.8776, "num_token_query": 52.4274, "num_token_union": 73.81, "num_word_context": 202.4752, "num_word_doc": 49.8898, "num_word_query": 39.9949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9971.5164, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5449, "query_norm": 1.7965, "queue_k_norm": 1.8537, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4274, "sent_len_1": 66.8771, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6775, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 95200 }, { "accuracy": 62.8906, "active_queue_size": 16384.0, "cl_loss": 2.5688, "doc_norm": 1.8529, "encoder_q-embeddings": 2502.2991, "encoder_q-layer.0": 1645.5364, "encoder_q-layer.1": 1835.8594, "encoder_q-layer.10": 3452.0771, "encoder_q-layer.11": 7411.2407, "encoder_q-layer.2": 2097.0942, "encoder_q-layer.3": 2292.3232, "encoder_q-layer.4": 2448.0, "encoder_q-layer.5": 2589.863, "encoder_q-layer.6": 2866.9075, "encoder_q-layer.7": 3190.2676, "encoder_q-layer.8": 3614.8237, "encoder_q-layer.9": 3302.7664, "epoch": 0.93, "inbatch_neg_score": 1.5479, "inbatch_pos_score": 2.3203, "learning_rate": 2.6111111111111113e-06, "loss": 2.5688, "norm_diff": 0.054, "norm_loss": 0.0, "num_token_doc": 66.6447, "num_token_overlap": 17.7916, "num_token_query": 52.2407, "num_token_union": 73.6327, "num_word_context": 202.2928, "num_word_doc": 49.7527, "num_word_query": 39.8655, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5013.5357, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5479, "query_norm": 1.7989, "queue_k_norm": 1.8525, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2407, "sent_len_1": 66.6447, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1962, "stdk": 0.0496, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 95300 }, { "accuracy": 62.2559, "active_queue_size": 16384.0, "cl_loss": 2.5713, "doc_norm": 1.8555, "encoder_q-embeddings": 2540.167, "encoder_q-layer.0": 1611.0035, "encoder_q-layer.1": 1795.8342, "encoder_q-layer.10": 3615.4766, "encoder_q-layer.11": 7411.5742, "encoder_q-layer.2": 2061.5803, "encoder_q-layer.3": 2281.5811, "encoder_q-layer.4": 2385.9241, "encoder_q-layer.5": 2478.2173, "encoder_q-layer.6": 2858.0757, "encoder_q-layer.7": 3104.0171, "encoder_q-layer.8": 3773.5132, "encoder_q-layer.9": 3349.7693, "epoch": 0.93, "inbatch_neg_score": 1.5479, "inbatch_pos_score": 2.3105, "learning_rate": 2.5555555555555557e-06, "loss": 2.5713, "norm_diff": 0.0609, "norm_loss": 0.0, "num_token_doc": 66.6253, "num_token_overlap": 17.795, "num_token_query": 52.2666, "num_token_union": 73.6384, "num_word_context": 202.3355, "num_word_doc": 49.6992, "num_word_query": 39.8548, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4994.7548, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5469, "query_norm": 1.7946, "queue_k_norm": 1.8535, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2666, "sent_len_1": 66.6253, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2012, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 95400 }, { "accuracy": 63.2812, "active_queue_size": 16384.0, "cl_loss": 2.5645, "doc_norm": 1.8557, "encoder_q-embeddings": 2230.2661, "encoder_q-layer.0": 1496.2349, "encoder_q-layer.1": 1642.2607, "encoder_q-layer.10": 3570.293, "encoder_q-layer.11": 7537.5273, "encoder_q-layer.2": 1892.6508, "encoder_q-layer.3": 2022.6702, "encoder_q-layer.4": 2247.053, "encoder_q-layer.5": 2322.9011, "encoder_q-layer.6": 2773.4141, "encoder_q-layer.7": 3213.5908, "encoder_q-layer.8": 4013.5015, "encoder_q-layer.9": 3422.1973, "epoch": 0.93, "inbatch_neg_score": 1.5492, "inbatch_pos_score": 2.3184, "learning_rate": 2.5e-06, "loss": 2.5645, "norm_diff": 0.0621, "norm_loss": 0.0, "num_token_doc": 66.7322, "num_token_overlap": 17.7933, "num_token_query": 52.179, "num_token_union": 73.6622, "num_word_context": 202.1898, "num_word_doc": 49.8076, "num_word_query": 39.797, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4942.4656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5479, "query_norm": 1.7936, "queue_k_norm": 1.8545, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.179, "sent_len_1": 66.7322, "sent_len_max_0": 128.0, "sent_len_max_1": 208.3175, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 95500 }, { "accuracy": 60.498, "active_queue_size": 16384.0, "cl_loss": 2.5677, "doc_norm": 1.8551, "encoder_q-embeddings": 2539.6663, "encoder_q-layer.0": 1621.4766, "encoder_q-layer.1": 1803.3478, "encoder_q-layer.10": 3567.5615, "encoder_q-layer.11": 7499.2456, "encoder_q-layer.2": 2041.2042, "encoder_q-layer.3": 2160.1494, "encoder_q-layer.4": 2293.3115, "encoder_q-layer.5": 2296.071, "encoder_q-layer.6": 2636.0554, "encoder_q-layer.7": 2958.9873, "encoder_q-layer.8": 3640.333, "encoder_q-layer.9": 3454.929, "epoch": 0.93, "inbatch_neg_score": 1.547, "inbatch_pos_score": 2.3047, "learning_rate": 2.4444444444444447e-06, "loss": 2.5677, "norm_diff": 0.0561, "norm_loss": 0.0, "num_token_doc": 66.6738, "num_token_overlap": 17.7928, "num_token_query": 52.227, "num_token_union": 73.6501, "num_word_context": 202.1281, "num_word_doc": 49.7813, "num_word_query": 39.8395, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4940.2931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5459, "query_norm": 1.799, "queue_k_norm": 1.853, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.227, "sent_len_1": 66.6738, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4087, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 95600 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.5613, "doc_norm": 1.8549, "encoder_q-embeddings": 2581.0725, "encoder_q-layer.0": 1680.5717, "encoder_q-layer.1": 1911.0884, "encoder_q-layer.10": 3684.5728, "encoder_q-layer.11": 7759.0815, "encoder_q-layer.2": 2232.3879, "encoder_q-layer.3": 2359.8938, "encoder_q-layer.4": 2590.0679, "encoder_q-layer.5": 2918.374, "encoder_q-layer.6": 3028.7363, "encoder_q-layer.7": 3220.9766, "encoder_q-layer.8": 3744.9316, "encoder_q-layer.9": 3438.0962, "epoch": 0.93, "inbatch_neg_score": 1.5486, "inbatch_pos_score": 2.2871, "learning_rate": 2.388888888888889e-06, "loss": 2.5613, "norm_diff": 0.0611, "norm_loss": 0.0, "num_token_doc": 67.06, "num_token_overlap": 17.8453, "num_token_query": 52.3556, "num_token_union": 73.8503, "num_word_context": 202.4888, "num_word_doc": 49.9958, "num_word_query": 39.9239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5263.6473, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5488, "query_norm": 1.7938, "queue_k_norm": 1.8539, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3556, "sent_len_1": 67.06, "sent_len_max_0": 128.0, "sent_len_max_1": 211.7287, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 95700 }, { "accuracy": 62.3535, "active_queue_size": 16384.0, "cl_loss": 2.5578, "doc_norm": 1.8506, "encoder_q-embeddings": 2505.877, "encoder_q-layer.0": 1623.6852, "encoder_q-layer.1": 1772.6334, "encoder_q-layer.10": 3751.8132, "encoder_q-layer.11": 7597.5439, "encoder_q-layer.2": 1975.1023, "encoder_q-layer.3": 2114.009, "encoder_q-layer.4": 2339.5137, "encoder_q-layer.5": 2422.554, "encoder_q-layer.6": 2876.7874, "encoder_q-layer.7": 3076.0906, "encoder_q-layer.8": 3687.7627, "encoder_q-layer.9": 3432.8386, "epoch": 0.94, "inbatch_neg_score": 1.5494, "inbatch_pos_score": 2.3184, "learning_rate": 2.3333333333333336e-06, "loss": 2.5578, "norm_diff": 0.0558, "norm_loss": 0.0, "num_token_doc": 66.7287, "num_token_overlap": 17.8059, "num_token_query": 52.1769, "num_token_union": 73.6211, "num_word_context": 202.2579, "num_word_doc": 49.8089, "num_word_query": 39.8073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5107.0289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5488, "query_norm": 1.7948, "queue_k_norm": 1.8534, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1769, "sent_len_1": 66.7287, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1962, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 95800 }, { "accuracy": 60.791, "active_queue_size": 16384.0, "cl_loss": 2.546, "doc_norm": 1.8513, "encoder_q-embeddings": 2440.416, "encoder_q-layer.0": 1553.7657, "encoder_q-layer.1": 1730.3619, "encoder_q-layer.10": 3556.9529, "encoder_q-layer.11": 7642.0859, "encoder_q-layer.2": 1971.5601, "encoder_q-layer.3": 2076.3843, "encoder_q-layer.4": 2244.1309, "encoder_q-layer.5": 2405.4692, "encoder_q-layer.6": 2784.1501, "encoder_q-layer.7": 3085.8542, "encoder_q-layer.8": 3684.0542, "encoder_q-layer.9": 3445.5234, "epoch": 0.94, "inbatch_neg_score": 1.5521, "inbatch_pos_score": 2.3105, "learning_rate": 2.277777777777778e-06, "loss": 2.546, "norm_diff": 0.0556, "norm_loss": 0.0, "num_token_doc": 66.764, "num_token_overlap": 17.8214, "num_token_query": 52.3083, "num_token_union": 73.694, "num_word_context": 202.4047, "num_word_doc": 49.8351, "num_word_query": 39.8816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5036.5163, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5518, "query_norm": 1.7957, "queue_k_norm": 1.8547, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3083, "sent_len_1": 66.764, "sent_len_max_0": 128.0, "sent_len_max_1": 208.405, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 95900 }, { "accuracy": 63.1348, "active_queue_size": 16384.0, "cl_loss": 2.5437, "doc_norm": 1.8551, "encoder_q-embeddings": 2435.1453, "encoder_q-layer.0": 1521.3724, "encoder_q-layer.1": 1636.618, "encoder_q-layer.10": 3623.3179, "encoder_q-layer.11": 7598.834, "encoder_q-layer.2": 1851.5754, "encoder_q-layer.3": 1989.9366, "encoder_q-layer.4": 2189.5608, "encoder_q-layer.5": 2215.0046, "encoder_q-layer.6": 2552.3523, "encoder_q-layer.7": 3013.4209, "encoder_q-layer.8": 3680.4922, "encoder_q-layer.9": 3391.3657, "epoch": 0.94, "inbatch_neg_score": 1.5515, "inbatch_pos_score": 2.3164, "learning_rate": 2.2222222222222225e-06, "loss": 2.5437, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.8887, "num_token_overlap": 17.8535, "num_token_query": 52.4613, "num_token_union": 73.8738, "num_word_context": 202.4698, "num_word_doc": 49.9147, "num_word_query": 40.0164, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4860.2975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5508, "query_norm": 1.7994, "queue_k_norm": 1.8561, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4613, "sent_len_1": 66.8887, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5563, "stdk": 0.0496, "stdq": 0.0468, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 96000 }, { "accuracy": 62.2559, "active_queue_size": 16384.0, "cl_loss": 2.5558, "doc_norm": 1.8547, "encoder_q-embeddings": 2467.1685, "encoder_q-layer.0": 1652.4675, "encoder_q-layer.1": 1841.5049, "encoder_q-layer.10": 3601.1743, "encoder_q-layer.11": 7341.6987, "encoder_q-layer.2": 2151.8318, "encoder_q-layer.3": 2284.6965, "encoder_q-layer.4": 2507.429, "encoder_q-layer.5": 2523.3201, "encoder_q-layer.6": 2900.6096, "encoder_q-layer.7": 3272.1477, "encoder_q-layer.8": 3946.6694, "encoder_q-layer.9": 3518.4226, "epoch": 0.94, "inbatch_neg_score": 1.5516, "inbatch_pos_score": 2.3125, "learning_rate": 2.166666666666667e-06, "loss": 2.5558, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.5661, "num_token_overlap": 17.8241, "num_token_query": 52.3502, "num_token_union": 73.6529, "num_word_context": 202.0591, "num_word_doc": 49.6922, "num_word_query": 39.9441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5024.4484, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5518, "query_norm": 1.7997, "queue_k_norm": 1.8538, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3502, "sent_len_1": 66.5661, "sent_len_max_0": 128.0, "sent_len_max_1": 208.105, "stdk": 0.0496, "stdq": 0.0468, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 96100 }, { "accuracy": 63.0859, "active_queue_size": 16384.0, "cl_loss": 2.5663, "doc_norm": 1.8589, "encoder_q-embeddings": 2628.3115, "encoder_q-layer.0": 1704.526, "encoder_q-layer.1": 1817.3896, "encoder_q-layer.10": 3549.0085, "encoder_q-layer.11": 7499.9697, "encoder_q-layer.2": 2040.8553, "encoder_q-layer.3": 2124.8208, "encoder_q-layer.4": 2248.9639, "encoder_q-layer.5": 2317.7009, "encoder_q-layer.6": 2613.6365, "encoder_q-layer.7": 2911.7522, "encoder_q-layer.8": 3731.5566, "encoder_q-layer.9": 3357.0247, "epoch": 0.94, "inbatch_neg_score": 1.5523, "inbatch_pos_score": 2.3262, "learning_rate": 2.1111111111111114e-06, "loss": 2.5663, "norm_diff": 0.0582, "norm_loss": 0.0, "num_token_doc": 66.7486, "num_token_overlap": 17.786, "num_token_query": 52.211, "num_token_union": 73.6678, "num_word_context": 202.2997, "num_word_doc": 49.8101, "num_word_query": 39.821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5038.5667, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5527, "query_norm": 1.8007, "queue_k_norm": 1.8546, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.211, "sent_len_1": 66.7486, "sent_len_max_0": 128.0, "sent_len_max_1": 208.6538, "stdk": 0.0498, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 96200 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.5677, "doc_norm": 1.8546, "encoder_q-embeddings": 3132.3477, "encoder_q-layer.0": 2157.1697, "encoder_q-layer.1": 2402.2961, "encoder_q-layer.10": 3640.6665, "encoder_q-layer.11": 7583.459, "encoder_q-layer.2": 2728.1499, "encoder_q-layer.3": 2923.1116, "encoder_q-layer.4": 3194.1011, "encoder_q-layer.5": 3217.3247, "encoder_q-layer.6": 3461.5684, "encoder_q-layer.7": 3561.6753, "encoder_q-layer.8": 4005.3472, "encoder_q-layer.9": 3479.8164, "epoch": 0.94, "inbatch_neg_score": 1.5542, "inbatch_pos_score": 2.3066, "learning_rate": 2.055555555555556e-06, "loss": 2.5677, "norm_diff": 0.0553, "norm_loss": 0.0, "num_token_doc": 66.7916, "num_token_overlap": 17.8369, "num_token_query": 52.45, "num_token_union": 73.79, "num_word_context": 202.341, "num_word_doc": 49.8202, "num_word_query": 40.0172, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5618.8, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5537, "query_norm": 1.7993, "queue_k_norm": 1.8561, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.45, "sent_len_1": 66.7916, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2438, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 96300 }, { "accuracy": 61.1816, "active_queue_size": 16384.0, "cl_loss": 2.5709, "doc_norm": 1.8541, "encoder_q-embeddings": 4687.7861, "encoder_q-layer.0": 3122.4473, "encoder_q-layer.1": 3866.5266, "encoder_q-layer.10": 3497.4553, "encoder_q-layer.11": 7350.9126, "encoder_q-layer.2": 4396.5146, "encoder_q-layer.3": 4859.9316, "encoder_q-layer.4": 5217.2632, "encoder_q-layer.5": 6010.6558, "encoder_q-layer.6": 6727.0732, "encoder_q-layer.7": 5086.4351, "encoder_q-layer.8": 3937.2134, "encoder_q-layer.9": 3314.803, "epoch": 0.94, "inbatch_neg_score": 1.5542, "inbatch_pos_score": 2.3125, "learning_rate": 2.0000000000000003e-06, "loss": 2.5709, "norm_diff": 0.0574, "norm_loss": 0.0, "num_token_doc": 66.9476, "num_token_overlap": 17.7888, "num_token_query": 52.1466, "num_token_union": 73.7409, "num_word_context": 202.4439, "num_word_doc": 49.9111, "num_word_query": 39.767, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7294.8652, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5527, "query_norm": 1.7967, "queue_k_norm": 1.8559, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1466, "sent_len_1": 66.9476, "sent_len_max_0": 128.0, "sent_len_max_1": 209.6863, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 96400 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.5634, "doc_norm": 1.8529, "encoder_q-embeddings": 2609.6411, "encoder_q-layer.0": 1677.2096, "encoder_q-layer.1": 1829.934, "encoder_q-layer.10": 3626.7046, "encoder_q-layer.11": 7593.9331, "encoder_q-layer.2": 2084.3625, "encoder_q-layer.3": 2168.7815, "encoder_q-layer.4": 2309.7832, "encoder_q-layer.5": 2447.9712, "encoder_q-layer.6": 2831.936, "encoder_q-layer.7": 3123.1262, "encoder_q-layer.8": 3765.5198, "encoder_q-layer.9": 3472.6631, "epoch": 0.94, "inbatch_neg_score": 1.5541, "inbatch_pos_score": 2.2852, "learning_rate": 1.9444444444444444e-06, "loss": 2.5634, "norm_diff": 0.0628, "norm_loss": 0.0, "num_token_doc": 66.7857, "num_token_overlap": 17.8623, "num_token_query": 52.273, "num_token_union": 73.6546, "num_word_context": 202.1647, "num_word_doc": 49.8108, "num_word_query": 39.8677, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5103.9454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5537, "query_norm": 1.7901, "queue_k_norm": 1.8545, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.273, "sent_len_1": 66.7857, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3462, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 96500 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.5612, "doc_norm": 1.858, "encoder_q-embeddings": 2401.0417, "encoder_q-layer.0": 1541.7645, "encoder_q-layer.1": 1677.8595, "encoder_q-layer.10": 3623.7705, "encoder_q-layer.11": 7355.0205, "encoder_q-layer.2": 1929.8383, "encoder_q-layer.3": 2109.7964, "encoder_q-layer.4": 2429.0393, "encoder_q-layer.5": 2510.9634, "encoder_q-layer.6": 2933.8582, "encoder_q-layer.7": 3261.4409, "encoder_q-layer.8": 3729.8372, "encoder_q-layer.9": 3369.6094, "epoch": 0.94, "inbatch_neg_score": 1.5531, "inbatch_pos_score": 2.334, "learning_rate": 1.888888888888889e-06, "loss": 2.5612, "norm_diff": 0.0581, "norm_loss": 0.0, "num_token_doc": 66.7094, "num_token_overlap": 17.7968, "num_token_query": 52.2472, "num_token_union": 73.6895, "num_word_context": 202.4701, "num_word_doc": 49.8013, "num_word_query": 39.8477, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4933.9736, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5527, "query_norm": 1.7999, "queue_k_norm": 1.8556, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2472, "sent_len_1": 66.7094, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1887, "stdk": 0.0498, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 96600 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.5619, "doc_norm": 1.8575, "encoder_q-embeddings": 7475.708, "encoder_q-layer.0": 5033.7642, "encoder_q-layer.1": 6412.125, "encoder_q-layer.10": 3536.3931, "encoder_q-layer.11": 7351.8516, "encoder_q-layer.2": 7750.9961, "encoder_q-layer.3": 7447.4146, "encoder_q-layer.4": 6820.915, "encoder_q-layer.5": 6062.2012, "encoder_q-layer.6": 5146.501, "encoder_q-layer.7": 4859.9053, "encoder_q-layer.8": 3886.4502, "encoder_q-layer.9": 3403.6516, "epoch": 0.94, "inbatch_neg_score": 1.5556, "inbatch_pos_score": 2.3164, "learning_rate": 1.8333333333333335e-06, "loss": 2.5619, "norm_diff": 0.0609, "norm_loss": 0.0, "num_token_doc": 66.5817, "num_token_overlap": 17.8235, "num_token_query": 52.2217, "num_token_union": 73.5451, "num_word_context": 202.0141, "num_word_doc": 49.7033, "num_word_query": 39.8335, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9002.2748, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5547, "query_norm": 1.7966, "queue_k_norm": 1.8558, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2217, "sent_len_1": 66.5817, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1375, "stdk": 0.0497, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 96700 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.5713, "doc_norm": 1.8569, "encoder_q-embeddings": 2913.6167, "encoder_q-layer.0": 1907.0062, "encoder_q-layer.1": 2189.0195, "encoder_q-layer.10": 3755.7129, "encoder_q-layer.11": 7830.0269, "encoder_q-layer.2": 2539.886, "encoder_q-layer.3": 2704.6616, "encoder_q-layer.4": 2907.2563, "encoder_q-layer.5": 2997.6169, "encoder_q-layer.6": 3390.3071, "encoder_q-layer.7": 3537.7876, "encoder_q-layer.8": 4036.5381, "encoder_q-layer.9": 3625.8301, "epoch": 0.95, "inbatch_neg_score": 1.5542, "inbatch_pos_score": 2.3047, "learning_rate": 1.777777777777778e-06, "loss": 2.5713, "norm_diff": 0.0645, "norm_loss": 0.0, "num_token_doc": 66.6333, "num_token_overlap": 17.7727, "num_token_query": 52.2501, "num_token_union": 73.6173, "num_word_context": 201.9774, "num_word_doc": 49.7223, "num_word_query": 39.8544, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5495.9361, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5537, "query_norm": 1.7924, "queue_k_norm": 1.8557, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2501, "sent_len_1": 66.6333, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7962, "stdk": 0.0497, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 96800 }, { "accuracy": 59.4238, "active_queue_size": 16384.0, "cl_loss": 2.5715, "doc_norm": 1.8527, "encoder_q-embeddings": 3171.666, "encoder_q-layer.0": 2311.4595, "encoder_q-layer.1": 2494.9805, "encoder_q-layer.10": 3863.8259, "encoder_q-layer.11": 8046.6348, "encoder_q-layer.2": 2676.8508, "encoder_q-layer.3": 2781.3621, "encoder_q-layer.4": 2891.031, "encoder_q-layer.5": 3045.572, "encoder_q-layer.6": 3510.6411, "encoder_q-layer.7": 3632.9243, "encoder_q-layer.8": 4010.363, "encoder_q-layer.9": 3570.113, "epoch": 0.95, "inbatch_neg_score": 1.5551, "inbatch_pos_score": 2.2969, "learning_rate": 1.7222222222222222e-06, "loss": 2.5715, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.8428, "num_token_overlap": 17.8275, "num_token_query": 52.3779, "num_token_union": 73.7876, "num_word_context": 202.4323, "num_word_doc": 49.874, "num_word_query": 39.9552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5647.2728, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5547, "query_norm": 1.7971, "queue_k_norm": 1.8554, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3779, "sent_len_1": 66.8428, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2375, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 96900 }, { "accuracy": 62.0605, "active_queue_size": 16384.0, "cl_loss": 2.5631, "doc_norm": 1.8577, "encoder_q-embeddings": 2890.5276, "encoder_q-layer.0": 1853.3782, "encoder_q-layer.1": 2094.2334, "encoder_q-layer.10": 3910.4648, "encoder_q-layer.11": 7767.0811, "encoder_q-layer.2": 2379.5076, "encoder_q-layer.3": 2602.9375, "encoder_q-layer.4": 2844.0129, "encoder_q-layer.5": 2870.3315, "encoder_q-layer.6": 3247.0879, "encoder_q-layer.7": 3417.4111, "encoder_q-layer.8": 3764.8169, "encoder_q-layer.9": 3453.8523, "epoch": 0.95, "inbatch_neg_score": 1.554, "inbatch_pos_score": 2.3086, "learning_rate": 1.6666666666666667e-06, "loss": 2.5631, "norm_diff": 0.0681, "norm_loss": 0.0, "num_token_doc": 66.8382, "num_token_overlap": 17.8472, "num_token_query": 52.3054, "num_token_union": 73.7276, "num_word_context": 202.3193, "num_word_doc": 49.8902, "num_word_query": 39.9033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5448.0379, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5547, "query_norm": 1.7896, "queue_k_norm": 1.8565, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3054, "sent_len_1": 66.8382, "sent_len_max_0": 128.0, "sent_len_max_1": 208.4187, "stdk": 0.0497, "stdq": 0.0462, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 97000 }, { "accuracy": 60.2051, "active_queue_size": 16384.0, "cl_loss": 2.5603, "doc_norm": 1.8553, "encoder_q-embeddings": 2940.6113, "encoder_q-layer.0": 1909.6261, "encoder_q-layer.1": 2145.4927, "encoder_q-layer.10": 3545.479, "encoder_q-layer.11": 7471.3433, "encoder_q-layer.2": 2503.2505, "encoder_q-layer.3": 2739.4185, "encoder_q-layer.4": 2955.5977, "encoder_q-layer.5": 2957.1699, "encoder_q-layer.6": 3079.6968, "encoder_q-layer.7": 3224.6418, "encoder_q-layer.8": 3816.9204, "encoder_q-layer.9": 3388.7715, "epoch": 0.95, "inbatch_neg_score": 1.556, "inbatch_pos_score": 2.3008, "learning_rate": 1.6111111111111111e-06, "loss": 2.5603, "norm_diff": 0.0643, "norm_loss": 0.0, "num_token_doc": 66.7405, "num_token_overlap": 17.8009, "num_token_query": 52.287, "num_token_union": 73.6891, "num_word_context": 202.2723, "num_word_doc": 49.7983, "num_word_query": 39.8679, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5294.3185, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5557, "query_norm": 1.791, "queue_k_norm": 1.8555, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.287, "sent_len_1": 66.7405, "sent_len_max_0": 128.0, "sent_len_max_1": 208.525, "stdk": 0.0496, "stdq": 0.0463, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 97100 }, { "accuracy": 62.5488, "active_queue_size": 16384.0, "cl_loss": 2.5579, "doc_norm": 1.8579, "encoder_q-embeddings": 2744.3923, "encoder_q-layer.0": 1786.1414, "encoder_q-layer.1": 2004.6431, "encoder_q-layer.10": 3610.0034, "encoder_q-layer.11": 7530.7183, "encoder_q-layer.2": 2274.7834, "encoder_q-layer.3": 2465.6941, "encoder_q-layer.4": 2626.3015, "encoder_q-layer.5": 2591.6943, "encoder_q-layer.6": 3032.3762, "encoder_q-layer.7": 3342.5798, "encoder_q-layer.8": 3784.7312, "encoder_q-layer.9": 3381.8999, "epoch": 0.95, "inbatch_neg_score": 1.556, "inbatch_pos_score": 2.3164, "learning_rate": 1.5555555555555556e-06, "loss": 2.5579, "norm_diff": 0.0594, "norm_loss": 0.0, "num_token_doc": 66.8596, "num_token_overlap": 17.8893, "num_token_query": 52.347, "num_token_union": 73.7183, "num_word_context": 202.0272, "num_word_doc": 49.8865, "num_word_query": 39.9387, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5213.1327, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5557, "query_norm": 1.7984, "queue_k_norm": 1.8551, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.347, "sent_len_1": 66.8596, "sent_len_max_0": 128.0, "sent_len_max_1": 210.6625, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 97200 }, { "accuracy": 62.9395, "active_queue_size": 16384.0, "cl_loss": 2.5488, "doc_norm": 1.8595, "encoder_q-embeddings": 4917.1079, "encoder_q-layer.0": 3329.3501, "encoder_q-layer.1": 3623.9355, "encoder_q-layer.10": 6937.3657, "encoder_q-layer.11": 14783.8545, "encoder_q-layer.2": 4270.2646, "encoder_q-layer.3": 4434.5103, "encoder_q-layer.4": 4792.0181, "encoder_q-layer.5": 4996.1377, "encoder_q-layer.6": 5331.3867, "encoder_q-layer.7": 6015.3359, "encoder_q-layer.8": 7246.0146, "encoder_q-layer.9": 6507.8672, "epoch": 0.95, "inbatch_neg_score": 1.5568, "inbatch_pos_score": 2.3262, "learning_rate": 1.5e-06, "loss": 2.5488, "norm_diff": 0.0573, "norm_loss": 0.0, "num_token_doc": 66.7237, "num_token_overlap": 17.8062, "num_token_query": 52.2284, "num_token_union": 73.6671, "num_word_context": 202.0323, "num_word_doc": 49.7508, "num_word_query": 39.8303, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9980.4771, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5566, "query_norm": 1.8021, "queue_k_norm": 1.8563, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2284, "sent_len_1": 66.7237, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7175, "stdk": 0.0498, "stdq": 0.0469, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 97300 }, { "accuracy": 61.6699, "active_queue_size": 16384.0, "cl_loss": 2.5768, "doc_norm": 1.856, "encoder_q-embeddings": 5019.0981, "encoder_q-layer.0": 3362.0339, "encoder_q-layer.1": 3806.7559, "encoder_q-layer.10": 7039.0444, "encoder_q-layer.11": 15240.3125, "encoder_q-layer.2": 4426.7358, "encoder_q-layer.3": 4695.188, "encoder_q-layer.4": 5098.5601, "encoder_q-layer.5": 5173.1626, "encoder_q-layer.6": 6104.1094, "encoder_q-layer.7": 6326.2744, "encoder_q-layer.8": 7271.2993, "encoder_q-layer.9": 6630.7158, "epoch": 0.95, "inbatch_neg_score": 1.5583, "inbatch_pos_score": 2.3125, "learning_rate": 1.4444444444444445e-06, "loss": 2.5768, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 66.6769, "num_token_overlap": 17.7308, "num_token_query": 52.2447, "num_token_union": 73.7083, "num_word_context": 202.3922, "num_word_doc": 49.7422, "num_word_query": 39.8323, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10243.6872, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5576, "query_norm": 1.7971, "queue_k_norm": 1.8555, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2447, "sent_len_1": 66.6769, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3162, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 97400 }, { "accuracy": 62.5, "active_queue_size": 16384.0, "cl_loss": 2.559, "doc_norm": 1.8586, "encoder_q-embeddings": 5436.5049, "encoder_q-layer.0": 3466.2061, "encoder_q-layer.1": 3928.312, "encoder_q-layer.10": 7319.4395, "encoder_q-layer.11": 15465.0166, "encoder_q-layer.2": 4660.3442, "encoder_q-layer.3": 4997.4546, "encoder_q-layer.4": 5523.7061, "encoder_q-layer.5": 5698.8525, "encoder_q-layer.6": 6379.0669, "encoder_q-layer.7": 6627.0508, "encoder_q-layer.8": 7360.7744, "encoder_q-layer.9": 6900.6841, "epoch": 0.95, "inbatch_neg_score": 1.5571, "inbatch_pos_score": 2.3262, "learning_rate": 1.388888888888889e-06, "loss": 2.559, "norm_diff": 0.0578, "norm_loss": 0.0, "num_token_doc": 66.8428, "num_token_overlap": 17.8403, "num_token_query": 52.346, "num_token_union": 73.7787, "num_word_context": 202.5135, "num_word_doc": 49.8781, "num_word_query": 39.9307, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10569.3392, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5576, "query_norm": 1.8008, "queue_k_norm": 1.8563, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.346, "sent_len_1": 66.8428, "sent_len_max_0": 128.0, "sent_len_max_1": 211.1475, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 97500 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.5609, "doc_norm": 1.8559, "encoder_q-embeddings": 10781.1455, "encoder_q-layer.0": 7598.7007, "encoder_q-layer.1": 8100.7998, "encoder_q-layer.10": 7112.168, "encoder_q-layer.11": 14604.8486, "encoder_q-layer.2": 9420.2441, "encoder_q-layer.3": 10054.8916, "encoder_q-layer.4": 10562.291, "encoder_q-layer.5": 13157.6367, "encoder_q-layer.6": 14448.4697, "encoder_q-layer.7": 13146.9248, "encoder_q-layer.8": 10666.6221, "encoder_q-layer.9": 7315.5308, "epoch": 0.95, "inbatch_neg_score": 1.5574, "inbatch_pos_score": 2.3125, "learning_rate": 1.3333333333333334e-06, "loss": 2.5609, "norm_diff": 0.0617, "norm_loss": 0.0, "num_token_doc": 66.8761, "num_token_overlap": 17.8134, "num_token_query": 52.2854, "num_token_union": 73.773, "num_word_context": 202.3383, "num_word_doc": 49.9057, "num_word_query": 39.8792, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16326.5448, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5576, "query_norm": 1.7941, "queue_k_norm": 1.8558, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2854, "sent_len_1": 66.8761, "sent_len_max_0": 128.0, "sent_len_max_1": 207.1687, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 97600 }, { "accuracy": 63.4766, "active_queue_size": 16384.0, "cl_loss": 2.5604, "doc_norm": 1.8562, "encoder_q-embeddings": 4395.4546, "encoder_q-layer.0": 2838.6335, "encoder_q-layer.1": 3075.897, "encoder_q-layer.10": 7340.0273, "encoder_q-layer.11": 15336.1201, "encoder_q-layer.2": 3454.9407, "encoder_q-layer.3": 3673.8096, "encoder_q-layer.4": 3999.1797, "encoder_q-layer.5": 4288.4438, "encoder_q-layer.6": 5035.8442, "encoder_q-layer.7": 6046.5391, "encoder_q-layer.8": 7121.6191, "encoder_q-layer.9": 6673.8726, "epoch": 0.95, "inbatch_neg_score": 1.5585, "inbatch_pos_score": 2.3262, "learning_rate": 1.2777777777777779e-06, "loss": 2.5604, "norm_diff": 0.0612, "norm_loss": 0.0, "num_token_doc": 66.6714, "num_token_overlap": 17.8052, "num_token_query": 52.3091, "num_token_union": 73.6848, "num_word_context": 202.1791, "num_word_doc": 49.7583, "num_word_query": 39.8845, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9669.0018, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5576, "query_norm": 1.795, "queue_k_norm": 1.8549, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3091, "sent_len_1": 66.6714, "sent_len_max_0": 128.0, "sent_len_max_1": 208.51, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 97700 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.5684, "doc_norm": 1.8576, "encoder_q-embeddings": 4604.2441, "encoder_q-layer.0": 2949.4487, "encoder_q-layer.1": 3220.6604, "encoder_q-layer.10": 7098.0225, "encoder_q-layer.11": 15408.5146, "encoder_q-layer.2": 3829.814, "encoder_q-layer.3": 4020.1956, "encoder_q-layer.4": 4541.3765, "encoder_q-layer.5": 4649.5908, "encoder_q-layer.6": 5430.0376, "encoder_q-layer.7": 6059.8823, "encoder_q-layer.8": 7259.4434, "encoder_q-layer.9": 6828.8599, "epoch": 0.95, "inbatch_neg_score": 1.5587, "inbatch_pos_score": 2.3242, "learning_rate": 1.2222222222222223e-06, "loss": 2.5684, "norm_diff": 0.0577, "norm_loss": 0.0, "num_token_doc": 66.674, "num_token_overlap": 17.7676, "num_token_query": 52.2371, "num_token_union": 73.6488, "num_word_context": 202.2938, "num_word_doc": 49.7951, "num_word_query": 39.8524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9704.1891, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5576, "query_norm": 1.7998, "queue_k_norm": 1.8569, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2371, "sent_len_1": 66.674, "sent_len_max_0": 128.0, "sent_len_max_1": 208.7738, "stdk": 0.0497, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 97800 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.555, "doc_norm": 1.8553, "encoder_q-embeddings": 6640.7427, "encoder_q-layer.0": 4557.3789, "encoder_q-layer.1": 5229.0508, "encoder_q-layer.10": 7264.2041, "encoder_q-layer.11": 15131.2959, "encoder_q-layer.2": 6030.2607, "encoder_q-layer.3": 6589.2686, "encoder_q-layer.4": 7162.5791, "encoder_q-layer.5": 7472.3296, "encoder_q-layer.6": 8083.9873, "encoder_q-layer.7": 7779.3193, "encoder_q-layer.8": 8312.1387, "encoder_q-layer.9": 6771.6553, "epoch": 0.96, "inbatch_neg_score": 1.5599, "inbatch_pos_score": 2.2969, "learning_rate": 1.1666666666666668e-06, "loss": 2.555, "norm_diff": 0.057, "norm_loss": 0.0, "num_token_doc": 66.8058, "num_token_overlap": 17.8473, "num_token_query": 52.2591, "num_token_union": 73.6747, "num_word_context": 202.2938, "num_word_doc": 49.8679, "num_word_query": 39.8447, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11619.1105, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5596, "query_norm": 1.7983, "queue_k_norm": 1.8557, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2591, "sent_len_1": 66.8058, "sent_len_max_0": 128.0, "sent_len_max_1": 208.2725, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 97900 }, { "accuracy": 63.8184, "active_queue_size": 16384.0, "cl_loss": 2.5594, "doc_norm": 1.8613, "encoder_q-embeddings": 5097.6675, "encoder_q-layer.0": 3385.696, "encoder_q-layer.1": 3883.2866, "encoder_q-layer.10": 7178.2397, "encoder_q-layer.11": 15112.4336, "encoder_q-layer.2": 4540.0093, "encoder_q-layer.3": 4954.6475, "encoder_q-layer.4": 6033.291, "encoder_q-layer.5": 6009.4561, "encoder_q-layer.6": 6560.3457, "encoder_q-layer.7": 7044.3633, "encoder_q-layer.8": 7645.2729, "encoder_q-layer.9": 6911.3975, "epoch": 0.96, "inbatch_neg_score": 1.5594, "inbatch_pos_score": 2.3359, "learning_rate": 1.1111111111111112e-06, "loss": 2.5594, "norm_diff": 0.0591, "norm_loss": 0.0, "num_token_doc": 66.5443, "num_token_overlap": 17.7802, "num_token_query": 52.172, "num_token_union": 73.5776, "num_word_context": 201.9463, "num_word_doc": 49.6871, "num_word_query": 39.7773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10566.801, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5596, "query_norm": 1.8021, "queue_k_norm": 1.8586, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.172, "sent_len_1": 66.5443, "sent_len_max_0": 128.0, "sent_len_max_1": 208.1925, "stdk": 0.0498, "stdq": 0.0468, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 98000 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.5581, "doc_norm": 1.8513, "encoder_q-embeddings": 7909.665, "encoder_q-layer.0": 5211.6504, "encoder_q-layer.1": 6047.3867, "encoder_q-layer.10": 7593.2085, "encoder_q-layer.11": 15997.3711, "encoder_q-layer.2": 7643.4067, "encoder_q-layer.3": 8480.2373, "encoder_q-layer.4": 9250.6035, "encoder_q-layer.5": 10350.1748, "encoder_q-layer.6": 8972.1064, "encoder_q-layer.7": 9064.2568, "encoder_q-layer.8": 8404.0264, "encoder_q-layer.9": 7398.1479, "epoch": 0.96, "inbatch_neg_score": 1.559, "inbatch_pos_score": 2.2969, "learning_rate": 1.0555555555555557e-06, "loss": 2.5581, "norm_diff": 0.0538, "norm_loss": 0.0, "num_token_doc": 66.8265, "num_token_overlap": 17.7987, "num_token_query": 52.2344, "num_token_union": 73.7229, "num_word_context": 202.2718, "num_word_doc": 49.8506, "num_word_query": 39.8231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13335.879, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5596, "query_norm": 1.7975, "queue_k_norm": 1.8559, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2344, "sent_len_1": 66.8265, "sent_len_max_0": 128.0, "sent_len_max_1": 210.7488, "stdk": 0.0494, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 98100 }, { "accuracy": 61.2793, "active_queue_size": 16384.0, "cl_loss": 2.5571, "doc_norm": 1.8596, "encoder_q-embeddings": 5240.2837, "encoder_q-layer.0": 3354.8049, "encoder_q-layer.1": 3776.6921, "encoder_q-layer.10": 6827.5703, "encoder_q-layer.11": 15094.9434, "encoder_q-layer.2": 4235.6748, "encoder_q-layer.3": 4484.5391, "encoder_q-layer.4": 4801.4297, "encoder_q-layer.5": 5084.8989, "encoder_q-layer.6": 5757.2139, "encoder_q-layer.7": 6226.8726, "encoder_q-layer.8": 7758.3599, "encoder_q-layer.9": 6958.4038, "epoch": 0.96, "inbatch_neg_score": 1.559, "inbatch_pos_score": 2.3203, "learning_rate": 1.0000000000000002e-06, "loss": 2.5571, "norm_diff": 0.0605, "norm_loss": 0.0, "num_token_doc": 66.8566, "num_token_overlap": 17.828, "num_token_query": 52.3133, "num_token_union": 73.7702, "num_word_context": 202.4469, "num_word_doc": 49.8982, "num_word_query": 39.9133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10222.1976, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5605, "query_norm": 1.7992, "queue_k_norm": 1.8574, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3133, "sent_len_1": 66.8566, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1625, "stdk": 0.0498, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 98200 }, { "accuracy": 60.4004, "active_queue_size": 16384.0, "cl_loss": 2.5556, "doc_norm": 1.8594, "encoder_q-embeddings": 4727.7842, "encoder_q-layer.0": 3003.3291, "encoder_q-layer.1": 3298.4978, "encoder_q-layer.10": 7230.2119, "encoder_q-layer.11": 15421.2354, "encoder_q-layer.2": 3763.7039, "encoder_q-layer.3": 4153.1675, "encoder_q-layer.4": 4345.3086, "encoder_q-layer.5": 4626.3516, "encoder_q-layer.6": 5182.2192, "encoder_q-layer.7": 5944.7686, "encoder_q-layer.8": 7672.7451, "encoder_q-layer.9": 7015.8979, "epoch": 0.96, "inbatch_neg_score": 1.5628, "inbatch_pos_score": 2.3066, "learning_rate": 9.444444444444445e-07, "loss": 2.5556, "norm_diff": 0.0599, "norm_loss": 0.0, "num_token_doc": 66.7369, "num_token_overlap": 17.7908, "num_token_query": 52.2531, "num_token_union": 73.6504, "num_word_context": 202.2552, "num_word_doc": 49.8265, "num_word_query": 39.8642, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10083.7732, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5615, "query_norm": 1.7995, "queue_k_norm": 1.8562, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2531, "sent_len_1": 66.7369, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5375, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 98300 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.5632, "doc_norm": 1.8571, "encoder_q-embeddings": 2909.6294, "encoder_q-layer.0": 2023.3711, "encoder_q-layer.1": 2362.0056, "encoder_q-layer.10": 3464.6748, "encoder_q-layer.11": 7572.0493, "encoder_q-layer.2": 2709.2656, "encoder_q-layer.3": 2925.1313, "encoder_q-layer.4": 3154.4819, "encoder_q-layer.5": 3390.0232, "encoder_q-layer.6": 3613.7988, "encoder_q-layer.7": 3773.8425, "encoder_q-layer.8": 4011.8511, "encoder_q-layer.9": 3427.4587, "epoch": 0.96, "inbatch_neg_score": 1.5638, "inbatch_pos_score": 2.2988, "learning_rate": 8.88888888888889e-07, "loss": 2.5632, "norm_diff": 0.0611, "norm_loss": 0.0, "num_token_doc": 66.7327, "num_token_overlap": 17.7988, "num_token_query": 52.3038, "num_token_union": 73.6951, "num_word_context": 202.2861, "num_word_doc": 49.7796, "num_word_query": 39.8934, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5564.9699, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5625, "query_norm": 1.796, "queue_k_norm": 1.8576, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3038, "sent_len_1": 66.7327, "sent_len_max_0": 128.0, "sent_len_max_1": 209.7375, "stdk": 0.0496, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 98400 }, { "accuracy": 63.0859, "active_queue_size": 16384.0, "cl_loss": 2.549, "doc_norm": 1.8572, "encoder_q-embeddings": 3099.6558, "encoder_q-layer.0": 2192.5173, "encoder_q-layer.1": 2477.3286, "encoder_q-layer.10": 3637.6475, "encoder_q-layer.11": 7628.7129, "encoder_q-layer.2": 2886.3704, "encoder_q-layer.3": 2974.5898, "encoder_q-layer.4": 3125.0305, "encoder_q-layer.5": 3113.9165, "encoder_q-layer.6": 3345.3362, "encoder_q-layer.7": 3646.5344, "encoder_q-layer.8": 4137.1235, "encoder_q-layer.9": 3477.9766, "epoch": 0.96, "inbatch_neg_score": 1.5624, "inbatch_pos_score": 2.332, "learning_rate": 8.333333333333333e-07, "loss": 2.549, "norm_diff": 0.0561, "norm_loss": 0.0, "num_token_doc": 66.7639, "num_token_overlap": 17.8614, "num_token_query": 52.3516, "num_token_union": 73.7006, "num_word_context": 202.326, "num_word_doc": 49.8328, "num_word_query": 39.9289, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5636.021, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5615, "query_norm": 1.8011, "queue_k_norm": 1.8574, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3516, "sent_len_1": 66.7639, "sent_len_max_0": 128.0, "sent_len_max_1": 209.7512, "stdk": 0.0496, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 98500 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.5601, "doc_norm": 1.8541, "encoder_q-embeddings": 2488.1003, "encoder_q-layer.0": 1568.204, "encoder_q-layer.1": 1739.9865, "encoder_q-layer.10": 3518.4309, "encoder_q-layer.11": 7633.9087, "encoder_q-layer.2": 2038.5649, "encoder_q-layer.3": 2108.8296, "encoder_q-layer.4": 2323.2185, "encoder_q-layer.5": 2460.928, "encoder_q-layer.6": 2863.9634, "encoder_q-layer.7": 3210.4785, "encoder_q-layer.8": 3794.9897, "encoder_q-layer.9": 3414.1814, "epoch": 0.96, "inbatch_neg_score": 1.5617, "inbatch_pos_score": 2.3105, "learning_rate": 7.777777777777778e-07, "loss": 2.5601, "norm_diff": 0.0586, "norm_loss": 0.0, "num_token_doc": 66.5847, "num_token_overlap": 17.8145, "num_token_query": 52.35, "num_token_union": 73.6433, "num_word_context": 202.2055, "num_word_doc": 49.6902, "num_word_query": 39.9331, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5077.4933, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5625, "query_norm": 1.7955, "queue_k_norm": 1.8565, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.35, "sent_len_1": 66.5847, "sent_len_max_0": 128.0, "sent_len_max_1": 210.1325, "stdk": 0.0495, "stdq": 0.0465, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 98600 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.549, "doc_norm": 1.8564, "encoder_q-embeddings": 2691.8999, "encoder_q-layer.0": 1764.662, "encoder_q-layer.1": 1928.6482, "encoder_q-layer.10": 3835.4902, "encoder_q-layer.11": 7823.3398, "encoder_q-layer.2": 2222.6853, "encoder_q-layer.3": 2314.811, "encoder_q-layer.4": 2429.6584, "encoder_q-layer.5": 2520.7603, "encoder_q-layer.6": 3003.0527, "encoder_q-layer.7": 3489.0281, "encoder_q-layer.8": 3772.1953, "encoder_q-layer.9": 3388.3865, "epoch": 0.96, "inbatch_neg_score": 1.5643, "inbatch_pos_score": 2.3164, "learning_rate": 7.222222222222222e-07, "loss": 2.549, "norm_diff": 0.0586, "norm_loss": 0.0, "num_token_doc": 66.9115, "num_token_overlap": 17.9011, "num_token_query": 52.3389, "num_token_union": 73.7161, "num_word_context": 202.4302, "num_word_doc": 49.8964, "num_word_query": 39.9129, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5307.1817, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5635, "query_norm": 1.7978, "queue_k_norm": 1.8574, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3389, "sent_len_1": 66.9115, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5563, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 98700 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.5644, "doc_norm": 1.8567, "encoder_q-embeddings": 2532.2239, "encoder_q-layer.0": 1624.8113, "encoder_q-layer.1": 1741.8708, "encoder_q-layer.10": 3559.5564, "encoder_q-layer.11": 7513.4004, "encoder_q-layer.2": 1984.1594, "encoder_q-layer.3": 2050.2922, "encoder_q-layer.4": 2208.2803, "encoder_q-layer.5": 2294.9644, "encoder_q-layer.6": 2626.6301, "encoder_q-layer.7": 2949.8184, "encoder_q-layer.8": 3531.6653, "encoder_q-layer.9": 3344.3501, "epoch": 0.96, "inbatch_neg_score": 1.5641, "inbatch_pos_score": 2.3145, "learning_rate": 6.666666666666667e-07, "loss": 2.5644, "norm_diff": 0.0576, "norm_loss": 0.0, "num_token_doc": 66.7303, "num_token_overlap": 17.7639, "num_token_query": 52.2207, "num_token_union": 73.7232, "num_word_context": 202.1208, "num_word_doc": 49.8401, "num_word_query": 39.8355, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4961.7372, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5635, "query_norm": 1.7991, "queue_k_norm": 1.8577, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2207, "sent_len_1": 66.7303, "sent_len_max_0": 128.0, "sent_len_max_1": 206.9425, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 98800 }, { "accuracy": 63.9648, "active_queue_size": 16384.0, "cl_loss": 2.5527, "doc_norm": 1.8608, "encoder_q-embeddings": 2494.3962, "encoder_q-layer.0": 1556.3573, "encoder_q-layer.1": 1712.9814, "encoder_q-layer.10": 3680.9041, "encoder_q-layer.11": 7748.5352, "encoder_q-layer.2": 1938.9281, "encoder_q-layer.3": 2053.8757, "encoder_q-layer.4": 2266.4553, "encoder_q-layer.5": 2368.1501, "encoder_q-layer.6": 2774.0208, "encoder_q-layer.7": 3079.647, "encoder_q-layer.8": 3747.5835, "encoder_q-layer.9": 3533.9597, "epoch": 0.97, "inbatch_neg_score": 1.5616, "inbatch_pos_score": 2.3379, "learning_rate": 6.111111111111112e-07, "loss": 2.5527, "norm_diff": 0.0538, "norm_loss": 0.0, "num_token_doc": 66.8675, "num_token_overlap": 17.8062, "num_token_query": 52.2568, "num_token_union": 73.7795, "num_word_context": 202.3637, "num_word_doc": 49.8797, "num_word_query": 39.8541, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5095.1398, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5625, "query_norm": 1.8071, "queue_k_norm": 1.8579, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2568, "sent_len_1": 66.8675, "sent_len_max_0": 128.0, "sent_len_max_1": 208.025, "stdk": 0.0498, "stdq": 0.0471, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 98900 }, { "accuracy": 61.4746, "active_queue_size": 16384.0, "cl_loss": 2.5627, "doc_norm": 1.8632, "encoder_q-embeddings": 2864.925, "encoder_q-layer.0": 1868.5735, "encoder_q-layer.1": 2060.1143, "encoder_q-layer.10": 3540.0481, "encoder_q-layer.11": 7725.4146, "encoder_q-layer.2": 2338.2773, "encoder_q-layer.3": 2456.6523, "encoder_q-layer.4": 2528.0044, "encoder_q-layer.5": 2665.3982, "encoder_q-layer.6": 2916.2383, "encoder_q-layer.7": 3168.8423, "encoder_q-layer.8": 3682.0837, "encoder_q-layer.9": 3328.4561, "epoch": 0.97, "inbatch_neg_score": 1.5631, "inbatch_pos_score": 2.3281, "learning_rate": 5.555555555555556e-07, "loss": 2.5627, "norm_diff": 0.0633, "norm_loss": 0.0, "num_token_doc": 66.7256, "num_token_overlap": 17.8232, "num_token_query": 52.362, "num_token_union": 73.732, "num_word_context": 202.3244, "num_word_doc": 49.7862, "num_word_query": 39.9602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5247.3656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5645, "query_norm": 1.7999, "queue_k_norm": 1.8586, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.362, "sent_len_1": 66.7256, "sent_len_max_0": 128.0, "sent_len_max_1": 209.9812, "stdk": 0.0499, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 99000 }, { "accuracy": 63.1836, "active_queue_size": 16384.0, "cl_loss": 2.5461, "doc_norm": 1.8606, "encoder_q-embeddings": 6371.0024, "encoder_q-layer.0": 4177.8892, "encoder_q-layer.1": 4689.5903, "encoder_q-layer.10": 3627.5591, "encoder_q-layer.11": 7704.8828, "encoder_q-layer.2": 5483.2129, "encoder_q-layer.3": 6341.5791, "encoder_q-layer.4": 7112.9287, "encoder_q-layer.5": 7951.2437, "encoder_q-layer.6": 7448.0547, "encoder_q-layer.7": 5734.46, "encoder_q-layer.8": 4682.564, "encoder_q-layer.9": 3566.21, "epoch": 0.97, "inbatch_neg_score": 1.5636, "inbatch_pos_score": 2.3359, "learning_rate": 5.000000000000001e-07, "loss": 2.5461, "norm_diff": 0.0611, "norm_loss": 0.0, "num_token_doc": 66.8658, "num_token_overlap": 17.8897, "num_token_query": 52.4349, "num_token_union": 73.7985, "num_word_context": 202.355, "num_word_doc": 49.8961, "num_word_query": 39.9844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8839.7827, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5645, "query_norm": 1.7994, "queue_k_norm": 1.8591, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4349, "sent_len_1": 66.8658, "sent_len_max_0": 128.0, "sent_len_max_1": 206.8787, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 99100 }, { "accuracy": 60.0098, "active_queue_size": 16384.0, "cl_loss": 2.5521, "doc_norm": 1.8603, "encoder_q-embeddings": 5778.7705, "encoder_q-layer.0": 4496.4561, "encoder_q-layer.1": 4816.332, "encoder_q-layer.10": 3758.0779, "encoder_q-layer.11": 7760.7632, "encoder_q-layer.2": 5681.873, "encoder_q-layer.3": 5699.9912, "encoder_q-layer.4": 6216.5449, "encoder_q-layer.5": 6310.4297, "encoder_q-layer.6": 7140.6646, "encoder_q-layer.7": 6535.2271, "encoder_q-layer.8": 5621.356, "encoder_q-layer.9": 3880.8357, "epoch": 0.97, "inbatch_neg_score": 1.5646, "inbatch_pos_score": 2.3223, "learning_rate": 4.444444444444445e-07, "loss": 2.5521, "norm_diff": 0.0651, "norm_loss": 0.0, "num_token_doc": 66.7377, "num_token_overlap": 17.853, "num_token_query": 52.3434, "num_token_union": 73.724, "num_word_context": 202.3541, "num_word_doc": 49.8629, "num_word_query": 39.9516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8615.1083, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5645, "query_norm": 1.7952, "queue_k_norm": 1.8589, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3434, "sent_len_1": 66.7377, "sent_len_max_0": 128.0, "sent_len_max_1": 206.5475, "stdk": 0.0497, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 99200 }, { "accuracy": 63.0371, "active_queue_size": 16384.0, "cl_loss": 2.5613, "doc_norm": 1.8558, "encoder_q-embeddings": 2524.4541, "encoder_q-layer.0": 1746.0321, "encoder_q-layer.1": 1884.3525, "encoder_q-layer.10": 3734.2834, "encoder_q-layer.11": 7528.4189, "encoder_q-layer.2": 2157.6777, "encoder_q-layer.3": 2331.7324, "encoder_q-layer.4": 2452.7288, "encoder_q-layer.5": 2568.4653, "encoder_q-layer.6": 2917.083, "encoder_q-layer.7": 3449.0959, "encoder_q-layer.8": 3696.8789, "encoder_q-layer.9": 3260.0906, "epoch": 0.97, "inbatch_neg_score": 1.5644, "inbatch_pos_score": 2.332, "learning_rate": 3.888888888888889e-07, "loss": 2.5613, "norm_diff": 0.0566, "norm_loss": 0.0, "num_token_doc": 66.8147, "num_token_overlap": 17.7995, "num_token_query": 52.3352, "num_token_union": 73.761, "num_word_context": 202.2594, "num_word_doc": 49.8428, "num_word_query": 39.914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5058.866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5645, "query_norm": 1.7992, "queue_k_norm": 1.8576, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3352, "sent_len_1": 66.8147, "sent_len_max_0": 128.0, "sent_len_max_1": 209.1337, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 99300 }, { "accuracy": 62.5488, "active_queue_size": 16384.0, "cl_loss": 2.5636, "doc_norm": 1.8594, "encoder_q-embeddings": 2329.3328, "encoder_q-layer.0": 1557.9626, "encoder_q-layer.1": 1687.666, "encoder_q-layer.10": 3489.8625, "encoder_q-layer.11": 7568.6167, "encoder_q-layer.2": 1983.5862, "encoder_q-layer.3": 2069.1953, "encoder_q-layer.4": 2278.5642, "encoder_q-layer.5": 2453.845, "encoder_q-layer.6": 2847.676, "encoder_q-layer.7": 3130.1404, "encoder_q-layer.8": 3637.0737, "encoder_q-layer.9": 3317.1628, "epoch": 0.97, "inbatch_neg_score": 1.5656, "inbatch_pos_score": 2.3262, "learning_rate": 3.3333333333333335e-07, "loss": 2.5636, "norm_diff": 0.063, "norm_loss": 0.0, "num_token_doc": 66.6877, "num_token_overlap": 17.794, "num_token_query": 52.3294, "num_token_union": 73.6833, "num_word_context": 202.5036, "num_word_doc": 49.7229, "num_word_query": 39.883, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4986.1082, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5654, "query_norm": 1.7964, "queue_k_norm": 1.8594, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3294, "sent_len_1": 66.6877, "sent_len_max_0": 128.0, "sent_len_max_1": 209.5675, "stdk": 0.0497, "stdq": 0.0465, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 99400 }, { "accuracy": 62.0605, "active_queue_size": 16384.0, "cl_loss": 2.5593, "doc_norm": 1.8602, "encoder_q-embeddings": 2454.0713, "encoder_q-layer.0": 1583.2598, "encoder_q-layer.1": 1784.994, "encoder_q-layer.10": 3437.3071, "encoder_q-layer.11": 7482.3369, "encoder_q-layer.2": 2054.6509, "encoder_q-layer.3": 2053.2983, "encoder_q-layer.4": 2240.2419, "encoder_q-layer.5": 2353.4668, "encoder_q-layer.6": 2661.6743, "encoder_q-layer.7": 3037.4727, "encoder_q-layer.8": 3563.771, "encoder_q-layer.9": 3167.6741, "epoch": 0.97, "inbatch_neg_score": 1.5641, "inbatch_pos_score": 2.3301, "learning_rate": 2.777777777777778e-07, "loss": 2.5593, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 66.7861, "num_token_overlap": 17.8569, "num_token_query": 52.4361, "num_token_union": 73.786, "num_word_context": 202.5251, "num_word_doc": 49.822, "num_word_query": 40.0063, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4824.6065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5645, "query_norm": 1.7996, "queue_k_norm": 1.8592, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4361, "sent_len_1": 66.7861, "sent_len_max_0": 128.0, "sent_len_max_1": 209.2388, "stdk": 0.0497, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 99500 }, { "accuracy": 62.207, "active_queue_size": 16384.0, "cl_loss": 2.551, "doc_norm": 1.8585, "encoder_q-embeddings": 2652.7527, "encoder_q-layer.0": 1727.0977, "encoder_q-layer.1": 1962.4696, "encoder_q-layer.10": 3546.4871, "encoder_q-layer.11": 7480.2485, "encoder_q-layer.2": 2259.8506, "encoder_q-layer.3": 2358.4849, "encoder_q-layer.4": 2495.8992, "encoder_q-layer.5": 2564.8325, "encoder_q-layer.6": 2898.0027, "encoder_q-layer.7": 3442.175, "encoder_q-layer.8": 3826.9133, "encoder_q-layer.9": 3493.2659, "epoch": 0.97, "inbatch_neg_score": 1.5656, "inbatch_pos_score": 2.3301, "learning_rate": 2.2222222222222224e-07, "loss": 2.551, "norm_diff": 0.0565, "norm_loss": 0.0, "num_token_doc": 66.7452, "num_token_overlap": 17.8193, "num_token_query": 52.3905, "num_token_union": 73.7267, "num_word_context": 202.4334, "num_word_doc": 49.8124, "num_word_query": 39.9611, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5138.7219, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5654, "query_norm": 1.8021, "queue_k_norm": 1.8601, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3905, "sent_len_1": 66.7452, "sent_len_max_0": 128.0, "sent_len_max_1": 207.855, "stdk": 0.0496, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 99600 }, { "accuracy": 62.4512, "active_queue_size": 16384.0, "cl_loss": 2.5414, "doc_norm": 1.862, "encoder_q-embeddings": 1631.6458, "encoder_q-layer.0": 1052.3467, "encoder_q-layer.1": 1212.7378, "encoder_q-layer.10": 1807.6614, "encoder_q-layer.11": 3909.9287, "encoder_q-layer.2": 1442.1097, "encoder_q-layer.3": 1617.8635, "encoder_q-layer.4": 1839.3182, "encoder_q-layer.5": 1993.7156, "encoder_q-layer.6": 2299.7139, "encoder_q-layer.7": 2118.1172, "encoder_q-layer.8": 2061.8267, "encoder_q-layer.9": 1808.1671, "epoch": 0.97, "inbatch_neg_score": 1.5662, "inbatch_pos_score": 2.3281, "learning_rate": 1.6666666666666668e-07, "loss": 2.5414, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 66.7687, "num_token_overlap": 17.8668, "num_token_query": 52.3553, "num_token_union": 73.6987, "num_word_context": 202.217, "num_word_doc": 49.807, "num_word_query": 39.9264, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2963.3924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5664, "query_norm": 1.8014, "queue_k_norm": 1.8602, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3553, "sent_len_1": 66.7687, "sent_len_max_0": 128.0, "sent_len_max_1": 208.38, "stdk": 0.0498, "stdq": 0.0468, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 99700 }, { "accuracy": 60.791, "active_queue_size": 16384.0, "cl_loss": 2.5608, "doc_norm": 1.8564, "encoder_q-embeddings": 1213.9801, "encoder_q-layer.0": 786.1255, "encoder_q-layer.1": 859.9163, "encoder_q-layer.10": 1932.7712, "encoder_q-layer.11": 4011.8093, "encoder_q-layer.2": 1008.1918, "encoder_q-layer.3": 1042.0793, "encoder_q-layer.4": 1123.687, "encoder_q-layer.5": 1159.8856, "encoder_q-layer.6": 1364.2487, "encoder_q-layer.7": 1531.7386, "encoder_q-layer.8": 1974.0603, "encoder_q-layer.9": 1823.1777, "epoch": 0.97, "inbatch_neg_score": 1.5661, "inbatch_pos_score": 2.3203, "learning_rate": 1.1111111111111112e-07, "loss": 2.5608, "norm_diff": 0.0573, "norm_loss": 0.0, "num_token_doc": 66.7115, "num_token_overlap": 17.7917, "num_token_query": 52.2258, "num_token_union": 73.6419, "num_word_context": 202.1092, "num_word_doc": 49.7683, "num_word_query": 39.8169, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2577.7335, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5664, "query_norm": 1.7991, "queue_k_norm": 1.8595, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2258, "sent_len_1": 66.7115, "sent_len_max_0": 128.0, "sent_len_max_1": 208.5538, "stdk": 0.0495, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 99800 }, { "accuracy": 60.498, "active_queue_size": 16384.0, "cl_loss": 2.5671, "doc_norm": 1.8597, "encoder_q-embeddings": 1214.704, "encoder_q-layer.0": 771.5571, "encoder_q-layer.1": 855.0276, "encoder_q-layer.10": 1720.9512, "encoder_q-layer.11": 3657.7446, "encoder_q-layer.2": 982.8806, "encoder_q-layer.3": 1044.5927, "encoder_q-layer.4": 1132.2941, "encoder_q-layer.5": 1171.0245, "encoder_q-layer.6": 1315.0724, "encoder_q-layer.7": 1493.7014, "encoder_q-layer.8": 1779.5968, "encoder_q-layer.9": 1686.0283, "epoch": 0.98, "inbatch_neg_score": 1.5637, "inbatch_pos_score": 2.3203, "learning_rate": 5.555555555555556e-08, "loss": 2.5671, "norm_diff": 0.0611, "norm_loss": 0.0, "num_token_doc": 66.8303, "num_token_overlap": 17.7998, "num_token_query": 52.2729, "num_token_union": 73.763, "num_word_context": 202.3521, "num_word_doc": 49.8635, "num_word_query": 39.8602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2434.2409, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5654, "query_norm": 1.7986, "queue_k_norm": 1.86, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2729, "sent_len_1": 66.8303, "sent_len_max_0": 128.0, "sent_len_max_1": 209.805, "stdk": 0.0497, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 99900 }, { "accuracy": 61.377, "active_queue_size": 16384.0, "cl_loss": 2.5542, "doc_norm": 1.8586, "encoder_q-embeddings": 1179.2413, "encoder_q-layer.0": 760.3253, "encoder_q-layer.1": 814.2128, "encoder_q-layer.10": 1722.4292, "encoder_q-layer.11": 3812.1431, "encoder_q-layer.2": 921.0991, "encoder_q-layer.3": 978.7668, "encoder_q-layer.4": 1085.7847, "encoder_q-layer.5": 1144.7561, "encoder_q-layer.6": 1335.6871, "encoder_q-layer.7": 1561.6517, "encoder_q-layer.8": 1770.4291, "encoder_q-layer.9": 1665.6146, "epoch": 0.98, "inbatch_neg_score": 1.5669, "inbatch_pos_score": 2.3184, "learning_rate": 0.0, "loss": 2.5542, "norm_diff": 0.0586, "norm_loss": 0.0, "num_token_doc": 66.6887, "num_token_overlap": 17.801, "num_token_query": 52.3084, "num_token_union": 73.6783, "num_word_context": 202.3725, "num_word_doc": 49.7716, "num_word_query": 39.8683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2437.8258, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5664, "query_norm": 1.8, "queue_k_norm": 1.859, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3084, "sent_len_1": 66.6887, "sent_len_max_0": 128.0, "sent_len_max_1": 209.3663, "stdk": 0.0496, "stdq": 0.0467, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 100000 }, { "dev_runtime": 26.1511, "dev_samples_per_second": 1.224, "dev_steps_per_second": 0.038, "epoch": 0.98, "step": 100000, "test_accuracy": 94.37255859375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3090860843658447, "test_doc_norm": 1.8337382078170776, "test_inbatch_neg_score": 1.868882417678833, "test_inbatch_pos_score": 2.894646406173706, "test_loss": 0.3090860843658447, "test_loss_align": 0.9604696035385132, "test_loss_unif": -0.6693239808082581, "test_loss_unif_q@queue": -0.6693239212036133, "test_norm_diff": 0.00494399294257164, "test_norm_loss": 0.0, "test_q@queue_neg_score": 1.5592586994171143, "test_query_norm": 1.8354812860488892, "test_queue_k_norm": 1.8589478731155396, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04398665204644203, "test_stdq": 0.043952979147434235, "test_stdqueue_k": 0.04966842383146286, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.1511, "dev_samples_per_second": 1.224, "dev_steps_per_second": 0.038, "epoch": 0.98, "eval_beir-arguana_ndcg@10": 0.38653, "eval_beir-arguana_recall@10": 0.66999, "eval_beir-arguana_recall@100": 0.94097, "eval_beir-arguana_recall@20": 0.7973, "eval_beir-avg_ndcg@10": 0.3876593333333333, "eval_beir-avg_recall@10": 0.46351183333333335, "eval_beir-avg_recall@100": 0.6419569166666668, "eval_beir-avg_recall@20": 0.5241325, "eval_beir-cqadupstack_ndcg@10": 0.2928333333333334, "eval_beir-cqadupstack_recall@10": 0.3925783333333334, "eval_beir-cqadupstack_recall@100": 0.6243891666666667, "eval_beir-cqadupstack_recall@20": 0.4612349999999999, "eval_beir-fiqa_ndcg@10": 0.26763, "eval_beir-fiqa_recall@10": 0.33485, "eval_beir-fiqa_recall@100": 0.6111, "eval_beir-fiqa_recall@20": 0.41649, "eval_beir-nfcorpus_ndcg@10": 0.29881, "eval_beir-nfcorpus_recall@10": 0.15184, "eval_beir-nfcorpus_recall@100": 0.28269, "eval_beir-nfcorpus_recall@20": 0.17956, "eval_beir-nq_ndcg@10": 0.29837, "eval_beir-nq_recall@10": 0.48047, "eval_beir-nq_recall@100": 0.81825, "eval_beir-nq_recall@20": 0.60291, "eval_beir-quora_ndcg@10": 0.78119, "eval_beir-quora_recall@10": 0.88917, "eval_beir-quora_recall@100": 0.97876, "eval_beir-quora_recall@20": 0.93078, "eval_beir-scidocs_ndcg@10": 0.16627, "eval_beir-scidocs_recall@10": 0.17298, "eval_beir-scidocs_recall@100": 0.38372, "eval_beir-scidocs_recall@20": 0.23343, "eval_beir-scifact_ndcg@10": 0.65554, "eval_beir-scifact_recall@10": 0.82744, "eval_beir-scifact_recall@100": 0.91489, "eval_beir-scifact_recall@20": 0.85478, "eval_beir-trec-covid_ndcg@10": 0.52738, "eval_beir-trec-covid_recall@10": 0.574, "eval_beir-trec-covid_recall@100": 0.4358, "eval_beir-trec-covid_recall@20": 0.552, "eval_beir-webis-touche2020_ndcg@10": 0.20204, "eval_beir-webis-touche2020_recall@10": 0.1418, "eval_beir-webis-touche2020_recall@100": 0.429, "eval_beir-webis-touche2020_recall@20": 0.21284, "eval_senteval-avg_sts": 0.7468018568294252, "eval_senteval-sickr_spearman": 0.7337127308506983, "eval_senteval-stsb_spearman": 0.759890982808152, "step": 100000, "test_accuracy": 94.37255859375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3090860843658447, "test_doc_norm": 1.8337382078170776, "test_inbatch_neg_score": 1.868882417678833, "test_inbatch_pos_score": 2.894646406173706, "test_loss": 0.3090860843658447, "test_loss_align": 0.9604696035385132, "test_loss_unif": -0.6693239808082581, "test_loss_unif_q@queue": -0.6693239212036133, "test_norm_diff": 0.00494399294257164, "test_norm_loss": 0.0, "test_q@queue_neg_score": 1.5592586994171143, "test_query_norm": 1.8354812860488892, "test_queue_k_norm": 1.8589478731155396, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04398665204644203, "test_stdq": 0.043952979147434235, "test_stdqueue_k": 0.04966842383146286, "test_stdqueue_q": 0.0 }, { "epoch": 0.98, "step": 100000, "total_flos": 0, "train_runtime": 106961.6151, "train_samples_per_second": 0.935 } ], "max_steps": 100000, "num_train_epochs": 1, "total_flos": 0, "trial_name": null, "trial_params": null }