{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9763241396143519, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy": 18.3594, "active_queue_size": 16384.0, "cl_loss": 182.938, "doc_norm": 8.4124, "encoder_q-embeddings": 24573.6152, "encoder_q-layer.0": 25979.3066, "encoder_q-layer.1": 23904.0684, "encoder_q-layer.10": 63381.2656, "encoder_q-layer.11": 42098.9453, "encoder_q-layer.2": 28790.5918, "encoder_q-layer.3": 30542.459, "encoder_q-layer.4": 34970.4297, "encoder_q-layer.5": 36208.0508, "encoder_q-layer.6": 45262.1641, "encoder_q-layer.7": 53899.0117, "encoder_q-layer.8": 65739.6094, "encoder_q-layer.9": 52087.6562, "epoch": 0.0, "inbatch_neg_score": 39.4635, "inbatch_pos_score": 47.6562, "learning_rate": 5.000000000000001e-07, "loss": 182.938, "norm_diff": 0.4329, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 57869.9702, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 39.5, "query_norm": 7.9795, "queue_k_norm": 8.4151, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7247, "sent_len_1": 66.8312, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0563, "stdk": 0.1803, "stdq": 0.1847, "stdqueue_k": 0.18, "stdqueue_q": 0.0, "step": 100 }, { "accuracy": 20.0195, "active_queue_size": 16384.0, "cl_loss": 118.672, "doc_norm": 8.3649, "encoder_q-embeddings": 8124.6973, "encoder_q-layer.0": 7364.1929, "encoder_q-layer.1": 8810.082, "encoder_q-layer.10": 18876.7578, "encoder_q-layer.11": 20576.9004, "encoder_q-layer.2": 10200.043, "encoder_q-layer.3": 10309.6553, "encoder_q-layer.4": 11181.8525, "encoder_q-layer.5": 11458.5527, "encoder_q-layer.6": 13057.4219, "encoder_q-layer.7": 14262.1475, "encoder_q-layer.8": 17808.6641, "encoder_q-layer.9": 13811.7383, "epoch": 0.0, "inbatch_neg_score": 36.7339, "inbatch_pos_score": 41.9688, "learning_rate": 1.0000000000000002e-06, "loss": 118.672, "norm_diff": 1.0669, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18601.2929, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 36.75, "query_norm": 7.2979, "queue_k_norm": 8.3537, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6174, "sent_len_1": 66.6111, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5675, "stdk": 0.1791, "stdq": 0.14, "stdqueue_k": 0.1784, "stdqueue_q": 0.0, "step": 200 }, { "accuracy": 18.8477, "active_queue_size": 16384.0, "cl_loss": 71.0841, "doc_norm": 8.2193, "encoder_q-embeddings": 4509.9502, "encoder_q-layer.0": 4637.7891, "encoder_q-layer.1": 5886.1167, "encoder_q-layer.10": 9992.3057, "encoder_q-layer.11": 16425.5332, "encoder_q-layer.2": 6114.2339, "encoder_q-layer.3": 5360.708, "encoder_q-layer.4": 5191.3135, "encoder_q-layer.5": 5212.5435, "encoder_q-layer.6": 6334.7324, "encoder_q-layer.7": 6803.6929, "encoder_q-layer.8": 7595.5562, "encoder_q-layer.9": 6464.8267, "epoch": 0.0, "inbatch_neg_score": 34.9121, "inbatch_pos_score": 38.3438, "learning_rate": 1.5e-06, "loss": 71.0841, "norm_diff": 1.218, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11580.2068, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 34.9375, "query_norm": 7.0012, "queue_k_norm": 8.2249, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4995, "sent_len_1": 66.989, "sent_max_len_0": 127.9925, "sent_max_len_1": 189.6662, "stdk": 0.1734, "stdq": 0.1135, "stdqueue_k": 0.1732, "stdqueue_q": 0.0, "step": 300 }, { "accuracy": 21.2891, "active_queue_size": 16384.0, "cl_loss": 49.8672, "doc_norm": 8.0505, "encoder_q-embeddings": 2818.0444, "encoder_q-layer.0": 2357.908, "encoder_q-layer.1": 2530.3979, "encoder_q-layer.10": 7126.5664, "encoder_q-layer.11": 12531.1064, "encoder_q-layer.2": 2825.7939, "encoder_q-layer.3": 3021.2476, "encoder_q-layer.4": 3384.1167, "encoder_q-layer.5": 3384.2224, "encoder_q-layer.6": 3876.3809, "encoder_q-layer.7": 4144.3066, "encoder_q-layer.8": 5015.0059, "encoder_q-layer.9": 4327.5298, "epoch": 0.0, "inbatch_neg_score": 32.5944, "inbatch_pos_score": 35.0625, "learning_rate": 2.0000000000000003e-06, "loss": 49.8672, "norm_diff": 1.1175, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8217.1219, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 32.5312, "query_norm": 6.933, "queue_k_norm": 8.0816, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7229, "sent_len_1": 66.8325, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7887, "stdk": 0.1669, "stdq": 0.1023, "stdqueue_k": 0.1686, "stdqueue_q": 0.0, "step": 400 }, { "accuracy": 20.1172, "active_queue_size": 16384.0, "cl_loss": 37.0515, "doc_norm": 7.9026, "encoder_q-embeddings": 2959.4326, "encoder_q-layer.0": 2726.6792, "encoder_q-layer.1": 3160.0867, "encoder_q-layer.10": 6881.7163, "encoder_q-layer.11": 9943.9883, "encoder_q-layer.2": 3478.1113, "encoder_q-layer.3": 3628.8708, "encoder_q-layer.4": 4031.145, "encoder_q-layer.5": 3653.0347, "encoder_q-layer.6": 4150.7798, "encoder_q-layer.7": 4035.1287, "encoder_q-layer.8": 4639.9404, "encoder_q-layer.9": 3857.6885, "epoch": 0.0, "inbatch_neg_score": 29.8234, "inbatch_pos_score": 31.8125, "learning_rate": 2.5e-06, "loss": 37.0515, "norm_diff": 0.9892, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7094.4748, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 29.7969, "query_norm": 6.9135, "queue_k_norm": 7.9178, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.2677, "sent_len_1": 66.6534, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.455, "stdk": 0.1626, "stdq": 0.0971, "stdqueue_k": 0.1633, "stdqueue_q": 0.0, "step": 500 }, { "accuracy": 20.9961, "active_queue_size": 16384.0, "cl_loss": 29.7595, "doc_norm": 7.7204, "encoder_q-embeddings": 3280.1541, "encoder_q-layer.0": 2687.1641, "encoder_q-layer.1": 2932.9026, "encoder_q-layer.10": 5435.9941, "encoder_q-layer.11": 8595.9951, "encoder_q-layer.2": 3128.7063, "encoder_q-layer.3": 3220.3384, "encoder_q-layer.4": 3384.9558, "encoder_q-layer.5": 3556.8882, "encoder_q-layer.6": 3815.9468, "encoder_q-layer.7": 3679.969, "encoder_q-layer.8": 3934.1973, "encoder_q-layer.9": 3275.6052, "epoch": 0.01, "inbatch_neg_score": 26.6289, "inbatch_pos_score": 28.3125, "learning_rate": 3e-06, "loss": 29.7595, "norm_diff": 1.1711, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6165.9291, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 26.6094, "query_norm": 6.5493, "queue_k_norm": 7.738, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.768, "sent_len_1": 66.8635, "sent_max_len_0": 128.0, "sent_max_len_1": 189.065, "stdk": 0.1575, "stdq": 0.0912, "stdqueue_k": 0.1572, "stdqueue_q": 0.0, "step": 600 }, { "accuracy": 22.7539, "active_queue_size": 16384.0, "cl_loss": 25.7, "doc_norm": 7.5623, "encoder_q-embeddings": 3943.7354, "encoder_q-layer.0": 3415.8108, "encoder_q-layer.1": 4084.3179, "encoder_q-layer.10": 5068.0566, "encoder_q-layer.11": 7527.9912, "encoder_q-layer.2": 4722.4941, "encoder_q-layer.3": 5070.3105, "encoder_q-layer.4": 5317.1025, "encoder_q-layer.5": 5142.1079, "encoder_q-layer.6": 4789.2197, "encoder_q-layer.7": 4430.3447, "encoder_q-layer.8": 4244.854, "encoder_q-layer.9": 2835.8984, "epoch": 0.01, "inbatch_neg_score": 23.449, "inbatch_pos_score": 24.875, "learning_rate": 3.5000000000000004e-06, "loss": 25.7, "norm_diff": 1.575, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6924.8815, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 23.4062, "query_norm": 5.9872, "queue_k_norm": 7.566, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7825, "sent_len_1": 66.8611, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9313, "stdk": 0.1515, "stdq": 0.0863, "stdqueue_k": 0.1519, "stdqueue_q": 0.0, "step": 700 }, { "accuracy": 22.4609, "active_queue_size": 16384.0, "cl_loss": 22.1833, "doc_norm": 7.3779, "encoder_q-embeddings": 3932.7754, "encoder_q-layer.0": 3207.5852, "encoder_q-layer.1": 4245.9473, "encoder_q-layer.10": 6121.6309, "encoder_q-layer.11": 9102.4863, "encoder_q-layer.2": 5293.0181, "encoder_q-layer.3": 5312.0068, "encoder_q-layer.4": 5810.0464, "encoder_q-layer.5": 5936.6729, "encoder_q-layer.6": 5564.4839, "encoder_q-layer.7": 4700.2944, "encoder_q-layer.8": 4696.3457, "encoder_q-layer.9": 2962.698, "epoch": 0.01, "inbatch_neg_score": 19.8464, "inbatch_pos_score": 21.0781, "learning_rate": 4.000000000000001e-06, "loss": 22.1833, "norm_diff": 2.0147, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7667.7987, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 19.8438, "query_norm": 5.3633, "queue_k_norm": 7.3952, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5224, "sent_len_1": 67.0432, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.3963, "stdk": 0.1457, "stdq": 0.0811, "stdqueue_k": 0.1459, "stdqueue_q": 0.0, "step": 800 }, { "accuracy": 22.8516, "active_queue_size": 16384.0, "cl_loss": 19.0382, "doc_norm": 7.2539, "encoder_q-embeddings": 6947.4736, "encoder_q-layer.0": 6123.3633, "encoder_q-layer.1": 6928.6445, "encoder_q-layer.10": 5241.9316, "encoder_q-layer.11": 8382.4932, "encoder_q-layer.2": 8080.8618, "encoder_q-layer.3": 8234.8203, "encoder_q-layer.4": 8453.1094, "encoder_q-layer.5": 8824.3213, "encoder_q-layer.6": 8119.9629, "encoder_q-layer.7": 6333.5737, "encoder_q-layer.8": 5604.8052, "encoder_q-layer.9": 2680.6394, "epoch": 0.01, "inbatch_neg_score": 14.7706, "inbatch_pos_score": 15.8203, "learning_rate": 4.5e-06, "loss": 19.0382, "norm_diff": 2.7909, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10459.0161, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 14.7578, "query_norm": 4.463, "queue_k_norm": 7.2443, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6512, "sent_len_1": 66.6026, "sent_max_len_0": 128.0, "sent_max_len_1": 186.6238, "stdk": 0.1418, "stdq": 0.0769, "stdqueue_k": 0.141, "stdqueue_q": 0.0, "step": 900 }, { "accuracy": 21.875, "active_queue_size": 16384.0, "cl_loss": 16.4345, "doc_norm": 7.089, "encoder_q-embeddings": 7619.0303, "encoder_q-layer.0": 6738.4121, "encoder_q-layer.1": 8096.0415, "encoder_q-layer.10": 4738.9473, "encoder_q-layer.11": 7764.9653, "encoder_q-layer.2": 9982.8008, "encoder_q-layer.3": 10481.375, "encoder_q-layer.4": 11939.3438, "encoder_q-layer.5": 13348.2646, "encoder_q-layer.6": 12799.6816, "encoder_q-layer.7": 9509.3418, "encoder_q-layer.8": 7635.9224, "encoder_q-layer.9": 2623.3301, "epoch": 0.01, "inbatch_neg_score": 10.8282, "inbatch_pos_score": 11.6562, "learning_rate": 5e-06, "loss": 16.4345, "norm_diff": 3.4643, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13399.1686, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 10.8125, "query_norm": 3.6247, "queue_k_norm": 7.0966, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6195, "sent_len_1": 66.7363, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4075, "stdk": 0.1348, "stdq": 0.0728, "stdqueue_k": 0.1355, "stdqueue_q": 0.0, "step": 1000 }, { "accuracy": 24.0234, "active_queue_size": 16384.0, "cl_loss": 14.7846, "doc_norm": 6.9627, "encoder_q-embeddings": 6318.0488, "encoder_q-layer.0": 5651.48, "encoder_q-layer.1": 6329.0352, "encoder_q-layer.10": 4820.7935, "encoder_q-layer.11": 7119.9531, "encoder_q-layer.2": 7527.811, "encoder_q-layer.3": 8097.2354, "encoder_q-layer.4": 8161.707, "encoder_q-layer.5": 8106.6514, "encoder_q-layer.6": 7817.6475, "encoder_q-layer.7": 7057.2227, "encoder_q-layer.8": 4766.6802, "encoder_q-layer.9": 1956.3457, "epoch": 0.01, "inbatch_neg_score": 8.1871, "inbatch_pos_score": 8.9375, "learning_rate": 5.500000000000001e-06, "loss": 14.7846, "norm_diff": 3.9116, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9792.2311, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 8.1953, "query_norm": 3.0511, "queue_k_norm": 6.9554, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5559, "sent_len_1": 66.8963, "sent_max_len_0": 127.995, "sent_max_len_1": 190.3925, "stdk": 0.1314, "stdq": 0.0686, "stdqueue_k": 0.1299, "stdqueue_q": 0.0, "step": 1100 }, { "accuracy": 26.4648, "active_queue_size": 16384.0, "cl_loss": 13.4219, "doc_norm": 6.8135, "encoder_q-embeddings": 6299.6631, "encoder_q-layer.0": 5145.9111, "encoder_q-layer.1": 5786.6631, "encoder_q-layer.10": 3710.6848, "encoder_q-layer.11": 6292.9424, "encoder_q-layer.2": 6485.5459, "encoder_q-layer.3": 7358.1211, "encoder_q-layer.4": 7954.8608, "encoder_q-layer.5": 8130.5972, "encoder_q-layer.6": 6556.9971, "encoder_q-layer.7": 4475.3809, "encoder_q-layer.8": 3077.2095, "encoder_q-layer.9": 1588.5592, "epoch": 0.01, "inbatch_neg_score": 4.229, "inbatch_pos_score": 4.9219, "learning_rate": 6e-06, "loss": 13.4219, "norm_diff": 4.3726, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8626.9828, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 4.2109, "query_norm": 2.4408, "queue_k_norm": 6.8313, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6132, "sent_len_1": 66.9068, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.7738, "stdk": 0.1231, "stdq": 0.062, "stdqueue_k": 0.1252, "stdqueue_q": 0.0, "step": 1200 }, { "accuracy": 25.0977, "active_queue_size": 16384.0, "cl_loss": 12.4149, "doc_norm": 6.7111, "encoder_q-embeddings": 6609.5674, "encoder_q-layer.0": 6072.5083, "encoder_q-layer.1": 6319.5332, "encoder_q-layer.10": 3281.6792, "encoder_q-layer.11": 5998.2388, "encoder_q-layer.2": 7492.6392, "encoder_q-layer.3": 7802.3125, "encoder_q-layer.4": 8949.1689, "encoder_q-layer.5": 9964.2793, "encoder_q-layer.6": 9427.125, "encoder_q-layer.7": 7105.6929, "encoder_q-layer.8": 6311.1265, "encoder_q-layer.9": 1690.4689, "epoch": 0.01, "inbatch_neg_score": 3.4533, "inbatch_pos_score": 4.0703, "learning_rate": 6.5000000000000004e-06, "loss": 12.4149, "norm_diff": 4.4456, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10301.2419, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 3.4453, "query_norm": 2.2656, "queue_k_norm": 6.7173, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3076, "sent_len_1": 66.7254, "sent_max_len_0": 127.9825, "sent_max_len_1": 189.8913, "stdk": 0.1196, "stdq": 0.0627, "stdqueue_k": 0.1204, "stdqueue_q": 0.0, "step": 1300 }, { "accuracy": 26.1719, "active_queue_size": 16384.0, "cl_loss": 11.5766, "doc_norm": 6.6125, "encoder_q-embeddings": 11791.7861, "encoder_q-layer.0": 9980.1143, "encoder_q-layer.1": 11396.7568, "encoder_q-layer.10": 4365.3472, "encoder_q-layer.11": 7692.8281, "encoder_q-layer.2": 11665.9561, "encoder_q-layer.3": 11913.6807, "encoder_q-layer.4": 12926.8535, "encoder_q-layer.5": 13567.5225, "encoder_q-layer.6": 10605.9424, "encoder_q-layer.7": 6602.4512, "encoder_q-layer.8": 5472.0371, "encoder_q-layer.9": 2170.6125, "epoch": 0.01, "inbatch_neg_score": 2.5477, "inbatch_pos_score": 3.1133, "learning_rate": 7.000000000000001e-06, "loss": 11.5766, "norm_diff": 4.4816, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14670.8265, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.5469, "query_norm": 2.1309, "queue_k_norm": 6.6103, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5567, "sent_len_1": 66.7983, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8887, "stdk": 0.1154, "stdq": 0.0609, "stdqueue_k": 0.1154, "stdqueue_q": 0.0, "step": 1400 }, { "accuracy": 26.9531, "active_queue_size": 16384.0, "cl_loss": 11.2713, "doc_norm": 6.5089, "encoder_q-embeddings": 4914.479, "encoder_q-layer.0": 4555.0654, "encoder_q-layer.1": 4610.147, "encoder_q-layer.10": 2696.6257, "encoder_q-layer.11": 5117.0581, "encoder_q-layer.2": 4936.5923, "encoder_q-layer.3": 4668.2554, "encoder_q-layer.4": 4728.1572, "encoder_q-layer.5": 4059.2068, "encoder_q-layer.6": 2995.6899, "encoder_q-layer.7": 2784.249, "encoder_q-layer.8": 2698.4382, "encoder_q-layer.9": 1421.9454, "epoch": 0.01, "inbatch_neg_score": 2.8626, "inbatch_pos_score": 3.4434, "learning_rate": 7.5e-06, "loss": 11.2713, "norm_diff": 4.4544, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5987.8671, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.8555, "query_norm": 2.0545, "queue_k_norm": 6.5093, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5238, "sent_len_1": 66.7101, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1912, "stdk": 0.1105, "stdq": 0.057, "stdqueue_k": 0.1111, "stdqueue_q": 0.0, "step": 1500 }, { "accuracy": 22.8516, "active_queue_size": 16384.0, "cl_loss": 11.0411, "doc_norm": 6.4145, "encoder_q-embeddings": 9431.2773, "encoder_q-layer.0": 7905.2036, "encoder_q-layer.1": 9269.7607, "encoder_q-layer.10": 3731.3535, "encoder_q-layer.11": 6197.8486, "encoder_q-layer.2": 10978.333, "encoder_q-layer.3": 10553.7188, "encoder_q-layer.4": 11554.6416, "encoder_q-layer.5": 11925.1348, "encoder_q-layer.6": 10693.4688, "encoder_q-layer.7": 8280.0352, "encoder_q-layer.8": 8004.8926, "encoder_q-layer.9": 2311.2715, "epoch": 0.02, "inbatch_neg_score": 2.1624, "inbatch_pos_score": 2.6699, "learning_rate": 8.000000000000001e-06, "loss": 11.0411, "norm_diff": 4.2767, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13066.0754, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.1523, "query_norm": 2.1379, "queue_k_norm": 6.4108, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6315, "sent_len_1": 67.0566, "sent_max_len_0": 128.0, "sent_max_len_1": 192.5325, "stdk": 0.1063, "stdq": 0.0619, "stdqueue_k": 0.1068, "stdqueue_q": 0.0, "step": 1600 }, { "accuracy": 24.9023, "active_queue_size": 16384.0, "cl_loss": 10.8426, "doc_norm": 6.3121, "encoder_q-embeddings": 8259.4004, "encoder_q-layer.0": 7353.3257, "encoder_q-layer.1": 7669.6562, "encoder_q-layer.10": 5799.4238, "encoder_q-layer.11": 8546.9512, "encoder_q-layer.2": 8720.8164, "encoder_q-layer.3": 8573.1357, "encoder_q-layer.4": 7836.1401, "encoder_q-layer.5": 7362.0508, "encoder_q-layer.6": 6196.8828, "encoder_q-layer.7": 5094.0322, "encoder_q-layer.8": 4345.6445, "encoder_q-layer.9": 2847.728, "epoch": 0.02, "inbatch_neg_score": 1.9436, "inbatch_pos_score": 2.4863, "learning_rate": 8.500000000000002e-06, "loss": 10.8426, "norm_diff": 4.205, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10313.6582, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.9375, "query_norm": 2.1071, "queue_k_norm": 6.3137, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6247, "sent_len_1": 66.8253, "sent_max_len_0": 128.0, "sent_max_len_1": 190.95, "stdk": 0.102, "stdq": 0.0612, "stdqueue_k": 0.1026, "stdqueue_q": 0.0, "step": 1700 }, { "accuracy": 26.2695, "active_queue_size": 16384.0, "cl_loss": 10.4975, "doc_norm": 6.2095, "encoder_q-embeddings": 5736.251, "encoder_q-layer.0": 5065.4678, "encoder_q-layer.1": 6030.4531, "encoder_q-layer.10": 3266.9131, "encoder_q-layer.11": 6169.8105, "encoder_q-layer.2": 6676.6494, "encoder_q-layer.3": 7000.5029, "encoder_q-layer.4": 7925.5723, "encoder_q-layer.5": 6610.9932, "encoder_q-layer.6": 5688.811, "encoder_q-layer.7": 5401.0938, "encoder_q-layer.8": 5072.7349, "encoder_q-layer.9": 2241.385, "epoch": 0.02, "inbatch_neg_score": 1.455, "inbatch_pos_score": 1.9795, "learning_rate": 9e-06, "loss": 10.4975, "norm_diff": 4.0826, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8518.0423, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.458, "query_norm": 2.1269, "queue_k_norm": 6.207, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4698, "sent_len_1": 66.5723, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5513, "stdk": 0.099, "stdq": 0.0619, "stdqueue_k": 0.0983, "stdqueue_q": 0.0, "step": 1800 }, { "accuracy": 23.3398, "active_queue_size": 16384.0, "cl_loss": 10.6016, "doc_norm": 6.0785, "encoder_q-embeddings": 11756.3037, "encoder_q-layer.0": 11483.4072, "encoder_q-layer.1": 13135.2324, "encoder_q-layer.10": 2718.928, "encoder_q-layer.11": 4702.5957, "encoder_q-layer.2": 12315.0586, "encoder_q-layer.3": 11215.5596, "encoder_q-layer.4": 10584.1104, "encoder_q-layer.5": 8692.7383, "encoder_q-layer.6": 6410.4287, "encoder_q-layer.7": 4555.1704, "encoder_q-layer.8": 3171.28, "encoder_q-layer.9": 1880.4562, "epoch": 0.02, "inbatch_neg_score": 1.5248, "inbatch_pos_score": 2.0312, "learning_rate": 9.5e-06, "loss": 10.6016, "norm_diff": 3.8732, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13465.1184, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5322, "query_norm": 2.2052, "queue_k_norm": 6.0935, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5243, "sent_len_1": 66.5426, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1738, "stdk": 0.0949, "stdq": 0.0642, "stdqueue_k": 0.0947, "stdqueue_q": 0.0, "step": 1900 }, { "accuracy": 24.4141, "active_queue_size": 16384.0, "cl_loss": 10.4007, "doc_norm": 5.9431, "encoder_q-embeddings": 4785.2983, "encoder_q-layer.0": 4100.334, "encoder_q-layer.1": 4657.2256, "encoder_q-layer.10": 2522.053, "encoder_q-layer.11": 4985.8887, "encoder_q-layer.2": 5300.0918, "encoder_q-layer.3": 5569.3584, "encoder_q-layer.4": 5619.064, "encoder_q-layer.5": 5157.7344, "encoder_q-layer.6": 4113.7783, "encoder_q-layer.7": 3431.3809, "encoder_q-layer.8": 3367.5449, "encoder_q-layer.9": 2040.9747, "epoch": 0.02, "inbatch_neg_score": 2.6561, "inbatch_pos_score": 3.1719, "learning_rate": 1e-05, "loss": 10.4007, "norm_diff": 3.6479, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6494.9427, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.6445, "query_norm": 2.2952, "queue_k_norm": 5.9571, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7189, "sent_len_1": 66.7818, "sent_max_len_0": 127.9988, "sent_max_len_1": 186.4212, "stdk": 0.0901, "stdq": 0.0647, "stdqueue_k": 0.091, "stdqueue_q": 0.0, "step": 2000 }, { "accuracy": 25.4883, "active_queue_size": 16384.0, "cl_loss": 10.2248, "doc_norm": 5.7798, "encoder_q-embeddings": 7409.9883, "encoder_q-layer.0": 5479.6025, "encoder_q-layer.1": 6072.6709, "encoder_q-layer.10": 5521.2754, "encoder_q-layer.11": 9293.6895, "encoder_q-layer.2": 7189.4053, "encoder_q-layer.3": 8025.9937, "encoder_q-layer.4": 8796.0918, "encoder_q-layer.5": 8735.1475, "encoder_q-layer.6": 8122.2749, "encoder_q-layer.7": 8442.0254, "encoder_q-layer.8": 7748.9937, "encoder_q-layer.9": 3815.5149, "epoch": 0.02, "inbatch_neg_score": 0.9947, "inbatch_pos_score": 1.5029, "learning_rate": 1.05e-05, "loss": 10.2248, "norm_diff": 3.5965, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10687.2111, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.9961, "query_norm": 2.1833, "queue_k_norm": 5.8061, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6593, "sent_len_1": 66.6894, "sent_max_len_0": 127.9887, "sent_max_len_1": 188.1612, "stdk": 0.0879, "stdq": 0.0634, "stdqueue_k": 0.0877, "stdqueue_q": 0.0, "step": 2100 }, { "accuracy": 25.0, "active_queue_size": 16384.0, "cl_loss": 9.6439, "doc_norm": 5.6167, "encoder_q-embeddings": 5238.3901, "encoder_q-layer.0": 4453.1357, "encoder_q-layer.1": 4957.6401, "encoder_q-layer.10": 12873.9014, "encoder_q-layer.11": 13736.6797, "encoder_q-layer.2": 5681.4219, "encoder_q-layer.3": 5967.0605, "encoder_q-layer.4": 6070.4258, "encoder_q-layer.5": 6173.4648, "encoder_q-layer.6": 7132.1587, "encoder_q-layer.7": 7605.897, "encoder_q-layer.8": 9153.2568, "encoder_q-layer.9": 8982.8779, "epoch": 0.02, "inbatch_neg_score": 1.4164, "inbatch_pos_score": 1.8975, "learning_rate": 1.1000000000000001e-05, "loss": 9.6439, "norm_diff": 3.3646, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10640.7682, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4082, "query_norm": 2.2521, "queue_k_norm": 5.6269, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6609, "sent_len_1": 66.744, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6813, "stdk": 0.0843, "stdq": 0.0674, "stdqueue_k": 0.0847, "stdqueue_q": 0.0, "step": 2200 }, { "accuracy": 23.6328, "active_queue_size": 16384.0, "cl_loss": 9.0949, "doc_norm": 5.4125, "encoder_q-embeddings": 12018.4062, "encoder_q-layer.0": 11758.4199, "encoder_q-layer.1": 11499.2363, "encoder_q-layer.10": 33755.3672, "encoder_q-layer.11": 33176.8594, "encoder_q-layer.2": 11447.4951, "encoder_q-layer.3": 13340.6885, "encoder_q-layer.4": 14733.6123, "encoder_q-layer.5": 17288.8652, "encoder_q-layer.6": 18120.5156, "encoder_q-layer.7": 21387.7305, "encoder_q-layer.8": 22265.6582, "encoder_q-layer.9": 22102.8477, "epoch": 0.02, "inbatch_neg_score": 0.9329, "inbatch_pos_score": 1.4365, "learning_rate": 1.1500000000000002e-05, "loss": 9.0949, "norm_diff": 3.209, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26755.1923, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.9233, "query_norm": 2.2035, "queue_k_norm": 5.4274, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5426, "sent_len_1": 66.6831, "sent_max_len_0": 127.9912, "sent_max_len_1": 188.76, "stdk": 0.0808, "stdq": 0.0676, "stdqueue_k": 0.0814, "stdqueue_q": 0.0, "step": 2300 }, { "accuracy": 27.2461, "active_queue_size": 16384.0, "cl_loss": 8.8267, "doc_norm": 5.2087, "encoder_q-embeddings": 3931.8386, "encoder_q-layer.0": 3130.0547, "encoder_q-layer.1": 3597.344, "encoder_q-layer.10": 13166.7939, "encoder_q-layer.11": 13303.8184, "encoder_q-layer.2": 4091.6462, "encoder_q-layer.3": 4680.4585, "encoder_q-layer.4": 6166.6309, "encoder_q-layer.5": 7830.2661, "encoder_q-layer.6": 9932.8662, "encoder_q-layer.7": 11423.4365, "encoder_q-layer.8": 12727.4609, "encoder_q-layer.9": 10952.8633, "epoch": 0.02, "inbatch_neg_score": 1.1719, "inbatch_pos_score": 1.6699, "learning_rate": 1.2e-05, "loss": 8.8267, "norm_diff": 3.0509, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11738.9621, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1631, "query_norm": 2.1578, "queue_k_norm": 5.2092, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6191, "sent_len_1": 66.7998, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.5012, "stdk": 0.0785, "stdq": 0.0629, "stdqueue_k": 0.079, "stdqueue_q": 0.0, "step": 2400 }, { "accuracy": 29.3945, "active_queue_size": 16384.0, "cl_loss": 8.5251, "doc_norm": 4.9667, "encoder_q-embeddings": 4294.2393, "encoder_q-layer.0": 3456.7397, "encoder_q-layer.1": 3544.0701, "encoder_q-layer.10": 12882.1621, "encoder_q-layer.11": 12337.6055, "encoder_q-layer.2": 3681.4619, "encoder_q-layer.3": 3620.8809, "encoder_q-layer.4": 3615.7639, "encoder_q-layer.5": 3726.7776, "encoder_q-layer.6": 4142.1587, "encoder_q-layer.7": 4860.1177, "encoder_q-layer.8": 6150.4707, "encoder_q-layer.9": 7923.6528, "epoch": 0.02, "inbatch_neg_score": 0.7755, "inbatch_pos_score": 1.2734, "learning_rate": 1.25e-05, "loss": 8.5251, "norm_diff": 2.8351, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8515.0725, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7749, "query_norm": 2.1317, "queue_k_norm": 4.9719, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5861, "sent_len_1": 66.7764, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.29, "stdk": 0.0765, "stdq": 0.0646, "stdqueue_k": 0.0765, "stdqueue_q": 0.0, "step": 2500 }, { "accuracy": 27.9297, "active_queue_size": 16384.0, "cl_loss": 8.192, "doc_norm": 4.737, "encoder_q-embeddings": 4381.2651, "encoder_q-layer.0": 3952.8071, "encoder_q-layer.1": 5103.6626, "encoder_q-layer.10": 7125.4375, "encoder_q-layer.11": 7335.2188, "encoder_q-layer.2": 5852.2427, "encoder_q-layer.3": 5364.5356, "encoder_q-layer.4": 5635.8564, "encoder_q-layer.5": 6243.3618, "encoder_q-layer.6": 7905.6343, "encoder_q-layer.7": 9408.1777, "encoder_q-layer.8": 10660.4287, "encoder_q-layer.9": 8611.333, "epoch": 0.03, "inbatch_neg_score": 1.1226, "inbatch_pos_score": 1.6094, "learning_rate": 1.3000000000000001e-05, "loss": 8.192, "norm_diff": 2.5218, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9653.0325, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1113, "query_norm": 2.2151, "queue_k_norm": 4.7431, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6749, "sent_len_1": 66.9092, "sent_max_len_0": 128.0, "sent_max_len_1": 191.11, "stdk": 0.074, "stdq": 0.064, "stdqueue_k": 0.0742, "stdqueue_q": 0.0, "step": 2600 }, { "accuracy": 29.0039, "active_queue_size": 16384.0, "cl_loss": 7.8426, "doc_norm": 4.4925, "encoder_q-embeddings": 3499.7139, "encoder_q-layer.0": 2895.0242, "encoder_q-layer.1": 3221.282, "encoder_q-layer.10": 7652.1172, "encoder_q-layer.11": 7557.9946, "encoder_q-layer.2": 3356.1514, "encoder_q-layer.3": 3427.8298, "encoder_q-layer.4": 3889.1887, "encoder_q-layer.5": 4322.5283, "encoder_q-layer.6": 5815.5269, "encoder_q-layer.7": 7698.751, "encoder_q-layer.8": 9354.168, "encoder_q-layer.9": 8454.4297, "epoch": 0.03, "inbatch_neg_score": 0.5388, "inbatch_pos_score": 1.0195, "learning_rate": 1.3500000000000001e-05, "loss": 7.8426, "norm_diff": 2.4338, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8007.7735, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5308, "query_norm": 2.0587, "queue_k_norm": 4.5104, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4436, "sent_len_1": 66.7317, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.0762, "stdk": 0.0717, "stdq": 0.0626, "stdqueue_k": 0.072, "stdqueue_q": 0.0, "step": 2700 }, { "accuracy": 30.6641, "active_queue_size": 16384.0, "cl_loss": 7.5497, "doc_norm": 4.276, "encoder_q-embeddings": 3168.9067, "encoder_q-layer.0": 2613.2512, "encoder_q-layer.1": 2913.5955, "encoder_q-layer.10": 24741.9688, "encoder_q-layer.11": 20705.7637, "encoder_q-layer.2": 3470.562, "encoder_q-layer.3": 4091.8005, "encoder_q-layer.4": 5565.9912, "encoder_q-layer.5": 7133.2646, "encoder_q-layer.6": 10424.2646, "encoder_q-layer.7": 13655.9287, "encoder_q-layer.8": 16491.5859, "encoder_q-layer.9": 19380.1816, "epoch": 0.03, "inbatch_neg_score": 0.6724, "inbatch_pos_score": 1.1631, "learning_rate": 1.4000000000000001e-05, "loss": 7.5497, "norm_diff": 2.2667, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16288.8393, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.667, "query_norm": 2.0093, "queue_k_norm": 4.2859, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6645, "sent_len_1": 66.6771, "sent_max_len_0": 127.995, "sent_max_len_1": 188.9725, "stdk": 0.0696, "stdq": 0.0607, "stdqueue_k": 0.0701, "stdqueue_q": 0.0, "step": 2800 }, { "accuracy": 30.2734, "active_queue_size": 16384.0, "cl_loss": 7.3433, "doc_norm": 4.0706, "encoder_q-embeddings": 2884.4387, "encoder_q-layer.0": 2572.9062, "encoder_q-layer.1": 2512.3818, "encoder_q-layer.10": 3601.2151, "encoder_q-layer.11": 4952.9468, "encoder_q-layer.2": 2560.2542, "encoder_q-layer.3": 2572.9482, "encoder_q-layer.4": 2431.8191, "encoder_q-layer.5": 2217.1436, "encoder_q-layer.6": 2326.8765, "encoder_q-layer.7": 2500.4307, "encoder_q-layer.8": 2651.2012, "encoder_q-layer.9": 2664.9097, "epoch": 0.03, "inbatch_neg_score": 0.5614, "inbatch_pos_score": 1.0684, "learning_rate": 1.45e-05, "loss": 7.3433, "norm_diff": 2.0818, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4078.4845, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5552, "query_norm": 1.9888, "queue_k_norm": 4.085, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6121, "sent_len_1": 66.8515, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0538, "stdk": 0.0681, "stdq": 0.0591, "stdqueue_k": 0.0685, "stdqueue_q": 0.0, "step": 2900 }, { "accuracy": 29.8828, "active_queue_size": 16384.0, "cl_loss": 7.3034, "doc_norm": 3.8656, "encoder_q-embeddings": 2121.5, "encoder_q-layer.0": 1862.0106, "encoder_q-layer.1": 1988.6655, "encoder_q-layer.10": 6840.7651, "encoder_q-layer.11": 6706.438, "encoder_q-layer.2": 2388.4695, "encoder_q-layer.3": 2616.6074, "encoder_q-layer.4": 2988.1047, "encoder_q-layer.5": 3445.4377, "encoder_q-layer.6": 4654.1484, "encoder_q-layer.7": 5899.0537, "encoder_q-layer.8": 6539.4565, "encoder_q-layer.9": 6375.0679, "epoch": 0.03, "inbatch_neg_score": 0.5895, "inbatch_pos_score": 1.0703, "learning_rate": 1.5e-05, "loss": 7.3034, "norm_diff": 1.8584, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6114.1737, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5806, "query_norm": 2.0072, "queue_k_norm": 3.8935, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4734, "sent_len_1": 66.8163, "sent_max_len_0": 128.0, "sent_max_len_1": 191.4787, "stdk": 0.0666, "stdq": 0.0594, "stdqueue_k": 0.067, "stdqueue_q": 0.0, "step": 3000 }, { "accuracy": 29.6875, "active_queue_size": 16384.0, "cl_loss": 7.0522, "doc_norm": 3.6896, "encoder_q-embeddings": 3017.1467, "encoder_q-layer.0": 2712.9302, "encoder_q-layer.1": 3072.8906, "encoder_q-layer.10": 11958.7529, "encoder_q-layer.11": 9254.0996, "encoder_q-layer.2": 3540.8032, "encoder_q-layer.3": 3223.6558, "encoder_q-layer.4": 2898.2351, "encoder_q-layer.5": 2941.783, "encoder_q-layer.6": 3534.179, "encoder_q-layer.7": 5014.4346, "encoder_q-layer.8": 7497.0347, "encoder_q-layer.9": 9714.3145, "epoch": 0.03, "inbatch_neg_score": 0.5702, "inbatch_pos_score": 1.0508, "learning_rate": 1.55e-05, "loss": 7.0522, "norm_diff": 1.6971, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7877.9576, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5605, "query_norm": 1.9925, "queue_k_norm": 3.7159, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5203, "sent_len_1": 66.7757, "sent_max_len_0": 128.0, "sent_max_len_1": 191.04, "stdk": 0.0654, "stdq": 0.0585, "stdqueue_k": 0.0656, "stdqueue_q": 0.0, "step": 3100 }, { "accuracy": 33.3008, "active_queue_size": 16384.0, "cl_loss": 7.0336, "doc_norm": 3.5471, "encoder_q-embeddings": 3795.8928, "encoder_q-layer.0": 3428.428, "encoder_q-layer.1": 3334.5359, "encoder_q-layer.10": 15607.749, "encoder_q-layer.11": 11672.1211, "encoder_q-layer.2": 3379.2324, "encoder_q-layer.3": 3371.5742, "encoder_q-layer.4": 3513.0681, "encoder_q-layer.5": 4121.417, "encoder_q-layer.6": 5214.3145, "encoder_q-layer.7": 7101.521, "encoder_q-layer.8": 9290.5703, "encoder_q-layer.9": 11945.7744, "epoch": 0.03, "inbatch_neg_score": 0.3817, "inbatch_pos_score": 0.8838, "learning_rate": 1.6000000000000003e-05, "loss": 7.0336, "norm_diff": 1.5796, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10046.5552, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.376, "query_norm": 1.9675, "queue_k_norm": 3.5656, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.543, "sent_len_1": 66.7699, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.9825, "stdk": 0.0643, "stdq": 0.0581, "stdqueue_k": 0.0645, "stdqueue_q": 0.0, "step": 3200 }, { "accuracy": 31.7383, "active_queue_size": 16384.0, "cl_loss": 6.6793, "doc_norm": 3.3917, "encoder_q-embeddings": 2178.3689, "encoder_q-layer.0": 1847.4492, "encoder_q-layer.1": 1914.5157, "encoder_q-layer.10": 18386.7461, "encoder_q-layer.11": 14061.9795, "encoder_q-layer.2": 2293.25, "encoder_q-layer.3": 2483.6562, "encoder_q-layer.4": 3056.7969, "encoder_q-layer.5": 4019.5356, "encoder_q-layer.6": 6136.3252, "encoder_q-layer.7": 9032.4277, "encoder_q-layer.8": 11937.8213, "encoder_q-layer.9": 14513.2578, "epoch": 0.03, "inbatch_neg_score": 0.7754, "inbatch_pos_score": 1.2559, "learning_rate": 1.65e-05, "loss": 6.6793, "norm_diff": 1.4981, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11493.7663, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7695, "query_norm": 1.8937, "queue_k_norm": 3.4102, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6929, "sent_len_1": 66.819, "sent_max_len_0": 127.9963, "sent_max_len_1": 191.735, "stdk": 0.0629, "stdq": 0.0556, "stdqueue_k": 0.0636, "stdqueue_q": 0.0, "step": 3300 }, { "accuracy": 31.7383, "active_queue_size": 16384.0, "cl_loss": 6.4674, "doc_norm": 3.2652, "encoder_q-embeddings": 2820.7227, "encoder_q-layer.0": 2549.5754, "encoder_q-layer.1": 2519.4182, "encoder_q-layer.10": 12570.2822, "encoder_q-layer.11": 10633.4609, "encoder_q-layer.2": 2647.6196, "encoder_q-layer.3": 2552.782, "encoder_q-layer.4": 3242.1782, "encoder_q-layer.5": 4089.8655, "encoder_q-layer.6": 5952.3726, "encoder_q-layer.7": 8223.1953, "encoder_q-layer.8": 9818.3115, "encoder_q-layer.9": 10639.6973, "epoch": 0.03, "inbatch_neg_score": 0.6953, "inbatch_pos_score": 1.1738, "learning_rate": 1.7000000000000003e-05, "loss": 6.4674, "norm_diff": 1.366, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9193.4321, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6885, "query_norm": 1.8992, "queue_k_norm": 3.272, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.48, "sent_len_1": 66.5915, "sent_max_len_0": 128.0, "sent_max_len_1": 191.255, "stdk": 0.0622, "stdq": 0.0559, "stdqueue_k": 0.0626, "stdqueue_q": 0.0, "step": 3400 }, { "accuracy": 33.5938, "active_queue_size": 16384.0, "cl_loss": 6.2855, "doc_norm": 3.1206, "encoder_q-embeddings": 1780.5391, "encoder_q-layer.0": 1540.3899, "encoder_q-layer.1": 1590.3577, "encoder_q-layer.10": 5007.5649, "encoder_q-layer.11": 4292.104, "encoder_q-layer.2": 1717.668, "encoder_q-layer.3": 1495.3522, "encoder_q-layer.4": 1608.1863, "encoder_q-layer.5": 1907.2172, "encoder_q-layer.6": 2629.7065, "encoder_q-layer.7": 3688.7856, "encoder_q-layer.8": 4479.7412, "encoder_q-layer.9": 4585.1938, "epoch": 0.03, "inbatch_neg_score": 0.4617, "inbatch_pos_score": 0.9424, "learning_rate": 1.75e-05, "loss": 6.2855, "norm_diff": 1.309, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4084.8463, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4556, "query_norm": 1.8116, "queue_k_norm": 3.1472, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7358, "sent_len_1": 66.8931, "sent_max_len_0": 128.0, "sent_max_len_1": 189.84, "stdk": 0.0613, "stdq": 0.0536, "stdqueue_k": 0.0616, "stdqueue_q": 0.0, "step": 3500 }, { "accuracy": 34.2773, "active_queue_size": 16384.0, "cl_loss": 6.0455, "doc_norm": 3.0059, "encoder_q-embeddings": 2466.2085, "encoder_q-layer.0": 2400.5425, "encoder_q-layer.1": 2571.9932, "encoder_q-layer.10": 2751.645, "encoder_q-layer.11": 4138.084, "encoder_q-layer.2": 2850.636, "encoder_q-layer.3": 2749.2432, "encoder_q-layer.4": 2735.0474, "encoder_q-layer.5": 2543.9834, "encoder_q-layer.6": 2390.8958, "encoder_q-layer.7": 2227.5291, "encoder_q-layer.8": 2418.9824, "encoder_q-layer.9": 2055.0027, "epoch": 0.04, "inbatch_neg_score": 0.5912, "inbatch_pos_score": 1.0801, "learning_rate": 1.8e-05, "loss": 6.0455, "norm_diff": 1.2116, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3873.3081, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5835, "query_norm": 1.7942, "queue_k_norm": 3.0336, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7955, "sent_len_1": 66.9949, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6813, "stdk": 0.0605, "stdq": 0.0529, "stdqueue_k": 0.061, "stdqueue_q": 0.0, "step": 3600 }, { "accuracy": 35.8398, "active_queue_size": 16384.0, "cl_loss": 5.9057, "doc_norm": 2.8961, "encoder_q-embeddings": 1680.0707, "encoder_q-layer.0": 1403.3943, "encoder_q-layer.1": 1642.4163, "encoder_q-layer.10": 7813.2773, "encoder_q-layer.11": 7151.8979, "encoder_q-layer.2": 1715.6718, "encoder_q-layer.3": 1655.1018, "encoder_q-layer.4": 1802.5042, "encoder_q-layer.5": 2308.4316, "encoder_q-layer.6": 3300.356, "encoder_q-layer.7": 4779.561, "encoder_q-layer.8": 6108.7915, "encoder_q-layer.9": 6172.1289, "epoch": 0.04, "inbatch_neg_score": 0.4803, "inbatch_pos_score": 0.9629, "learning_rate": 1.85e-05, "loss": 5.9057, "norm_diff": 1.1081, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5678.5119, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4756, "query_norm": 1.788, "queue_k_norm": 2.9118, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4999, "sent_len_1": 66.6965, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7225, "stdk": 0.0601, "stdq": 0.0523, "stdqueue_k": 0.06, "stdqueue_q": 0.0, "step": 3700 }, { "accuracy": 37.0117, "active_queue_size": 16384.0, "cl_loss": 5.7682, "doc_norm": 2.7969, "encoder_q-embeddings": 1755.5713, "encoder_q-layer.0": 1521.0176, "encoder_q-layer.1": 1535.8611, "encoder_q-layer.10": 5253.2124, "encoder_q-layer.11": 5546.4644, "encoder_q-layer.2": 1545.6274, "encoder_q-layer.3": 1479.2799, "encoder_q-layer.4": 1348.678, "encoder_q-layer.5": 1208.4032, "encoder_q-layer.6": 1271.6772, "encoder_q-layer.7": 1787.4344, "encoder_q-layer.8": 2678.2371, "encoder_q-layer.9": 3652.03, "epoch": 0.04, "inbatch_neg_score": 0.3882, "inbatch_pos_score": 0.8774, "learning_rate": 1.9e-05, "loss": 5.7682, "norm_diff": 1.0598, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3745.762, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3792, "query_norm": 1.737, "queue_k_norm": 2.7985, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7385, "sent_len_1": 66.9946, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.5563, "stdk": 0.0588, "stdq": 0.05, "stdqueue_k": 0.0592, "stdqueue_q": 0.0, "step": 3800 }, { "accuracy": 34.8633, "active_queue_size": 16384.0, "cl_loss": 5.6614, "doc_norm": 2.6945, "encoder_q-embeddings": 2883.0723, "encoder_q-layer.0": 2406.6013, "encoder_q-layer.1": 2318.0491, "encoder_q-layer.10": 7483.3818, "encoder_q-layer.11": 6878.645, "encoder_q-layer.2": 2562.9951, "encoder_q-layer.3": 2447.2234, "encoder_q-layer.4": 2699.6907, "encoder_q-layer.5": 3225.7871, "encoder_q-layer.6": 4057.6375, "encoder_q-layer.7": 5232.8755, "encoder_q-layer.8": 6119.3735, "encoder_q-layer.9": 6687.123, "epoch": 0.04, "inbatch_neg_score": 0.4438, "inbatch_pos_score": 0.9272, "learning_rate": 1.9500000000000003e-05, "loss": 5.6614, "norm_diff": 0.9703, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6229.0199, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4434, "query_norm": 1.7242, "queue_k_norm": 2.6952, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5905, "sent_len_1": 66.6688, "sent_max_len_0": 127.9875, "sent_max_len_1": 188.6725, "stdk": 0.0584, "stdq": 0.0505, "stdqueue_k": 0.0585, "stdqueue_q": 0.0, "step": 3900 }, { "accuracy": 34.668, "active_queue_size": 16384.0, "cl_loss": 5.5163, "doc_norm": 2.5829, "encoder_q-embeddings": 3227.4265, "encoder_q-layer.0": 2982.3767, "encoder_q-layer.1": 3367.2661, "encoder_q-layer.10": 1790.1979, "encoder_q-layer.11": 2883.7458, "encoder_q-layer.2": 4219.1519, "encoder_q-layer.3": 3984.1824, "encoder_q-layer.4": 3129.9001, "encoder_q-layer.5": 2069.4695, "encoder_q-layer.6": 1638.7479, "encoder_q-layer.7": 1408.7084, "encoder_q-layer.8": 1522.048, "encoder_q-layer.9": 1228.0052, "epoch": 0.04, "inbatch_neg_score": 0.4239, "inbatch_pos_score": 0.9058, "learning_rate": 2e-05, "loss": 5.5163, "norm_diff": 0.8579, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4187.275, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4202, "query_norm": 1.725, "queue_k_norm": 2.5893, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7695, "sent_len_1": 66.9262, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3688, "stdk": 0.0578, "stdq": 0.0503, "stdqueue_k": 0.0578, "stdqueue_q": 0.0, "step": 4000 }, { "accuracy": 33.3008, "active_queue_size": 16384.0, "cl_loss": 5.4608, "doc_norm": 2.4886, "encoder_q-embeddings": 3219.2051, "encoder_q-layer.0": 2669.3284, "encoder_q-layer.1": 2908.7742, "encoder_q-layer.10": 4279.5542, "encoder_q-layer.11": 7032.1982, "encoder_q-layer.2": 3341.1992, "encoder_q-layer.3": 3042.302, "encoder_q-layer.4": 2782.1531, "encoder_q-layer.5": 2638.8689, "encoder_q-layer.6": 2387.8882, "encoder_q-layer.7": 2297.6438, "encoder_q-layer.8": 2614.6042, "encoder_q-layer.9": 2647.0266, "epoch": 0.04, "inbatch_neg_score": 0.3714, "inbatch_pos_score": 0.8486, "learning_rate": 2.05e-05, "loss": 5.4608, "norm_diff": 0.7883, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4964.9154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3687, "query_norm": 1.7003, "queue_k_norm": 2.4949, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4826, "sent_len_1": 66.7287, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0488, "stdk": 0.0568, "stdq": 0.0495, "stdqueue_k": 0.0572, "stdqueue_q": 0.0, "step": 4100 }, { "accuracy": 36.7188, "active_queue_size": 16384.0, "cl_loss": 5.3866, "doc_norm": 2.3947, "encoder_q-embeddings": 3599.0911, "encoder_q-layer.0": 3038.2166, "encoder_q-layer.1": 3100.6431, "encoder_q-layer.10": 10333.3535, "encoder_q-layer.11": 12137.9062, "encoder_q-layer.2": 3201.4216, "encoder_q-layer.3": 3050.5518, "encoder_q-layer.4": 2831.6963, "encoder_q-layer.5": 3237.3098, "encoder_q-layer.6": 4502.7124, "encoder_q-layer.7": 6596.4819, "encoder_q-layer.8": 8684.2334, "encoder_q-layer.9": 9523.8828, "epoch": 0.04, "inbatch_neg_score": 0.3886, "inbatch_pos_score": 0.8818, "learning_rate": 2.1e-05, "loss": 5.3866, "norm_diff": 0.7152, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8782.9116, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3865, "query_norm": 1.6794, "queue_k_norm": 2.3998, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6225, "sent_len_1": 66.6852, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.6275, "stdk": 0.0568, "stdq": 0.0484, "stdqueue_k": 0.0564, "stdqueue_q": 0.0, "step": 4200 }, { "accuracy": 35.1562, "active_queue_size": 16384.0, "cl_loss": 5.2915, "doc_norm": 2.3026, "encoder_q-embeddings": 2294.6255, "encoder_q-layer.0": 1844.7961, "encoder_q-layer.1": 1999.0333, "encoder_q-layer.10": 4057.0986, "encoder_q-layer.11": 5978.9556, "encoder_q-layer.2": 2286.0813, "encoder_q-layer.3": 2242.2183, "encoder_q-layer.4": 2233.3748, "encoder_q-layer.5": 2126.3953, "encoder_q-layer.6": 2075.1084, "encoder_q-layer.7": 2123.5625, "encoder_q-layer.8": 2820.9849, "encoder_q-layer.9": 3239.0254, "epoch": 0.04, "inbatch_neg_score": 0.3738, "inbatch_pos_score": 0.8555, "learning_rate": 2.15e-05, "loss": 5.2915, "norm_diff": 0.5995, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4126.5581, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3711, "query_norm": 1.7031, "queue_k_norm": 2.3077, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7364, "sent_len_1": 66.9593, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1325, "stdk": 0.0556, "stdq": 0.0488, "stdqueue_k": 0.0557, "stdqueue_q": 0.0, "step": 4300 }, { "accuracy": 39.0625, "active_queue_size": 16384.0, "cl_loss": 5.2719, "doc_norm": 2.2241, "encoder_q-embeddings": 4493.3486, "encoder_q-layer.0": 4186.2671, "encoder_q-layer.1": 3366.3308, "encoder_q-layer.10": 4853.8687, "encoder_q-layer.11": 6284.3921, "encoder_q-layer.2": 3131.3445, "encoder_q-layer.3": 2542.3359, "encoder_q-layer.4": 2189.0251, "encoder_q-layer.5": 1887.4631, "encoder_q-layer.6": 1867.3265, "encoder_q-layer.7": 2008.5134, "encoder_q-layer.8": 2908.3813, "encoder_q-layer.9": 3366.6182, "epoch": 0.04, "inbatch_neg_score": 0.3988, "inbatch_pos_score": 0.9146, "learning_rate": 2.2000000000000003e-05, "loss": 5.2719, "norm_diff": 0.5575, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5226.2323, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.395, "query_norm": 1.6666, "queue_k_norm": 2.2294, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5943, "sent_len_1": 66.8541, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.5775, "stdk": 0.0548, "stdq": 0.0475, "stdqueue_k": 0.0552, "stdqueue_q": 0.0, "step": 4400 }, { "accuracy": 37.207, "active_queue_size": 16384.0, "cl_loss": 5.1772, "doc_norm": 2.1514, "encoder_q-embeddings": 6781.9419, "encoder_q-layer.0": 5287.2319, "encoder_q-layer.1": 4881.6816, "encoder_q-layer.10": 4723.4023, "encoder_q-layer.11": 5716.1504, "encoder_q-layer.2": 4682.2812, "encoder_q-layer.3": 3898.6104, "encoder_q-layer.4": 3531.6455, "encoder_q-layer.5": 3703.5269, "encoder_q-layer.6": 3797.5691, "encoder_q-layer.7": 3705.9985, "encoder_q-layer.8": 3626.5474, "encoder_q-layer.9": 3604.6626, "epoch": 0.04, "inbatch_neg_score": 0.4354, "inbatch_pos_score": 0.9526, "learning_rate": 2.25e-05, "loss": 5.1772, "norm_diff": 0.4529, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6856.6395, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4324, "query_norm": 1.6985, "queue_k_norm": 2.1542, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4677, "sent_len_1": 66.8111, "sent_max_len_0": 128.0, "sent_max_len_1": 192.33, "stdk": 0.0543, "stdq": 0.0488, "stdqueue_k": 0.0544, "stdqueue_q": 0.0, "step": 4500 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 5.1216, "doc_norm": 2.0821, "encoder_q-embeddings": 2007.9473, "encoder_q-layer.0": 1645.603, "encoder_q-layer.1": 1685.8197, "encoder_q-layer.10": 3120.1572, "encoder_q-layer.11": 5227.3647, "encoder_q-layer.2": 1813.1951, "encoder_q-layer.3": 1772.5637, "encoder_q-layer.4": 1766.2794, "encoder_q-layer.5": 1557.1678, "encoder_q-layer.6": 1411.9102, "encoder_q-layer.7": 1403.6086, "encoder_q-layer.8": 1699.3674, "encoder_q-layer.9": 1857.9069, "epoch": 0.04, "inbatch_neg_score": 0.4919, "inbatch_pos_score": 1.0098, "learning_rate": 2.3000000000000003e-05, "loss": 5.1216, "norm_diff": 0.4091, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3312.8097, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4905, "query_norm": 1.6731, "queue_k_norm": 2.0883, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7261, "sent_len_1": 66.8733, "sent_max_len_0": 128.0, "sent_max_len_1": 191.9013, "stdk": 0.0536, "stdq": 0.048, "stdqueue_k": 0.0539, "stdqueue_q": 0.0, "step": 4600 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 5.0745, "doc_norm": 2.0174, "encoder_q-embeddings": 2592.313, "encoder_q-layer.0": 1922.8169, "encoder_q-layer.1": 1857.7175, "encoder_q-layer.10": 3373.6958, "encoder_q-layer.11": 4723.0068, "encoder_q-layer.2": 1990.3442, "encoder_q-layer.3": 1849.002, "encoder_q-layer.4": 1769.1403, "encoder_q-layer.5": 1576.6014, "encoder_q-layer.6": 1644.3331, "encoder_q-layer.7": 1571.4076, "encoder_q-layer.8": 1775.1705, "encoder_q-layer.9": 1977.6998, "epoch": 0.05, "inbatch_neg_score": 0.4635, "inbatch_pos_score": 0.9946, "learning_rate": 2.35e-05, "loss": 5.0745, "norm_diff": 0.3098, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3437.1992, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4604, "query_norm": 1.7076, "queue_k_norm": 2.0294, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5063, "sent_len_1": 66.7307, "sent_max_len_0": 127.995, "sent_max_len_1": 189.025, "stdk": 0.0531, "stdq": 0.0489, "stdqueue_k": 0.0534, "stdqueue_q": 0.0, "step": 4700 }, { "accuracy": 37.3047, "active_queue_size": 16384.0, "cl_loss": 5.0215, "doc_norm": 1.9641, "encoder_q-embeddings": 1970.7554, "encoder_q-layer.0": 1619.0726, "encoder_q-layer.1": 1746.9089, "encoder_q-layer.10": 4395.2036, "encoder_q-layer.11": 5708.2754, "encoder_q-layer.2": 1865.8929, "encoder_q-layer.3": 1745.8848, "encoder_q-layer.4": 1649.9213, "encoder_q-layer.5": 1612.5787, "encoder_q-layer.6": 1903.5604, "encoder_q-layer.7": 2676.5906, "encoder_q-layer.8": 3709.7029, "encoder_q-layer.9": 4035.4373, "epoch": 0.05, "inbatch_neg_score": 0.496, "inbatch_pos_score": 1.0098, "learning_rate": 2.4e-05, "loss": 5.0215, "norm_diff": 0.2594, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4097.0241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4893, "query_norm": 1.7048, "queue_k_norm": 1.9777, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7972, "sent_len_1": 66.8867, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9162, "stdk": 0.0524, "stdq": 0.049, "stdqueue_k": 0.0529, "stdqueue_q": 0.0, "step": 4800 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.98, "doc_norm": 1.9227, "encoder_q-embeddings": 1889.3928, "encoder_q-layer.0": 1518.458, "encoder_q-layer.1": 1504.9557, "encoder_q-layer.10": 5919.8418, "encoder_q-layer.11": 9694.1406, "encoder_q-layer.2": 1582.5001, "encoder_q-layer.3": 1557.9734, "encoder_q-layer.4": 1693.0052, "encoder_q-layer.5": 1689.8562, "encoder_q-layer.6": 1908.0654, "encoder_q-layer.7": 2373.3721, "encoder_q-layer.8": 3119.4131, "encoder_q-layer.9": 3431.9084, "epoch": 0.05, "inbatch_neg_score": 0.55, "inbatch_pos_score": 1.0635, "learning_rate": 2.45e-05, "loss": 4.98, "norm_diff": 0.2354, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5300.9398, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5474, "query_norm": 1.6873, "queue_k_norm": 1.931, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4892, "sent_len_1": 66.8287, "sent_max_len_0": 128.0, "sent_max_len_1": 191.595, "stdk": 0.052, "stdq": 0.0483, "stdqueue_k": 0.0525, "stdqueue_q": 0.0, "step": 4900 }, { "accuracy": 39.7461, "active_queue_size": 16384.0, "cl_loss": 4.9497, "doc_norm": 1.8877, "encoder_q-embeddings": 9419.1904, "encoder_q-layer.0": 7929.4917, "encoder_q-layer.1": 7491.1357, "encoder_q-layer.10": 2668.9258, "encoder_q-layer.11": 4527.3521, "encoder_q-layer.2": 6702.8853, "encoder_q-layer.3": 5465.6895, "encoder_q-layer.4": 3938.3171, "encoder_q-layer.5": 3013.012, "encoder_q-layer.6": 2660.8225, "encoder_q-layer.7": 2141.896, "encoder_q-layer.8": 1794.5238, "encoder_q-layer.9": 1649.7963, "epoch": 0.05, "inbatch_neg_score": 0.5745, "inbatch_pos_score": 1.0859, "learning_rate": 2.5e-05, "loss": 4.9497, "norm_diff": 0.1859, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8360.3204, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5703, "query_norm": 1.7018, "queue_k_norm": 1.8923, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6246, "sent_len_1": 66.9073, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9563, "stdk": 0.0518, "stdq": 0.0485, "stdqueue_k": 0.052, "stdqueue_q": 0.0, "step": 5000 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.8995, "doc_norm": 1.8569, "encoder_q-embeddings": 2295.2324, "encoder_q-layer.0": 2042.0952, "encoder_q-layer.1": 1767.8647, "encoder_q-layer.10": 3051.5608, "encoder_q-layer.11": 4504.166, "encoder_q-layer.2": 1738.4922, "encoder_q-layer.3": 1659.3031, "encoder_q-layer.4": 1495.7397, "encoder_q-layer.5": 1376.677, "encoder_q-layer.6": 1425.326, "encoder_q-layer.7": 1524.7819, "encoder_q-layer.8": 2080.3369, "encoder_q-layer.9": 2218.0447, "epoch": 0.05, "inbatch_neg_score": 0.5772, "inbatch_pos_score": 1.1162, "learning_rate": 2.5500000000000003e-05, "loss": 4.8995, "norm_diff": 0.1537, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3249.4012, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5752, "query_norm": 1.7033, "queue_k_norm": 1.8582, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.637, "sent_len_1": 66.6402, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.9062, "stdk": 0.0515, "stdq": 0.0482, "stdqueue_k": 0.0516, "stdqueue_q": 0.0, "step": 5100 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.8973, "doc_norm": 1.8243, "encoder_q-embeddings": 17052.1191, "encoder_q-layer.0": 14259.6055, "encoder_q-layer.1": 12713.4893, "encoder_q-layer.10": 5334.7847, "encoder_q-layer.11": 8207.2832, "encoder_q-layer.2": 12013.1621, "encoder_q-layer.3": 10388.9209, "encoder_q-layer.4": 6704.6919, "encoder_q-layer.5": 3980.7886, "encoder_q-layer.6": 2870.3218, "encoder_q-layer.7": 2898.9216, "encoder_q-layer.8": 3510.4524, "encoder_q-layer.9": 3155.7795, "epoch": 0.05, "inbatch_neg_score": 0.6235, "inbatch_pos_score": 1.1289, "learning_rate": 2.6000000000000002e-05, "loss": 4.8973, "norm_diff": 0.1144, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14697.246, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6191, "query_norm": 1.7099, "queue_k_norm": 1.8298, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4679, "sent_len_1": 66.8242, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.655, "stdk": 0.0509, "stdq": 0.048, "stdqueue_k": 0.0512, "stdqueue_q": 0.0, "step": 5200 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.8416, "doc_norm": 1.8013, "encoder_q-embeddings": 4121.8354, "encoder_q-layer.0": 3662.761, "encoder_q-layer.1": 3566.9705, "encoder_q-layer.10": 3242.3955, "encoder_q-layer.11": 4875.1846, "encoder_q-layer.2": 4162.646, "encoder_q-layer.3": 4160.9912, "encoder_q-layer.4": 3281.2305, "encoder_q-layer.5": 2617.2925, "encoder_q-layer.6": 2330.4824, "encoder_q-layer.7": 2494.4529, "encoder_q-layer.8": 3101.2693, "encoder_q-layer.9": 2587.1201, "epoch": 0.05, "inbatch_neg_score": 0.6467, "inbatch_pos_score": 1.1709, "learning_rate": 2.6500000000000004e-05, "loss": 4.8416, "norm_diff": 0.0874, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5247.3291, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6416, "query_norm": 1.7139, "queue_k_norm": 1.8046, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6538, "sent_len_1": 66.9439, "sent_max_len_0": 127.9875, "sent_max_len_1": 190.6763, "stdk": 0.0506, "stdq": 0.0482, "stdqueue_k": 0.0508, "stdqueue_q": 0.0, "step": 5300 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.8253, "doc_norm": 1.7856, "encoder_q-embeddings": 4469.7485, "encoder_q-layer.0": 3853.7676, "encoder_q-layer.1": 3414.4919, "encoder_q-layer.10": 2270.9878, "encoder_q-layer.11": 3977.3105, "encoder_q-layer.2": 3348.3862, "encoder_q-layer.3": 2932.1885, "encoder_q-layer.4": 2493.8262, "encoder_q-layer.5": 1914.538, "encoder_q-layer.6": 1808.0981, "encoder_q-layer.7": 1720.5675, "encoder_q-layer.8": 2072.3662, "encoder_q-layer.9": 1856.8464, "epoch": 0.05, "inbatch_neg_score": 0.6768, "inbatch_pos_score": 1.2148, "learning_rate": 2.7000000000000002e-05, "loss": 4.8253, "norm_diff": 0.0395, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4489.8849, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6733, "query_norm": 1.7478, "queue_k_norm": 1.7862, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5083, "sent_len_1": 66.7701, "sent_max_len_0": 128.0, "sent_max_len_1": 187.0637, "stdk": 0.0505, "stdq": 0.0493, "stdqueue_k": 0.0506, "stdqueue_q": 0.0, "step": 5400 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.7899, "doc_norm": 1.7641, "encoder_q-embeddings": 2787.6543, "encoder_q-layer.0": 2250.2034, "encoder_q-layer.1": 2349.2419, "encoder_q-layer.10": 2326.1541, "encoder_q-layer.11": 3878.8628, "encoder_q-layer.2": 2575.1536, "encoder_q-layer.3": 2247.4827, "encoder_q-layer.4": 2026.6145, "encoder_q-layer.5": 2239.3818, "encoder_q-layer.6": 2109.5889, "encoder_q-layer.7": 1834.312, "encoder_q-layer.8": 1958.7511, "encoder_q-layer.9": 1975.3362, "epoch": 0.05, "inbatch_neg_score": 0.6713, "inbatch_pos_score": 1.1855, "learning_rate": 2.7500000000000004e-05, "loss": 4.7899, "norm_diff": 0.03, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3548.5556, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.667, "query_norm": 1.734, "queue_k_norm": 1.7704, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6587, "sent_len_1": 66.8722, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.9125, "stdk": 0.0499, "stdq": 0.0485, "stdqueue_k": 0.0503, "stdqueue_q": 0.0, "step": 5500 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.7729, "doc_norm": 1.762, "encoder_q-embeddings": 4204.6411, "encoder_q-layer.0": 3079.668, "encoder_q-layer.1": 2863.4578, "encoder_q-layer.10": 2470.4451, "encoder_q-layer.11": 3607.4512, "encoder_q-layer.2": 2532.2986, "encoder_q-layer.3": 2214.7725, "encoder_q-layer.4": 1703.4606, "encoder_q-layer.5": 1443.1531, "encoder_q-layer.6": 1482.1296, "encoder_q-layer.7": 1935.4692, "encoder_q-layer.8": 2524.6389, "encoder_q-layer.9": 2417.2078, "epoch": 0.05, "inbatch_neg_score": 0.6826, "inbatch_pos_score": 1.2373, "learning_rate": 2.8000000000000003e-05, "loss": 4.7729, "norm_diff": 0.0139, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4003.425, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6807, "query_norm": 1.7691, "queue_k_norm": 1.756, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4418, "sent_len_1": 66.8057, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6425, "stdk": 0.0502, "stdq": 0.0494, "stdqueue_k": 0.05, "stdqueue_q": 0.0, "step": 5600 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.7189, "doc_norm": 1.7446, "encoder_q-embeddings": 2233.6069, "encoder_q-layer.0": 1823.9745, "encoder_q-layer.1": 1737.2661, "encoder_q-layer.10": 2853.8533, "encoder_q-layer.11": 4244.48, "encoder_q-layer.2": 1827.3226, "encoder_q-layer.3": 1645.573, "encoder_q-layer.4": 1450.8884, "encoder_q-layer.5": 1329.4531, "encoder_q-layer.6": 1507.2753, "encoder_q-layer.7": 1819.5398, "encoder_q-layer.8": 2333.5559, "encoder_q-layer.9": 2546.4136, "epoch": 0.06, "inbatch_neg_score": 0.6776, "inbatch_pos_score": 1.2246, "learning_rate": 2.8499999999999998e-05, "loss": 4.7189, "norm_diff": 0.0307, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3162.5031, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6763, "query_norm": 1.7754, "queue_k_norm": 1.7452, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6681, "sent_len_1": 66.8779, "sent_max_len_0": 127.99, "sent_max_len_1": 189.5037, "stdk": 0.0496, "stdq": 0.0494, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 5700 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.7039, "doc_norm": 1.7418, "encoder_q-embeddings": 2247.7288, "encoder_q-layer.0": 1980.1896, "encoder_q-layer.1": 1993.1979, "encoder_q-layer.10": 2629.2759, "encoder_q-layer.11": 3994.124, "encoder_q-layer.2": 1898.1484, "encoder_q-layer.3": 1843.923, "encoder_q-layer.4": 1733.7544, "encoder_q-layer.5": 1664.0702, "encoder_q-layer.6": 1673.1606, "encoder_q-layer.7": 1602.4644, "encoder_q-layer.8": 1875.3827, "encoder_q-layer.9": 2140.9614, "epoch": 0.06, "inbatch_neg_score": 0.702, "inbatch_pos_score": 1.249, "learning_rate": 2.9e-05, "loss": 4.7039, "norm_diff": 0.0158, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3187.4491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6987, "query_norm": 1.7426, "queue_k_norm": 1.7378, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.696, "sent_len_1": 66.9083, "sent_max_len_0": 128.0, "sent_max_len_1": 187.0675, "stdk": 0.0496, "stdq": 0.0478, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 5800 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.6701, "doc_norm": 1.7326, "encoder_q-embeddings": 2793.3298, "encoder_q-layer.0": 2390.4851, "encoder_q-layer.1": 2431.7329, "encoder_q-layer.10": 3806.4448, "encoder_q-layer.11": 6262.9795, "encoder_q-layer.2": 2473.7285, "encoder_q-layer.3": 2149.6296, "encoder_q-layer.4": 1861.4341, "encoder_q-layer.5": 1773.1456, "encoder_q-layer.6": 1634.4415, "encoder_q-layer.7": 1766.4125, "encoder_q-layer.8": 2472.3135, "encoder_q-layer.9": 2363.27, "epoch": 0.06, "inbatch_neg_score": 0.69, "inbatch_pos_score": 1.2295, "learning_rate": 2.95e-05, "loss": 4.6701, "norm_diff": 0.0365, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4147.1318, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.689, "query_norm": 1.7691, "queue_k_norm": 1.7319, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6874, "sent_len_1": 66.9467, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.9275, "stdk": 0.0494, "stdq": 0.0491, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 5900 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.6608, "doc_norm": 1.7246, "encoder_q-embeddings": 2160.7446, "encoder_q-layer.0": 1738.396, "encoder_q-layer.1": 1663.3828, "encoder_q-layer.10": 2073.1396, "encoder_q-layer.11": 3552.731, "encoder_q-layer.2": 1654.2417, "encoder_q-layer.3": 1551.8844, "encoder_q-layer.4": 1365.4958, "encoder_q-layer.5": 1251.9512, "encoder_q-layer.6": 1232.1012, "encoder_q-layer.7": 1303.3862, "encoder_q-layer.8": 1622.2695, "encoder_q-layer.9": 1649.2571, "epoch": 0.06, "inbatch_neg_score": 0.6706, "inbatch_pos_score": 1.1992, "learning_rate": 3e-05, "loss": 4.6608, "norm_diff": 0.039, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2769.5298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.668, "query_norm": 1.7612, "queue_k_norm": 1.7243, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5656, "sent_len_1": 66.9799, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3175, "stdk": 0.0491, "stdq": 0.0482, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 6000 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.6447, "doc_norm": 1.7187, "encoder_q-embeddings": 31703.9238, "encoder_q-layer.0": 25398.1016, "encoder_q-layer.1": 23587.4043, "encoder_q-layer.10": 8608.499, "encoder_q-layer.11": 14536.7725, "encoder_q-layer.2": 17045.0371, "encoder_q-layer.3": 13419.7432, "encoder_q-layer.4": 11003.0615, "encoder_q-layer.5": 10627.7656, "encoder_q-layer.6": 10422.5225, "encoder_q-layer.7": 6501.4277, "encoder_q-layer.8": 4223.8696, "encoder_q-layer.9": 4151.6753, "epoch": 0.06, "inbatch_neg_score": 0.7095, "inbatch_pos_score": 1.2559, "learning_rate": 3.05e-05, "loss": 4.6447, "norm_diff": 0.0622, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25908.1849, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.709, "query_norm": 1.7809, "queue_k_norm": 1.7195, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6741, "sent_len_1": 67.0802, "sent_max_len_0": 127.9912, "sent_max_len_1": 189.81, "stdk": 0.0491, "stdq": 0.0475, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 6100 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.6206, "doc_norm": 1.7184, "encoder_q-embeddings": 2523.7898, "encoder_q-layer.0": 1807.1292, "encoder_q-layer.1": 1867.9629, "encoder_q-layer.10": 5882.4712, "encoder_q-layer.11": 9796.8604, "encoder_q-layer.2": 2190.582, "encoder_q-layer.3": 2287.0466, "encoder_q-layer.4": 2329.3132, "encoder_q-layer.5": 2297.2842, "encoder_q-layer.6": 2647.834, "encoder_q-layer.7": 2796.1167, "encoder_q-layer.8": 3037.7373, "encoder_q-layer.9": 3279.5964, "epoch": 0.06, "inbatch_neg_score": 0.6508, "inbatch_pos_score": 1.1914, "learning_rate": 3.1e-05, "loss": 4.6206, "norm_diff": 0.0662, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5589.017, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6475, "query_norm": 1.7846, "queue_k_norm": 1.7151, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5719, "sent_len_1": 66.6983, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.555, "stdk": 0.0491, "stdq": 0.0477, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 6200 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.6093, "doc_norm": 1.7041, "encoder_q-embeddings": 19522.5293, "encoder_q-layer.0": 18005.9141, "encoder_q-layer.1": 15612.7109, "encoder_q-layer.10": 3555.3992, "encoder_q-layer.11": 7036.5752, "encoder_q-layer.2": 15047.5674, "encoder_q-layer.3": 12324.8486, "encoder_q-layer.4": 9226.8672, "encoder_q-layer.5": 6607.2271, "encoder_q-layer.6": 6807.6108, "encoder_q-layer.7": 4520.7666, "encoder_q-layer.8": 3013.7122, "encoder_q-layer.9": 2646.8088, "epoch": 0.06, "inbatch_neg_score": 0.6719, "inbatch_pos_score": 1.1992, "learning_rate": 3.15e-05, "loss": 4.6093, "norm_diff": 0.1294, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17406.9785, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.6646, "query_norm": 1.8335, "queue_k_norm": 1.7101, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5966, "sent_len_1": 66.8532, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.5987, "stdk": 0.0485, "stdq": 0.0486, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 6300 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.5664, "doc_norm": 1.7076, "encoder_q-embeddings": 2609.522, "encoder_q-layer.0": 1948.2803, "encoder_q-layer.1": 1925.7748, "encoder_q-layer.10": 5281.7998, "encoder_q-layer.11": 9684.6162, "encoder_q-layer.2": 2302.3779, "encoder_q-layer.3": 2371.9729, "encoder_q-layer.4": 2285.3821, "encoder_q-layer.5": 2365.0728, "encoder_q-layer.6": 2754.5291, "encoder_q-layer.7": 3536.1907, "encoder_q-layer.8": 4431.3027, "encoder_q-layer.9": 4102.6274, "epoch": 0.06, "inbatch_neg_score": 0.6493, "inbatch_pos_score": 1.1689, "learning_rate": 3.2000000000000005e-05, "loss": 4.5664, "norm_diff": 0.1468, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5782.3027, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6431, "query_norm": 1.8544, "queue_k_norm": 1.7054, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7153, "sent_len_1": 66.6172, "sent_max_len_0": 128.0, "sent_max_len_1": 188.875, "stdk": 0.049, "stdq": 0.048, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 6400 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.5584, "doc_norm": 1.7045, "encoder_q-embeddings": 16279.085, "encoder_q-layer.0": 14601.0762, "encoder_q-layer.1": 16104.041, "encoder_q-layer.10": 6886.9629, "encoder_q-layer.11": 10871.2539, "encoder_q-layer.2": 16222.6602, "encoder_q-layer.3": 13168.8418, "encoder_q-layer.4": 10629.0518, "encoder_q-layer.5": 7406.4199, "encoder_q-layer.6": 6359.5474, "encoder_q-layer.7": 6183.4873, "encoder_q-layer.8": 6028.0044, "encoder_q-layer.9": 5398.7046, "epoch": 0.06, "inbatch_neg_score": 0.6592, "inbatch_pos_score": 1.2217, "learning_rate": 3.2500000000000004e-05, "loss": 4.5584, "norm_diff": 0.1667, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17291.3335, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.6533, "query_norm": 1.8712, "queue_k_norm": 1.7003, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6242, "sent_len_1": 66.7769, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7413, "stdk": 0.0487, "stdq": 0.0485, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 6500 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.5392, "doc_norm": 1.6978, "encoder_q-embeddings": 6943.8433, "encoder_q-layer.0": 5546.8052, "encoder_q-layer.1": 3961.9131, "encoder_q-layer.10": 3939.387, "encoder_q-layer.11": 7266.792, "encoder_q-layer.2": 3926.9622, "encoder_q-layer.3": 3624.3809, "encoder_q-layer.4": 3164.812, "encoder_q-layer.5": 2885.24, "encoder_q-layer.6": 2477.7432, "encoder_q-layer.7": 2484.623, "encoder_q-layer.8": 2802.1072, "encoder_q-layer.9": 2642.7063, "epoch": 0.06, "inbatch_neg_score": 0.6944, "inbatch_pos_score": 1.2285, "learning_rate": 3.3e-05, "loss": 4.5392, "norm_diff": 0.1816, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6586.351, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6895, "query_norm": 1.8793, "queue_k_norm": 1.6965, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6485, "sent_len_1": 66.953, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0, "stdk": 0.0486, "stdq": 0.0478, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 6600 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.5372, "doc_norm": 1.7, "encoder_q-embeddings": 1637.2737, "encoder_q-layer.0": 1267.2552, "encoder_q-layer.1": 1140.1689, "encoder_q-layer.10": 1808.3328, "encoder_q-layer.11": 3624.1604, "encoder_q-layer.2": 1307.2075, "encoder_q-layer.3": 1351.8483, "encoder_q-layer.4": 1335.545, "encoder_q-layer.5": 1287.8599, "encoder_q-layer.6": 1368.3625, "encoder_q-layer.7": 1402.4396, "encoder_q-layer.8": 1546.0304, "encoder_q-layer.9": 1504.6959, "epoch": 0.07, "inbatch_neg_score": 0.6706, "inbatch_pos_score": 1.2129, "learning_rate": 3.35e-05, "loss": 4.5372, "norm_diff": 0.2145, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2507.6103, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6631, "query_norm": 1.9144, "queue_k_norm": 1.6922, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6487, "sent_len_1": 66.7937, "sent_max_len_0": 127.995, "sent_max_len_1": 188.7962, "stdk": 0.0488, "stdq": 0.0473, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 6700 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.4983, "doc_norm": 1.681, "encoder_q-embeddings": 3301.6572, "encoder_q-layer.0": 2738.4238, "encoder_q-layer.1": 2818.6458, "encoder_q-layer.10": 2494.2686, "encoder_q-layer.11": 4713.4746, "encoder_q-layer.2": 3171.762, "encoder_q-layer.3": 2829.4316, "encoder_q-layer.4": 2647.4451, "encoder_q-layer.5": 2432.8591, "encoder_q-layer.6": 2176.5552, "encoder_q-layer.7": 2056.0237, "encoder_q-layer.8": 2005.8933, "encoder_q-layer.9": 1943.4744, "epoch": 0.07, "inbatch_neg_score": 0.6463, "inbatch_pos_score": 1.2041, "learning_rate": 3.4000000000000007e-05, "loss": 4.4983, "norm_diff": 0.2122, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4160.8561, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6406, "query_norm": 1.8932, "queue_k_norm": 1.6903, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7775, "sent_len_1": 66.497, "sent_max_len_0": 128.0, "sent_max_len_1": 187.615, "stdk": 0.0481, "stdq": 0.0496, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 6800 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.5096, "doc_norm": 1.6847, "encoder_q-embeddings": 2992.7554, "encoder_q-layer.0": 2560.8623, "encoder_q-layer.1": 2376.8784, "encoder_q-layer.10": 1684.7683, "encoder_q-layer.11": 3318.2085, "encoder_q-layer.2": 2368.7231, "encoder_q-layer.3": 2275.4104, "encoder_q-layer.4": 2169.3916, "encoder_q-layer.5": 2374.6924, "encoder_q-layer.6": 3363.1328, "encoder_q-layer.7": 3217.3127, "encoder_q-layer.8": 1773.3606, "encoder_q-layer.9": 1367.5931, "epoch": 0.07, "inbatch_neg_score": 0.6459, "inbatch_pos_score": 1.1787, "learning_rate": 3.45e-05, "loss": 4.5096, "norm_diff": 0.1913, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3763.1255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6392, "query_norm": 1.876, "queue_k_norm": 1.6851, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4948, "sent_len_1": 66.846, "sent_max_len_0": 127.9813, "sent_max_len_1": 188.67, "stdk": 0.0483, "stdq": 0.0485, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 6900 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.5055, "doc_norm": 1.6734, "encoder_q-embeddings": 3020.0967, "encoder_q-layer.0": 2328.0823, "encoder_q-layer.1": 2334.0225, "encoder_q-layer.10": 1496.3284, "encoder_q-layer.11": 2995.291, "encoder_q-layer.2": 2438.1545, "encoder_q-layer.3": 2197.0369, "encoder_q-layer.4": 2055.083, "encoder_q-layer.5": 1885.0323, "encoder_q-layer.6": 1502.8472, "encoder_q-layer.7": 1311.5782, "encoder_q-layer.8": 1426.0101, "encoder_q-layer.9": 1255.8892, "epoch": 0.07, "inbatch_neg_score": 0.6098, "inbatch_pos_score": 1.167, "learning_rate": 3.5e-05, "loss": 4.5055, "norm_diff": 0.1979, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3219.0468, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6055, "query_norm": 1.8712, "queue_k_norm": 1.6806, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5347, "sent_len_1": 66.6701, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.6075, "stdk": 0.0481, "stdq": 0.0482, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 7000 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 4.5038, "doc_norm": 1.6762, "encoder_q-embeddings": 11132.8994, "encoder_q-layer.0": 8573.9941, "encoder_q-layer.1": 8381.8428, "encoder_q-layer.10": 1679.8735, "encoder_q-layer.11": 3316.3311, "encoder_q-layer.2": 8252.6318, "encoder_q-layer.3": 6581.0845, "encoder_q-layer.4": 5667.4644, "encoder_q-layer.5": 4689.4092, "encoder_q-layer.6": 4179.5234, "encoder_q-layer.7": 2650.1565, "encoder_q-layer.8": 1635.5649, "encoder_q-layer.9": 1266.4197, "epoch": 0.07, "inbatch_neg_score": 0.5794, "inbatch_pos_score": 1.1299, "learning_rate": 3.55e-05, "loss": 4.5038, "norm_diff": 0.1913, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9809.4027, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5732, "query_norm": 1.8675, "queue_k_norm": 1.6733, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6796, "sent_len_1": 66.6826, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4162, "stdk": 0.0482, "stdq": 0.0477, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 7100 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.4867, "doc_norm": 1.6659, "encoder_q-embeddings": 1730.0222, "encoder_q-layer.0": 1302.5208, "encoder_q-layer.1": 1404.3313, "encoder_q-layer.10": 1679.3398, "encoder_q-layer.11": 3174.2827, "encoder_q-layer.2": 1484.556, "encoder_q-layer.3": 1488.3912, "encoder_q-layer.4": 1419.9142, "encoder_q-layer.5": 1283.314, "encoder_q-layer.6": 1152.8278, "encoder_q-layer.7": 1191.8724, "encoder_q-layer.8": 1434.1866, "encoder_q-layer.9": 1282.5541, "epoch": 0.07, "inbatch_neg_score": 0.5724, "inbatch_pos_score": 1.126, "learning_rate": 3.6e-05, "loss": 4.4867, "norm_diff": 0.186, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2367.383, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5674, "query_norm": 1.8519, "queue_k_norm": 1.6683, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6388, "sent_len_1": 66.6271, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5425, "stdk": 0.0481, "stdq": 0.0466, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 7200 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 4.4692, "doc_norm": 1.6607, "encoder_q-embeddings": 1540.605, "encoder_q-layer.0": 1195.324, "encoder_q-layer.1": 1346.2542, "encoder_q-layer.10": 1973.7396, "encoder_q-layer.11": 3601.7058, "encoder_q-layer.2": 1444.645, "encoder_q-layer.3": 1474.0696, "encoder_q-layer.4": 1492.0544, "encoder_q-layer.5": 1334.8214, "encoder_q-layer.6": 1377.2375, "encoder_q-layer.7": 1445.91, "encoder_q-layer.8": 1574.6002, "encoder_q-layer.9": 1405.4325, "epoch": 0.07, "inbatch_neg_score": 0.5469, "inbatch_pos_score": 1.1133, "learning_rate": 3.65e-05, "loss": 4.4692, "norm_diff": 0.2261, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2497.8061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.541, "query_norm": 1.8868, "queue_k_norm": 1.6643, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4916, "sent_len_1": 66.6895, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4325, "stdk": 0.048, "stdq": 0.0471, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 7300 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.4521, "doc_norm": 1.6603, "encoder_q-embeddings": 3251.0632, "encoder_q-layer.0": 2492.0513, "encoder_q-layer.1": 2426.385, "encoder_q-layer.10": 1847.8998, "encoder_q-layer.11": 3458.3682, "encoder_q-layer.2": 2369.8228, "encoder_q-layer.3": 2168.0278, "encoder_q-layer.4": 1969.6324, "encoder_q-layer.5": 1607.84, "encoder_q-layer.6": 1433.9122, "encoder_q-layer.7": 1177.8136, "encoder_q-layer.8": 1277.0326, "encoder_q-layer.9": 1213.9926, "epoch": 0.07, "inbatch_neg_score": 0.5876, "inbatch_pos_score": 1.1064, "learning_rate": 3.7e-05, "loss": 4.4521, "norm_diff": 0.2941, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3313.2479, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5776, "query_norm": 1.9543, "queue_k_norm": 1.6592, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5024, "sent_len_1": 66.9711, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.7125, "stdk": 0.048, "stdq": 0.0475, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 7400 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.4407, "doc_norm": 1.6571, "encoder_q-embeddings": 4944.2358, "encoder_q-layer.0": 3804.2004, "encoder_q-layer.1": 2937.7454, "encoder_q-layer.10": 1773.5364, "encoder_q-layer.11": 3479.5981, "encoder_q-layer.2": 2776.0735, "encoder_q-layer.3": 2370.2449, "encoder_q-layer.4": 1850.0682, "encoder_q-layer.5": 1301.431, "encoder_q-layer.6": 1198.5037, "encoder_q-layer.7": 1007.7867, "encoder_q-layer.8": 1094.8214, "encoder_q-layer.9": 1165.368, "epoch": 0.07, "inbatch_neg_score": 0.5533, "inbatch_pos_score": 1.084, "learning_rate": 3.7500000000000003e-05, "loss": 4.4407, "norm_diff": 0.2528, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4128.1284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5469, "query_norm": 1.9099, "queue_k_norm": 1.6558, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6683, "sent_len_1": 66.6518, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.4462, "stdk": 0.0481, "stdq": 0.0465, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 7500 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.4127, "doc_norm": 1.6548, "encoder_q-embeddings": 3112.938, "encoder_q-layer.0": 2265.0247, "encoder_q-layer.1": 2364.1941, "encoder_q-layer.10": 1693.4141, "encoder_q-layer.11": 3454.2217, "encoder_q-layer.2": 2541.5332, "encoder_q-layer.3": 2457.5298, "encoder_q-layer.4": 2176.7798, "encoder_q-layer.5": 1691.4523, "encoder_q-layer.6": 1596.0844, "encoder_q-layer.7": 1344.0222, "encoder_q-layer.8": 1217.8251, "encoder_q-layer.9": 1057.8986, "epoch": 0.07, "inbatch_neg_score": 0.6007, "inbatch_pos_score": 1.1436, "learning_rate": 3.8e-05, "loss": 4.4127, "norm_diff": 0.275, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3321.0246, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5923, "query_norm": 1.9298, "queue_k_norm": 1.6547, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5894, "sent_len_1": 66.8729, "sent_max_len_0": 128.0, "sent_max_len_1": 193.2825, "stdk": 0.0481, "stdq": 0.047, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 7600 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.4057, "doc_norm": 1.6509, "encoder_q-embeddings": 10141.6299, "encoder_q-layer.0": 7242.5732, "encoder_q-layer.1": 5828.853, "encoder_q-layer.10": 1462.1422, "encoder_q-layer.11": 2921.2002, "encoder_q-layer.2": 5277.0601, "encoder_q-layer.3": 4240.3071, "encoder_q-layer.4": 3597.2156, "encoder_q-layer.5": 2952.2026, "encoder_q-layer.6": 2419.7109, "encoder_q-layer.7": 1981.8281, "encoder_q-layer.8": 1490.7096, "encoder_q-layer.9": 1118.869, "epoch": 0.08, "inbatch_neg_score": 0.5925, "inbatch_pos_score": 1.1387, "learning_rate": 3.85e-05, "loss": 4.4057, "norm_diff": 0.2498, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7681.5469, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5859, "query_norm": 1.9007, "queue_k_norm": 1.6489, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.852, "sent_len_1": 66.9242, "sent_max_len_0": 127.99, "sent_max_len_1": 191.045, "stdk": 0.048, "stdq": 0.048, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 7700 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.4384, "doc_norm": 1.6474, "encoder_q-embeddings": 3069.0933, "encoder_q-layer.0": 2421.3484, "encoder_q-layer.1": 2372.406, "encoder_q-layer.10": 4289.7959, "encoder_q-layer.11": 8649.792, "encoder_q-layer.2": 2375.9595, "encoder_q-layer.3": 2244.9653, "encoder_q-layer.4": 2097.4727, "encoder_q-layer.5": 1964.3298, "encoder_q-layer.6": 1816.8014, "encoder_q-layer.7": 2145.4646, "encoder_q-layer.8": 2197.6746, "encoder_q-layer.9": 2086.5586, "epoch": 0.08, "inbatch_neg_score": 0.6065, "inbatch_pos_score": 1.1484, "learning_rate": 3.9000000000000006e-05, "loss": 4.4384, "norm_diff": 0.1988, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5014.0165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6011, "query_norm": 1.8462, "queue_k_norm": 1.6477, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5621, "sent_len_1": 66.883, "sent_max_len_0": 127.9875, "sent_max_len_1": 189.1225, "stdk": 0.0481, "stdq": 0.0463, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 7800 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.4007, "doc_norm": 1.6419, "encoder_q-embeddings": 2397.3413, "encoder_q-layer.0": 1799.1223, "encoder_q-layer.1": 1766.4229, "encoder_q-layer.10": 698.8602, "encoder_q-layer.11": 1598.526, "encoder_q-layer.2": 1619.1793, "encoder_q-layer.3": 1686.3501, "encoder_q-layer.4": 1335.0551, "encoder_q-layer.5": 1134.0258, "encoder_q-layer.6": 918.6378, "encoder_q-layer.7": 754.3168, "encoder_q-layer.8": 724.702, "encoder_q-layer.9": 535.8472, "epoch": 0.08, "inbatch_neg_score": 0.5307, "inbatch_pos_score": 1.0781, "learning_rate": 3.9500000000000005e-05, "loss": 4.4007, "norm_diff": 0.1888, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2229.0235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5264, "query_norm": 1.8307, "queue_k_norm": 1.6446, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6422, "sent_len_1": 66.9699, "sent_max_len_0": 127.9975, "sent_max_len_1": 191.6687, "stdk": 0.0481, "stdq": 0.0465, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 7900 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.3669, "doc_norm": 1.643, "encoder_q-embeddings": 4462.644, "encoder_q-layer.0": 4220.7832, "encoder_q-layer.1": 4253.6143, "encoder_q-layer.10": 822.0338, "encoder_q-layer.11": 1428.8724, "encoder_q-layer.2": 4383.3491, "encoder_q-layer.3": 5122.0708, "encoder_q-layer.4": 6088.6333, "encoder_q-layer.5": 7193.3174, "encoder_q-layer.6": 9763.6152, "encoder_q-layer.7": 6215.5659, "encoder_q-layer.8": 2382.7832, "encoder_q-layer.9": 632.2007, "epoch": 0.08, "inbatch_neg_score": 0.5175, "inbatch_pos_score": 1.0518, "learning_rate": 4e-05, "loss": 4.3669, "norm_diff": 0.1766, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7555.0456, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5107, "query_norm": 1.8196, "queue_k_norm": 1.6379, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.8121, "sent_len_1": 66.8007, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9825, "stdk": 0.0483, "stdq": 0.0469, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 8000 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.3536, "doc_norm": 1.6428, "encoder_q-embeddings": 25717.3926, "encoder_q-layer.0": 21454.8789, "encoder_q-layer.1": 21534.5879, "encoder_q-layer.10": 1014.9626, "encoder_q-layer.11": 1641.1044, "encoder_q-layer.2": 22723.5508, "encoder_q-layer.3": 18501.9961, "encoder_q-layer.4": 13444.2461, "encoder_q-layer.5": 10753.2002, "encoder_q-layer.6": 7664.3892, "encoder_q-layer.7": 4555.2588, "encoder_q-layer.8": 1973.6086, "encoder_q-layer.9": 840.5275, "epoch": 0.08, "inbatch_neg_score": 0.5092, "inbatch_pos_score": 1.0605, "learning_rate": 4.05e-05, "loss": 4.3536, "norm_diff": 0.2304, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 23448.1965, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5024, "query_norm": 1.8732, "queue_k_norm": 1.636, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6733, "sent_len_1": 67.0477, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.6887, "stdk": 0.0483, "stdq": 0.0474, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8100 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 4.3646, "doc_norm": 1.6261, "encoder_q-embeddings": 607.1784, "encoder_q-layer.0": 459.8595, "encoder_q-layer.1": 439.6087, "encoder_q-layer.10": 802.2569, "encoder_q-layer.11": 1549.027, "encoder_q-layer.2": 444.998, "encoder_q-layer.3": 456.5317, "encoder_q-layer.4": 454.3563, "encoder_q-layer.5": 436.3199, "encoder_q-layer.6": 501.8625, "encoder_q-layer.7": 512.3929, "encoder_q-layer.8": 632.3259, "encoder_q-layer.9": 578.2252, "epoch": 0.08, "inbatch_neg_score": 0.5171, "inbatch_pos_score": 1.0664, "learning_rate": 4.1e-05, "loss": 4.3646, "norm_diff": 0.2142, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 972.7959, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5142, "query_norm": 1.8403, "queue_k_norm": 1.6292, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7164, "sent_len_1": 66.6963, "sent_max_len_0": 127.9912, "sent_max_len_1": 189.4512, "stdk": 0.0481, "stdq": 0.0461, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8200 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 4.3341, "doc_norm": 1.6224, "encoder_q-embeddings": 1941.0107, "encoder_q-layer.0": 1537.866, "encoder_q-layer.1": 1294.5465, "encoder_q-layer.10": 810.3134, "encoder_q-layer.11": 1794.2417, "encoder_q-layer.2": 1334.8246, "encoder_q-layer.3": 1342.0403, "encoder_q-layer.4": 1048.4379, "encoder_q-layer.5": 772.6133, "encoder_q-layer.6": 696.1389, "encoder_q-layer.7": 569.6844, "encoder_q-layer.8": 569.3687, "encoder_q-layer.9": 497.3762, "epoch": 0.08, "inbatch_neg_score": 0.5024, "inbatch_pos_score": 1.0801, "learning_rate": 4.15e-05, "loss": 4.3341, "norm_diff": 0.186, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1857.5648, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4976, "query_norm": 1.8084, "queue_k_norm": 1.625, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5925, "sent_len_1": 66.8008, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6362, "stdk": 0.048, "stdq": 0.0461, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8300 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 4.3469, "doc_norm": 1.6155, "encoder_q-embeddings": 1216.0938, "encoder_q-layer.0": 1042.1858, "encoder_q-layer.1": 1147.1848, "encoder_q-layer.10": 663.3225, "encoder_q-layer.11": 1380.3187, "encoder_q-layer.2": 1060.4854, "encoder_q-layer.3": 1044.3481, "encoder_q-layer.4": 930.7347, "encoder_q-layer.5": 811.4622, "encoder_q-layer.6": 940.9795, "encoder_q-layer.7": 851.4821, "encoder_q-layer.8": 743.7413, "encoder_q-layer.9": 593.2667, "epoch": 0.08, "inbatch_neg_score": 0.5055, "inbatch_pos_score": 1.0791, "learning_rate": 4.2e-05, "loss": 4.3469, "norm_diff": 0.1593, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1483.7346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5005, "query_norm": 1.7748, "queue_k_norm": 1.6196, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5615, "sent_len_1": 66.6644, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.415, "stdk": 0.048, "stdq": 0.0467, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8400 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.314, "doc_norm": 1.6215, "encoder_q-embeddings": 1383.0879, "encoder_q-layer.0": 1333.5497, "encoder_q-layer.1": 1248.028, "encoder_q-layer.10": 787.0162, "encoder_q-layer.11": 1540.881, "encoder_q-layer.2": 1319.6702, "encoder_q-layer.3": 1390.4956, "encoder_q-layer.4": 1369.5006, "encoder_q-layer.5": 1313.6355, "encoder_q-layer.6": 1314.5282, "encoder_q-layer.7": 999.9505, "encoder_q-layer.8": 642.4548, "encoder_q-layer.9": 572.7162, "epoch": 0.08, "inbatch_neg_score": 0.4954, "inbatch_pos_score": 1.0547, "learning_rate": 4.25e-05, "loss": 4.314, "norm_diff": 0.1379, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1802.201, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4912, "query_norm": 1.7594, "queue_k_norm": 1.6151, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6948, "sent_len_1": 66.7306, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1113, "stdk": 0.0483, "stdq": 0.0468, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8500 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.3171, "doc_norm": 1.6082, "encoder_q-embeddings": 10828.7256, "encoder_q-layer.0": 8629.7637, "encoder_q-layer.1": 7270.0474, "encoder_q-layer.10": 755.7032, "encoder_q-layer.11": 1434.5792, "encoder_q-layer.2": 6351.0298, "encoder_q-layer.3": 5587.4839, "encoder_q-layer.4": 3571.5173, "encoder_q-layer.5": 2639.0605, "encoder_q-layer.6": 1951.5265, "encoder_q-layer.7": 1285.5845, "encoder_q-layer.8": 817.2206, "encoder_q-layer.9": 541.8496, "epoch": 0.08, "inbatch_neg_score": 0.4806, "inbatch_pos_score": 1.0244, "learning_rate": 4.3e-05, "loss": 4.3171, "norm_diff": 0.1191, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8556.6815, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4768, "query_norm": 1.7273, "queue_k_norm": 1.6105, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7246, "sent_len_1": 66.819, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3525, "stdk": 0.048, "stdq": 0.0459, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8600 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.2951, "doc_norm": 1.6117, "encoder_q-embeddings": 590.1497, "encoder_q-layer.0": 419.1748, "encoder_q-layer.1": 433.0621, "encoder_q-layer.10": 707.4109, "encoder_q-layer.11": 1535.5437, "encoder_q-layer.2": 491.7354, "encoder_q-layer.3": 502.3202, "encoder_q-layer.4": 495.61, "encoder_q-layer.5": 468.1476, "encoder_q-layer.6": 540.837, "encoder_q-layer.7": 563.8289, "encoder_q-layer.8": 617.6689, "encoder_q-layer.9": 568.3716, "epoch": 0.08, "inbatch_neg_score": 0.445, "inbatch_pos_score": 1.002, "learning_rate": 4.35e-05, "loss": 4.2951, "norm_diff": 0.1184, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 964.7315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4399, "query_norm": 1.7301, "queue_k_norm": 1.6016, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7393, "sent_len_1": 66.7505, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.265, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 8700 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.2719, "doc_norm": 1.5986, "encoder_q-embeddings": 1360.2281, "encoder_q-layer.0": 1044.1281, "encoder_q-layer.1": 917.89, "encoder_q-layer.10": 775.8815, "encoder_q-layer.11": 1710.1274, "encoder_q-layer.2": 819.1315, "encoder_q-layer.3": 704.2605, "encoder_q-layer.4": 582.478, "encoder_q-layer.5": 502.9425, "encoder_q-layer.6": 504.6039, "encoder_q-layer.7": 511.0261, "encoder_q-layer.8": 614.457, "encoder_q-layer.9": 565.9466, "epoch": 0.09, "inbatch_neg_score": 0.4192, "inbatch_pos_score": 0.9985, "learning_rate": 4.4000000000000006e-05, "loss": 4.2719, "norm_diff": 0.1692, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1358.6915, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.415, "query_norm": 1.7678, "queue_k_norm": 1.5977, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6727, "sent_len_1": 67.0049, "sent_max_len_0": 127.9862, "sent_max_len_1": 190.5163, "stdk": 0.048, "stdq": 0.0473, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8800 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.2839, "doc_norm": 1.5849, "encoder_q-embeddings": 1089.1626, "encoder_q-layer.0": 820.8914, "encoder_q-layer.1": 755.2834, "encoder_q-layer.10": 659.7272, "encoder_q-layer.11": 1374.2563, "encoder_q-layer.2": 888.2657, "encoder_q-layer.3": 906.4954, "encoder_q-layer.4": 946.1681, "encoder_q-layer.5": 968.2234, "encoder_q-layer.6": 809.597, "encoder_q-layer.7": 704.881, "encoder_q-layer.8": 654.6403, "encoder_q-layer.9": 587.6082, "epoch": 0.09, "inbatch_neg_score": 0.4261, "inbatch_pos_score": 0.9468, "learning_rate": 4.4500000000000004e-05, "loss": 4.2839, "norm_diff": 0.1242, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1304.9215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4199, "query_norm": 1.7091, "queue_k_norm": 1.5881, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.491, "sent_len_1": 66.9397, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1962, "stdk": 0.0479, "stdq": 0.0442, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 8900 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 4.2641, "doc_norm": 1.5863, "encoder_q-embeddings": 626.6901, "encoder_q-layer.0": 470.3658, "encoder_q-layer.1": 430.098, "encoder_q-layer.10": 585.2725, "encoder_q-layer.11": 1212.2841, "encoder_q-layer.2": 459.6656, "encoder_q-layer.3": 453.9423, "encoder_q-layer.4": 452.3259, "encoder_q-layer.5": 452.0439, "encoder_q-layer.6": 491.8336, "encoder_q-layer.7": 552.1317, "encoder_q-layer.8": 591.2455, "encoder_q-layer.9": 490.7165, "epoch": 0.09, "inbatch_neg_score": 0.459, "inbatch_pos_score": 1.0127, "learning_rate": 4.5e-05, "loss": 4.2641, "norm_diff": 0.1445, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 890.0936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4534, "query_norm": 1.7308, "queue_k_norm": 1.5832, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5655, "sent_len_1": 66.6679, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.8162, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9000 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 4.2787, "doc_norm": 1.5825, "encoder_q-embeddings": 520.2496, "encoder_q-layer.0": 350.5874, "encoder_q-layer.1": 369.8788, "encoder_q-layer.10": 575.1912, "encoder_q-layer.11": 1327.8735, "encoder_q-layer.2": 421.1877, "encoder_q-layer.3": 471.6623, "encoder_q-layer.4": 475.6396, "encoder_q-layer.5": 456.0974, "encoder_q-layer.6": 457.0869, "encoder_q-layer.7": 481.3801, "encoder_q-layer.8": 491.2348, "encoder_q-layer.9": 450.4988, "epoch": 0.09, "inbatch_neg_score": 0.4627, "inbatch_pos_score": 1.0381, "learning_rate": 4.55e-05, "loss": 4.2787, "norm_diff": 0.1045, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 841.7201, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.459, "query_norm": 1.6871, "queue_k_norm": 1.5789, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5681, "sent_len_1": 66.7836, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6062, "stdk": 0.0482, "stdq": 0.0462, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9100 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.2718, "doc_norm": 1.568, "encoder_q-embeddings": 5428.0737, "encoder_q-layer.0": 4302.147, "encoder_q-layer.1": 3691.1809, "encoder_q-layer.10": 931.5087, "encoder_q-layer.11": 1938.2416, "encoder_q-layer.2": 3809.7175, "encoder_q-layer.3": 3458.3354, "encoder_q-layer.4": 3288.3311, "encoder_q-layer.5": 3458.1245, "encoder_q-layer.6": 3679.6577, "encoder_q-layer.7": 2469.5291, "encoder_q-layer.8": 966.1225, "encoder_q-layer.9": 588.2387, "epoch": 0.09, "inbatch_neg_score": 0.4561, "inbatch_pos_score": 0.999, "learning_rate": 4.600000000000001e-05, "loss": 4.2718, "norm_diff": 0.0916, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5018.4413, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4534, "query_norm": 1.6596, "queue_k_norm": 1.5738, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4933, "sent_len_1": 66.9883, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4087, "stdk": 0.0476, "stdq": 0.0468, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9200 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 4.2481, "doc_norm": 1.569, "encoder_q-embeddings": 640.6901, "encoder_q-layer.0": 477.6535, "encoder_q-layer.1": 524.3274, "encoder_q-layer.10": 564.482, "encoder_q-layer.11": 1253.0978, "encoder_q-layer.2": 629.7975, "encoder_q-layer.3": 611.4416, "encoder_q-layer.4": 584.3022, "encoder_q-layer.5": 607.1081, "encoder_q-layer.6": 540.0031, "encoder_q-layer.7": 533.5709, "encoder_q-layer.8": 518.7196, "encoder_q-layer.9": 474.6032, "epoch": 0.09, "inbatch_neg_score": 0.409, "inbatch_pos_score": 0.9844, "learning_rate": 4.6500000000000005e-05, "loss": 4.2481, "norm_diff": 0.0907, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 945.7017, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4053, "query_norm": 1.6597, "queue_k_norm": 1.568, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5361, "sent_len_1": 66.5353, "sent_max_len_0": 128.0, "sent_max_len_1": 188.995, "stdk": 0.048, "stdq": 0.0464, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9300 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.2264, "doc_norm": 1.5546, "encoder_q-embeddings": 787.6659, "encoder_q-layer.0": 620.3303, "encoder_q-layer.1": 579.701, "encoder_q-layer.10": 699.1459, "encoder_q-layer.11": 1459.0614, "encoder_q-layer.2": 605.5234, "encoder_q-layer.3": 540.2785, "encoder_q-layer.4": 540.414, "encoder_q-layer.5": 493.0037, "encoder_q-layer.6": 521.9612, "encoder_q-layer.7": 538.3928, "encoder_q-layer.8": 656.7009, "encoder_q-layer.9": 570.2309, "epoch": 0.09, "inbatch_neg_score": 0.3811, "inbatch_pos_score": 0.9287, "learning_rate": 4.7e-05, "loss": 4.2264, "norm_diff": 0.0945, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1048.956, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3772, "query_norm": 1.6491, "queue_k_norm": 1.5615, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5418, "sent_len_1": 66.7551, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.8363, "stdk": 0.0477, "stdq": 0.0454, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9400 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.2057, "doc_norm": 1.5569, "encoder_q-embeddings": 525.0574, "encoder_q-layer.0": 363.3755, "encoder_q-layer.1": 366.5184, "encoder_q-layer.10": 811.9566, "encoder_q-layer.11": 1993.5321, "encoder_q-layer.2": 401.9593, "encoder_q-layer.3": 435.1902, "encoder_q-layer.4": 450.916, "encoder_q-layer.5": 433.7385, "encoder_q-layer.6": 447.1725, "encoder_q-layer.7": 466.4157, "encoder_q-layer.8": 519.1483, "encoder_q-layer.9": 516.7704, "epoch": 0.09, "inbatch_neg_score": 0.3809, "inbatch_pos_score": 0.9087, "learning_rate": 4.75e-05, "loss": 4.2057, "norm_diff": 0.1406, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1071.8468, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3779, "query_norm": 1.6975, "queue_k_norm": 1.5553, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4278, "sent_len_1": 66.6935, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9212, "stdk": 0.048, "stdq": 0.0453, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 9500 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 4.1628, "doc_norm": 1.5539, "encoder_q-embeddings": 669.6625, "encoder_q-layer.0": 519.4456, "encoder_q-layer.1": 502.9407, "encoder_q-layer.10": 525.9736, "encoder_q-layer.11": 1188.63, "encoder_q-layer.2": 579.9673, "encoder_q-layer.3": 616.2158, "encoder_q-layer.4": 587.853, "encoder_q-layer.5": 574.8867, "encoder_q-layer.6": 571.2247, "encoder_q-layer.7": 521.166, "encoder_q-layer.8": 496.4248, "encoder_q-layer.9": 424.4366, "epoch": 0.09, "inbatch_neg_score": 0.3674, "inbatch_pos_score": 0.9609, "learning_rate": 4.8e-05, "loss": 4.1628, "norm_diff": 0.1848, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 919.5265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3613, "query_norm": 1.7388, "queue_k_norm": 1.5503, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7188, "sent_len_1": 66.8452, "sent_max_len_0": 127.9963, "sent_max_len_1": 191.6488, "stdk": 0.0481, "stdq": 0.046, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9600 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 4.1617, "doc_norm": 1.5447, "encoder_q-embeddings": 1039.8633, "encoder_q-layer.0": 824.9395, "encoder_q-layer.1": 834.1817, "encoder_q-layer.10": 623.0634, "encoder_q-layer.11": 1390.597, "encoder_q-layer.2": 908.7852, "encoder_q-layer.3": 1039.8843, "encoder_q-layer.4": 915.5507, "encoder_q-layer.5": 807.553, "encoder_q-layer.6": 586.4208, "encoder_q-layer.7": 514.4703, "encoder_q-layer.8": 493.8262, "encoder_q-layer.9": 432.4262, "epoch": 0.09, "inbatch_neg_score": 0.383, "inbatch_pos_score": 0.9556, "learning_rate": 4.85e-05, "loss": 4.1617, "norm_diff": 0.1602, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1258.0177, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3794, "query_norm": 1.7049, "queue_k_norm": 1.5431, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6643, "sent_len_1": 67.0655, "sent_max_len_0": 128.0, "sent_max_len_1": 192.0762, "stdk": 0.0479, "stdq": 0.0452, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 9700 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.1599, "doc_norm": 1.5417, "encoder_q-embeddings": 695.0004, "encoder_q-layer.0": 509.7966, "encoder_q-layer.1": 489.4838, "encoder_q-layer.10": 506.0833, "encoder_q-layer.11": 1227.2151, "encoder_q-layer.2": 469.5974, "encoder_q-layer.3": 496.0407, "encoder_q-layer.4": 507.0879, "encoder_q-layer.5": 491.3771, "encoder_q-layer.6": 458.3533, "encoder_q-layer.7": 483.3652, "encoder_q-layer.8": 510.9316, "encoder_q-layer.9": 435.9326, "epoch": 0.1, "inbatch_neg_score": 0.3948, "inbatch_pos_score": 0.9482, "learning_rate": 4.9e-05, "loss": 4.1599, "norm_diff": 0.0863, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 887.8212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3914, "query_norm": 1.628, "queue_k_norm": 1.5402, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5396, "sent_len_1": 66.968, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9638, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9800 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 4.1476, "doc_norm": 1.5365, "encoder_q-embeddings": 2832.9487, "encoder_q-layer.0": 2199.062, "encoder_q-layer.1": 1729.7677, "encoder_q-layer.10": 925.1445, "encoder_q-layer.11": 2218.1118, "encoder_q-layer.2": 1679.2469, "encoder_q-layer.3": 1521.6201, "encoder_q-layer.4": 1483.9786, "encoder_q-layer.5": 1421.0198, "encoder_q-layer.6": 1168.7893, "encoder_q-layer.7": 1125.6886, "encoder_q-layer.8": 993.3325, "encoder_q-layer.9": 796.5136, "epoch": 0.1, "inbatch_neg_score": 0.3779, "inbatch_pos_score": 0.9351, "learning_rate": 4.9500000000000004e-05, "loss": 4.1476, "norm_diff": 0.0137, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2585.4425, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3748, "query_norm": 1.5482, "queue_k_norm": 1.5347, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6854, "sent_len_1": 66.6417, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.8313, "stdk": 0.048, "stdq": 0.043, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 9900 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 4.1463, "doc_norm": 1.5297, "encoder_q-embeddings": 1334.9727, "encoder_q-layer.0": 1020.5611, "encoder_q-layer.1": 1057.9719, "encoder_q-layer.10": 983.6282, "encoder_q-layer.11": 2422.7173, "encoder_q-layer.2": 1215.7311, "encoder_q-layer.3": 1255.921, "encoder_q-layer.4": 1131.1027, "encoder_q-layer.5": 1064.9705, "encoder_q-layer.6": 960.1463, "encoder_q-layer.7": 993.3258, "encoder_q-layer.8": 968.444, "encoder_q-layer.9": 811.381, "epoch": 0.1, "inbatch_neg_score": 0.3656, "inbatch_pos_score": 0.9268, "learning_rate": 5e-05, "loss": 4.1463, "norm_diff": 0.0148, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1844.2572, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.363, "query_norm": 1.5348, "queue_k_norm": 1.5291, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.8131, "sent_len_1": 66.999, "sent_max_len_0": 127.9838, "sent_max_len_1": 193.7862, "stdk": 0.0478, "stdq": 0.0434, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 10000 }, { "dev_runtime": 26.5371, "dev_samples_per_second": 2.412, "dev_steps_per_second": 0.038, "epoch": 0.1, "step": 10000, "test_accuracy": 92.10205078125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4749242663383484, "test_doc_norm": 1.455350637435913, "test_inbatch_neg_score": 0.6903941631317139, "test_inbatch_pos_score": 1.4322099685668945, "test_loss": 0.4749242663383484, "test_loss_align": 1.5912666320800781, "test_loss_unif": 3.7258567810058594, "test_loss_unif_q@queue": 3.725856304168701, "test_norm_diff": 0.06794100254774094, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.36293625831604004, "test_query_norm": 1.5232917070388794, "test_queue_k_norm": 1.5290088653564453, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.039343688637018204, "test_stdq": 0.03748031705617905, "test_stdqueue_k": 0.047906409949064255, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.5371, "dev_samples_per_second": 2.412, "dev_steps_per_second": 0.038, "epoch": 0.1, "eval_beir-arguana_ndcg@10": 0.32299, "eval_beir-arguana_recall@10": 0.55761, "eval_beir-arguana_recall@100": 0.87553, "eval_beir-arguana_recall@20": 0.69061, "eval_beir-avg_ndcg@10": 0.29379816666666664, "eval_beir-avg_recall@10": 0.3571779166666666, "eval_beir-avg_recall@100": 0.5481685833333334, "eval_beir-avg_recall@20": 0.41503016666666664, "eval_beir-cqadupstack_ndcg@10": 0.19119166666666668, "eval_beir-cqadupstack_recall@10": 0.26672916666666663, "eval_beir-cqadupstack_recall@100": 0.47492583333333344, "eval_beir-cqadupstack_recall@20": 0.32132166666666667, "eval_beir-fiqa_ndcg@10": 0.17395, "eval_beir-fiqa_recall@10": 0.22642, "eval_beir-fiqa_recall@100": 0.46789, "eval_beir-fiqa_recall@20": 0.27949, "eval_beir-nfcorpus_ndcg@10": 0.22581, "eval_beir-nfcorpus_recall@10": 0.10057, "eval_beir-nfcorpus_recall@100": 0.22087, "eval_beir-nfcorpus_recall@20": 0.12986, "eval_beir-nq_ndcg@10": 0.17445, "eval_beir-nq_recall@10": 0.29799, "eval_beir-nq_recall@100": 0.62744, "eval_beir-nq_recall@20": 0.39986, "eval_beir-quora_ndcg@10": 0.60532, "eval_beir-quora_recall@10": 0.73722, "eval_beir-quora_recall@100": 0.91359, "eval_beir-quora_recall@20": 0.80359, "eval_beir-scidocs_ndcg@10": 0.11746, "eval_beir-scidocs_recall@10": 0.12738, "eval_beir-scidocs_recall@100": 0.30908, "eval_beir-scidocs_recall@20": 0.17383, "eval_beir-scifact_ndcg@10": 0.5331, "eval_beir-scifact_recall@10": 0.68117, "eval_beir-scifact_recall@100": 0.86622, "eval_beir-scifact_recall@20": 0.76128, "eval_beir-trec-covid_ndcg@10": 0.42353, "eval_beir-trec-covid_recall@10": 0.458, "eval_beir-trec-covid_recall@100": 0.3346, "eval_beir-trec-covid_recall@20": 0.42, "eval_beir-webis-touche2020_ndcg@10": 0.17018, "eval_beir-webis-touche2020_recall@10": 0.11869, "eval_beir-webis-touche2020_recall@100": 0.39154, "eval_beir-webis-touche2020_recall@20": 0.17046, "eval_senteval-avg_sts": 0.7372250531525408, "eval_senteval-sickr_spearman": 0.7077120066781033, "eval_senteval-stsb_spearman": 0.7667380996269783, "step": 10000, "test_accuracy": 92.10205078125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4749242663383484, "test_doc_norm": 1.455350637435913, "test_inbatch_neg_score": 0.6903941631317139, "test_inbatch_pos_score": 1.4322099685668945, "test_loss": 0.4749242663383484, "test_loss_align": 1.5912666320800781, "test_loss_unif": 3.7258567810058594, "test_loss_unif_q@queue": 3.725856304168701, "test_norm_diff": 0.06794100254774094, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.36293625831604004, "test_query_norm": 1.5232917070388794, "test_queue_k_norm": 1.5290088653564453, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.039343688637018204, "test_stdq": 0.03748031705617905, "test_stdqueue_k": 0.047906409949064255, "test_stdqueue_q": 0.0 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.1232, "doc_norm": 1.5242, "encoder_q-embeddings": 1448.6219, "encoder_q-layer.0": 1057.8486, "encoder_q-layer.1": 907.1691, "encoder_q-layer.10": 956.0738, "encoder_q-layer.11": 2449.9661, "encoder_q-layer.2": 998.15, "encoder_q-layer.3": 1017.4432, "encoder_q-layer.4": 972.1656, "encoder_q-layer.5": 896.0901, "encoder_q-layer.6": 923.0245, "encoder_q-layer.7": 879.6662, "encoder_q-layer.8": 923.41, "encoder_q-layer.9": 809.8011, "epoch": 0.1, "inbatch_neg_score": 0.3545, "inbatch_pos_score": 0.9023, "learning_rate": 4.994444444444445e-05, "loss": 4.1232, "norm_diff": 0.0294, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1760.5811, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3508, "query_norm": 1.5481, "queue_k_norm": 1.5215, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5843, "sent_len_1": 66.8939, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.555, "stdk": 0.0478, "stdq": 0.0433, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10100 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.1035, "doc_norm": 1.517, "encoder_q-embeddings": 1465.5112, "encoder_q-layer.0": 1101.0411, "encoder_q-layer.1": 1216.679, "encoder_q-layer.10": 1181.6202, "encoder_q-layer.11": 3352.5471, "encoder_q-layer.2": 1489.4796, "encoder_q-layer.3": 1700.3457, "encoder_q-layer.4": 1854.9624, "encoder_q-layer.5": 1326.7089, "encoder_q-layer.6": 1454.3921, "encoder_q-layer.7": 1386.4214, "encoder_q-layer.8": 1545.1841, "encoder_q-layer.9": 1167.6659, "epoch": 0.1, "inbatch_neg_score": 0.3639, "inbatch_pos_score": 0.9004, "learning_rate": 4.9888888888888894e-05, "loss": 4.1035, "norm_diff": 0.0174, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2416.2822, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3604, "query_norm": 1.5103, "queue_k_norm": 1.5171, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.8457, "sent_len_1": 66.958, "sent_max_len_0": 127.9988, "sent_max_len_1": 192.1387, "stdk": 0.0477, "stdq": 0.0421, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10200 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 4.1216, "doc_norm": 1.5083, "encoder_q-embeddings": 4060.6438, "encoder_q-layer.0": 3663.6775, "encoder_q-layer.1": 3929.0264, "encoder_q-layer.10": 979.9901, "encoder_q-layer.11": 2301.4797, "encoder_q-layer.2": 3900.3865, "encoder_q-layer.3": 3572.3472, "encoder_q-layer.4": 3456.988, "encoder_q-layer.5": 3282.427, "encoder_q-layer.6": 3067.7095, "encoder_q-layer.7": 2623.2017, "encoder_q-layer.8": 1217.5815, "encoder_q-layer.9": 886.4047, "epoch": 0.1, "inbatch_neg_score": 0.3697, "inbatch_pos_score": 0.9355, "learning_rate": 4.9833333333333336e-05, "loss": 4.1216, "norm_diff": 0.0209, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4623.9438, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.365, "query_norm": 1.5231, "queue_k_norm": 1.5113, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6929, "sent_len_1": 66.8969, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.1012, "stdk": 0.0475, "stdq": 0.043, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10300 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 4.088, "doc_norm": 1.5045, "encoder_q-embeddings": 1342.5358, "encoder_q-layer.0": 1048.5533, "encoder_q-layer.1": 1082.2039, "encoder_q-layer.10": 991.5097, "encoder_q-layer.11": 2445.8215, "encoder_q-layer.2": 1161.4873, "encoder_q-layer.3": 1155.7552, "encoder_q-layer.4": 1163.2036, "encoder_q-layer.5": 1083.4946, "encoder_q-layer.6": 1284.0621, "encoder_q-layer.7": 1222.6984, "encoder_q-layer.8": 1075.6233, "encoder_q-layer.9": 802.6752, "epoch": 0.1, "inbatch_neg_score": 0.373, "inbatch_pos_score": 0.9126, "learning_rate": 4.977777777777778e-05, "loss": 4.088, "norm_diff": 0.0228, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1883.5007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3704, "query_norm": 1.4816, "queue_k_norm": 1.5109, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.615, "sent_len_1": 66.8435, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4275, "stdk": 0.0475, "stdq": 0.0417, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10400 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 4.0845, "doc_norm": 1.502, "encoder_q-embeddings": 3570.2195, "encoder_q-layer.0": 2616.9275, "encoder_q-layer.1": 2755.8596, "encoder_q-layer.10": 916.3362, "encoder_q-layer.11": 2503.7976, "encoder_q-layer.2": 2606.4436, "encoder_q-layer.3": 3120.5632, "encoder_q-layer.4": 2674.1753, "encoder_q-layer.5": 2353.906, "encoder_q-layer.6": 2579.9351, "encoder_q-layer.7": 2095.0886, "encoder_q-layer.8": 1489.7363, "encoder_q-layer.9": 942.8386, "epoch": 0.1, "inbatch_neg_score": 0.36, "inbatch_pos_score": 0.9375, "learning_rate": 4.972222222222223e-05, "loss": 4.0845, "norm_diff": 0.0218, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3721.1271, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3591, "query_norm": 1.5208, "queue_k_norm": 1.5047, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.463, "sent_len_1": 66.6337, "sent_max_len_0": 127.985, "sent_max_len_1": 189.9112, "stdk": 0.0475, "stdq": 0.0439, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10500 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 4.106, "doc_norm": 1.5022, "encoder_q-embeddings": 187511.7031, "encoder_q-layer.0": 151757.2812, "encoder_q-layer.1": 118608.2422, "encoder_q-layer.10": 1063.5543, "encoder_q-layer.11": 2429.6487, "encoder_q-layer.2": 118204.2344, "encoder_q-layer.3": 98121.9922, "encoder_q-layer.4": 66158.5469, "encoder_q-layer.5": 51081.5117, "encoder_q-layer.6": 56306.3789, "encoder_q-layer.7": 44408.6797, "encoder_q-layer.8": 16155.0684, "encoder_q-layer.9": 4178.2417, "epoch": 0.1, "inbatch_neg_score": 0.3446, "inbatch_pos_score": 0.9248, "learning_rate": 4.966666666666667e-05, "loss": 4.106, "norm_diff": 0.0236, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 149206.6448, "preclip_grad_norm_avg": 0.0014, "q@queue_neg_score": 0.3433, "query_norm": 1.5257, "queue_k_norm": 1.5033, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.3991, "sent_len_1": 66.4898, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1238, "stdk": 0.0475, "stdq": 0.0442, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10600 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 4.1047, "doc_norm": 1.5031, "encoder_q-embeddings": 1014.2232, "encoder_q-layer.0": 739.649, "encoder_q-layer.1": 763.5262, "encoder_q-layer.10": 933.4952, "encoder_q-layer.11": 2341.1086, "encoder_q-layer.2": 882.92, "encoder_q-layer.3": 898.0259, "encoder_q-layer.4": 995.6754, "encoder_q-layer.5": 1016.4094, "encoder_q-layer.6": 1075.6371, "encoder_q-layer.7": 1109.2477, "encoder_q-layer.8": 960.1476, "encoder_q-layer.9": 773.7275, "epoch": 0.1, "inbatch_neg_score": 0.3618, "inbatch_pos_score": 0.9048, "learning_rate": 4.961111111111111e-05, "loss": 4.1047, "norm_diff": 0.0198, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1632.3818, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3601, "query_norm": 1.4912, "queue_k_norm": 1.5045, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6321, "sent_len_1": 66.6452, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2262, "stdk": 0.0476, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10700 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 4.0709, "doc_norm": 1.507, "encoder_q-embeddings": 1750.1372, "encoder_q-layer.0": 1315.0245, "encoder_q-layer.1": 1497.1692, "encoder_q-layer.10": 960.6241, "encoder_q-layer.11": 2375.9946, "encoder_q-layer.2": 1779.9139, "encoder_q-layer.3": 1963.3195, "encoder_q-layer.4": 2226.3806, "encoder_q-layer.5": 2452.6548, "encoder_q-layer.6": 2769.4651, "encoder_q-layer.7": 2260.2739, "encoder_q-layer.8": 1495.4889, "encoder_q-layer.9": 921.791, "epoch": 0.11, "inbatch_neg_score": 0.3341, "inbatch_pos_score": 0.9087, "learning_rate": 4.955555555555556e-05, "loss": 4.0709, "norm_diff": 0.0141, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2723.7382, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.332, "query_norm": 1.5194, "queue_k_norm": 1.5024, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7322, "sent_len_1": 66.8316, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5037, "stdk": 0.0478, "stdq": 0.0429, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10800 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 4.0591, "doc_norm": 1.5046, "encoder_q-embeddings": 1090.3921, "encoder_q-layer.0": 764.0958, "encoder_q-layer.1": 846.8281, "encoder_q-layer.10": 451.404, "encoder_q-layer.11": 1574.7854, "encoder_q-layer.2": 1043.4315, "encoder_q-layer.3": 1104.7054, "encoder_q-layer.4": 1159.7896, "encoder_q-layer.5": 1285.5601, "encoder_q-layer.6": 1176.7213, "encoder_q-layer.7": 930.2723, "encoder_q-layer.8": 699.2979, "encoder_q-layer.9": 436.2315, "epoch": 0.11, "inbatch_neg_score": 0.2675, "inbatch_pos_score": 0.8408, "learning_rate": 4.9500000000000004e-05, "loss": 4.0591, "norm_diff": 0.0498, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1557.6519, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.4549, "queue_k_norm": 1.5031, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5651, "sent_len_1": 66.6271, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9575, "stdk": 0.0477, "stdq": 0.0425, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10900 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.0512, "doc_norm": 1.5056, "encoder_q-embeddings": 1045.3315, "encoder_q-layer.0": 768.9818, "encoder_q-layer.1": 771.2023, "encoder_q-layer.10": 468.3041, "encoder_q-layer.11": 1132.8435, "encoder_q-layer.2": 876.0078, "encoder_q-layer.3": 924.3677, "encoder_q-layer.4": 994.562, "encoder_q-layer.5": 1084.5082, "encoder_q-layer.6": 1298.8358, "encoder_q-layer.7": 1205.6101, "encoder_q-layer.8": 1030.2903, "encoder_q-layer.9": 435.7359, "epoch": 0.11, "inbatch_neg_score": 0.3033, "inbatch_pos_score": 0.8525, "learning_rate": 4.9444444444444446e-05, "loss": 4.0512, "norm_diff": 0.0178, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1407.07, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3027, "query_norm": 1.488, "queue_k_norm": 1.5039, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4838, "sent_len_1": 66.7538, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6962, "stdk": 0.0478, "stdq": 0.0436, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 11000 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 4.0384, "doc_norm": 1.5049, "encoder_q-embeddings": 1006.3923, "encoder_q-layer.0": 704.3137, "encoder_q-layer.1": 808.3285, "encoder_q-layer.10": 484.9539, "encoder_q-layer.11": 1134.8771, "encoder_q-layer.2": 939.6899, "encoder_q-layer.3": 1007.7847, "encoder_q-layer.4": 1085.712, "encoder_q-layer.5": 1097.9065, "encoder_q-layer.6": 1321.4631, "encoder_q-layer.7": 1319.3065, "encoder_q-layer.8": 992.7642, "encoder_q-layer.9": 452.0336, "epoch": 0.11, "inbatch_neg_score": 0.3231, "inbatch_pos_score": 0.8779, "learning_rate": 4.938888888888889e-05, "loss": 4.0384, "norm_diff": 0.0156, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1453.3017, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3218, "query_norm": 1.4934, "queue_k_norm": 1.5017, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7371, "sent_len_1": 66.7437, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8638, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 11100 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.0397, "doc_norm": 1.5064, "encoder_q-embeddings": 1012.1171, "encoder_q-layer.0": 728.0106, "encoder_q-layer.1": 828.1684, "encoder_q-layer.10": 449.1099, "encoder_q-layer.11": 1101.2108, "encoder_q-layer.2": 937.0818, "encoder_q-layer.3": 948.9102, "encoder_q-layer.4": 918.5349, "encoder_q-layer.5": 989.2603, "encoder_q-layer.6": 853.625, "encoder_q-layer.7": 794.043, "encoder_q-layer.8": 540.7559, "encoder_q-layer.9": 361.571, "epoch": 0.11, "inbatch_neg_score": 0.287, "inbatch_pos_score": 0.8438, "learning_rate": 4.933333333333334e-05, "loss": 4.0397, "norm_diff": 0.0399, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1250.5895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2842, "query_norm": 1.4665, "queue_k_norm": 1.5032, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6078, "sent_len_1": 66.7658, "sent_max_len_0": 127.9963, "sent_max_len_1": 192.0888, "stdk": 0.048, "stdq": 0.0428, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 11200 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 4.0461, "doc_norm": 1.5027, "encoder_q-embeddings": 1029.3694, "encoder_q-layer.0": 752.6773, "encoder_q-layer.1": 886.6398, "encoder_q-layer.10": 412.6276, "encoder_q-layer.11": 934.545, "encoder_q-layer.2": 1092.1249, "encoder_q-layer.3": 1246.7792, "encoder_q-layer.4": 1278.7234, "encoder_q-layer.5": 1266.5922, "encoder_q-layer.6": 1416.1179, "encoder_q-layer.7": 1327.7383, "encoder_q-layer.8": 1005.059, "encoder_q-layer.9": 444.2445, "epoch": 0.11, "inbatch_neg_score": 0.2476, "inbatch_pos_score": 0.8359, "learning_rate": 4.927777777777778e-05, "loss": 4.0461, "norm_diff": 0.037, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1530.0007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2444, "query_norm": 1.4685, "queue_k_norm": 1.5033, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.525, "sent_len_1": 66.7541, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1538, "stdk": 0.0479, "stdq": 0.044, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 11300 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 4.0348, "doc_norm": 1.5066, "encoder_q-embeddings": 2606.7839, "encoder_q-layer.0": 2006.7711, "encoder_q-layer.1": 2066.5149, "encoder_q-layer.10": 422.2552, "encoder_q-layer.11": 1069.499, "encoder_q-layer.2": 2240.208, "encoder_q-layer.3": 2292.5117, "encoder_q-layer.4": 2212.0054, "encoder_q-layer.5": 2138.0845, "encoder_q-layer.6": 2252.054, "encoder_q-layer.7": 2182.6211, "encoder_q-layer.8": 1314.6482, "encoder_q-layer.9": 526.4974, "epoch": 0.11, "inbatch_neg_score": 0.2728, "inbatch_pos_score": 0.8657, "learning_rate": 4.922222222222222e-05, "loss": 4.0348, "norm_diff": 0.0142, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2913.886, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2727, "query_norm": 1.4927, "queue_k_norm": 1.5037, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6386, "sent_len_1": 66.8792, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3325, "stdk": 0.0481, "stdq": 0.0434, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 11400 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.0312, "doc_norm": 1.4998, "encoder_q-embeddings": 1069.3881, "encoder_q-layer.0": 726.3233, "encoder_q-layer.1": 792.9965, "encoder_q-layer.10": 594.219, "encoder_q-layer.11": 1380.2258, "encoder_q-layer.2": 983.8962, "encoder_q-layer.3": 1054.9308, "encoder_q-layer.4": 1132.5931, "encoder_q-layer.5": 1156.3755, "encoder_q-layer.6": 1121.5953, "encoder_q-layer.7": 1043.3979, "encoder_q-layer.8": 838.561, "encoder_q-layer.9": 488.6925, "epoch": 0.11, "inbatch_neg_score": 0.2625, "inbatch_pos_score": 0.8501, "learning_rate": 4.9166666666666665e-05, "loss": 4.0312, "norm_diff": 0.0297, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1415.3553, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2625, "query_norm": 1.5295, "queue_k_norm": 1.5024, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6382, "sent_len_1": 66.6068, "sent_max_len_0": 128.0, "sent_max_len_1": 187.295, "stdk": 0.0479, "stdq": 0.0447, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 11500 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.1056, "doc_norm": 1.5024, "encoder_q-embeddings": 2051.3428, "encoder_q-layer.0": 1474.5814, "encoder_q-layer.1": 1655.0903, "encoder_q-layer.10": 407.5242, "encoder_q-layer.11": 1187.4927, "encoder_q-layer.2": 1921.9691, "encoder_q-layer.3": 2049.3608, "encoder_q-layer.4": 2106.4958, "encoder_q-layer.5": 2339.4492, "encoder_q-layer.6": 1984.9128, "encoder_q-layer.7": 1711.2782, "encoder_q-layer.8": 1137.4091, "encoder_q-layer.9": 462.4265, "epoch": 0.11, "inbatch_neg_score": 0.2268, "inbatch_pos_score": 0.7871, "learning_rate": 4.9111111111111114e-05, "loss": 4.1056, "norm_diff": 0.024, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2527.8002, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2258, "query_norm": 1.4784, "queue_k_norm": 1.5027, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4645, "sent_len_1": 66.601, "sent_max_len_0": 127.9925, "sent_max_len_1": 191.475, "stdk": 0.0482, "stdq": 0.0432, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11600 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 4.0915, "doc_norm": 1.5046, "encoder_q-embeddings": 548.1115, "encoder_q-layer.0": 413.7278, "encoder_q-layer.1": 398.8999, "encoder_q-layer.10": 504.678, "encoder_q-layer.11": 1200.3452, "encoder_q-layer.2": 447.3956, "encoder_q-layer.3": 464.8933, "encoder_q-layer.4": 521.2776, "encoder_q-layer.5": 558.3212, "encoder_q-layer.6": 587.2059, "encoder_q-layer.7": 536.9484, "encoder_q-layer.8": 545.2123, "encoder_q-layer.9": 401.0293, "epoch": 0.11, "inbatch_neg_score": 0.2307, "inbatch_pos_score": 0.7827, "learning_rate": 4.905555555555556e-05, "loss": 4.0915, "norm_diff": 0.0195, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 843.7791, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2286, "query_norm": 1.523, "queue_k_norm": 1.5073, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4597, "sent_len_1": 66.4979, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.5387, "stdk": 0.0482, "stdq": 0.0441, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 11700 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 4.0486, "doc_norm": 1.5093, "encoder_q-embeddings": 590.2604, "encoder_q-layer.0": 455.0719, "encoder_q-layer.1": 510.2821, "encoder_q-layer.10": 218.1097, "encoder_q-layer.11": 547.544, "encoder_q-layer.2": 608.6299, "encoder_q-layer.3": 639.5643, "encoder_q-layer.4": 590.972, "encoder_q-layer.5": 579.012, "encoder_q-layer.6": 625.2393, "encoder_q-layer.7": 601.3443, "encoder_q-layer.8": 374.3857, "encoder_q-layer.9": 192.5331, "epoch": 0.12, "inbatch_neg_score": 0.2711, "inbatch_pos_score": 0.8569, "learning_rate": 4.9e-05, "loss": 4.0486, "norm_diff": 0.0155, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 785.8823, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2688, "query_norm": 1.5177, "queue_k_norm": 1.5109, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5391, "sent_len_1": 66.7429, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.5987, "stdk": 0.0484, "stdq": 0.0437, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 11800 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 4.0696, "doc_norm": 1.5152, "encoder_q-embeddings": 355.1288, "encoder_q-layer.0": 258.4359, "encoder_q-layer.1": 279.7134, "encoder_q-layer.10": 216.8395, "encoder_q-layer.11": 625.1115, "encoder_q-layer.2": 327.1831, "encoder_q-layer.3": 366.8892, "encoder_q-layer.4": 414.1359, "encoder_q-layer.5": 427.0367, "encoder_q-layer.6": 455.8827, "encoder_q-layer.7": 426.4621, "encoder_q-layer.8": 341.6607, "encoder_q-layer.9": 190.7862, "epoch": 0.12, "inbatch_neg_score": 0.2291, "inbatch_pos_score": 0.8081, "learning_rate": 4.894444444444445e-05, "loss": 4.0696, "norm_diff": 0.0143, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 544.4381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2266, "query_norm": 1.5136, "queue_k_norm": 1.5138, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5393, "sent_len_1": 66.7562, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7025, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 11900 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 4.0252, "doc_norm": 1.5175, "encoder_q-embeddings": 1231.4844, "encoder_q-layer.0": 916.1233, "encoder_q-layer.1": 1073.6591, "encoder_q-layer.10": 266.6816, "encoder_q-layer.11": 571.3257, "encoder_q-layer.2": 1186.0205, "encoder_q-layer.3": 1346.601, "encoder_q-layer.4": 1361.4141, "encoder_q-layer.5": 1560.375, "encoder_q-layer.6": 1145.3721, "encoder_q-layer.7": 957.6866, "encoder_q-layer.8": 534.3804, "encoder_q-layer.9": 254.6789, "epoch": 0.12, "inbatch_neg_score": 0.2261, "inbatch_pos_score": 0.8169, "learning_rate": 4.888888888888889e-05, "loss": 4.0252, "norm_diff": 0.0245, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1543.724, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2241, "query_norm": 1.5331, "queue_k_norm": 1.5144, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6531, "sent_len_1": 66.9527, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2625, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 12000 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 4.0212, "doc_norm": 1.5119, "encoder_q-embeddings": 544.5698, "encoder_q-layer.0": 497.9139, "encoder_q-layer.1": 585.8976, "encoder_q-layer.10": 206.0625, "encoder_q-layer.11": 654.7182, "encoder_q-layer.2": 656.5906, "encoder_q-layer.3": 550.7283, "encoder_q-layer.4": 477.0172, "encoder_q-layer.5": 468.8564, "encoder_q-layer.6": 459.476, "encoder_q-layer.7": 470.0642, "encoder_q-layer.8": 307.8608, "encoder_q-layer.9": 185.8992, "epoch": 0.12, "inbatch_neg_score": 0.2068, "inbatch_pos_score": 0.7861, "learning_rate": 4.883333333333334e-05, "loss": 4.0212, "norm_diff": 0.0642, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 742.2359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2042, "query_norm": 1.4476, "queue_k_norm": 1.5162, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7082, "sent_len_1": 67.0751, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0825, "stdk": 0.0487, "stdq": 0.0433, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 12100 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 4.0317, "doc_norm": 1.506, "encoder_q-embeddings": 2377.106, "encoder_q-layer.0": 1700.5345, "encoder_q-layer.1": 1674.6139, "encoder_q-layer.10": 197.6092, "encoder_q-layer.11": 520.281, "encoder_q-layer.2": 1859.0636, "encoder_q-layer.3": 1859.1727, "encoder_q-layer.4": 1651.075, "encoder_q-layer.5": 1600.6549, "encoder_q-layer.6": 1730.6539, "encoder_q-layer.7": 1552.8525, "encoder_q-layer.8": 876.1519, "encoder_q-layer.9": 238.2715, "epoch": 0.12, "inbatch_neg_score": 0.2351, "inbatch_pos_score": 0.8135, "learning_rate": 4.8777777777777775e-05, "loss": 4.0317, "norm_diff": 0.0494, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2329.9054, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2312, "query_norm": 1.4567, "queue_k_norm": 1.5132, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.3852, "sent_len_1": 66.9012, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8237, "stdk": 0.0485, "stdq": 0.0429, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 12200 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.0176, "doc_norm": 1.5124, "encoder_q-embeddings": 822.8361, "encoder_q-layer.0": 601.0208, "encoder_q-layer.1": 612.6987, "encoder_q-layer.10": 224.7592, "encoder_q-layer.11": 926.0406, "encoder_q-layer.2": 686.2689, "encoder_q-layer.3": 770.9217, "encoder_q-layer.4": 808.9857, "encoder_q-layer.5": 791.8606, "encoder_q-layer.6": 840.7661, "encoder_q-layer.7": 813.7579, "encoder_q-layer.8": 493.4341, "encoder_q-layer.9": 222.1043, "epoch": 0.12, "inbatch_neg_score": 0.2903, "inbatch_pos_score": 0.8423, "learning_rate": 4.8722222222222224e-05, "loss": 4.0176, "norm_diff": 0.0421, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1055.2346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2881, "query_norm": 1.4703, "queue_k_norm": 1.5127, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5544, "sent_len_1": 66.6338, "sent_max_len_0": 127.9925, "sent_max_len_1": 189.995, "stdk": 0.0488, "stdq": 0.0415, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 12300 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.0263, "doc_norm": 1.5002, "encoder_q-embeddings": 1945.4038, "encoder_q-layer.0": 1574.0858, "encoder_q-layer.1": 1729.1653, "encoder_q-layer.10": 222.8371, "encoder_q-layer.11": 551.9618, "encoder_q-layer.2": 2140.1362, "encoder_q-layer.3": 2437.7771, "encoder_q-layer.4": 2358.2485, "encoder_q-layer.5": 2259.3484, "encoder_q-layer.6": 2312.6785, "encoder_q-layer.7": 1654.3422, "encoder_q-layer.8": 746.4417, "encoder_q-layer.9": 256.0833, "epoch": 0.12, "inbatch_neg_score": 0.2668, "inbatch_pos_score": 0.8169, "learning_rate": 4.866666666666667e-05, "loss": 4.0263, "norm_diff": 0.0315, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2606.6894, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2654, "query_norm": 1.4687, "queue_k_norm": 1.5044, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7714, "sent_len_1": 66.7858, "sent_max_len_0": 128.0, "sent_max_len_1": 188.73, "stdk": 0.0485, "stdq": 0.0423, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 12400 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.988, "doc_norm": 1.4962, "encoder_q-embeddings": 754.5173, "encoder_q-layer.0": 561.1927, "encoder_q-layer.1": 570.1388, "encoder_q-layer.10": 208.0388, "encoder_q-layer.11": 618.7166, "encoder_q-layer.2": 617.2849, "encoder_q-layer.3": 636.7538, "encoder_q-layer.4": 646.8017, "encoder_q-layer.5": 620.9261, "encoder_q-layer.6": 679.8842, "encoder_q-layer.7": 611.0474, "encoder_q-layer.8": 423.8813, "encoder_q-layer.9": 204.5651, "epoch": 0.12, "inbatch_neg_score": 0.2398, "inbatch_pos_score": 0.8062, "learning_rate": 4.8611111111111115e-05, "loss": 3.988, "norm_diff": 0.0359, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 864.3019, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2377, "query_norm": 1.4603, "queue_k_norm": 1.4936, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6237, "sent_len_1": 66.6978, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2375, "stdk": 0.0485, "stdq": 0.0416, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 12500 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 4.0002, "doc_norm": 1.4891, "encoder_q-embeddings": 795.8477, "encoder_q-layer.0": 584.8983, "encoder_q-layer.1": 586.4973, "encoder_q-layer.10": 217.2432, "encoder_q-layer.11": 533.3003, "encoder_q-layer.2": 632.0687, "encoder_q-layer.3": 718.1371, "encoder_q-layer.4": 870.2447, "encoder_q-layer.5": 906.3325, "encoder_q-layer.6": 1073.8632, "encoder_q-layer.7": 1105.8287, "encoder_q-layer.8": 577.5829, "encoder_q-layer.9": 206.88, "epoch": 0.12, "inbatch_neg_score": 0.2717, "inbatch_pos_score": 0.8818, "learning_rate": 4.855555555555556e-05, "loss": 4.0002, "norm_diff": 0.0444, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1067.5997, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2708, "query_norm": 1.5335, "queue_k_norm": 1.4889, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7895, "sent_len_1": 66.6443, "sent_max_len_0": 128.0, "sent_max_len_1": 189.62, "stdk": 0.0484, "stdq": 0.0442, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 12600 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 4.0139, "doc_norm": 1.4753, "encoder_q-embeddings": 3569.6147, "encoder_q-layer.0": 2766.7241, "encoder_q-layer.1": 3177.2852, "encoder_q-layer.10": 205.2834, "encoder_q-layer.11": 530.3652, "encoder_q-layer.2": 3727.6851, "encoder_q-layer.3": 3910.3848, "encoder_q-layer.4": 3915.7039, "encoder_q-layer.5": 3704.4194, "encoder_q-layer.6": 3116.019, "encoder_q-layer.7": 3248.938, "encoder_q-layer.8": 2077.9902, "encoder_q-layer.9": 338.6291, "epoch": 0.12, "inbatch_neg_score": 0.2434, "inbatch_pos_score": 0.8257, "learning_rate": 4.85e-05, "loss": 4.0139, "norm_diff": 0.015, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4425.35, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2412, "query_norm": 1.4765, "queue_k_norm": 1.4786, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6397, "sent_len_1": 66.6997, "sent_max_len_0": 127.9887, "sent_max_len_1": 188.4762, "stdk": 0.048, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 12700 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.9959, "doc_norm": 1.4749, "encoder_q-embeddings": 738.3461, "encoder_q-layer.0": 529.9689, "encoder_q-layer.1": 579.7754, "encoder_q-layer.10": 222.5539, "encoder_q-layer.11": 562.9724, "encoder_q-layer.2": 687.142, "encoder_q-layer.3": 715.8489, "encoder_q-layer.4": 745.6083, "encoder_q-layer.5": 703.7142, "encoder_q-layer.6": 683.5349, "encoder_q-layer.7": 638.855, "encoder_q-layer.8": 399.6296, "encoder_q-layer.9": 201.6442, "epoch": 0.12, "inbatch_neg_score": 0.2805, "inbatch_pos_score": 0.8535, "learning_rate": 4.844444444444445e-05, "loss": 3.9959, "norm_diff": 0.0174, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 893.1091, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2786, "query_norm": 1.4906, "queue_k_norm": 1.4708, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3225, "sent_len_1": 66.4732, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.1188, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 12800 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.999, "doc_norm": 1.4578, "encoder_q-embeddings": 811.5291, "encoder_q-layer.0": 618.3511, "encoder_q-layer.1": 582.4246, "encoder_q-layer.10": 219.0276, "encoder_q-layer.11": 497.8519, "encoder_q-layer.2": 655.3713, "encoder_q-layer.3": 656.1781, "encoder_q-layer.4": 719.4308, "encoder_q-layer.5": 717.2226, "encoder_q-layer.6": 606.2795, "encoder_q-layer.7": 497.0389, "encoder_q-layer.8": 317.8746, "encoder_q-layer.9": 182.5797, "epoch": 0.13, "inbatch_neg_score": 0.2156, "inbatch_pos_score": 0.8052, "learning_rate": 4.838888888888889e-05, "loss": 3.999, "norm_diff": 0.0476, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 871.0591, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.213, "query_norm": 1.5054, "queue_k_norm": 1.4601, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5349, "sent_len_1": 66.8358, "sent_max_len_0": 128.0, "sent_max_len_1": 189.895, "stdk": 0.0477, "stdq": 0.0441, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 12900 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.9801, "doc_norm": 1.4544, "encoder_q-embeddings": 831.8998, "encoder_q-layer.0": 588.6066, "encoder_q-layer.1": 727.7207, "encoder_q-layer.10": 206.1723, "encoder_q-layer.11": 542.5445, "encoder_q-layer.2": 856.7277, "encoder_q-layer.3": 892.5495, "encoder_q-layer.4": 952.4522, "encoder_q-layer.5": 849.66, "encoder_q-layer.6": 670.2434, "encoder_q-layer.7": 480.575, "encoder_q-layer.8": 292.2965, "encoder_q-layer.9": 189.3088, "epoch": 0.13, "inbatch_neg_score": 0.1982, "inbatch_pos_score": 0.7954, "learning_rate": 4.8333333333333334e-05, "loss": 3.9801, "norm_diff": 0.0412, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1003.9375, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1974, "query_norm": 1.4956, "queue_k_norm": 1.4552, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.3934, "sent_len_1": 66.5846, "sent_max_len_0": 127.9887, "sent_max_len_1": 187.0813, "stdk": 0.0477, "stdq": 0.0441, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 13000 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.9767, "doc_norm": 1.4531, "encoder_q-embeddings": 2882.895, "encoder_q-layer.0": 2046.2373, "encoder_q-layer.1": 2343.554, "encoder_q-layer.10": 201.055, "encoder_q-layer.11": 491.8831, "encoder_q-layer.2": 2824.616, "encoder_q-layer.3": 2716.7651, "encoder_q-layer.4": 2527.4299, "encoder_q-layer.5": 2262.6106, "encoder_q-layer.6": 1617.656, "encoder_q-layer.7": 1113.8308, "encoder_q-layer.8": 586.1891, "encoder_q-layer.9": 214.6304, "epoch": 0.13, "inbatch_neg_score": 0.1814, "inbatch_pos_score": 0.7627, "learning_rate": 4.8277777777777776e-05, "loss": 3.9767, "norm_diff": 0.0124, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3019.3928, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1797, "query_norm": 1.4585, "queue_k_norm": 1.4481, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.3671, "sent_len_1": 66.7703, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.125, "stdk": 0.0478, "stdq": 0.043, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 13100 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9583, "doc_norm": 1.445, "encoder_q-embeddings": 1193.7732, "encoder_q-layer.0": 911.8041, "encoder_q-layer.1": 957.4974, "encoder_q-layer.10": 195.1377, "encoder_q-layer.11": 497.1541, "encoder_q-layer.2": 1047.168, "encoder_q-layer.3": 1123.2655, "encoder_q-layer.4": 1193.0824, "encoder_q-layer.5": 1156.7236, "encoder_q-layer.6": 1095.5287, "encoder_q-layer.7": 851.3163, "encoder_q-layer.8": 555.9937, "encoder_q-layer.9": 220.9848, "epoch": 0.13, "inbatch_neg_score": 0.2088, "inbatch_pos_score": 0.8057, "learning_rate": 4.8222222222222225e-05, "loss": 3.9583, "norm_diff": 0.0257, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1364.8189, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2056, "query_norm": 1.4706, "queue_k_norm": 1.4445, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6512, "sent_len_1": 66.9243, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5275, "stdk": 0.0477, "stdq": 0.0439, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 13200 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.979, "doc_norm": 1.4346, "encoder_q-embeddings": 3532.0715, "encoder_q-layer.0": 2783.6743, "encoder_q-layer.1": 2764.2039, "encoder_q-layer.10": 240.3751, "encoder_q-layer.11": 533.4269, "encoder_q-layer.2": 2951.6108, "encoder_q-layer.3": 3285.2749, "encoder_q-layer.4": 3401.4937, "encoder_q-layer.5": 3191.0632, "encoder_q-layer.6": 2752.156, "encoder_q-layer.7": 1964.6599, "encoder_q-layer.8": 1123.731, "encoder_q-layer.9": 333.0159, "epoch": 0.13, "inbatch_neg_score": 0.2088, "inbatch_pos_score": 0.7935, "learning_rate": 4.8166666666666674e-05, "loss": 3.979, "norm_diff": 0.0305, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3860.8845, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2064, "query_norm": 1.4651, "queue_k_norm": 1.4384, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3905, "sent_len_1": 66.8065, "sent_max_len_0": 127.9912, "sent_max_len_1": 190.3512, "stdk": 0.0474, "stdq": 0.0437, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 13300 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9386, "doc_norm": 1.4295, "encoder_q-embeddings": 512.2905, "encoder_q-layer.0": 344.1226, "encoder_q-layer.1": 376.6283, "encoder_q-layer.10": 200.2297, "encoder_q-layer.11": 537.9089, "encoder_q-layer.2": 421.9718, "encoder_q-layer.3": 449.9577, "encoder_q-layer.4": 456.8662, "encoder_q-layer.5": 487.7169, "encoder_q-layer.6": 414.438, "encoder_q-layer.7": 310.3743, "encoder_q-layer.8": 264.0671, "encoder_q-layer.9": 192.0341, "epoch": 0.13, "inbatch_neg_score": 0.2036, "inbatch_pos_score": 0.7739, "learning_rate": 4.811111111111111e-05, "loss": 3.9386, "norm_diff": 0.0145, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 592.2179, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2002, "query_norm": 1.4334, "queue_k_norm": 1.4343, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.636, "sent_len_1": 66.7712, "sent_max_len_0": 128.0, "sent_max_len_1": 187.525, "stdk": 0.0474, "stdq": 0.0426, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 13400 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.9692, "doc_norm": 1.4289, "encoder_q-embeddings": 882.7989, "encoder_q-layer.0": 642.8174, "encoder_q-layer.1": 735.2681, "encoder_q-layer.10": 237.4112, "encoder_q-layer.11": 573.2514, "encoder_q-layer.2": 751.2281, "encoder_q-layer.3": 771.5388, "encoder_q-layer.4": 733.3499, "encoder_q-layer.5": 702.6493, "encoder_q-layer.6": 743.8546, "encoder_q-layer.7": 732.8325, "encoder_q-layer.8": 507.2711, "encoder_q-layer.9": 253.0866, "epoch": 0.13, "inbatch_neg_score": 0.2084, "inbatch_pos_score": 0.7808, "learning_rate": 4.805555555555556e-05, "loss": 3.9692, "norm_diff": 0.0564, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1004.2169, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2063, "query_norm": 1.4853, "queue_k_norm": 1.4284, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4988, "sent_len_1": 66.8274, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6425, "stdk": 0.0475, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 13500 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.9845, "doc_norm": 1.4335, "encoder_q-embeddings": 446.4859, "encoder_q-layer.0": 394.486, "encoder_q-layer.1": 443.1285, "encoder_q-layer.10": 229.8172, "encoder_q-layer.11": 617.2519, "encoder_q-layer.2": 544.3834, "encoder_q-layer.3": 575.8123, "encoder_q-layer.4": 551.3411, "encoder_q-layer.5": 556.5685, "encoder_q-layer.6": 643.2734, "encoder_q-layer.7": 599.6227, "encoder_q-layer.8": 376.844, "encoder_q-layer.9": 199.6167, "epoch": 0.13, "inbatch_neg_score": 0.1651, "inbatch_pos_score": 0.7681, "learning_rate": 4.8e-05, "loss": 3.9845, "norm_diff": 0.1899, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 725.8377, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1633, "query_norm": 1.6234, "queue_k_norm": 1.4276, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7274, "sent_len_1": 66.8145, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7575, "stdk": 0.0478, "stdq": 0.0445, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 13600 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.9536, "doc_norm": 1.4231, "encoder_q-embeddings": 426.7956, "encoder_q-layer.0": 306.3632, "encoder_q-layer.1": 347.4604, "encoder_q-layer.10": 203.7925, "encoder_q-layer.11": 443.1774, "encoder_q-layer.2": 392.0001, "encoder_q-layer.3": 428.814, "encoder_q-layer.4": 462.8127, "encoder_q-layer.5": 493.8662, "encoder_q-layer.6": 492.5912, "encoder_q-layer.7": 526.8166, "encoder_q-layer.8": 382.7576, "encoder_q-layer.9": 213.5119, "epoch": 0.13, "inbatch_neg_score": 0.1572, "inbatch_pos_score": 0.75, "learning_rate": 4.794444444444445e-05, "loss": 3.9536, "norm_diff": 0.1147, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 591.277, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1562, "query_norm": 1.5378, "queue_k_norm": 1.4248, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8576, "sent_len_1": 66.7893, "sent_max_len_0": 128.0, "sent_max_len_1": 189.925, "stdk": 0.0475, "stdq": 0.0444, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 13700 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 3.9529, "doc_norm": 1.4174, "encoder_q-embeddings": 2097.8501, "encoder_q-layer.0": 1576.168, "encoder_q-layer.1": 1951.212, "encoder_q-layer.10": 431.5559, "encoder_q-layer.11": 1112.64, "encoder_q-layer.2": 2138.0662, "encoder_q-layer.3": 2234.4377, "encoder_q-layer.4": 2775.7593, "encoder_q-layer.5": 2536.4038, "encoder_q-layer.6": 2770.7483, "encoder_q-layer.7": 2626.386, "encoder_q-layer.8": 1791.1729, "encoder_q-layer.9": 503.3346, "epoch": 0.13, "inbatch_neg_score": 0.1787, "inbatch_pos_score": 0.7388, "learning_rate": 4.7888888888888886e-05, "loss": 3.9529, "norm_diff": 0.1222, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2986.6581, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1743, "query_norm": 1.5396, "queue_k_norm": 1.4222, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6745, "sent_len_1": 66.9, "sent_max_len_0": 127.995, "sent_max_len_1": 190.725, "stdk": 0.0474, "stdq": 0.0443, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 13800 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.9409, "doc_norm": 1.4166, "encoder_q-embeddings": 627.9582, "encoder_q-layer.0": 415.0849, "encoder_q-layer.1": 460.2468, "encoder_q-layer.10": 201.2237, "encoder_q-layer.11": 572.1221, "encoder_q-layer.2": 511.6462, "encoder_q-layer.3": 558.3589, "encoder_q-layer.4": 575.6423, "encoder_q-layer.5": 556.6241, "encoder_q-layer.6": 621.0123, "encoder_q-layer.7": 650.7845, "encoder_q-layer.8": 449.7071, "encoder_q-layer.9": 187.8036, "epoch": 0.14, "inbatch_neg_score": 0.1821, "inbatch_pos_score": 0.751, "learning_rate": 4.7833333333333335e-05, "loss": 3.9409, "norm_diff": 0.0715, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 761.3757, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1803, "query_norm": 1.4881, "queue_k_norm": 1.4156, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5475, "sent_len_1": 66.8108, "sent_max_len_0": 127.995, "sent_max_len_1": 190.2887, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 13900 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.9274, "doc_norm": 1.4123, "encoder_q-embeddings": 687.394, "encoder_q-layer.0": 524.3434, "encoder_q-layer.1": 592.6246, "encoder_q-layer.10": 175.3681, "encoder_q-layer.11": 485.2189, "encoder_q-layer.2": 646.2941, "encoder_q-layer.3": 649.6846, "encoder_q-layer.4": 639.1945, "encoder_q-layer.5": 640.9507, "encoder_q-layer.6": 625.6684, "encoder_q-layer.7": 580.7272, "encoder_q-layer.8": 455.3405, "encoder_q-layer.9": 198.9424, "epoch": 0.14, "inbatch_neg_score": 0.2033, "inbatch_pos_score": 0.7935, "learning_rate": 4.7777777777777784e-05, "loss": 3.9274, "norm_diff": 0.0664, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 834.8607, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2004, "query_norm": 1.4788, "queue_k_norm": 1.4108, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5313, "sent_len_1": 66.8135, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3587, "stdk": 0.0474, "stdq": 0.0429, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 14000 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.9207, "doc_norm": 1.4032, "encoder_q-embeddings": 1871.4392, "encoder_q-layer.0": 1633.6338, "encoder_q-layer.1": 1645.5273, "encoder_q-layer.10": 197.693, "encoder_q-layer.11": 542.2784, "encoder_q-layer.2": 1716.2496, "encoder_q-layer.3": 1666.9688, "encoder_q-layer.4": 1504.8525, "encoder_q-layer.5": 1336.1301, "encoder_q-layer.6": 1365.6995, "encoder_q-layer.7": 1105.9332, "encoder_q-layer.8": 598.1918, "encoder_q-layer.9": 269.591, "epoch": 0.14, "inbatch_neg_score": 0.1948, "inbatch_pos_score": 0.7764, "learning_rate": 4.7722222222222226e-05, "loss": 3.9207, "norm_diff": 0.1644, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1986.365, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1934, "query_norm": 1.5676, "queue_k_norm": 1.4045, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5608, "sent_len_1": 66.7681, "sent_max_len_0": 128.0, "sent_max_len_1": 192.61, "stdk": 0.0472, "stdq": 0.0441, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 14100 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9333, "doc_norm": 1.4029, "encoder_q-embeddings": 372.4076, "encoder_q-layer.0": 260.6894, "encoder_q-layer.1": 303.78, "encoder_q-layer.10": 213.9157, "encoder_q-layer.11": 530.054, "encoder_q-layer.2": 350.6402, "encoder_q-layer.3": 400.8733, "encoder_q-layer.4": 423.9517, "encoder_q-layer.5": 401.4339, "encoder_q-layer.6": 417.9214, "encoder_q-layer.7": 383.4362, "encoder_q-layer.8": 319.0243, "encoder_q-layer.9": 202.1911, "epoch": 0.14, "inbatch_neg_score": 0.2187, "inbatch_pos_score": 0.769, "learning_rate": 4.766666666666667e-05, "loss": 3.9333, "norm_diff": 0.2075, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 523.7317, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2153, "query_norm": 1.6103, "queue_k_norm": 1.4018, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7221, "sent_len_1": 66.6961, "sent_max_len_0": 127.9887, "sent_max_len_1": 190.685, "stdk": 0.0472, "stdq": 0.0437, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14200 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9455, "doc_norm": 1.3992, "encoder_q-embeddings": 354.4218, "encoder_q-layer.0": 265.0338, "encoder_q-layer.1": 309.6699, "encoder_q-layer.10": 204.8625, "encoder_q-layer.11": 562.0129, "encoder_q-layer.2": 345.8843, "encoder_q-layer.3": 361.1559, "encoder_q-layer.4": 342.3157, "encoder_q-layer.5": 318.474, "encoder_q-layer.6": 317.6906, "encoder_q-layer.7": 314.0235, "encoder_q-layer.8": 301.2424, "encoder_q-layer.9": 206.4161, "epoch": 0.14, "inbatch_neg_score": 0.233, "inbatch_pos_score": 0.7729, "learning_rate": 4.761111111111111e-05, "loss": 3.9455, "norm_diff": 0.2154, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 489.6401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2285, "query_norm": 1.6147, "queue_k_norm": 1.3994, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6544, "sent_len_1": 66.678, "sent_max_len_0": 127.995, "sent_max_len_1": 190.4475, "stdk": 0.0471, "stdq": 0.042, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14300 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.9395, "doc_norm": 1.3992, "encoder_q-embeddings": 277.3193, "encoder_q-layer.0": 198.979, "encoder_q-layer.1": 230.4752, "encoder_q-layer.10": 210.0844, "encoder_q-layer.11": 550.3181, "encoder_q-layer.2": 280.252, "encoder_q-layer.3": 286.834, "encoder_q-layer.4": 281.1815, "encoder_q-layer.5": 232.3588, "encoder_q-layer.6": 248.2735, "encoder_q-layer.7": 225.532, "encoder_q-layer.8": 236.5916, "encoder_q-layer.9": 199.0494, "epoch": 0.14, "inbatch_neg_score": 0.237, "inbatch_pos_score": 0.7949, "learning_rate": 4.755555555555556e-05, "loss": 3.9395, "norm_diff": 0.2736, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 409.1768, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.233, "query_norm": 1.6729, "queue_k_norm": 1.3982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5979, "sent_len_1": 67.0639, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.9162, "stdk": 0.0471, "stdq": 0.0432, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14400 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.9424, "doc_norm": 1.3932, "encoder_q-embeddings": 599.4418, "encoder_q-layer.0": 466.7927, "encoder_q-layer.1": 528.8549, "encoder_q-layer.10": 191.1747, "encoder_q-layer.11": 467.4011, "encoder_q-layer.2": 525.6415, "encoder_q-layer.3": 531.1902, "encoder_q-layer.4": 525.718, "encoder_q-layer.5": 509.418, "encoder_q-layer.6": 522.8702, "encoder_q-layer.7": 474.5603, "encoder_q-layer.8": 314.0046, "encoder_q-layer.9": 175.7449, "epoch": 0.14, "inbatch_neg_score": 0.2478, "inbatch_pos_score": 0.8257, "learning_rate": 4.75e-05, "loss": 3.9424, "norm_diff": 0.1665, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 695.2265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2455, "query_norm": 1.5597, "queue_k_norm": 1.3947, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5253, "sent_len_1": 66.7356, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.0838, "stdk": 0.0469, "stdq": 0.0443, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 14500 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.9532, "doc_norm": 1.3923, "encoder_q-embeddings": 101.2574, "encoder_q-layer.0": 65.0897, "encoder_q-layer.1": 70.9208, "encoder_q-layer.10": 113.7212, "encoder_q-layer.11": 247.1996, "encoder_q-layer.2": 79.5446, "encoder_q-layer.3": 87.2119, "encoder_q-layer.4": 87.67, "encoder_q-layer.5": 98.8027, "encoder_q-layer.6": 112.2808, "encoder_q-layer.7": 122.6052, "encoder_q-layer.8": 122.2925, "encoder_q-layer.9": 97.9424, "epoch": 0.14, "inbatch_neg_score": 0.2894, "inbatch_pos_score": 0.8584, "learning_rate": 4.7444444444444445e-05, "loss": 3.9532, "norm_diff": 0.3056, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 166.5068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.6979, "queue_k_norm": 1.3924, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5179, "sent_len_1": 66.9447, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5075, "stdk": 0.0468, "stdq": 0.0423, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 14600 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.9671, "doc_norm": 1.3914, "encoder_q-embeddings": 109.2747, "encoder_q-layer.0": 79.9783, "encoder_q-layer.1": 91.614, "encoder_q-layer.10": 48.4705, "encoder_q-layer.11": 116.162, "encoder_q-layer.2": 96.1963, "encoder_q-layer.3": 99.885, "encoder_q-layer.4": 94.7271, "encoder_q-layer.5": 90.4193, "encoder_q-layer.6": 100.9214, "encoder_q-layer.7": 93.4115, "encoder_q-layer.8": 57.8512, "encoder_q-layer.9": 42.6186, "epoch": 0.14, "inbatch_neg_score": 0.3077, "inbatch_pos_score": 0.8813, "learning_rate": 4.7388888888888894e-05, "loss": 3.9671, "norm_diff": 0.224, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 132.2918, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3054, "query_norm": 1.6154, "queue_k_norm": 1.3912, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5062, "sent_len_1": 66.7546, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.7012, "stdk": 0.0467, "stdq": 0.0446, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 14700 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.9645, "doc_norm": 1.3999, "encoder_q-embeddings": 118.1986, "encoder_q-layer.0": 84.6109, "encoder_q-layer.1": 98.01, "encoder_q-layer.10": 49.4066, "encoder_q-layer.11": 119.3815, "encoder_q-layer.2": 115.8879, "encoder_q-layer.3": 116.8283, "encoder_q-layer.4": 121.1821, "encoder_q-layer.5": 116.5276, "encoder_q-layer.6": 123.9427, "encoder_q-layer.7": 124.564, "encoder_q-layer.8": 92.8206, "encoder_q-layer.9": 44.4205, "epoch": 0.14, "inbatch_neg_score": 0.3176, "inbatch_pos_score": 0.9014, "learning_rate": 4.7333333333333336e-05, "loss": 3.9645, "norm_diff": 0.177, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 155.6826, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3135, "query_norm": 1.5768, "queue_k_norm": 1.3924, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6535, "sent_len_1": 66.6476, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4787, "stdk": 0.0469, "stdq": 0.0446, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 14800 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.9761, "doc_norm": 1.3872, "encoder_q-embeddings": 151.2502, "encoder_q-layer.0": 100.7463, "encoder_q-layer.1": 114.0323, "encoder_q-layer.10": 48.1116, "encoder_q-layer.11": 139.4457, "encoder_q-layer.2": 138.4239, "encoder_q-layer.3": 145.1298, "encoder_q-layer.4": 150.2883, "encoder_q-layer.5": 129.5031, "encoder_q-layer.6": 124.3448, "encoder_q-layer.7": 89.7055, "encoder_q-layer.8": 70.4009, "encoder_q-layer.9": 44.2518, "epoch": 0.15, "inbatch_neg_score": 0.3382, "inbatch_pos_score": 0.9033, "learning_rate": 4.727777777777778e-05, "loss": 3.9761, "norm_diff": 0.1927, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 174.5184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3352, "query_norm": 1.5799, "queue_k_norm": 1.3949, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6342, "sent_len_1": 66.5168, "sent_max_len_0": 128.0, "sent_max_len_1": 188.105, "stdk": 0.0463, "stdq": 0.0446, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 14900 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.0545, "doc_norm": 1.3965, "encoder_q-embeddings": 686.3675, "encoder_q-layer.0": 451.9664, "encoder_q-layer.1": 500.3155, "encoder_q-layer.10": 51.9915, "encoder_q-layer.11": 163.3711, "encoder_q-layer.2": 518.0944, "encoder_q-layer.3": 570.2614, "encoder_q-layer.4": 561.1185, "encoder_q-layer.5": 333.0579, "encoder_q-layer.6": 301.4528, "encoder_q-layer.7": 247.2181, "encoder_q-layer.8": 103.5584, "encoder_q-layer.9": 48.6361, "epoch": 0.15, "inbatch_neg_score": 0.3096, "inbatch_pos_score": 0.8501, "learning_rate": 4.722222222222222e-05, "loss": 4.0545, "norm_diff": 0.127, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 645.4065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3074, "query_norm": 1.5235, "queue_k_norm": 1.3951, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5009, "sent_len_1": 66.7196, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.8025, "stdk": 0.0465, "stdq": 0.0437, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 15000 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 4.0392, "doc_norm": 1.3977, "encoder_q-embeddings": 245.6425, "encoder_q-layer.0": 184.1453, "encoder_q-layer.1": 191.0379, "encoder_q-layer.10": 48.1357, "encoder_q-layer.11": 163.4632, "encoder_q-layer.2": 185.4342, "encoder_q-layer.3": 186.8276, "encoder_q-layer.4": 186.2157, "encoder_q-layer.5": 169.1217, "encoder_q-layer.6": 128.7419, "encoder_q-layer.7": 124.8794, "encoder_q-layer.8": 77.1153, "encoder_q-layer.9": 48.1254, "epoch": 0.15, "inbatch_neg_score": 0.2874, "inbatch_pos_score": 0.8354, "learning_rate": 4.716666666666667e-05, "loss": 4.0392, "norm_diff": 0.0933, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 247.045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2854, "query_norm": 1.491, "queue_k_norm": 1.3978, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4342, "sent_len_1": 66.6044, "sent_max_len_0": 128.0, "sent_max_len_1": 190.65, "stdk": 0.0465, "stdq": 0.0439, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 15100 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 4.0059, "doc_norm": 1.4007, "encoder_q-embeddings": 600.2662, "encoder_q-layer.0": 464.0216, "encoder_q-layer.1": 502.0555, "encoder_q-layer.10": 49.7401, "encoder_q-layer.11": 154.4386, "encoder_q-layer.2": 607.28, "encoder_q-layer.3": 662.6984, "encoder_q-layer.4": 617.4419, "encoder_q-layer.5": 630.6807, "encoder_q-layer.6": 449.3363, "encoder_q-layer.7": 390.9094, "encoder_q-layer.8": 178.693, "encoder_q-layer.9": 52.0982, "epoch": 0.15, "inbatch_neg_score": 0.2754, "inbatch_pos_score": 0.8516, "learning_rate": 4.711111111111111e-05, "loss": 4.0059, "norm_diff": 0.0854, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 711.0516, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2739, "query_norm": 1.4861, "queue_k_norm": 1.3987, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4063, "sent_len_1": 66.7179, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.625, "stdk": 0.0465, "stdq": 0.0445, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 15200 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9854, "doc_norm": 1.3932, "encoder_q-embeddings": 171.7647, "encoder_q-layer.0": 124.6395, "encoder_q-layer.1": 138.8629, "encoder_q-layer.10": 52.2233, "encoder_q-layer.11": 166.0545, "encoder_q-layer.2": 159.2583, "encoder_q-layer.3": 175.69, "encoder_q-layer.4": 186.0485, "encoder_q-layer.5": 159.1864, "encoder_q-layer.6": 139.8041, "encoder_q-layer.7": 111.3394, "encoder_q-layer.8": 73.7917, "encoder_q-layer.9": 46.0585, "epoch": 0.15, "inbatch_neg_score": 0.2563, "inbatch_pos_score": 0.8179, "learning_rate": 4.7055555555555555e-05, "loss": 3.9854, "norm_diff": 0.0727, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 208.2503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2544, "query_norm": 1.4659, "queue_k_norm": 1.3987, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5796, "sent_len_1": 66.7591, "sent_max_len_0": 127.995, "sent_max_len_1": 187.8512, "stdk": 0.0462, "stdq": 0.0441, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 15300 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.9364, "doc_norm": 1.3946, "encoder_q-embeddings": 98.2659, "encoder_q-layer.0": 69.9116, "encoder_q-layer.1": 80.354, "encoder_q-layer.10": 47.3424, "encoder_q-layer.11": 135.4125, "encoder_q-layer.2": 89.3703, "encoder_q-layer.3": 93.3736, "encoder_q-layer.4": 94.203, "encoder_q-layer.5": 89.2558, "encoder_q-layer.6": 82.35, "encoder_q-layer.7": 81.5199, "encoder_q-layer.8": 60.5502, "encoder_q-layer.9": 41.2844, "epoch": 0.15, "inbatch_neg_score": 0.2431, "inbatch_pos_score": 0.8232, "learning_rate": 4.7e-05, "loss": 3.9364, "norm_diff": 0.0701, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 125.5042, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2417, "query_norm": 1.4647, "queue_k_norm": 1.3979, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.9601, "sent_len_1": 66.7331, "sent_max_len_0": 127.9975, "sent_max_len_1": 187.3825, "stdk": 0.0463, "stdq": 0.0439, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 15400 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.9364, "doc_norm": 1.3937, "encoder_q-embeddings": 68.9543, "encoder_q-layer.0": 48.3398, "encoder_q-layer.1": 53.7667, "encoder_q-layer.10": 50.7215, "encoder_q-layer.11": 144.3082, "encoder_q-layer.2": 62.3266, "encoder_q-layer.3": 66.7551, "encoder_q-layer.4": 66.5818, "encoder_q-layer.5": 59.5828, "encoder_q-layer.6": 64.1212, "encoder_q-layer.7": 58.2441, "encoder_q-layer.8": 52.4492, "encoder_q-layer.9": 42.9889, "epoch": 0.15, "inbatch_neg_score": 0.2185, "inbatch_pos_score": 0.7988, "learning_rate": 4.6944444444444446e-05, "loss": 3.9364, "norm_diff": 0.0749, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 102.3751, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2163, "query_norm": 1.4686, "queue_k_norm": 1.3948, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7234, "sent_len_1": 66.8204, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6175, "stdk": 0.0463, "stdq": 0.0441, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 15500 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.9782, "doc_norm": 1.3922, "encoder_q-embeddings": 68.5778, "encoder_q-layer.0": 46.9362, "encoder_q-layer.1": 53.1969, "encoder_q-layer.10": 47.8897, "encoder_q-layer.11": 151.0063, "encoder_q-layer.2": 62.5139, "encoder_q-layer.3": 66.0207, "encoder_q-layer.4": 64.5455, "encoder_q-layer.5": 70.6176, "encoder_q-layer.6": 81.4071, "encoder_q-layer.7": 79.381, "encoder_q-layer.8": 59.1474, "encoder_q-layer.9": 48.7727, "epoch": 0.15, "inbatch_neg_score": 0.2157, "inbatch_pos_score": 0.7817, "learning_rate": 4.6888888888888895e-05, "loss": 3.9782, "norm_diff": 0.0747, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 109.2311, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2133, "query_norm": 1.4669, "queue_k_norm": 1.3945, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5903, "sent_len_1": 66.8023, "sent_max_len_0": 128.0, "sent_max_len_1": 188.83, "stdk": 0.0464, "stdq": 0.0443, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 15600 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.9369, "doc_norm": 1.3895, "encoder_q-embeddings": 218.1293, "encoder_q-layer.0": 145.8685, "encoder_q-layer.1": 160.2072, "encoder_q-layer.10": 49.0334, "encoder_q-layer.11": 133.0958, "encoder_q-layer.2": 211.7158, "encoder_q-layer.3": 233.3526, "encoder_q-layer.4": 232.5226, "encoder_q-layer.5": 212.2959, "encoder_q-layer.6": 195.5407, "encoder_q-layer.7": 146.126, "encoder_q-layer.8": 75.904, "encoder_q-layer.9": 45.2196, "epoch": 0.15, "inbatch_neg_score": 0.1856, "inbatch_pos_score": 0.7651, "learning_rate": 4.683333333333334e-05, "loss": 3.9369, "norm_diff": 0.0785, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 254.6071, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1837, "query_norm": 1.468, "queue_k_norm": 1.3894, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6361, "sent_len_1": 66.8456, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1062, "stdk": 0.0464, "stdq": 0.0447, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 15700 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9258, "doc_norm": 1.3828, "encoder_q-embeddings": 140.4813, "encoder_q-layer.0": 104.6733, "encoder_q-layer.1": 115.5573, "encoder_q-layer.10": 45.7633, "encoder_q-layer.11": 136.2687, "encoder_q-layer.2": 127.966, "encoder_q-layer.3": 127.3648, "encoder_q-layer.4": 134.2122, "encoder_q-layer.5": 122.7271, "encoder_q-layer.6": 115.5868, "encoder_q-layer.7": 94.8347, "encoder_q-layer.8": 62.2587, "encoder_q-layer.9": 41.3484, "epoch": 0.15, "inbatch_neg_score": 0.1835, "inbatch_pos_score": 0.7568, "learning_rate": 4.677777777777778e-05, "loss": 3.9258, "norm_diff": 0.0863, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 165.4756, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1801, "query_norm": 1.4691, "queue_k_norm": 1.3862, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4979, "sent_len_1": 66.8366, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.4963, "stdk": 0.0463, "stdq": 0.0441, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 15800 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.9362, "doc_norm": 1.3859, "encoder_q-embeddings": 125.9639, "encoder_q-layer.0": 86.0378, "encoder_q-layer.1": 95.0165, "encoder_q-layer.10": 44.7026, "encoder_q-layer.11": 134.9548, "encoder_q-layer.2": 106.1055, "encoder_q-layer.3": 115.7382, "encoder_q-layer.4": 129.0615, "encoder_q-layer.5": 112.1226, "encoder_q-layer.6": 119.2956, "encoder_q-layer.7": 102.3925, "encoder_q-layer.8": 60.9863, "encoder_q-layer.9": 42.4531, "epoch": 0.16, "inbatch_neg_score": 0.1573, "inbatch_pos_score": 0.7622, "learning_rate": 4.672222222222222e-05, "loss": 3.9362, "norm_diff": 0.0759, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 150.3743, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1553, "query_norm": 1.4618, "queue_k_norm": 1.3842, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7016, "sent_len_1": 66.8074, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5163, "stdk": 0.0465, "stdq": 0.0452, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 15900 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.935, "doc_norm": 1.373, "encoder_q-embeddings": 118.8427, "encoder_q-layer.0": 86.9172, "encoder_q-layer.1": 93.124, "encoder_q-layer.10": 46.4803, "encoder_q-layer.11": 134.8716, "encoder_q-layer.2": 107.4415, "encoder_q-layer.3": 110.7665, "encoder_q-layer.4": 107.1925, "encoder_q-layer.5": 101.547, "encoder_q-layer.6": 115.4582, "encoder_q-layer.7": 102.5843, "encoder_q-layer.8": 71.0576, "encoder_q-layer.9": 42.9454, "epoch": 0.16, "inbatch_neg_score": 0.1515, "inbatch_pos_score": 0.7046, "learning_rate": 4.666666666666667e-05, "loss": 3.935, "norm_diff": 0.0321, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 148.2303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1495, "query_norm": 1.405, "queue_k_norm": 1.3811, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.654, "sent_len_1": 66.7306, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0875, "stdk": 0.0462, "stdq": 0.043, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16000 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9482, "doc_norm": 1.3703, "encoder_q-embeddings": 117.0102, "encoder_q-layer.0": 75.8594, "encoder_q-layer.1": 82.1882, "encoder_q-layer.10": 47.0544, "encoder_q-layer.11": 115.2047, "encoder_q-layer.2": 93.3069, "encoder_q-layer.3": 97.4153, "encoder_q-layer.4": 98.0169, "encoder_q-layer.5": 95.2284, "encoder_q-layer.6": 86.9421, "encoder_q-layer.7": 83.5667, "encoder_q-layer.8": 66.0427, "encoder_q-layer.9": 40.2911, "epoch": 0.16, "inbatch_neg_score": 0.1112, "inbatch_pos_score": 0.6914, "learning_rate": 4.6611111111111114e-05, "loss": 3.9482, "norm_diff": 0.0478, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 130.9325, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1108, "query_norm": 1.4182, "queue_k_norm": 1.3786, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5667, "sent_len_1": 66.9079, "sent_max_len_0": 127.9887, "sent_max_len_1": 190.3638, "stdk": 0.0462, "stdq": 0.0443, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16100 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.8997, "doc_norm": 1.375, "encoder_q-embeddings": 486.3974, "encoder_q-layer.0": 349.0056, "encoder_q-layer.1": 404.3986, "encoder_q-layer.10": 41.5997, "encoder_q-layer.11": 119.1728, "encoder_q-layer.2": 465.0899, "encoder_q-layer.3": 510.5376, "encoder_q-layer.4": 586.5212, "encoder_q-layer.5": 598.0455, "encoder_q-layer.6": 639.2706, "encoder_q-layer.7": 659.7355, "encoder_q-layer.8": 313.7819, "encoder_q-layer.9": 47.4314, "epoch": 0.16, "inbatch_neg_score": 0.1029, "inbatch_pos_score": 0.7026, "learning_rate": 4.6555555555555556e-05, "loss": 3.8997, "norm_diff": 0.0486, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 659.3569, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.101, "query_norm": 1.4236, "queue_k_norm": 1.3756, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7566, "sent_len_1": 66.9884, "sent_max_len_0": 128.0, "sent_max_len_1": 192.7875, "stdk": 0.0465, "stdq": 0.0447, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16200 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.894, "doc_norm": 1.3715, "encoder_q-embeddings": 114.9876, "encoder_q-layer.0": 75.0975, "encoder_q-layer.1": 86.4187, "encoder_q-layer.10": 54.0708, "encoder_q-layer.11": 126.8935, "encoder_q-layer.2": 90.9847, "encoder_q-layer.3": 92.3021, "encoder_q-layer.4": 104.3983, "encoder_q-layer.5": 109.1251, "encoder_q-layer.6": 118.9829, "encoder_q-layer.7": 122.2081, "encoder_q-layer.8": 76.253, "encoder_q-layer.9": 44.1866, "epoch": 0.16, "inbatch_neg_score": 0.1649, "inbatch_pos_score": 0.7651, "learning_rate": 4.6500000000000005e-05, "loss": 3.894, "norm_diff": 0.0846, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 145.2216, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1643, "query_norm": 1.4561, "queue_k_norm": 1.3708, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7651, "sent_len_1": 66.7364, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5112, "stdk": 0.0465, "stdq": 0.0449, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16300 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8899, "doc_norm": 1.3694, "encoder_q-embeddings": 105.9879, "encoder_q-layer.0": 69.6776, "encoder_q-layer.1": 73.0259, "encoder_q-layer.10": 51.2931, "encoder_q-layer.11": 134.4872, "encoder_q-layer.2": 82.3298, "encoder_q-layer.3": 81.2358, "encoder_q-layer.4": 80.8226, "encoder_q-layer.5": 71.8717, "encoder_q-layer.6": 73.1462, "encoder_q-layer.7": 71.4123, "encoder_q-layer.8": 63.7232, "encoder_q-layer.9": 46.74, "epoch": 0.16, "inbatch_neg_score": 0.1491, "inbatch_pos_score": 0.7158, "learning_rate": 4.644444444444445e-05, "loss": 3.8899, "norm_diff": 0.0278, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 120.7642, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1483, "query_norm": 1.3954, "queue_k_norm": 1.369, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7221, "sent_len_1": 66.7902, "sent_max_len_0": 128.0, "sent_max_len_1": 186.7875, "stdk": 0.0465, "stdq": 0.0435, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16400 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.8937, "doc_norm": 1.3688, "encoder_q-embeddings": 195.8061, "encoder_q-layer.0": 137.6205, "encoder_q-layer.1": 147.7545, "encoder_q-layer.10": 47.7688, "encoder_q-layer.11": 130.1084, "encoder_q-layer.2": 184.0878, "encoder_q-layer.3": 215.1972, "encoder_q-layer.4": 162.7357, "encoder_q-layer.5": 132.765, "encoder_q-layer.6": 138.843, "encoder_q-layer.7": 121.4215, "encoder_q-layer.8": 91.5112, "encoder_q-layer.9": 42.571, "epoch": 0.16, "inbatch_neg_score": 0.1338, "inbatch_pos_score": 0.7217, "learning_rate": 4.638888888888889e-05, "loss": 3.8937, "norm_diff": 0.029, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 217.0789, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1328, "query_norm": 1.3968, "queue_k_norm": 1.3675, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.743, "sent_len_1": 66.803, "sent_max_len_0": 127.9963, "sent_max_len_1": 192.1675, "stdk": 0.0466, "stdq": 0.044, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16500 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9006, "doc_norm": 1.3652, "encoder_q-embeddings": 152.8425, "encoder_q-layer.0": 107.3593, "encoder_q-layer.1": 118.4587, "encoder_q-layer.10": 45.661, "encoder_q-layer.11": 127.1703, "encoder_q-layer.2": 148.9623, "encoder_q-layer.3": 162.9388, "encoder_q-layer.4": 162.3302, "encoder_q-layer.5": 173.0497, "encoder_q-layer.6": 162.347, "encoder_q-layer.7": 146.2151, "encoder_q-layer.8": 109.3104, "encoder_q-layer.9": 47.2476, "epoch": 0.16, "inbatch_neg_score": 0.1038, "inbatch_pos_score": 0.6738, "learning_rate": 4.633333333333333e-05, "loss": 3.9006, "norm_diff": 0.0296, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 196.3661, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1024, "query_norm": 1.3926, "queue_k_norm": 1.3626, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.788, "sent_len_1": 66.628, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6225, "stdk": 0.0466, "stdq": 0.0443, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16600 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.9359, "doc_norm": 1.3619, "encoder_q-embeddings": 295.8063, "encoder_q-layer.0": 191.1639, "encoder_q-layer.1": 210.3992, "encoder_q-layer.10": 86.8317, "encoder_q-layer.11": 251.1226, "encoder_q-layer.2": 241.4262, "encoder_q-layer.3": 253.4207, "encoder_q-layer.4": 251.5928, "encoder_q-layer.5": 254.0243, "encoder_q-layer.6": 241.0334, "encoder_q-layer.7": 190.1155, "encoder_q-layer.8": 141.4251, "encoder_q-layer.9": 80.1912, "epoch": 0.16, "inbatch_neg_score": 0.1743, "inbatch_pos_score": 0.7593, "learning_rate": 4.627777777777778e-05, "loss": 3.9359, "norm_diff": 0.0659, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 327.2202, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1713, "query_norm": 1.4278, "queue_k_norm": 1.3622, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5265, "sent_len_1": 66.8326, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.69, "stdk": 0.0465, "stdq": 0.0441, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16700 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.9239, "doc_norm": 1.3537, "encoder_q-embeddings": 209.5271, "encoder_q-layer.0": 124.7035, "encoder_q-layer.1": 134.5946, "encoder_q-layer.10": 99.6314, "encoder_q-layer.11": 238.9496, "encoder_q-layer.2": 161.2883, "encoder_q-layer.3": 166.0681, "encoder_q-layer.4": 174.1743, "encoder_q-layer.5": 174.8566, "encoder_q-layer.6": 190.8604, "encoder_q-layer.7": 188.6429, "encoder_q-layer.8": 141.7034, "encoder_q-layer.9": 84.8372, "epoch": 0.16, "inbatch_neg_score": 0.1375, "inbatch_pos_score": 0.7119, "learning_rate": 4.6222222222222224e-05, "loss": 3.9239, "norm_diff": 0.0523, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 252.4816, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1372, "query_norm": 1.4059, "queue_k_norm": 1.3607, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6799, "sent_len_1": 66.7118, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1337, "stdk": 0.0463, "stdq": 0.0441, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16800 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.9201, "doc_norm": 1.3543, "encoder_q-embeddings": 533.924, "encoder_q-layer.0": 335.3622, "encoder_q-layer.1": 353.9277, "encoder_q-layer.10": 89.3871, "encoder_q-layer.11": 235.4401, "encoder_q-layer.2": 401.0655, "encoder_q-layer.3": 433.1139, "encoder_q-layer.4": 450.3177, "encoder_q-layer.5": 371.7997, "encoder_q-layer.6": 292.2145, "encoder_q-layer.7": 232.5093, "encoder_q-layer.8": 158.0044, "encoder_q-layer.9": 85.6728, "epoch": 0.16, "inbatch_neg_score": 0.1234, "inbatch_pos_score": 0.6929, "learning_rate": 4.6166666666666666e-05, "loss": 3.9201, "norm_diff": 0.038, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 516.3858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.123, "query_norm": 1.3923, "queue_k_norm": 1.3589, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.9137, "sent_len_1": 66.742, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1738, "stdk": 0.0464, "stdq": 0.0439, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16900 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.9414, "doc_norm": 1.357, "encoder_q-embeddings": 428.0726, "encoder_q-layer.0": 324.3796, "encoder_q-layer.1": 329.0556, "encoder_q-layer.10": 89.7308, "encoder_q-layer.11": 213.8756, "encoder_q-layer.2": 382.4053, "encoder_q-layer.3": 399.1373, "encoder_q-layer.4": 387.3417, "encoder_q-layer.5": 373.3512, "encoder_q-layer.6": 388.229, "encoder_q-layer.7": 355.2121, "encoder_q-layer.8": 266.0051, "encoder_q-layer.9": 98.4887, "epoch": 0.17, "inbatch_neg_score": 0.1027, "inbatch_pos_score": 0.6948, "learning_rate": 4.6111111111111115e-05, "loss": 3.9414, "norm_diff": 0.075, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 490.8928, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1011, "query_norm": 1.432, "queue_k_norm": 1.3548, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7068, "sent_len_1": 66.8056, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.9137, "stdk": 0.0465, "stdq": 0.0457, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 17000 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9178, "doc_norm": 1.3585, "encoder_q-embeddings": 576.1531, "encoder_q-layer.0": 413.6543, "encoder_q-layer.1": 464.8547, "encoder_q-layer.10": 98.5647, "encoder_q-layer.11": 230.4515, "encoder_q-layer.2": 551.2227, "encoder_q-layer.3": 612.2803, "encoder_q-layer.4": 607.754, "encoder_q-layer.5": 638.2733, "encoder_q-layer.6": 595.5005, "encoder_q-layer.7": 794.442, "encoder_q-layer.8": 369.146, "encoder_q-layer.9": 118.0525, "epoch": 0.17, "inbatch_neg_score": 0.1514, "inbatch_pos_score": 0.7407, "learning_rate": 4.605555555555556e-05, "loss": 3.9178, "norm_diff": 0.0716, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 759.128, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.151, "query_norm": 1.43, "queue_k_norm": 1.3543, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.3773, "sent_len_1": 66.8449, "sent_max_len_0": 127.9875, "sent_max_len_1": 189.9325, "stdk": 0.0466, "stdq": 0.0456, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 17100 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.9092, "doc_norm": 1.3552, "encoder_q-embeddings": 151.8722, "encoder_q-layer.0": 114.4169, "encoder_q-layer.1": 126.7002, "encoder_q-layer.10": 87.8569, "encoder_q-layer.11": 234.9289, "encoder_q-layer.2": 98.372, "encoder_q-layer.3": 91.177, "encoder_q-layer.4": 95.3304, "encoder_q-layer.5": 90.4303, "encoder_q-layer.6": 93.6449, "encoder_q-layer.7": 96.1152, "encoder_q-layer.8": 102.3421, "encoder_q-layer.9": 84.0697, "epoch": 0.17, "inbatch_neg_score": 0.155, "inbatch_pos_score": 0.7402, "learning_rate": 4.600000000000001e-05, "loss": 3.9092, "norm_diff": 0.0583, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 185.9777, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1544, "query_norm": 1.4135, "queue_k_norm": 1.3512, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6788, "sent_len_1": 66.9067, "sent_max_len_0": 128.0, "sent_max_len_1": 190.275, "stdk": 0.0466, "stdq": 0.0451, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 17200 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.945, "doc_norm": 1.3496, "encoder_q-embeddings": 379.5452, "encoder_q-layer.0": 273.6328, "encoder_q-layer.1": 285.9629, "encoder_q-layer.10": 92.8731, "encoder_q-layer.11": 247.0635, "encoder_q-layer.2": 309.6139, "encoder_q-layer.3": 298.5076, "encoder_q-layer.4": 337.2933, "encoder_q-layer.5": 331.5935, "encoder_q-layer.6": 281.8842, "encoder_q-layer.7": 263.5883, "encoder_q-layer.8": 156.0638, "encoder_q-layer.9": 87.9287, "epoch": 0.17, "inbatch_neg_score": 0.1643, "inbatch_pos_score": 0.7422, "learning_rate": 4.594444444444444e-05, "loss": 3.945, "norm_diff": 0.0562, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 410.225, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1647, "query_norm": 1.4058, "queue_k_norm": 1.3513, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5882, "sent_len_1": 66.6038, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3088, "stdk": 0.0464, "stdq": 0.0446, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 17300 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.9301, "doc_norm": 1.3507, "encoder_q-embeddings": 635.6841, "encoder_q-layer.0": 471.3374, "encoder_q-layer.1": 462.5242, "encoder_q-layer.10": 102.5668, "encoder_q-layer.11": 262.6571, "encoder_q-layer.2": 536.8969, "encoder_q-layer.3": 504.3932, "encoder_q-layer.4": 565.4521, "encoder_q-layer.5": 503.9261, "encoder_q-layer.6": 463.4861, "encoder_q-layer.7": 411.4541, "encoder_q-layer.8": 252.7184, "encoder_q-layer.9": 119.5555, "epoch": 0.17, "inbatch_neg_score": 0.1499, "inbatch_pos_score": 0.7451, "learning_rate": 4.588888888888889e-05, "loss": 3.9301, "norm_diff": 0.0714, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 658.3881, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1494, "query_norm": 1.4221, "queue_k_norm": 1.352, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7066, "sent_len_1": 66.6564, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4787, "stdk": 0.0464, "stdq": 0.0454, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 17400 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.9136, "doc_norm": 1.3544, "encoder_q-embeddings": 246.2215, "encoder_q-layer.0": 166.2929, "encoder_q-layer.1": 176.1354, "encoder_q-layer.10": 115.9626, "encoder_q-layer.11": 279.6915, "encoder_q-layer.2": 205.8633, "encoder_q-layer.3": 225.5159, "encoder_q-layer.4": 270.0342, "encoder_q-layer.5": 260.4101, "encoder_q-layer.6": 273.8176, "encoder_q-layer.7": 231.253, "encoder_q-layer.8": 133.1767, "encoder_q-layer.9": 89.0363, "epoch": 0.17, "inbatch_neg_score": 0.1562, "inbatch_pos_score": 0.7256, "learning_rate": 4.5833333333333334e-05, "loss": 3.9136, "norm_diff": 0.0189, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 315.8603, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1554, "query_norm": 1.3728, "queue_k_norm": 1.3515, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7097, "sent_len_1": 66.7676, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5213, "stdk": 0.0466, "stdq": 0.0441, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 17500 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.9476, "doc_norm": 1.3525, "encoder_q-embeddings": 148.4117, "encoder_q-layer.0": 104.6336, "encoder_q-layer.1": 113.6051, "encoder_q-layer.10": 91.1598, "encoder_q-layer.11": 249.0083, "encoder_q-layer.2": 128.8011, "encoder_q-layer.3": 131.8181, "encoder_q-layer.4": 143.8814, "encoder_q-layer.5": 123.8587, "encoder_q-layer.6": 126.8292, "encoder_q-layer.7": 117.8842, "encoder_q-layer.8": 101.2336, "encoder_q-layer.9": 82.3403, "epoch": 0.17, "inbatch_neg_score": 0.1596, "inbatch_pos_score": 0.7607, "learning_rate": 4.577777777777778e-05, "loss": 3.9476, "norm_diff": 0.0556, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 202.473, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1595, "query_norm": 1.4081, "queue_k_norm": 1.3506, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5108, "sent_len_1": 66.6462, "sent_max_len_0": 128.0, "sent_max_len_1": 188.36, "stdk": 0.0465, "stdq": 0.0454, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 17600 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.9554, "doc_norm": 1.3506, "encoder_q-embeddings": 386.6497, "encoder_q-layer.0": 249.4272, "encoder_q-layer.1": 288.4667, "encoder_q-layer.10": 88.796, "encoder_q-layer.11": 244.155, "encoder_q-layer.2": 302.3511, "encoder_q-layer.3": 299.1961, "encoder_q-layer.4": 314.6632, "encoder_q-layer.5": 303.9231, "encoder_q-layer.6": 316.2852, "encoder_q-layer.7": 270.1153, "encoder_q-layer.8": 170.3074, "encoder_q-layer.9": 84.8112, "epoch": 0.17, "inbatch_neg_score": 0.1462, "inbatch_pos_score": 0.7378, "learning_rate": 4.572222222222222e-05, "loss": 3.9554, "norm_diff": 0.0378, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 407.6739, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1448, "query_norm": 1.3877, "queue_k_norm": 1.3502, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6051, "sent_len_1": 66.7142, "sent_max_len_0": 128.0, "sent_max_len_1": 191.635, "stdk": 0.0464, "stdq": 0.0452, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 17700 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.9353, "doc_norm": 1.3562, "encoder_q-embeddings": 123.8398, "encoder_q-layer.0": 87.9011, "encoder_q-layer.1": 96.9939, "encoder_q-layer.10": 93.4801, "encoder_q-layer.11": 260.4778, "encoder_q-layer.2": 111.3194, "encoder_q-layer.3": 113.9637, "encoder_q-layer.4": 115.3972, "encoder_q-layer.5": 117.8505, "encoder_q-layer.6": 114.2771, "encoder_q-layer.7": 109.7221, "encoder_q-layer.8": 100.6786, "encoder_q-layer.9": 88.9882, "epoch": 0.17, "inbatch_neg_score": 0.1438, "inbatch_pos_score": 0.7246, "learning_rate": 4.566666666666667e-05, "loss": 3.9353, "norm_diff": 0.0289, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 185.0428, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1417, "query_norm": 1.3314, "queue_k_norm": 1.3508, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6041, "sent_len_1": 66.9653, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6312, "stdk": 0.0467, "stdq": 0.0432, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 17800 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.8968, "doc_norm": 1.3526, "encoder_q-embeddings": 346.3051, "encoder_q-layer.0": 237.0117, "encoder_q-layer.1": 271.722, "encoder_q-layer.10": 90.2617, "encoder_q-layer.11": 228.6057, "encoder_q-layer.2": 312.6709, "encoder_q-layer.3": 310.5305, "encoder_q-layer.4": 311.5999, "encoder_q-layer.5": 315.3223, "encoder_q-layer.6": 275.4089, "encoder_q-layer.7": 218.6979, "encoder_q-layer.8": 144.6746, "encoder_q-layer.9": 88.5533, "epoch": 0.17, "inbatch_neg_score": 0.1358, "inbatch_pos_score": 0.707, "learning_rate": 4.561111111111112e-05, "loss": 3.8968, "norm_diff": 0.0118, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 379.4541, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1345, "query_norm": 1.3496, "queue_k_norm": 1.3524, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.8821, "sent_len_1": 66.6959, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1213, "stdk": 0.0466, "stdq": 0.0442, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 17900 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.9078, "doc_norm": 1.3485, "encoder_q-embeddings": 243.1019, "encoder_q-layer.0": 172.1128, "encoder_q-layer.1": 192.7829, "encoder_q-layer.10": 99.8203, "encoder_q-layer.11": 276.7333, "encoder_q-layer.2": 213.3159, "encoder_q-layer.3": 220.9751, "encoder_q-layer.4": 211.2242, "encoder_q-layer.5": 204.6863, "encoder_q-layer.6": 202.9141, "encoder_q-layer.7": 164.5236, "encoder_q-layer.8": 127.2251, "encoder_q-layer.9": 92.7859, "epoch": 0.18, "inbatch_neg_score": 0.1492, "inbatch_pos_score": 0.709, "learning_rate": 4.555555555555556e-05, "loss": 3.9078, "norm_diff": 0.0171, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 290.0621, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1478, "query_norm": 1.3422, "queue_k_norm": 1.3516, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7599, "sent_len_1": 66.804, "sent_max_len_0": 127.9925, "sent_max_len_1": 191.975, "stdk": 0.0464, "stdq": 0.0434, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 18000 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.9029, "doc_norm": 1.3513, "encoder_q-embeddings": 579.4786, "encoder_q-layer.0": 431.342, "encoder_q-layer.1": 452.2222, "encoder_q-layer.10": 94.5966, "encoder_q-layer.11": 247.9829, "encoder_q-layer.2": 381.3724, "encoder_q-layer.3": 389.9299, "encoder_q-layer.4": 386.4562, "encoder_q-layer.5": 394.2091, "encoder_q-layer.6": 298.8677, "encoder_q-layer.7": 235.827, "encoder_q-layer.8": 133.7776, "encoder_q-layer.9": 82.7383, "epoch": 0.18, "inbatch_neg_score": 0.15, "inbatch_pos_score": 0.7529, "learning_rate": 4.55e-05, "loss": 3.9029, "norm_diff": 0.0237, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 537.174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.15, "query_norm": 1.3728, "queue_k_norm": 1.3515, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5571, "sent_len_1": 66.9846, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4112, "stdk": 0.0465, "stdq": 0.0449, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 18100 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.9082, "doc_norm": 1.3505, "encoder_q-embeddings": 161.9716, "encoder_q-layer.0": 114.5806, "encoder_q-layer.1": 121.3009, "encoder_q-layer.10": 91.0913, "encoder_q-layer.11": 245.4089, "encoder_q-layer.2": 126.7238, "encoder_q-layer.3": 130.2252, "encoder_q-layer.4": 127.6933, "encoder_q-layer.5": 122.6257, "encoder_q-layer.6": 126.414, "encoder_q-layer.7": 115.4995, "encoder_q-layer.8": 98.2336, "encoder_q-layer.9": 82.6969, "epoch": 0.18, "inbatch_neg_score": 0.1314, "inbatch_pos_score": 0.7026, "learning_rate": 4.5444444444444444e-05, "loss": 3.9082, "norm_diff": 0.0195, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 201.4797, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1306, "query_norm": 1.331, "queue_k_norm": 1.351, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6398, "sent_len_1": 66.7794, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6337, "stdk": 0.0465, "stdq": 0.0436, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 18200 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.922, "doc_norm": 1.3482, "encoder_q-embeddings": 160.6007, "encoder_q-layer.0": 107.7947, "encoder_q-layer.1": 121.2719, "encoder_q-layer.10": 100.2906, "encoder_q-layer.11": 262.0791, "encoder_q-layer.2": 135.5783, "encoder_q-layer.3": 146.1541, "encoder_q-layer.4": 148.2761, "encoder_q-layer.5": 146.7165, "encoder_q-layer.6": 144.2052, "encoder_q-layer.7": 147.9992, "encoder_q-layer.8": 112.7344, "encoder_q-layer.9": 87.0317, "epoch": 0.18, "inbatch_neg_score": 0.1289, "inbatch_pos_score": 0.7139, "learning_rate": 4.538888888888889e-05, "loss": 3.922, "norm_diff": 0.0294, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 219.3931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.129, "query_norm": 1.3755, "queue_k_norm": 1.3522, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5169, "sent_len_1": 66.5345, "sent_max_len_0": 127.9788, "sent_max_len_1": 189.0875, "stdk": 0.0465, "stdq": 0.0445, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 18300 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.9178, "doc_norm": 1.3539, "encoder_q-embeddings": 359.2426, "encoder_q-layer.0": 253.3728, "encoder_q-layer.1": 291.4139, "encoder_q-layer.10": 94.171, "encoder_q-layer.11": 257.3727, "encoder_q-layer.2": 335.1048, "encoder_q-layer.3": 344.0554, "encoder_q-layer.4": 305.3002, "encoder_q-layer.5": 304.4402, "encoder_q-layer.6": 266.9291, "encoder_q-layer.7": 197.4305, "encoder_q-layer.8": 168.9363, "encoder_q-layer.9": 121.9087, "epoch": 0.18, "inbatch_neg_score": 0.1307, "inbatch_pos_score": 0.7104, "learning_rate": 4.5333333333333335e-05, "loss": 3.9178, "norm_diff": 0.011, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 403.7289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1299, "query_norm": 1.3454, "queue_k_norm": 1.3514, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5867, "sent_len_1": 66.7833, "sent_max_len_0": 127.995, "sent_max_len_1": 189.5012, "stdk": 0.0467, "stdq": 0.0439, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 18400 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.8948, "doc_norm": 1.3485, "encoder_q-embeddings": 191.1251, "encoder_q-layer.0": 133.8897, "encoder_q-layer.1": 141.0235, "encoder_q-layer.10": 89.6171, "encoder_q-layer.11": 227.264, "encoder_q-layer.2": 168.3449, "encoder_q-layer.3": 173.1342, "encoder_q-layer.4": 205.4, "encoder_q-layer.5": 195.0257, "encoder_q-layer.6": 177.1876, "encoder_q-layer.7": 165.0112, "encoder_q-layer.8": 124.5874, "encoder_q-layer.9": 97.0163, "epoch": 0.18, "inbatch_neg_score": 0.1243, "inbatch_pos_score": 0.731, "learning_rate": 4.527777777777778e-05, "loss": 3.8948, "norm_diff": 0.0256, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 247.9117, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1237, "query_norm": 1.374, "queue_k_norm": 1.3535, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7092, "sent_len_1": 66.8868, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5637, "stdk": 0.0465, "stdq": 0.0452, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 18500 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.8952, "doc_norm": 1.3562, "encoder_q-embeddings": 157.2226, "encoder_q-layer.0": 104.6582, "encoder_q-layer.1": 105.8167, "encoder_q-layer.10": 86.7936, "encoder_q-layer.11": 225.9356, "encoder_q-layer.2": 128.0221, "encoder_q-layer.3": 137.9705, "encoder_q-layer.4": 147.8537, "encoder_q-layer.5": 155.8734, "encoder_q-layer.6": 180.8462, "encoder_q-layer.7": 169.9611, "encoder_q-layer.8": 154.7385, "encoder_q-layer.9": 89.9773, "epoch": 0.18, "inbatch_neg_score": 0.1253, "inbatch_pos_score": 0.7324, "learning_rate": 4.522222222222223e-05, "loss": 3.8952, "norm_diff": 0.0139, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 222.945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1238, "query_norm": 1.3595, "queue_k_norm": 1.353, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5213, "sent_len_1": 66.7242, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4375, "stdk": 0.0468, "stdq": 0.0445, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 18600 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.9189, "doc_norm": 1.3462, "encoder_q-embeddings": 255.6388, "encoder_q-layer.0": 171.6222, "encoder_q-layer.1": 179.1928, "encoder_q-layer.10": 189.9466, "encoder_q-layer.11": 430.8868, "encoder_q-layer.2": 205.9063, "encoder_q-layer.3": 227.282, "encoder_q-layer.4": 245.6713, "encoder_q-layer.5": 250.218, "encoder_q-layer.6": 235.432, "encoder_q-layer.7": 242.5287, "encoder_q-layer.8": 226.5556, "encoder_q-layer.9": 176.558, "epoch": 0.18, "inbatch_neg_score": 0.122, "inbatch_pos_score": 0.7344, "learning_rate": 4.516666666666667e-05, "loss": 3.9189, "norm_diff": 0.0447, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 366.0876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1221, "query_norm": 1.3909, "queue_k_norm": 1.352, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5417, "sent_len_1": 66.6359, "sent_max_len_0": 128.0, "sent_max_len_1": 190.795, "stdk": 0.0465, "stdq": 0.0457, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 18700 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.9055, "doc_norm": 1.3534, "encoder_q-embeddings": 235.7673, "encoder_q-layer.0": 158.7097, "encoder_q-layer.1": 167.946, "encoder_q-layer.10": 195.5995, "encoder_q-layer.11": 497.9937, "encoder_q-layer.2": 183.6299, "encoder_q-layer.3": 204.2178, "encoder_q-layer.4": 213.1703, "encoder_q-layer.5": 191.4971, "encoder_q-layer.6": 201.2991, "encoder_q-layer.7": 228.4733, "encoder_q-layer.8": 242.4537, "encoder_q-layer.9": 192.9163, "epoch": 0.18, "inbatch_neg_score": 0.1291, "inbatch_pos_score": 0.7153, "learning_rate": 4.511111111111112e-05, "loss": 3.9055, "norm_diff": 0.0282, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 365.1017, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1292, "query_norm": 1.3777, "queue_k_norm": 1.353, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.486, "sent_len_1": 66.7381, "sent_max_len_0": 127.9912, "sent_max_len_1": 189.7912, "stdk": 0.0468, "stdq": 0.045, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18800 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.886, "doc_norm": 1.3557, "encoder_q-embeddings": 370.9135, "encoder_q-layer.0": 274.9819, "encoder_q-layer.1": 293.3445, "encoder_q-layer.10": 221.3543, "encoder_q-layer.11": 480.5896, "encoder_q-layer.2": 346.8774, "encoder_q-layer.3": 352.5345, "encoder_q-layer.4": 367.6356, "encoder_q-layer.5": 355.3308, "encoder_q-layer.6": 314.5123, "encoder_q-layer.7": 262.0268, "encoder_q-layer.8": 231.4427, "encoder_q-layer.9": 177.9165, "epoch": 0.18, "inbatch_neg_score": 0.1385, "inbatch_pos_score": 0.7183, "learning_rate": 4.5055555555555554e-05, "loss": 3.886, "norm_diff": 0.0278, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 481.0864, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1382, "query_norm": 1.3426, "queue_k_norm": 1.3542, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4617, "sent_len_1": 67.0869, "sent_max_len_0": 127.995, "sent_max_len_1": 191.8038, "stdk": 0.0469, "stdq": 0.044, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18900 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.9052, "doc_norm": 1.3598, "encoder_q-embeddings": 326.2983, "encoder_q-layer.0": 215.6209, "encoder_q-layer.1": 249.1362, "encoder_q-layer.10": 181.2832, "encoder_q-layer.11": 472.5775, "encoder_q-layer.2": 308.9732, "encoder_q-layer.3": 339.3229, "encoder_q-layer.4": 355.8079, "encoder_q-layer.5": 392.1414, "encoder_q-layer.6": 321.9666, "encoder_q-layer.7": 240.4998, "encoder_q-layer.8": 224.1311, "encoder_q-layer.9": 168.7135, "epoch": 0.19, "inbatch_neg_score": 0.1454, "inbatch_pos_score": 0.729, "learning_rate": 4.5e-05, "loss": 3.9052, "norm_diff": 0.013, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 449.9391, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1455, "query_norm": 1.3552, "queue_k_norm": 1.3545, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7098, "sent_len_1": 66.8244, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.5312, "stdk": 0.047, "stdq": 0.0442, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 19000 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.9009, "doc_norm": 1.3495, "encoder_q-embeddings": 212.0729, "encoder_q-layer.0": 150.8517, "encoder_q-layer.1": 171.279, "encoder_q-layer.10": 193.8923, "encoder_q-layer.11": 526.906, "encoder_q-layer.2": 192.1191, "encoder_q-layer.3": 192.0621, "encoder_q-layer.4": 194.2249, "encoder_q-layer.5": 192.0197, "encoder_q-layer.6": 211.5083, "encoder_q-layer.7": 194.9301, "encoder_q-layer.8": 193.4064, "encoder_q-layer.9": 166.512, "epoch": 0.19, "inbatch_neg_score": 0.1442, "inbatch_pos_score": 0.7227, "learning_rate": 4.4944444444444445e-05, "loss": 3.9009, "norm_diff": 0.0258, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 356.0635, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1431, "query_norm": 1.3267, "queue_k_norm": 1.354, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6042, "sent_len_1": 66.6853, "sent_max_len_0": 127.99, "sent_max_len_1": 188.3775, "stdk": 0.0467, "stdq": 0.0435, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 19100 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.8907, "doc_norm": 1.3556, "encoder_q-embeddings": 565.5508, "encoder_q-layer.0": 398.1466, "encoder_q-layer.1": 449.76, "encoder_q-layer.10": 190.682, "encoder_q-layer.11": 489.582, "encoder_q-layer.2": 508.651, "encoder_q-layer.3": 488.5508, "encoder_q-layer.4": 364.3371, "encoder_q-layer.5": 363.5547, "encoder_q-layer.6": 360.9924, "encoder_q-layer.7": 307.1757, "encoder_q-layer.8": 257.8992, "encoder_q-layer.9": 188.4021, "epoch": 0.19, "inbatch_neg_score": 0.1438, "inbatch_pos_score": 0.7197, "learning_rate": 4.4888888888888894e-05, "loss": 3.8907, "norm_diff": 0.0207, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 626.4886, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1439, "query_norm": 1.335, "queue_k_norm": 1.3534, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6645, "sent_len_1": 66.6733, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.3388, "stdk": 0.0469, "stdq": 0.0441, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 19200 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.8795, "doc_norm": 1.3566, "encoder_q-embeddings": 1081.5887, "encoder_q-layer.0": 905.7245, "encoder_q-layer.1": 839.7098, "encoder_q-layer.10": 176.6451, "encoder_q-layer.11": 438.6918, "encoder_q-layer.2": 975.4826, "encoder_q-layer.3": 948.4104, "encoder_q-layer.4": 1069.0829, "encoder_q-layer.5": 1135.6642, "encoder_q-layer.6": 1095.9291, "encoder_q-layer.7": 743.1978, "encoder_q-layer.8": 458.3871, "encoder_q-layer.9": 205.4919, "epoch": 0.19, "inbatch_neg_score": 0.1416, "inbatch_pos_score": 0.7549, "learning_rate": 4.483333333333333e-05, "loss": 3.8795, "norm_diff": 0.0195, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1259.8105, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1422, "query_norm": 1.3518, "queue_k_norm": 1.3544, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6518, "sent_len_1": 66.8123, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.5075, "stdk": 0.0469, "stdq": 0.0445, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 19300 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.8876, "doc_norm": 1.356, "encoder_q-embeddings": 289.2652, "encoder_q-layer.0": 188.6183, "encoder_q-layer.1": 201.2085, "encoder_q-layer.10": 178.4011, "encoder_q-layer.11": 475.1307, "encoder_q-layer.2": 234.3646, "encoder_q-layer.3": 252.0612, "encoder_q-layer.4": 259.0883, "encoder_q-layer.5": 260.9365, "encoder_q-layer.6": 267.3643, "encoder_q-layer.7": 267.6208, "encoder_q-layer.8": 229.1301, "encoder_q-layer.9": 174.2598, "epoch": 0.19, "inbatch_neg_score": 0.1432, "inbatch_pos_score": 0.7314, "learning_rate": 4.477777777777778e-05, "loss": 3.8876, "norm_diff": 0.0313, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 396.3295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1425, "query_norm": 1.3248, "queue_k_norm": 1.3549, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4373, "sent_len_1": 66.6634, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.095, "stdk": 0.0469, "stdq": 0.0434, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19400 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.8988, "doc_norm": 1.3515, "encoder_q-embeddings": 651.3977, "encoder_q-layer.0": 440.6877, "encoder_q-layer.1": 482.8671, "encoder_q-layer.10": 174.6608, "encoder_q-layer.11": 465.9007, "encoder_q-layer.2": 558.5131, "encoder_q-layer.3": 577.6807, "encoder_q-layer.4": 619.4542, "encoder_q-layer.5": 665.3205, "encoder_q-layer.6": 711.8629, "encoder_q-layer.7": 643.3707, "encoder_q-layer.8": 314.292, "encoder_q-layer.9": 188.7018, "epoch": 0.19, "inbatch_neg_score": 0.1532, "inbatch_pos_score": 0.7515, "learning_rate": 4.472222222222223e-05, "loss": 3.8988, "norm_diff": 0.0143, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 791.0536, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1526, "query_norm": 1.3494, "queue_k_norm": 1.3553, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6647, "sent_len_1": 66.9369, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.48, "stdk": 0.0467, "stdq": 0.0438, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19500 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.8883, "doc_norm": 1.3579, "encoder_q-embeddings": 266.1316, "encoder_q-layer.0": 185.8211, "encoder_q-layer.1": 205.7943, "encoder_q-layer.10": 171.5263, "encoder_q-layer.11": 481.8735, "encoder_q-layer.2": 232.093, "encoder_q-layer.3": 251.1954, "encoder_q-layer.4": 247.9567, "encoder_q-layer.5": 257.7036, "encoder_q-layer.6": 226.1796, "encoder_q-layer.7": 212.2035, "encoder_q-layer.8": 183.3053, "encoder_q-layer.9": 152.3083, "epoch": 0.19, "inbatch_neg_score": 0.1441, "inbatch_pos_score": 0.7437, "learning_rate": 4.466666666666667e-05, "loss": 3.8883, "norm_diff": 0.0274, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 386.7284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1434, "query_norm": 1.3305, "queue_k_norm": 1.3556, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5095, "sent_len_1": 66.672, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2125, "stdk": 0.047, "stdq": 0.0434, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19600 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.8635, "doc_norm": 1.3586, "encoder_q-embeddings": 589.1138, "encoder_q-layer.0": 404.6903, "encoder_q-layer.1": 460.3463, "encoder_q-layer.10": 184.8665, "encoder_q-layer.11": 484.645, "encoder_q-layer.2": 583.9757, "encoder_q-layer.3": 672.5516, "encoder_q-layer.4": 587.1419, "encoder_q-layer.5": 602.1509, "encoder_q-layer.6": 502.7628, "encoder_q-layer.7": 405.8271, "encoder_q-layer.8": 269.0933, "encoder_q-layer.9": 169.8474, "epoch": 0.19, "inbatch_neg_score": 0.1312, "inbatch_pos_score": 0.7266, "learning_rate": 4.461111111111111e-05, "loss": 3.8635, "norm_diff": 0.0271, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 718.9629, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1301, "query_norm": 1.3386, "queue_k_norm": 1.356, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6955, "sent_len_1": 66.7661, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5062, "stdk": 0.047, "stdq": 0.0439, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19700 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.8667, "doc_norm": 1.351, "encoder_q-embeddings": 2201.3438, "encoder_q-layer.0": 1507.1871, "encoder_q-layer.1": 1692.6409, "encoder_q-layer.10": 205.9475, "encoder_q-layer.11": 502.6085, "encoder_q-layer.2": 1848.77, "encoder_q-layer.3": 1533.681, "encoder_q-layer.4": 1570.943, "encoder_q-layer.5": 1511.2046, "encoder_q-layer.6": 1501.1678, "encoder_q-layer.7": 1216.7107, "encoder_q-layer.8": 755.9272, "encoder_q-layer.9": 216.0245, "epoch": 0.19, "inbatch_neg_score": 0.1263, "inbatch_pos_score": 0.7153, "learning_rate": 4.4555555555555555e-05, "loss": 3.8667, "norm_diff": 0.0188, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2189.2978, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1248, "query_norm": 1.3428, "queue_k_norm": 1.3534, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7801, "sent_len_1": 66.5256, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0462, "stdk": 0.0468, "stdq": 0.0443, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 19800 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.8661, "doc_norm": 1.351, "encoder_q-embeddings": 2025.0411, "encoder_q-layer.0": 1471.2938, "encoder_q-layer.1": 1835.0312, "encoder_q-layer.10": 172.4737, "encoder_q-layer.11": 462.674, "encoder_q-layer.2": 2014.9342, "encoder_q-layer.3": 2326.0049, "encoder_q-layer.4": 2370.0959, "encoder_q-layer.5": 2345.5476, "encoder_q-layer.6": 2359.2964, "encoder_q-layer.7": 1886.2856, "encoder_q-layer.8": 863.5931, "encoder_q-layer.9": 200.4811, "epoch": 0.19, "inbatch_neg_score": 0.1347, "inbatch_pos_score": 0.7407, "learning_rate": 4.4500000000000004e-05, "loss": 3.8661, "norm_diff": 0.0184, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2625.3004, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1342, "query_norm": 1.3567, "queue_k_norm": 1.3552, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6004, "sent_len_1": 66.9244, "sent_max_len_0": 128.0, "sent_max_len_1": 190.645, "stdk": 0.0468, "stdq": 0.0444, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19900 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.8453, "doc_norm": 1.3496, "encoder_q-embeddings": 3255.4143, "encoder_q-layer.0": 2715.6372, "encoder_q-layer.1": 2530.4417, "encoder_q-layer.10": 171.2253, "encoder_q-layer.11": 420.5656, "encoder_q-layer.2": 3097.3789, "encoder_q-layer.3": 2415.0986, "encoder_q-layer.4": 1818.4052, "encoder_q-layer.5": 1086.2689, "encoder_q-layer.6": 1120.5854, "encoder_q-layer.7": 736.5075, "encoder_q-layer.8": 315.373, "encoder_q-layer.9": 159.7473, "epoch": 0.2, "inbatch_neg_score": 0.1306, "inbatch_pos_score": 0.6895, "learning_rate": 4.4444444444444447e-05, "loss": 3.8453, "norm_diff": 0.0284, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2960.7574, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1311, "query_norm": 1.3214, "queue_k_norm": 1.3549, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.82, "sent_len_1": 66.7736, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.8375, "stdk": 0.0467, "stdq": 0.0433, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20000 }, { "dev_runtime": 26.7057, "dev_samples_per_second": 2.396, "dev_steps_per_second": 0.037, "epoch": 0.2, "step": 20000, "test_accuracy": 92.3583984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4532972574234009, "test_doc_norm": 1.2997431755065918, "test_inbatch_neg_score": 0.44682252407073975, "test_inbatch_pos_score": 1.240107774734497, "test_loss": 0.4532972574234009, "test_loss_align": 1.1007963418960571, "test_loss_unif": 3.9472498893737793, "test_loss_unif_q@queue": 3.9472501277923584, "test_norm_diff": 0.06798030436038971, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.1290624439716339, "test_query_norm": 1.3677234649658203, "test_queue_k_norm": 1.3550655841827393, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0397089347243309, "test_stdq": 0.0394117534160614, "test_stdqueue_k": 0.04700636491179466, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.7057, "dev_samples_per_second": 2.396, "dev_steps_per_second": 0.037, "epoch": 0.2, "eval_beir-arguana_ndcg@10": 0.32997, "eval_beir-arguana_recall@10": 0.54765, "eval_beir-arguana_recall@100": 0.85277, "eval_beir-arguana_recall@20": 0.6835, "eval_beir-avg_ndcg@10": 0.3275635, "eval_beir-avg_recall@10": 0.39126066666666665, "eval_beir-avg_recall@100": 0.57617375, "eval_beir-avg_recall@20": 0.4501345, "eval_beir-cqadupstack_ndcg@10": 0.21237499999999998, "eval_beir-cqadupstack_recall@10": 0.2978566666666666, "eval_beir-cqadupstack_recall@100": 0.5169675, "eval_beir-cqadupstack_recall@20": 0.35734499999999997, "eval_beir-fiqa_ndcg@10": 0.18162, "eval_beir-fiqa_recall@10": 0.23274, "eval_beir-fiqa_recall@100": 0.49634, "eval_beir-fiqa_recall@20": 0.31683, "eval_beir-nfcorpus_ndcg@10": 0.2533, "eval_beir-nfcorpus_recall@10": 0.12521, "eval_beir-nfcorpus_recall@100": 0.24641, "eval_beir-nfcorpus_recall@20": 0.15121, "eval_beir-nq_ndcg@10": 0.19426, "eval_beir-nq_recall@10": 0.3321, "eval_beir-nq_recall@100": 0.66401, "eval_beir-nq_recall@20": 0.44518, "eval_beir-quora_ndcg@10": 0.74975, "eval_beir-quora_recall@10": 0.86258, "eval_beir-quora_recall@100": 0.97001, "eval_beir-quora_recall@20": 0.91135, "eval_beir-scidocs_ndcg@10": 0.13084, "eval_beir-scidocs_recall@10": 0.13692, "eval_beir-scidocs_recall@100": 0.33633, "eval_beir-scidocs_recall@20": 0.18685, "eval_beir-scifact_ndcg@10": 0.57333, "eval_beir-scifact_recall@10": 0.72278, "eval_beir-scifact_recall@100": 0.89356, "eval_beir-scifact_recall@20": 0.78428, "eval_beir-trec-covid_ndcg@10": 0.49627, "eval_beir-trec-covid_recall@10": 0.534, "eval_beir-trec-covid_recall@100": 0.373, "eval_beir-trec-covid_recall@20": 0.487, "eval_beir-webis-touche2020_ndcg@10": 0.15392, "eval_beir-webis-touche2020_recall@10": 0.12077, "eval_beir-webis-touche2020_recall@100": 0.41234, "eval_beir-webis-touche2020_recall@20": 0.1778, "eval_senteval-avg_sts": 0.7485481453497325, "eval_senteval-sickr_spearman": 0.7094978030295724, "eval_senteval-stsb_spearman": 0.7875984876698926, "step": 20000, "test_accuracy": 92.3583984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4532972574234009, "test_doc_norm": 1.2997431755065918, "test_inbatch_neg_score": 0.44682252407073975, "test_inbatch_pos_score": 1.240107774734497, "test_loss": 0.4532972574234009, "test_loss_align": 1.1007963418960571, "test_loss_unif": 3.9472498893737793, "test_loss_unif_q@queue": 3.9472501277923584, "test_norm_diff": 0.06798030436038971, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.1290624439716339, "test_query_norm": 1.3677234649658203, "test_queue_k_norm": 1.3550655841827393, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0397089347243309, "test_stdq": 0.0394117534160614, "test_stdqueue_k": 0.04700636491179466, "test_stdqueue_q": 0.0 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.8571, "doc_norm": 1.3533, "encoder_q-embeddings": 551.2217, "encoder_q-layer.0": 357.9622, "encoder_q-layer.1": 387.5716, "encoder_q-layer.10": 174.9591, "encoder_q-layer.11": 469.3132, "encoder_q-layer.2": 426.6346, "encoder_q-layer.3": 426.7167, "encoder_q-layer.4": 474.8409, "encoder_q-layer.5": 474.8704, "encoder_q-layer.6": 541.2084, "encoder_q-layer.7": 562.3967, "encoder_q-layer.8": 351.6401, "encoder_q-layer.9": 163.8648, "epoch": 0.2, "inbatch_neg_score": 0.1333, "inbatch_pos_score": 0.731, "learning_rate": 4.438888888888889e-05, "loss": 3.8571, "norm_diff": 0.0238, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 649.9805, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1326, "query_norm": 1.3503, "queue_k_norm": 1.354, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5142, "sent_len_1": 66.8142, "sent_max_len_0": 127.9775, "sent_max_len_1": 190.0112, "stdk": 0.0469, "stdq": 0.0444, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20100 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.8353, "doc_norm": 1.3558, "encoder_q-embeddings": 754.3925, "encoder_q-layer.0": 533.5608, "encoder_q-layer.1": 576.3531, "encoder_q-layer.10": 179.5916, "encoder_q-layer.11": 481.0899, "encoder_q-layer.2": 625.8697, "encoder_q-layer.3": 687.2671, "encoder_q-layer.4": 702.237, "encoder_q-layer.5": 737.9841, "encoder_q-layer.6": 765.2175, "encoder_q-layer.7": 703.8678, "encoder_q-layer.8": 399.3055, "encoder_q-layer.9": 184.76, "epoch": 0.2, "inbatch_neg_score": 0.126, "inbatch_pos_score": 0.7217, "learning_rate": 4.433333333333334e-05, "loss": 3.8353, "norm_diff": 0.0313, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 901.9221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1248, "query_norm": 1.3245, "queue_k_norm": 1.3541, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7413, "sent_len_1": 66.6071, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8587, "stdk": 0.047, "stdq": 0.0441, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20200 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.8395, "doc_norm": 1.3437, "encoder_q-embeddings": 225.8385, "encoder_q-layer.0": 154.7932, "encoder_q-layer.1": 176.0182, "encoder_q-layer.10": 183.0402, "encoder_q-layer.11": 448.5835, "encoder_q-layer.2": 202.1684, "encoder_q-layer.3": 201.8231, "encoder_q-layer.4": 209.0318, "encoder_q-layer.5": 216.6036, "encoder_q-layer.6": 247.9307, "encoder_q-layer.7": 238.0286, "encoder_q-layer.8": 232.1489, "encoder_q-layer.9": 163.5558, "epoch": 0.2, "inbatch_neg_score": 0.1289, "inbatch_pos_score": 0.7231, "learning_rate": 4.427777777777778e-05, "loss": 3.8395, "norm_diff": 0.0267, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 358.7837, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1279, "query_norm": 1.3681, "queue_k_norm": 1.3544, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4307, "sent_len_1": 66.6, "sent_max_len_0": 128.0, "sent_max_len_1": 189.445, "stdk": 0.0466, "stdq": 0.045, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20300 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.8561, "doc_norm": 1.3568, "encoder_q-embeddings": 1226.3292, "encoder_q-layer.0": 876.9609, "encoder_q-layer.1": 995.2627, "encoder_q-layer.10": 207.8447, "encoder_q-layer.11": 529.3334, "encoder_q-layer.2": 1033.4486, "encoder_q-layer.3": 1063.2505, "encoder_q-layer.4": 933.2578, "encoder_q-layer.5": 896.2583, "encoder_q-layer.6": 932.4158, "encoder_q-layer.7": 890.6226, "encoder_q-layer.8": 486.3077, "encoder_q-layer.9": 199.6875, "epoch": 0.2, "inbatch_neg_score": 0.117, "inbatch_pos_score": 0.708, "learning_rate": 4.422222222222222e-05, "loss": 3.8561, "norm_diff": 0.0243, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1302.7433, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1165, "query_norm": 1.3325, "queue_k_norm": 1.3527, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4848, "sent_len_1": 66.7092, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7125, "stdk": 0.047, "stdq": 0.0441, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20400 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.8425, "doc_norm": 1.354, "encoder_q-embeddings": 221.6666, "encoder_q-layer.0": 152.4658, "encoder_q-layer.1": 172.4022, "encoder_q-layer.10": 191.1923, "encoder_q-layer.11": 471.0406, "encoder_q-layer.2": 194.3909, "encoder_q-layer.3": 212.3781, "encoder_q-layer.4": 221.6207, "encoder_q-layer.5": 230.8092, "encoder_q-layer.6": 231.6697, "encoder_q-layer.7": 243.4378, "encoder_q-layer.8": 237.1169, "encoder_q-layer.9": 168.4083, "epoch": 0.2, "inbatch_neg_score": 0.1088, "inbatch_pos_score": 0.7075, "learning_rate": 4.4166666666666665e-05, "loss": 3.8425, "norm_diff": 0.0197, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 358.6876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1086, "query_norm": 1.3424, "queue_k_norm": 1.3525, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7735, "sent_len_1": 66.7802, "sent_max_len_0": 127.9912, "sent_max_len_1": 189.8562, "stdk": 0.0469, "stdq": 0.0449, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20500 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8577, "doc_norm": 1.3515, "encoder_q-embeddings": 518.1207, "encoder_q-layer.0": 391.1146, "encoder_q-layer.1": 434.7253, "encoder_q-layer.10": 206.5135, "encoder_q-layer.11": 440.404, "encoder_q-layer.2": 510.0896, "encoder_q-layer.3": 539.5595, "encoder_q-layer.4": 508.1175, "encoder_q-layer.5": 424.2583, "encoder_q-layer.6": 459.9745, "encoder_q-layer.7": 450.9956, "encoder_q-layer.8": 335.4348, "encoder_q-layer.9": 196.4643, "epoch": 0.2, "inbatch_neg_score": 0.0907, "inbatch_pos_score": 0.6846, "learning_rate": 4.4111111111111114e-05, "loss": 3.8577, "norm_diff": 0.0209, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 642.2013, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0911, "query_norm": 1.3313, "queue_k_norm": 1.3527, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7215, "sent_len_1": 66.7821, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.1275, "stdk": 0.0469, "stdq": 0.0449, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20600 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8545, "doc_norm": 1.349, "encoder_q-embeddings": 6119.8525, "encoder_q-layer.0": 4449.7129, "encoder_q-layer.1": 4056.0957, "encoder_q-layer.10": 358.7365, "encoder_q-layer.11": 889.9792, "encoder_q-layer.2": 4901.6025, "encoder_q-layer.3": 5894.6387, "encoder_q-layer.4": 5909.7275, "encoder_q-layer.5": 7019.2139, "encoder_q-layer.6": 6845.9438, "encoder_q-layer.7": 6342.1201, "encoder_q-layer.8": 4853.3115, "encoder_q-layer.9": 829.1781, "epoch": 0.2, "inbatch_neg_score": 0.1104, "inbatch_pos_score": 0.686, "learning_rate": 4.4055555555555557e-05, "loss": 3.8545, "norm_diff": 0.0578, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7601.1825, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1097, "query_norm": 1.2912, "queue_k_norm": 1.351, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7689, "sent_len_1": 66.8259, "sent_max_len_0": 127.9938, "sent_max_len_1": 187.6538, "stdk": 0.0469, "stdq": 0.0431, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20700 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.8408, "doc_norm": 1.3486, "encoder_q-embeddings": 955.9053, "encoder_q-layer.0": 698.7919, "encoder_q-layer.1": 799.5128, "encoder_q-layer.10": 439.9704, "encoder_q-layer.11": 870.8439, "encoder_q-layer.2": 868.2888, "encoder_q-layer.3": 935.7921, "encoder_q-layer.4": 986.5264, "encoder_q-layer.5": 956.6926, "encoder_q-layer.6": 881.2711, "encoder_q-layer.7": 769.7325, "encoder_q-layer.8": 663.849, "encoder_q-layer.9": 356.8274, "epoch": 0.2, "inbatch_neg_score": 0.0942, "inbatch_pos_score": 0.6792, "learning_rate": 4.4000000000000006e-05, "loss": 3.8408, "norm_diff": 0.0427, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1197.7078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0936, "query_norm": 1.3058, "queue_k_norm": 1.35, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5898, "sent_len_1": 66.7032, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6337, "stdk": 0.0468, "stdq": 0.0439, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20800 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.849, "doc_norm": 1.3465, "encoder_q-embeddings": 429.7493, "encoder_q-layer.0": 289.2836, "encoder_q-layer.1": 315.4372, "encoder_q-layer.10": 363.5622, "encoder_q-layer.11": 870.4051, "encoder_q-layer.2": 350.1143, "encoder_q-layer.3": 378.0157, "encoder_q-layer.4": 407.2095, "encoder_q-layer.5": 426.0043, "encoder_q-layer.6": 447.1826, "encoder_q-layer.7": 476.7109, "encoder_q-layer.8": 505.0712, "encoder_q-layer.9": 370.6302, "epoch": 0.2, "inbatch_neg_score": 0.106, "inbatch_pos_score": 0.7056, "learning_rate": 4.394444444444445e-05, "loss": 3.849, "norm_diff": 0.0184, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 678.5245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1066, "query_norm": 1.3294, "queue_k_norm": 1.351, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6398, "sent_len_1": 66.6089, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6012, "stdk": 0.0468, "stdq": 0.0446, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 20900 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.8416, "doc_norm": 1.346, "encoder_q-embeddings": 1277.6453, "encoder_q-layer.0": 932.6703, "encoder_q-layer.1": 987.9236, "encoder_q-layer.10": 393.0883, "encoder_q-layer.11": 879.1566, "encoder_q-layer.2": 1165.3003, "encoder_q-layer.3": 1132.3431, "encoder_q-layer.4": 1086.431, "encoder_q-layer.5": 1085.9227, "encoder_q-layer.6": 1080.9056, "encoder_q-layer.7": 1051.4586, "encoder_q-layer.8": 816.9343, "encoder_q-layer.9": 423.5492, "epoch": 0.21, "inbatch_neg_score": 0.1121, "inbatch_pos_score": 0.752, "learning_rate": 4.388888888888889e-05, "loss": 3.8416, "norm_diff": 0.0257, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1478.6878, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1107, "query_norm": 1.3712, "queue_k_norm": 1.3489, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7381, "sent_len_1": 66.829, "sent_max_len_0": 128.0, "sent_max_len_1": 188.295, "stdk": 0.0468, "stdq": 0.0459, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 21000 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8409, "doc_norm": 1.3502, "encoder_q-embeddings": 953.6544, "encoder_q-layer.0": 615.155, "encoder_q-layer.1": 744.9752, "encoder_q-layer.10": 362.9015, "encoder_q-layer.11": 916.1037, "encoder_q-layer.2": 837.2083, "encoder_q-layer.3": 772.5844, "encoder_q-layer.4": 708.3731, "encoder_q-layer.5": 705.2854, "encoder_q-layer.6": 712.2845, "encoder_q-layer.7": 651.0258, "encoder_q-layer.8": 503.2701, "encoder_q-layer.9": 333.0715, "epoch": 0.21, "inbatch_neg_score": 0.107, "inbatch_pos_score": 0.6899, "learning_rate": 4.383333333333334e-05, "loss": 3.8409, "norm_diff": 0.0214, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1072.7344, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1071, "query_norm": 1.3337, "queue_k_norm": 1.3479, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.516, "sent_len_1": 66.7296, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6125, "stdk": 0.047, "stdq": 0.0443, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 21100 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.8297, "doc_norm": 1.3491, "encoder_q-embeddings": 553.1418, "encoder_q-layer.0": 379.9221, "encoder_q-layer.1": 399.3843, "encoder_q-layer.10": 348.225, "encoder_q-layer.11": 916.4127, "encoder_q-layer.2": 475.071, "encoder_q-layer.3": 491.5464, "encoder_q-layer.4": 528.554, "encoder_q-layer.5": 527.8, "encoder_q-layer.6": 456.6912, "encoder_q-layer.7": 488.158, "encoder_q-layer.8": 443.855, "encoder_q-layer.9": 355.022, "epoch": 0.21, "inbatch_neg_score": 0.1098, "inbatch_pos_score": 0.7197, "learning_rate": 4.377777777777778e-05, "loss": 3.8297, "norm_diff": 0.0179, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 775.9636, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.109, "query_norm": 1.3438, "queue_k_norm": 1.3475, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.59, "sent_len_1": 66.589, "sent_max_len_0": 127.9912, "sent_max_len_1": 188.9588, "stdk": 0.047, "stdq": 0.0446, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 21200 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.8165, "doc_norm": 1.3488, "encoder_q-embeddings": 746.7514, "encoder_q-layer.0": 537.7691, "encoder_q-layer.1": 577.314, "encoder_q-layer.10": 353.7815, "encoder_q-layer.11": 827.9166, "encoder_q-layer.2": 687.8779, "encoder_q-layer.3": 696.0586, "encoder_q-layer.4": 661.75, "encoder_q-layer.5": 682.2339, "encoder_q-layer.6": 679.9427, "encoder_q-layer.7": 719.5403, "encoder_q-layer.8": 517.1862, "encoder_q-layer.9": 336.5033, "epoch": 0.21, "inbatch_neg_score": 0.1113, "inbatch_pos_score": 0.71, "learning_rate": 4.3722222222222224e-05, "loss": 3.8165, "norm_diff": 0.0133, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 943.597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1103, "query_norm": 1.3383, "queue_k_norm": 1.3503, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7567, "sent_len_1": 66.8846, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5025, "stdk": 0.047, "stdq": 0.0447, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21300 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.8097, "doc_norm": 1.3483, "encoder_q-embeddings": 497.3821, "encoder_q-layer.0": 369.3507, "encoder_q-layer.1": 438.4397, "encoder_q-layer.10": 337.1913, "encoder_q-layer.11": 816.2186, "encoder_q-layer.2": 499.4397, "encoder_q-layer.3": 569.2717, "encoder_q-layer.4": 530.5953, "encoder_q-layer.5": 562.4185, "encoder_q-layer.6": 551.5959, "encoder_q-layer.7": 572.2709, "encoder_q-layer.8": 521.4017, "encoder_q-layer.9": 340.0655, "epoch": 0.21, "inbatch_neg_score": 0.117, "inbatch_pos_score": 0.7227, "learning_rate": 4.3666666666666666e-05, "loss": 3.8097, "norm_diff": 0.0204, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 781.9538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1154, "query_norm": 1.3454, "queue_k_norm": 1.3496, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.8772, "sent_len_1": 66.7585, "sent_max_len_0": 128.0, "sent_max_len_1": 190.305, "stdk": 0.0469, "stdq": 0.045, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21400 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.8289, "doc_norm": 1.3529, "encoder_q-embeddings": 406.2677, "encoder_q-layer.0": 280.5229, "encoder_q-layer.1": 299.3098, "encoder_q-layer.10": 343.2279, "encoder_q-layer.11": 901.5815, "encoder_q-layer.2": 335.5108, "encoder_q-layer.3": 357.2155, "encoder_q-layer.4": 354.7465, "encoder_q-layer.5": 358.4413, "encoder_q-layer.6": 364.3249, "encoder_q-layer.7": 384.11, "encoder_q-layer.8": 357.3859, "encoder_q-layer.9": 310.1239, "epoch": 0.21, "inbatch_neg_score": 0.1202, "inbatch_pos_score": 0.7285, "learning_rate": 4.3611111111111116e-05, "loss": 3.8289, "norm_diff": 0.0158, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 651.3212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1196, "query_norm": 1.3402, "queue_k_norm": 1.3504, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6164, "sent_len_1": 66.7431, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3237, "stdk": 0.0471, "stdq": 0.0446, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21500 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.8018, "doc_norm": 1.3532, "encoder_q-embeddings": 391.7026, "encoder_q-layer.0": 268.0398, "encoder_q-layer.1": 287.6858, "encoder_q-layer.10": 329.1181, "encoder_q-layer.11": 870.5489, "encoder_q-layer.2": 329.8334, "encoder_q-layer.3": 353.4065, "encoder_q-layer.4": 366.9715, "encoder_q-layer.5": 360.9129, "encoder_q-layer.6": 418.7265, "encoder_q-layer.7": 373.8462, "encoder_q-layer.8": 355.8887, "encoder_q-layer.9": 299.8091, "epoch": 0.21, "inbatch_neg_score": 0.1224, "inbatch_pos_score": 0.7178, "learning_rate": 4.355555555555556e-05, "loss": 3.8018, "norm_diff": 0.037, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 634.2441, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1218, "query_norm": 1.3172, "queue_k_norm": 1.3515, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7819, "sent_len_1": 66.99, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.6138, "stdk": 0.0471, "stdq": 0.0438, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21600 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7974, "doc_norm": 1.3479, "encoder_q-embeddings": 1000.9813, "encoder_q-layer.0": 783.9073, "encoder_q-layer.1": 786.8348, "encoder_q-layer.10": 362.7757, "encoder_q-layer.11": 915.472, "encoder_q-layer.2": 761.3816, "encoder_q-layer.3": 750.381, "encoder_q-layer.4": 766.3036, "encoder_q-layer.5": 772.94, "encoder_q-layer.6": 585.4821, "encoder_q-layer.7": 507.5451, "encoder_q-layer.8": 435.888, "encoder_q-layer.9": 324.7284, "epoch": 0.21, "inbatch_neg_score": 0.1285, "inbatch_pos_score": 0.71, "learning_rate": 4.35e-05, "loss": 3.7974, "norm_diff": 0.0291, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1089.7144, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1284, "query_norm": 1.3212, "queue_k_norm": 1.3511, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8642, "sent_len_1": 66.9813, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4263, "stdk": 0.0469, "stdq": 0.0437, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21700 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.8059, "doc_norm": 1.355, "encoder_q-embeddings": 406.9651, "encoder_q-layer.0": 288.3742, "encoder_q-layer.1": 304.6346, "encoder_q-layer.10": 412.858, "encoder_q-layer.11": 910.6919, "encoder_q-layer.2": 356.3942, "encoder_q-layer.3": 335.4362, "encoder_q-layer.4": 333.1385, "encoder_q-layer.5": 330.4194, "encoder_q-layer.6": 341.1608, "encoder_q-layer.7": 339.0038, "encoder_q-layer.8": 372.6168, "encoder_q-layer.9": 347.3814, "epoch": 0.21, "inbatch_neg_score": 0.1237, "inbatch_pos_score": 0.7354, "learning_rate": 4.344444444444445e-05, "loss": 3.8059, "norm_diff": 0.0132, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 631.6005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1244, "query_norm": 1.3524, "queue_k_norm": 1.3514, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7242, "sent_len_1": 66.8811, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.225, "stdk": 0.0472, "stdq": 0.0451, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21800 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.813, "doc_norm": 1.3499, "encoder_q-embeddings": 6666.0884, "encoder_q-layer.0": 5319.8276, "encoder_q-layer.1": 6229.6787, "encoder_q-layer.10": 358.6563, "encoder_q-layer.11": 859.8044, "encoder_q-layer.2": 7412.5376, "encoder_q-layer.3": 7523.6475, "encoder_q-layer.4": 8592.458, "encoder_q-layer.5": 9367.999, "encoder_q-layer.6": 8768.2441, "encoder_q-layer.7": 5747.3115, "encoder_q-layer.8": 3313.3372, "encoder_q-layer.9": 636.266, "epoch": 0.21, "inbatch_neg_score": 0.126, "inbatch_pos_score": 0.73, "learning_rate": 4.338888888888889e-05, "loss": 3.813, "norm_diff": 0.0271, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9174.333, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1248, "query_norm": 1.3306, "queue_k_norm": 1.349, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5611, "sent_len_1": 66.4805, "sent_max_len_0": 128.0, "sent_max_len_1": 187.805, "stdk": 0.047, "stdq": 0.0444, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21900 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.8091, "doc_norm": 1.3501, "encoder_q-embeddings": 2480.6094, "encoder_q-layer.0": 1822.5787, "encoder_q-layer.1": 1976.4589, "encoder_q-layer.10": 315.1654, "encoder_q-layer.11": 822.3841, "encoder_q-layer.2": 2203.0986, "encoder_q-layer.3": 2168.03, "encoder_q-layer.4": 2257.2515, "encoder_q-layer.5": 2354.3477, "encoder_q-layer.6": 2617.4138, "encoder_q-layer.7": 2037.5317, "encoder_q-layer.8": 866.3499, "encoder_q-layer.9": 373.3218, "epoch": 0.21, "inbatch_neg_score": 0.1187, "inbatch_pos_score": 0.7295, "learning_rate": 4.3333333333333334e-05, "loss": 3.8091, "norm_diff": 0.0351, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2856.7673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1194, "query_norm": 1.3208, "queue_k_norm": 1.351, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5513, "sent_len_1": 66.9017, "sent_max_len_0": 128.0, "sent_max_len_1": 189.12, "stdk": 0.047, "stdq": 0.0439, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 22000 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.8005, "doc_norm": 1.354, "encoder_q-embeddings": 454.3195, "encoder_q-layer.0": 282.41, "encoder_q-layer.1": 303.0137, "encoder_q-layer.10": 332.8831, "encoder_q-layer.11": 788.1265, "encoder_q-layer.2": 331.8298, "encoder_q-layer.3": 364.2173, "encoder_q-layer.4": 371.9166, "encoder_q-layer.5": 363.4718, "encoder_q-layer.6": 388.3487, "encoder_q-layer.7": 392.5764, "encoder_q-layer.8": 383.0605, "encoder_q-layer.9": 318.6465, "epoch": 0.22, "inbatch_neg_score": 0.11, "inbatch_pos_score": 0.7158, "learning_rate": 4.3277777777777776e-05, "loss": 3.8005, "norm_diff": 0.0368, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 617.0971, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.109, "query_norm": 1.3173, "queue_k_norm": 1.3501, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5843, "sent_len_1": 66.8924, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3787, "stdk": 0.0471, "stdq": 0.0441, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 22100 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.8024, "doc_norm": 1.3553, "encoder_q-embeddings": 539.3846, "encoder_q-layer.0": 338.2092, "encoder_q-layer.1": 372.1256, "encoder_q-layer.10": 337.4008, "encoder_q-layer.11": 829.5327, "encoder_q-layer.2": 423.9568, "encoder_q-layer.3": 466.5827, "encoder_q-layer.4": 509.4337, "encoder_q-layer.5": 504.9278, "encoder_q-layer.6": 555.5624, "encoder_q-layer.7": 455.7044, "encoder_q-layer.8": 394.995, "encoder_q-layer.9": 311.2281, "epoch": 0.22, "inbatch_neg_score": 0.1099, "inbatch_pos_score": 0.7524, "learning_rate": 4.3222222222222226e-05, "loss": 3.8024, "norm_diff": 0.0203, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 726.1408, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1102, "query_norm": 1.3593, "queue_k_norm": 1.3513, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6626, "sent_len_1": 66.7146, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.7463, "stdk": 0.0472, "stdq": 0.0458, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 22200 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.8137, "doc_norm": 1.3483, "encoder_q-embeddings": 2382.9331, "encoder_q-layer.0": 1778.2188, "encoder_q-layer.1": 1835.4368, "encoder_q-layer.10": 341.4946, "encoder_q-layer.11": 821.4415, "encoder_q-layer.2": 2633.1738, "encoder_q-layer.3": 2374.8274, "encoder_q-layer.4": 2296.2898, "encoder_q-layer.5": 2224.3191, "encoder_q-layer.6": 2260.1301, "encoder_q-layer.7": 2311.9927, "encoder_q-layer.8": 2312.8123, "encoder_q-layer.9": 607.0443, "epoch": 0.22, "inbatch_neg_score": 0.1187, "inbatch_pos_score": 0.7188, "learning_rate": 4.316666666666667e-05, "loss": 3.8137, "norm_diff": 0.0301, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3150.8733, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1196, "query_norm": 1.3182, "queue_k_norm": 1.3503, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5526, "sent_len_1": 66.8892, "sent_max_len_0": 127.99, "sent_max_len_1": 189.3025, "stdk": 0.047, "stdq": 0.0439, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 22300 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.8183, "doc_norm": 1.3433, "encoder_q-embeddings": 457.9995, "encoder_q-layer.0": 313.3121, "encoder_q-layer.1": 355.3496, "encoder_q-layer.10": 353.3523, "encoder_q-layer.11": 923.2453, "encoder_q-layer.2": 411.7694, "encoder_q-layer.3": 454.7651, "encoder_q-layer.4": 482.6721, "encoder_q-layer.5": 474.3675, "encoder_q-layer.6": 465.6811, "encoder_q-layer.7": 440.9239, "encoder_q-layer.8": 384.406, "encoder_q-layer.9": 319.5748, "epoch": 0.22, "inbatch_neg_score": 0.1236, "inbatch_pos_score": 0.7358, "learning_rate": 4.311111111111111e-05, "loss": 3.8183, "norm_diff": 0.02, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 719.3877, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1227, "query_norm": 1.3486, "queue_k_norm": 1.3502, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5803, "sent_len_1": 66.7589, "sent_max_len_0": 128.0, "sent_max_len_1": 187.1825, "stdk": 0.0467, "stdq": 0.0449, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 22400 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.785, "doc_norm": 1.3509, "encoder_q-embeddings": 1530.9359, "encoder_q-layer.0": 1107.5148, "encoder_q-layer.1": 1112.3423, "encoder_q-layer.10": 369.9207, "encoder_q-layer.11": 820.063, "encoder_q-layer.2": 1324.6287, "encoder_q-layer.3": 1215.7021, "encoder_q-layer.4": 1115.0627, "encoder_q-layer.5": 959.7265, "encoder_q-layer.6": 851.089, "encoder_q-layer.7": 636.7807, "encoder_q-layer.8": 537.9233, "encoder_q-layer.9": 328.5572, "epoch": 0.22, "inbatch_neg_score": 0.1165, "inbatch_pos_score": 0.6982, "learning_rate": 4.305555555555556e-05, "loss": 3.785, "norm_diff": 0.0394, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1523.9073, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.116, "query_norm": 1.3124, "queue_k_norm": 1.3527, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.9193, "sent_len_1": 66.8471, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.2975, "stdk": 0.047, "stdq": 0.0439, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 22500 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.7776, "doc_norm": 1.3498, "encoder_q-embeddings": 251.3742, "encoder_q-layer.0": 165.7935, "encoder_q-layer.1": 186.9228, "encoder_q-layer.10": 167.2168, "encoder_q-layer.11": 409.7726, "encoder_q-layer.2": 212.376, "encoder_q-layer.3": 247.4759, "encoder_q-layer.4": 227.1718, "encoder_q-layer.5": 231.8541, "encoder_q-layer.6": 229.2096, "encoder_q-layer.7": 199.7781, "encoder_q-layer.8": 205.0949, "encoder_q-layer.9": 163.1175, "epoch": 0.22, "inbatch_neg_score": 0.1214, "inbatch_pos_score": 0.7192, "learning_rate": 4.3e-05, "loss": 3.7776, "norm_diff": 0.0178, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 347.451, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1206, "query_norm": 1.3327, "queue_k_norm": 1.3517, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7765, "sent_len_1": 66.8262, "sent_max_len_0": 127.9975, "sent_max_len_1": 187.9575, "stdk": 0.047, "stdq": 0.0443, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 22600 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.7793, "doc_norm": 1.3443, "encoder_q-embeddings": 947.0262, "encoder_q-layer.0": 729.1731, "encoder_q-layer.1": 760.1464, "encoder_q-layer.10": 183.0103, "encoder_q-layer.11": 414.7955, "encoder_q-layer.2": 885.2103, "encoder_q-layer.3": 946.6344, "encoder_q-layer.4": 936.0244, "encoder_q-layer.5": 929.5696, "encoder_q-layer.6": 745.9501, "encoder_q-layer.7": 442.4337, "encoder_q-layer.8": 282.2178, "encoder_q-layer.9": 173.3818, "epoch": 0.22, "inbatch_neg_score": 0.1174, "inbatch_pos_score": 0.7051, "learning_rate": 4.294444444444445e-05, "loss": 3.7793, "norm_diff": 0.035, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1070.2454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1177, "query_norm": 1.3107, "queue_k_norm": 1.3515, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7041, "sent_len_1": 66.9485, "sent_max_len_0": 128.0, "sent_max_len_1": 190.44, "stdk": 0.0468, "stdq": 0.0439, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 22700 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.8091, "doc_norm": 1.3517, "encoder_q-embeddings": 192.7737, "encoder_q-layer.0": 129.7228, "encoder_q-layer.1": 136.43, "encoder_q-layer.10": 154.2933, "encoder_q-layer.11": 400.1254, "encoder_q-layer.2": 156.832, "encoder_q-layer.3": 166.2216, "encoder_q-layer.4": 168.6596, "encoder_q-layer.5": 169.9108, "encoder_q-layer.6": 175.9098, "encoder_q-layer.7": 172.0333, "encoder_q-layer.8": 171.7622, "encoder_q-layer.9": 143.2877, "epoch": 0.22, "inbatch_neg_score": 0.126, "inbatch_pos_score": 0.73, "learning_rate": 4.2888888888888886e-05, "loss": 3.8091, "norm_diff": 0.029, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 293.7209, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1268, "query_norm": 1.3239, "queue_k_norm": 1.3496, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6518, "sent_len_1": 66.8438, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9375, "stdk": 0.047, "stdq": 0.0441, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 22800 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.8018, "doc_norm": 1.3477, "encoder_q-embeddings": 357.7165, "encoder_q-layer.0": 258.6755, "encoder_q-layer.1": 288.2072, "encoder_q-layer.10": 164.1257, "encoder_q-layer.11": 444.3792, "encoder_q-layer.2": 347.8618, "encoder_q-layer.3": 362.0359, "encoder_q-layer.4": 330.5571, "encoder_q-layer.5": 305.9081, "encoder_q-layer.6": 301.5775, "encoder_q-layer.7": 276.5104, "encoder_q-layer.8": 220.6186, "encoder_q-layer.9": 163.4184, "epoch": 0.22, "inbatch_neg_score": 0.126, "inbatch_pos_score": 0.6973, "learning_rate": 4.2833333333333335e-05, "loss": 3.8018, "norm_diff": 0.0639, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 456.0197, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1251, "query_norm": 1.2838, "queue_k_norm": 1.3538, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5049, "sent_len_1": 66.6681, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9812, "stdk": 0.0469, "stdq": 0.0426, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 22900 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.7985, "doc_norm": 1.3542, "encoder_q-embeddings": 454.9732, "encoder_q-layer.0": 324.7047, "encoder_q-layer.1": 384.3638, "encoder_q-layer.10": 180.5007, "encoder_q-layer.11": 392.7128, "encoder_q-layer.2": 502.0698, "encoder_q-layer.3": 527.551, "encoder_q-layer.4": 684.0004, "encoder_q-layer.5": 593.0962, "encoder_q-layer.6": 438.7404, "encoder_q-layer.7": 317.7796, "encoder_q-layer.8": 237.7991, "encoder_q-layer.9": 166.1038, "epoch": 0.22, "inbatch_neg_score": 0.1233, "inbatch_pos_score": 0.7324, "learning_rate": 4.277777777777778e-05, "loss": 3.7985, "norm_diff": 0.0187, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 633.5487, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1237, "query_norm": 1.3355, "queue_k_norm": 1.3532, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6227, "sent_len_1": 66.9047, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.545, "stdk": 0.0471, "stdq": 0.0443, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23000 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.8083, "doc_norm": 1.3572, "encoder_q-embeddings": 613.1642, "encoder_q-layer.0": 414.2847, "encoder_q-layer.1": 423.6113, "encoder_q-layer.10": 166.7528, "encoder_q-layer.11": 415.5231, "encoder_q-layer.2": 508.8585, "encoder_q-layer.3": 538.8178, "encoder_q-layer.4": 533.8806, "encoder_q-layer.5": 510.7761, "encoder_q-layer.6": 523.8236, "encoder_q-layer.7": 404.4873, "encoder_q-layer.8": 229.6449, "encoder_q-layer.9": 158.8057, "epoch": 0.23, "inbatch_neg_score": 0.1454, "inbatch_pos_score": 0.7275, "learning_rate": 4.272222222222223e-05, "loss": 3.8083, "norm_diff": 0.0283, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 675.1756, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1444, "query_norm": 1.3292, "queue_k_norm": 1.3539, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6072, "sent_len_1": 66.8352, "sent_max_len_0": 127.9862, "sent_max_len_1": 191.4412, "stdk": 0.0472, "stdq": 0.0439, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23100 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.7989, "doc_norm": 1.351, "encoder_q-embeddings": 197.7, "encoder_q-layer.0": 145.4522, "encoder_q-layer.1": 155.5701, "encoder_q-layer.10": 164.6618, "encoder_q-layer.11": 389.8264, "encoder_q-layer.2": 172.4497, "encoder_q-layer.3": 185.2386, "encoder_q-layer.4": 183.9115, "encoder_q-layer.5": 178.4339, "encoder_q-layer.6": 192.8804, "encoder_q-layer.7": 157.3048, "encoder_q-layer.8": 162.4778, "encoder_q-layer.9": 146.2093, "epoch": 0.23, "inbatch_neg_score": 0.143, "inbatch_pos_score": 0.731, "learning_rate": 4.266666666666667e-05, "loss": 3.7989, "norm_diff": 0.0366, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 295.4459, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1416, "query_norm": 1.315, "queue_k_norm": 1.3562, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6662, "sent_len_1": 66.5481, "sent_max_len_0": 127.9988, "sent_max_len_1": 187.9563, "stdk": 0.047, "stdq": 0.0434, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23200 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.8075, "doc_norm": 1.3587, "encoder_q-embeddings": 372.2062, "encoder_q-layer.0": 243.5394, "encoder_q-layer.1": 254.8999, "encoder_q-layer.10": 163.6101, "encoder_q-layer.11": 395.5081, "encoder_q-layer.2": 293.2967, "encoder_q-layer.3": 302.0604, "encoder_q-layer.4": 328.785, "encoder_q-layer.5": 322.2125, "encoder_q-layer.6": 312.6622, "encoder_q-layer.7": 285.8482, "encoder_q-layer.8": 220.7417, "encoder_q-layer.9": 160.888, "epoch": 0.23, "inbatch_neg_score": 0.1464, "inbatch_pos_score": 0.7617, "learning_rate": 4.261111111111111e-05, "loss": 3.8075, "norm_diff": 0.0132, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 443.0979, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.146, "query_norm": 1.3542, "queue_k_norm": 1.3552, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5188, "sent_len_1": 66.5946, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.9787, "stdk": 0.0472, "stdq": 0.0449, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23300 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.8008, "doc_norm": 1.352, "encoder_q-embeddings": 162.2314, "encoder_q-layer.0": 109.1894, "encoder_q-layer.1": 116.2346, "encoder_q-layer.10": 163.5607, "encoder_q-layer.11": 421.0049, "encoder_q-layer.2": 129.8377, "encoder_q-layer.3": 136.017, "encoder_q-layer.4": 140.4872, "encoder_q-layer.5": 143.0993, "encoder_q-layer.6": 160.0287, "encoder_q-layer.7": 169.2202, "encoder_q-layer.8": 177.4418, "encoder_q-layer.9": 159.2567, "epoch": 0.23, "inbatch_neg_score": 0.1425, "inbatch_pos_score": 0.7568, "learning_rate": 4.255555555555556e-05, "loss": 3.8008, "norm_diff": 0.0187, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 281.7877, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1425, "query_norm": 1.349, "queue_k_norm": 1.3573, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5309, "sent_len_1": 66.9078, "sent_max_len_0": 127.995, "sent_max_len_1": 189.38, "stdk": 0.0469, "stdq": 0.0447, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 23400 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.7857, "doc_norm": 1.354, "encoder_q-embeddings": 190.6391, "encoder_q-layer.0": 123.382, "encoder_q-layer.1": 134.804, "encoder_q-layer.10": 165.0462, "encoder_q-layer.11": 413.4578, "encoder_q-layer.2": 150.2052, "encoder_q-layer.3": 159.0041, "encoder_q-layer.4": 158.5854, "encoder_q-layer.5": 161.8342, "encoder_q-layer.6": 176.2232, "encoder_q-layer.7": 163.9828, "encoder_q-layer.8": 164.5647, "encoder_q-layer.9": 148.893, "epoch": 0.23, "inbatch_neg_score": 0.1466, "inbatch_pos_score": 0.7749, "learning_rate": 4.25e-05, "loss": 3.7857, "norm_diff": 0.0264, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 295.6809, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1467, "query_norm": 1.3413, "queue_k_norm": 1.3582, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7168, "sent_len_1": 66.8181, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.7262, "stdk": 0.047, "stdq": 0.0443, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 23500 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7764, "doc_norm": 1.3601, "encoder_q-embeddings": 259.9012, "encoder_q-layer.0": 178.9238, "encoder_q-layer.1": 201.9012, "encoder_q-layer.10": 174.2613, "encoder_q-layer.11": 436.2602, "encoder_q-layer.2": 210.061, "encoder_q-layer.3": 214.8278, "encoder_q-layer.4": 216.1468, "encoder_q-layer.5": 189.015, "encoder_q-layer.6": 196.4997, "encoder_q-layer.7": 172.3049, "encoder_q-layer.8": 178.2055, "encoder_q-layer.9": 158.4228, "epoch": 0.23, "inbatch_neg_score": 0.1527, "inbatch_pos_score": 0.7549, "learning_rate": 4.2444444444444445e-05, "loss": 3.7764, "norm_diff": 0.0169, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 345.8545, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1527, "query_norm": 1.3588, "queue_k_norm": 1.3569, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6109, "sent_len_1": 66.815, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0987, "stdk": 0.0472, "stdq": 0.0448, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23600 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.8009, "doc_norm": 1.3592, "encoder_q-embeddings": 346.4308, "encoder_q-layer.0": 245.7152, "encoder_q-layer.1": 290.5626, "encoder_q-layer.10": 180.9912, "encoder_q-layer.11": 434.1124, "encoder_q-layer.2": 341.0268, "encoder_q-layer.3": 339.9049, "encoder_q-layer.4": 334.7585, "encoder_q-layer.5": 320.7549, "encoder_q-layer.6": 306.8021, "encoder_q-layer.7": 272.4517, "encoder_q-layer.8": 222.1849, "encoder_q-layer.9": 162.9416, "epoch": 0.23, "inbatch_neg_score": 0.1456, "inbatch_pos_score": 0.7549, "learning_rate": 4.238888888888889e-05, "loss": 3.8009, "norm_diff": 0.0199, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 450.0162, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1453, "query_norm": 1.34, "queue_k_norm": 1.3582, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5248, "sent_len_1": 66.7161, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3688, "stdk": 0.0472, "stdq": 0.0443, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23700 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7641, "doc_norm": 1.3523, "encoder_q-embeddings": 352.8316, "encoder_q-layer.0": 240.4485, "encoder_q-layer.1": 257.9659, "encoder_q-layer.10": 176.1357, "encoder_q-layer.11": 428.9634, "encoder_q-layer.2": 304.9905, "encoder_q-layer.3": 270.8497, "encoder_q-layer.4": 276.6199, "encoder_q-layer.5": 276.8734, "encoder_q-layer.6": 267.1439, "encoder_q-layer.7": 227.1968, "encoder_q-layer.8": 212.7255, "encoder_q-layer.9": 170.8193, "epoch": 0.23, "inbatch_neg_score": 0.1386, "inbatch_pos_score": 0.7417, "learning_rate": 4.233333333333334e-05, "loss": 3.7641, "norm_diff": 0.0161, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 422.5258, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1387, "query_norm": 1.3513, "queue_k_norm": 1.3595, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7967, "sent_len_1": 66.993, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7763, "stdk": 0.0469, "stdq": 0.0451, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 23800 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.7597, "doc_norm": 1.3568, "encoder_q-embeddings": 269.4254, "encoder_q-layer.0": 180.8516, "encoder_q-layer.1": 201.1593, "encoder_q-layer.10": 173.8523, "encoder_q-layer.11": 440.4648, "encoder_q-layer.2": 230.3094, "encoder_q-layer.3": 229.9072, "encoder_q-layer.4": 238.4596, "encoder_q-layer.5": 247.6894, "encoder_q-layer.6": 274.0237, "encoder_q-layer.7": 244.4024, "encoder_q-layer.8": 200.4015, "encoder_q-layer.9": 166.0392, "epoch": 0.23, "inbatch_neg_score": 0.132, "inbatch_pos_score": 0.748, "learning_rate": 4.227777777777778e-05, "loss": 3.7597, "norm_diff": 0.0408, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 372.6179, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1327, "query_norm": 1.316, "queue_k_norm": 1.361, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6707, "sent_len_1": 66.9101, "sent_max_len_0": 127.9912, "sent_max_len_1": 189.6075, "stdk": 0.0471, "stdq": 0.0438, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 23900 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.7731, "doc_norm": 1.3576, "encoder_q-embeddings": 269.5442, "encoder_q-layer.0": 180.8217, "encoder_q-layer.1": 202.5275, "encoder_q-layer.10": 162.096, "encoder_q-layer.11": 437.6111, "encoder_q-layer.2": 221.5869, "encoder_q-layer.3": 234.9691, "encoder_q-layer.4": 246.961, "encoder_q-layer.5": 242.2858, "encoder_q-layer.6": 271.694, "encoder_q-layer.7": 261.9846, "encoder_q-layer.8": 245.2418, "encoder_q-layer.9": 157.3813, "epoch": 0.23, "inbatch_neg_score": 0.1215, "inbatch_pos_score": 0.6982, "learning_rate": 4.222222222222222e-05, "loss": 3.7731, "norm_diff": 0.0608, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 381.8636, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1225, "query_norm": 1.2968, "queue_k_norm": 1.3602, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3985, "sent_len_1": 66.5684, "sent_max_len_0": 128.0, "sent_max_len_1": 190.17, "stdk": 0.0471, "stdq": 0.0434, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 24000 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.7755, "doc_norm": 1.3562, "encoder_q-embeddings": 184.5584, "encoder_q-layer.0": 128.8255, "encoder_q-layer.1": 138.9713, "encoder_q-layer.10": 171.1591, "encoder_q-layer.11": 415.6893, "encoder_q-layer.2": 155.8508, "encoder_q-layer.3": 157.5774, "encoder_q-layer.4": 157.3665, "encoder_q-layer.5": 154.9553, "encoder_q-layer.6": 167.535, "encoder_q-layer.7": 162.2681, "encoder_q-layer.8": 179.4111, "encoder_q-layer.9": 151.5068, "epoch": 0.24, "inbatch_neg_score": 0.118, "inbatch_pos_score": 0.7246, "learning_rate": 4.216666666666667e-05, "loss": 3.7755, "norm_diff": 0.0345, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 293.7175, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1169, "query_norm": 1.3234, "queue_k_norm": 1.359, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6115, "sent_len_1": 66.8684, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6163, "stdk": 0.0471, "stdq": 0.0445, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 24100 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.7655, "doc_norm": 1.3569, "encoder_q-embeddings": 294.6273, "encoder_q-layer.0": 186.8882, "encoder_q-layer.1": 223.5059, "encoder_q-layer.10": 159.9229, "encoder_q-layer.11": 398.1171, "encoder_q-layer.2": 246.0483, "encoder_q-layer.3": 253.161, "encoder_q-layer.4": 220.4548, "encoder_q-layer.5": 220.9802, "encoder_q-layer.6": 211.9226, "encoder_q-layer.7": 164.5676, "encoder_q-layer.8": 169.2525, "encoder_q-layer.9": 147.3584, "epoch": 0.24, "inbatch_neg_score": 0.1142, "inbatch_pos_score": 0.7407, "learning_rate": 4.211111111111111e-05, "loss": 3.7655, "norm_diff": 0.0327, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 357.0599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1137, "query_norm": 1.3243, "queue_k_norm": 1.3582, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6406, "sent_len_1": 66.7714, "sent_max_len_0": 128.0, "sent_max_len_1": 186.6375, "stdk": 0.0471, "stdq": 0.0448, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 24200 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.7604, "doc_norm": 1.3618, "encoder_q-embeddings": 275.6784, "encoder_q-layer.0": 186.9898, "encoder_q-layer.1": 208.5125, "encoder_q-layer.10": 178.0866, "encoder_q-layer.11": 429.5746, "encoder_q-layer.2": 243.3088, "encoder_q-layer.3": 264.3227, "encoder_q-layer.4": 271.3762, "encoder_q-layer.5": 258.008, "encoder_q-layer.6": 260.8535, "encoder_q-layer.7": 222.2956, "encoder_q-layer.8": 227.1616, "encoder_q-layer.9": 165.2231, "epoch": 0.24, "inbatch_neg_score": 0.1109, "inbatch_pos_score": 0.7256, "learning_rate": 4.205555555555556e-05, "loss": 3.7604, "norm_diff": 0.0336, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 382.9281, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1104, "query_norm": 1.3282, "queue_k_norm": 1.3581, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.8915, "sent_len_1": 66.5647, "sent_max_len_0": 128.0, "sent_max_len_1": 188.95, "stdk": 0.0474, "stdq": 0.0451, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 24300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.7503, "doc_norm": 1.3587, "encoder_q-embeddings": 178.8586, "encoder_q-layer.0": 120.2524, "encoder_q-layer.1": 124.5828, "encoder_q-layer.10": 154.4412, "encoder_q-layer.11": 397.8536, "encoder_q-layer.2": 139.8436, "encoder_q-layer.3": 144.7573, "encoder_q-layer.4": 147.7808, "encoder_q-layer.5": 146.2005, "encoder_q-layer.6": 166.3415, "encoder_q-layer.7": 155.4869, "encoder_q-layer.8": 167.39, "encoder_q-layer.9": 144.5406, "epoch": 0.24, "inbatch_neg_score": 0.1147, "inbatch_pos_score": 0.751, "learning_rate": 4.2e-05, "loss": 3.7503, "norm_diff": 0.0379, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 280.2738, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1137, "query_norm": 1.3239, "queue_k_norm": 1.3582, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7208, "sent_len_1": 66.9123, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.4325, "stdk": 0.0473, "stdq": 0.0447, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 24400 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.7614, "doc_norm": 1.35, "encoder_q-embeddings": 197.3807, "encoder_q-layer.0": 139.4481, "encoder_q-layer.1": 153.7218, "encoder_q-layer.10": 170.9752, "encoder_q-layer.11": 401.3859, "encoder_q-layer.2": 164.1809, "encoder_q-layer.3": 164.6412, "encoder_q-layer.4": 160.5298, "encoder_q-layer.5": 143.1613, "encoder_q-layer.6": 159.3412, "encoder_q-layer.7": 152.0508, "encoder_q-layer.8": 171.1719, "encoder_q-layer.9": 151.7886, "epoch": 0.24, "inbatch_neg_score": 0.118, "inbatch_pos_score": 0.7144, "learning_rate": 4.194444444444445e-05, "loss": 3.7614, "norm_diff": 0.0277, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 295.1165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1191, "query_norm": 1.3291, "queue_k_norm": 1.357, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5111, "sent_len_1": 66.7795, "sent_max_len_0": 128.0, "sent_max_len_1": 191.18, "stdk": 0.047, "stdq": 0.0448, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 24500 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.7616, "doc_norm": 1.3582, "encoder_q-embeddings": 443.1695, "encoder_q-layer.0": 348.3844, "encoder_q-layer.1": 364.1354, "encoder_q-layer.10": 333.8098, "encoder_q-layer.11": 803.3564, "encoder_q-layer.2": 319.5727, "encoder_q-layer.3": 312.0909, "encoder_q-layer.4": 311.5317, "encoder_q-layer.5": 316.6595, "encoder_q-layer.6": 327.3993, "encoder_q-layer.7": 307.8696, "encoder_q-layer.8": 343.4843, "encoder_q-layer.9": 305.3763, "epoch": 0.24, "inbatch_neg_score": 0.1128, "inbatch_pos_score": 0.7095, "learning_rate": 4.188888888888889e-05, "loss": 3.7616, "norm_diff": 0.07, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 602.467, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1119, "query_norm": 1.2882, "queue_k_norm": 1.3574, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5132, "sent_len_1": 66.7143, "sent_max_len_0": 128.0, "sent_max_len_1": 189.76, "stdk": 0.0472, "stdq": 0.0432, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 24600 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.7501, "doc_norm": 1.3556, "encoder_q-embeddings": 853.1665, "encoder_q-layer.0": 613.6458, "encoder_q-layer.1": 645.8395, "encoder_q-layer.10": 375.8819, "encoder_q-layer.11": 852.3375, "encoder_q-layer.2": 816.4717, "encoder_q-layer.3": 777.5923, "encoder_q-layer.4": 648.3468, "encoder_q-layer.5": 619.724, "encoder_q-layer.6": 513.8243, "encoder_q-layer.7": 476.1887, "encoder_q-layer.8": 405.8267, "encoder_q-layer.9": 323.3201, "epoch": 0.24, "inbatch_neg_score": 0.1165, "inbatch_pos_score": 0.7173, "learning_rate": 4.183333333333334e-05, "loss": 3.7501, "norm_diff": 0.0316, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 982.1468, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.116, "query_norm": 1.3241, "queue_k_norm": 1.3573, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5773, "sent_len_1": 66.641, "sent_max_len_0": 128.0, "sent_max_len_1": 189.685, "stdk": 0.0472, "stdq": 0.0446, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 24700 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7456, "doc_norm": 1.3568, "encoder_q-embeddings": 342.0883, "encoder_q-layer.0": 216.4034, "encoder_q-layer.1": 241.4105, "encoder_q-layer.10": 335.7007, "encoder_q-layer.11": 829.6301, "encoder_q-layer.2": 279.8575, "encoder_q-layer.3": 293.3982, "encoder_q-layer.4": 305.7881, "encoder_q-layer.5": 302.7013, "encoder_q-layer.6": 327.8279, "encoder_q-layer.7": 337.9767, "encoder_q-layer.8": 351.7181, "encoder_q-layer.9": 310.5223, "epoch": 0.24, "inbatch_neg_score": 0.1134, "inbatch_pos_score": 0.7222, "learning_rate": 4.177777777777778e-05, "loss": 3.7456, "norm_diff": 0.0309, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 565.159, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1133, "query_norm": 1.3265, "queue_k_norm": 1.3562, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7585, "sent_len_1": 66.9654, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2125, "stdk": 0.0473, "stdq": 0.0448, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 24800 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.7374, "doc_norm": 1.3549, "encoder_q-embeddings": 460.1381, "encoder_q-layer.0": 323.7534, "encoder_q-layer.1": 340.7857, "encoder_q-layer.10": 345.9028, "encoder_q-layer.11": 897.4807, "encoder_q-layer.2": 289.0247, "encoder_q-layer.3": 279.8627, "encoder_q-layer.4": 275.4364, "encoder_q-layer.5": 283.5023, "encoder_q-layer.6": 307.6125, "encoder_q-layer.7": 317.8042, "encoder_q-layer.8": 348.7555, "encoder_q-layer.9": 312.7047, "epoch": 0.24, "inbatch_neg_score": 0.1215, "inbatch_pos_score": 0.7363, "learning_rate": 4.172222222222222e-05, "loss": 3.7374, "norm_diff": 0.0223, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 620.1548, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1197, "query_norm": 1.3384, "queue_k_norm": 1.3577, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.723, "sent_len_1": 66.8566, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9412, "stdk": 0.0472, "stdq": 0.0451, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 24900 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.765, "doc_norm": 1.3626, "encoder_q-embeddings": 817.5108, "encoder_q-layer.0": 598.244, "encoder_q-layer.1": 631.1122, "encoder_q-layer.10": 340.5235, "encoder_q-layer.11": 866.5327, "encoder_q-layer.2": 786.5286, "encoder_q-layer.3": 876.404, "encoder_q-layer.4": 872.7281, "encoder_q-layer.5": 728.2366, "encoder_q-layer.6": 575.2148, "encoder_q-layer.7": 452.572, "encoder_q-layer.8": 409.3481, "encoder_q-layer.9": 311.3935, "epoch": 0.24, "inbatch_neg_score": 0.119, "inbatch_pos_score": 0.7036, "learning_rate": 4.166666666666667e-05, "loss": 3.765, "norm_diff": 0.0599, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1026.1881, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1177, "query_norm": 1.3027, "queue_k_norm": 1.3573, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6448, "sent_len_1": 66.88, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7562, "stdk": 0.0474, "stdq": 0.0438, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 25000 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.7352, "doc_norm": 1.3592, "encoder_q-embeddings": 435.5723, "encoder_q-layer.0": 324.3735, "encoder_q-layer.1": 334.8833, "encoder_q-layer.10": 326.4576, "encoder_q-layer.11": 814.4606, "encoder_q-layer.2": 362.8478, "encoder_q-layer.3": 388.4027, "encoder_q-layer.4": 399.2074, "encoder_q-layer.5": 369.7895, "encoder_q-layer.6": 382.6363, "encoder_q-layer.7": 366.967, "encoder_q-layer.8": 357.9899, "encoder_q-layer.9": 310.4913, "epoch": 0.25, "inbatch_neg_score": 0.1311, "inbatch_pos_score": 0.7539, "learning_rate": 4.1611111111111114e-05, "loss": 3.7352, "norm_diff": 0.0123, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 634.9313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1313, "query_norm": 1.3486, "queue_k_norm": 1.3571, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5371, "sent_len_1": 66.8878, "sent_max_len_0": 127.9975, "sent_max_len_1": 192.1975, "stdk": 0.0474, "stdq": 0.045, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 25100 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.7507, "doc_norm": 1.3518, "encoder_q-embeddings": 926.7665, "encoder_q-layer.0": 632.7087, "encoder_q-layer.1": 668.3276, "encoder_q-layer.10": 316.8555, "encoder_q-layer.11": 865.0134, "encoder_q-layer.2": 732.8823, "encoder_q-layer.3": 702.5092, "encoder_q-layer.4": 779.1286, "encoder_q-layer.5": 749.9915, "encoder_q-layer.6": 703.7898, "encoder_q-layer.7": 583.3362, "encoder_q-layer.8": 483.4462, "encoder_q-layer.9": 317.5675, "epoch": 0.25, "inbatch_neg_score": 0.1264, "inbatch_pos_score": 0.7227, "learning_rate": 4.155555555555556e-05, "loss": 3.7507, "norm_diff": 0.0308, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1031.7363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1254, "query_norm": 1.3212, "queue_k_norm": 1.3576, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3847, "sent_len_1": 66.7316, "sent_max_len_0": 128.0, "sent_max_len_1": 189.335, "stdk": 0.0471, "stdq": 0.0443, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 25200 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.7564, "doc_norm": 1.3646, "encoder_q-embeddings": 650.0403, "encoder_q-layer.0": 463.7545, "encoder_q-layer.1": 531.6412, "encoder_q-layer.10": 346.5643, "encoder_q-layer.11": 888.3378, "encoder_q-layer.2": 647.9252, "encoder_q-layer.3": 653.1898, "encoder_q-layer.4": 657.8326, "encoder_q-layer.5": 582.9211, "encoder_q-layer.6": 545.7859, "encoder_q-layer.7": 570.7147, "encoder_q-layer.8": 411.388, "encoder_q-layer.9": 312.3246, "epoch": 0.25, "inbatch_neg_score": 0.1236, "inbatch_pos_score": 0.7275, "learning_rate": 4.15e-05, "loss": 3.7564, "norm_diff": 0.0379, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 881.0189, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1239, "query_norm": 1.3282, "queue_k_norm": 1.3587, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5579, "sent_len_1": 66.5902, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.6337, "stdk": 0.0475, "stdq": 0.0445, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 25300 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.7217, "doc_norm": 1.3621, "encoder_q-embeddings": 358.8708, "encoder_q-layer.0": 239.4191, "encoder_q-layer.1": 256.9503, "encoder_q-layer.10": 340.8175, "encoder_q-layer.11": 832.6398, "encoder_q-layer.2": 296.4247, "encoder_q-layer.3": 302.4897, "encoder_q-layer.4": 313.5304, "encoder_q-layer.5": 303.3897, "encoder_q-layer.6": 305.0152, "encoder_q-layer.7": 322.2231, "encoder_q-layer.8": 354.3423, "encoder_q-layer.9": 316.8783, "epoch": 0.25, "inbatch_neg_score": 0.1274, "inbatch_pos_score": 0.7363, "learning_rate": 4.144444444444445e-05, "loss": 3.7217, "norm_diff": 0.0346, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 577.1145, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1282, "query_norm": 1.3283, "queue_k_norm": 1.3595, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7076, "sent_len_1": 66.8799, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9538, "stdk": 0.0474, "stdq": 0.0445, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 25400 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.757, "doc_norm": 1.3659, "encoder_q-embeddings": 436.6176, "encoder_q-layer.0": 290.9691, "encoder_q-layer.1": 316.3727, "encoder_q-layer.10": 358.8671, "encoder_q-layer.11": 781.7358, "encoder_q-layer.2": 365.8804, "encoder_q-layer.3": 408.7874, "encoder_q-layer.4": 433.0444, "encoder_q-layer.5": 493.9584, "encoder_q-layer.6": 503.2086, "encoder_q-layer.7": 373.5388, "encoder_q-layer.8": 352.3829, "encoder_q-layer.9": 304.0918, "epoch": 0.25, "inbatch_neg_score": 0.1279, "inbatch_pos_score": 0.749, "learning_rate": 4.138888888888889e-05, "loss": 3.757, "norm_diff": 0.0385, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 646.5359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.127, "query_norm": 1.3274, "queue_k_norm": 1.3609, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.427, "sent_len_1": 66.8325, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8887, "stdk": 0.0475, "stdq": 0.0445, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25500 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.7582, "doc_norm": 1.3594, "encoder_q-embeddings": 514.6096, "encoder_q-layer.0": 340.7436, "encoder_q-layer.1": 368.0806, "encoder_q-layer.10": 358.5483, "encoder_q-layer.11": 890.4617, "encoder_q-layer.2": 391.545, "encoder_q-layer.3": 417.7759, "encoder_q-layer.4": 407.757, "encoder_q-layer.5": 397.5239, "encoder_q-layer.6": 405.1075, "encoder_q-layer.7": 381.6176, "encoder_q-layer.8": 367.3252, "encoder_q-layer.9": 326.5726, "epoch": 0.25, "inbatch_neg_score": 0.1303, "inbatch_pos_score": 0.7227, "learning_rate": 4.133333333333333e-05, "loss": 3.7582, "norm_diff": 0.048, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 693.6241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1304, "query_norm": 1.3114, "queue_k_norm": 1.3614, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6517, "sent_len_1": 67.0917, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9225, "stdk": 0.0473, "stdq": 0.0442, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25600 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.7317, "doc_norm": 1.3557, "encoder_q-embeddings": 474.4588, "encoder_q-layer.0": 336.4996, "encoder_q-layer.1": 358.2989, "encoder_q-layer.10": 337.9182, "encoder_q-layer.11": 831.3555, "encoder_q-layer.2": 362.9608, "encoder_q-layer.3": 335.1188, "encoder_q-layer.4": 332.8047, "encoder_q-layer.5": 315.3557, "encoder_q-layer.6": 317.475, "encoder_q-layer.7": 346.4657, "encoder_q-layer.8": 345.8534, "encoder_q-layer.9": 296.451, "epoch": 0.25, "inbatch_neg_score": 0.1278, "inbatch_pos_score": 0.7422, "learning_rate": 4.127777777777778e-05, "loss": 3.7317, "norm_diff": 0.0319, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 625.3782, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1261, "query_norm": 1.3239, "queue_k_norm": 1.361, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6009, "sent_len_1": 66.9112, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3587, "stdk": 0.0472, "stdq": 0.0446, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25700 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.7248, "doc_norm": 1.3634, "encoder_q-embeddings": 293.0004, "encoder_q-layer.0": 185.2005, "encoder_q-layer.1": 198.5898, "encoder_q-layer.10": 357.5757, "encoder_q-layer.11": 888.3564, "encoder_q-layer.2": 220.8898, "encoder_q-layer.3": 221.6219, "encoder_q-layer.4": 229.4255, "encoder_q-layer.5": 250.0493, "encoder_q-layer.6": 274.464, "encoder_q-layer.7": 297.094, "encoder_q-layer.8": 345.1984, "encoder_q-layer.9": 323.2019, "epoch": 0.25, "inbatch_neg_score": 0.1268, "inbatch_pos_score": 0.748, "learning_rate": 4.1222222222222224e-05, "loss": 3.7248, "norm_diff": 0.0387, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 552.3886, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1277, "query_norm": 1.3246, "queue_k_norm": 1.3603, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.8133, "sent_len_1": 66.8012, "sent_max_len_0": 127.9875, "sent_max_len_1": 188.9925, "stdk": 0.0475, "stdq": 0.0449, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 25800 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.7303, "doc_norm": 1.3731, "encoder_q-embeddings": 447.1751, "encoder_q-layer.0": 300.1248, "encoder_q-layer.1": 291.9772, "encoder_q-layer.10": 388.986, "encoder_q-layer.11": 869.6096, "encoder_q-layer.2": 337.4511, "encoder_q-layer.3": 359.6604, "encoder_q-layer.4": 358.7879, "encoder_q-layer.5": 365.4788, "encoder_q-layer.6": 371.9411, "encoder_q-layer.7": 405.8719, "encoder_q-layer.8": 421.6292, "encoder_q-layer.9": 334.341, "epoch": 0.25, "inbatch_neg_score": 0.116, "inbatch_pos_score": 0.7368, "learning_rate": 4.116666666666667e-05, "loss": 3.7303, "norm_diff": 0.0748, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 656.7303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1168, "query_norm": 1.2983, "queue_k_norm": 1.3616, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.574, "sent_len_1": 66.6753, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0062, "stdk": 0.0478, "stdq": 0.0439, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25900 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7258, "doc_norm": 1.3607, "encoder_q-embeddings": 325.5944, "encoder_q-layer.0": 202.2785, "encoder_q-layer.1": 211.9325, "encoder_q-layer.10": 377.9514, "encoder_q-layer.11": 861.9652, "encoder_q-layer.2": 239.8693, "encoder_q-layer.3": 261.7965, "encoder_q-layer.4": 271.8151, "encoder_q-layer.5": 287.8405, "encoder_q-layer.6": 331.6373, "encoder_q-layer.7": 340.1442, "encoder_q-layer.8": 411.6153, "encoder_q-layer.9": 352.9811, "epoch": 0.25, "inbatch_neg_score": 0.118, "inbatch_pos_score": 0.7153, "learning_rate": 4.111111111111111e-05, "loss": 3.7258, "norm_diff": 0.0298, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 565.6673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1172, "query_norm": 1.3352, "queue_k_norm": 1.3608, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7049, "sent_len_1": 66.7944, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.72, "stdk": 0.0474, "stdq": 0.0454, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26000 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.7032, "doc_norm": 1.3597, "encoder_q-embeddings": 285.0451, "encoder_q-layer.0": 189.1489, "encoder_q-layer.1": 194.3281, "encoder_q-layer.10": 328.0672, "encoder_q-layer.11": 887.5942, "encoder_q-layer.2": 214.7321, "encoder_q-layer.3": 223.1271, "encoder_q-layer.4": 229.3601, "encoder_q-layer.5": 246.6264, "encoder_q-layer.6": 276.7376, "encoder_q-layer.7": 299.9438, "encoder_q-layer.8": 355.6362, "encoder_q-layer.9": 321.4252, "epoch": 0.25, "inbatch_neg_score": 0.1088, "inbatch_pos_score": 0.7344, "learning_rate": 4.105555555555556e-05, "loss": 3.7032, "norm_diff": 0.0597, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 555.4253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.11, "query_norm": 1.3, "queue_k_norm": 1.3604, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.736, "sent_len_1": 66.7909, "sent_max_len_0": 127.9912, "sent_max_len_1": 190.4062, "stdk": 0.0473, "stdq": 0.0441, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26100 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.7212, "doc_norm": 1.3584, "encoder_q-embeddings": 348.0716, "encoder_q-layer.0": 252.7523, "encoder_q-layer.1": 274.2079, "encoder_q-layer.10": 335.1247, "encoder_q-layer.11": 824.356, "encoder_q-layer.2": 328.6178, "encoder_q-layer.3": 335.7925, "encoder_q-layer.4": 350.7004, "encoder_q-layer.5": 371.9881, "encoder_q-layer.6": 343.3, "encoder_q-layer.7": 322.9323, "encoder_q-layer.8": 323.9284, "encoder_q-layer.9": 304.328, "epoch": 0.26, "inbatch_neg_score": 0.1049, "inbatch_pos_score": 0.7124, "learning_rate": 4.1e-05, "loss": 3.7212, "norm_diff": 0.0626, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 582.6054, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1038, "query_norm": 1.2958, "queue_k_norm": 1.3627, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4979, "sent_len_1": 66.9508, "sent_max_len_0": 128.0, "sent_max_len_1": 192.6037, "stdk": 0.0473, "stdq": 0.0441, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26200 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.7252, "doc_norm": 1.3605, "encoder_q-embeddings": 305.1795, "encoder_q-layer.0": 207.0273, "encoder_q-layer.1": 220.8252, "encoder_q-layer.10": 324.1914, "encoder_q-layer.11": 789.8194, "encoder_q-layer.2": 254.2399, "encoder_q-layer.3": 271.0608, "encoder_q-layer.4": 286.2052, "encoder_q-layer.5": 273.4794, "encoder_q-layer.6": 298.5145, "encoder_q-layer.7": 309.1039, "encoder_q-layer.8": 350.6047, "encoder_q-layer.9": 299.3842, "epoch": 0.26, "inbatch_neg_score": 0.1046, "inbatch_pos_score": 0.7334, "learning_rate": 4.094444444444445e-05, "loss": 3.7252, "norm_diff": 0.0454, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 536.7802, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.103, "query_norm": 1.3151, "queue_k_norm": 1.3613, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4642, "sent_len_1": 66.853, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3762, "stdk": 0.0474, "stdq": 0.0449, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26300 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.7241, "doc_norm": 1.3551, "encoder_q-embeddings": 270.8231, "encoder_q-layer.0": 174.2923, "encoder_q-layer.1": 183.1766, "encoder_q-layer.10": 329.2335, "encoder_q-layer.11": 860.2533, "encoder_q-layer.2": 196.405, "encoder_q-layer.3": 203.9876, "encoder_q-layer.4": 219.8371, "encoder_q-layer.5": 225.0043, "encoder_q-layer.6": 255.0766, "encoder_q-layer.7": 275.1777, "encoder_q-layer.8": 325.7107, "encoder_q-layer.9": 313.6901, "epoch": 0.26, "inbatch_neg_score": 0.1015, "inbatch_pos_score": 0.7056, "learning_rate": 4.088888888888889e-05, "loss": 3.7241, "norm_diff": 0.077, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 523.236, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1017, "query_norm": 1.2782, "queue_k_norm": 1.3605, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6001, "sent_len_1": 66.8803, "sent_max_len_0": 127.9912, "sent_max_len_1": 188.5437, "stdk": 0.0472, "stdq": 0.0435, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26400 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.7095, "doc_norm": 1.3651, "encoder_q-embeddings": 611.7718, "encoder_q-layer.0": 445.7836, "encoder_q-layer.1": 514.8941, "encoder_q-layer.10": 365.8618, "encoder_q-layer.11": 838.8661, "encoder_q-layer.2": 614.3501, "encoder_q-layer.3": 554.5471, "encoder_q-layer.4": 642.676, "encoder_q-layer.5": 544.938, "encoder_q-layer.6": 567.8007, "encoder_q-layer.7": 472.3041, "encoder_q-layer.8": 359.216, "encoder_q-layer.9": 301.7739, "epoch": 0.26, "inbatch_neg_score": 0.0968, "inbatch_pos_score": 0.6943, "learning_rate": 4.0833333333333334e-05, "loss": 3.7095, "norm_diff": 0.0695, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 831.4299, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0973, "query_norm": 1.2957, "queue_k_norm": 1.3601, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.779, "sent_len_1": 66.7507, "sent_max_len_0": 128.0, "sent_max_len_1": 189.84, "stdk": 0.0476, "stdq": 0.0443, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26500 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.7323, "doc_norm": 1.3565, "encoder_q-embeddings": 859.5204, "encoder_q-layer.0": 614.0084, "encoder_q-layer.1": 633.3259, "encoder_q-layer.10": 703.6698, "encoder_q-layer.11": 1695.24, "encoder_q-layer.2": 715.7848, "encoder_q-layer.3": 739.9949, "encoder_q-layer.4": 736.1332, "encoder_q-layer.5": 672.1953, "encoder_q-layer.6": 697.6075, "encoder_q-layer.7": 638.7929, "encoder_q-layer.8": 700.3571, "encoder_q-layer.9": 604.2394, "epoch": 0.26, "inbatch_neg_score": 0.0976, "inbatch_pos_score": 0.7148, "learning_rate": 4.0777777777777783e-05, "loss": 3.7323, "norm_diff": 0.0289, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1230.1028, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0982, "query_norm": 1.3284, "queue_k_norm": 1.359, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4018, "sent_len_1": 66.5221, "sent_max_len_0": 127.9975, "sent_max_len_1": 186.5238, "stdk": 0.0474, "stdq": 0.0455, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26600 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.7245, "doc_norm": 1.3598, "encoder_q-embeddings": 725.924, "encoder_q-layer.0": 510.0121, "encoder_q-layer.1": 530.9851, "encoder_q-layer.10": 674.1282, "encoder_q-layer.11": 1682.5123, "encoder_q-layer.2": 582.5299, "encoder_q-layer.3": 617.7875, "encoder_q-layer.4": 636.1264, "encoder_q-layer.5": 597.0092, "encoder_q-layer.6": 623.5167, "encoder_q-layer.7": 615.924, "encoder_q-layer.8": 714.3014, "encoder_q-layer.9": 631.4978, "epoch": 0.26, "inbatch_neg_score": 0.1042, "inbatch_pos_score": 0.73, "learning_rate": 4.0722222222222226e-05, "loss": 3.7245, "norm_diff": 0.0607, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1155.3752, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1044, "query_norm": 1.2991, "queue_k_norm": 1.3584, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.3308, "sent_len_1": 66.5063, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.305, "stdk": 0.0475, "stdq": 0.0442, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26700 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.7202, "doc_norm": 1.3582, "encoder_q-embeddings": 2015.082, "encoder_q-layer.0": 1235.7554, "encoder_q-layer.1": 1310.2706, "encoder_q-layer.10": 656.8926, "encoder_q-layer.11": 1702.8279, "encoder_q-layer.2": 1445.8064, "encoder_q-layer.3": 1486.7087, "encoder_q-layer.4": 1536.181, "encoder_q-layer.5": 1391.8425, "encoder_q-layer.6": 1469.8325, "encoder_q-layer.7": 1175.3022, "encoder_q-layer.8": 906.4152, "encoder_q-layer.9": 623.0956, "epoch": 0.26, "inbatch_neg_score": 0.1058, "inbatch_pos_score": 0.7178, "learning_rate": 4.066666666666667e-05, "loss": 3.7202, "norm_diff": 0.0536, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2103.3342, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1067, "query_norm": 1.3046, "queue_k_norm": 1.358, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5028, "sent_len_1": 66.7822, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8613, "stdk": 0.0474, "stdq": 0.0443, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26800 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.6938, "doc_norm": 1.3563, "encoder_q-embeddings": 636.4672, "encoder_q-layer.0": 418.8726, "encoder_q-layer.1": 446.4644, "encoder_q-layer.10": 631.7808, "encoder_q-layer.11": 1748.8579, "encoder_q-layer.2": 494.982, "encoder_q-layer.3": 525.9588, "encoder_q-layer.4": 540.4283, "encoder_q-layer.5": 541.2128, "encoder_q-layer.6": 579.5715, "encoder_q-layer.7": 611.5917, "encoder_q-layer.8": 668.1641, "encoder_q-layer.9": 589.7095, "epoch": 0.26, "inbatch_neg_score": 0.1087, "inbatch_pos_score": 0.6968, "learning_rate": 4.061111111111111e-05, "loss": 3.6938, "norm_diff": 0.0824, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1127.6201, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1091, "query_norm": 1.2739, "queue_k_norm": 1.3592, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7797, "sent_len_1": 67.0142, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1875, "stdk": 0.0474, "stdq": 0.0434, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26900 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.6996, "doc_norm": 1.353, "encoder_q-embeddings": 610.1492, "encoder_q-layer.0": 411.4096, "encoder_q-layer.1": 426.5073, "encoder_q-layer.10": 632.5197, "encoder_q-layer.11": 1650.6398, "encoder_q-layer.2": 458.0187, "encoder_q-layer.3": 486.0215, "encoder_q-layer.4": 509.7263, "encoder_q-layer.5": 494.1087, "encoder_q-layer.6": 577.5765, "encoder_q-layer.7": 611.6134, "encoder_q-layer.8": 688.9394, "encoder_q-layer.9": 602.7495, "epoch": 0.26, "inbatch_neg_score": 0.1131, "inbatch_pos_score": 0.7109, "learning_rate": 4.055555555555556e-05, "loss": 3.6996, "norm_diff": 0.0603, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1075.154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1141, "query_norm": 1.2926, "queue_k_norm": 1.3587, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7091, "sent_len_1": 66.7739, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.8475, "stdk": 0.0473, "stdq": 0.0438, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27000 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.6823, "doc_norm": 1.3564, "encoder_q-embeddings": 553.5873, "encoder_q-layer.0": 358.1823, "encoder_q-layer.1": 370.1344, "encoder_q-layer.10": 646.0848, "encoder_q-layer.11": 1547.0597, "encoder_q-layer.2": 403.8387, "encoder_q-layer.3": 421.7768, "encoder_q-layer.4": 441.0554, "encoder_q-layer.5": 473.9887, "encoder_q-layer.6": 504.2322, "encoder_q-layer.7": 580.7479, "encoder_q-layer.8": 671.4791, "encoder_q-layer.9": 615.8334, "epoch": 0.26, "inbatch_neg_score": 0.116, "inbatch_pos_score": 0.7554, "learning_rate": 4.05e-05, "loss": 3.6823, "norm_diff": 0.0189, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 989.7414, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1154, "query_norm": 1.3428, "queue_k_norm": 1.359, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7808, "sent_len_1": 66.9501, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6562, "stdk": 0.0474, "stdq": 0.0456, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27100 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.68, "doc_norm": 1.3548, "encoder_q-embeddings": 677.6486, "encoder_q-layer.0": 464.4007, "encoder_q-layer.1": 489.5532, "encoder_q-layer.10": 666.0706, "encoder_q-layer.11": 1568.899, "encoder_q-layer.2": 564.7958, "encoder_q-layer.3": 621.0269, "encoder_q-layer.4": 647.0415, "encoder_q-layer.5": 624.4418, "encoder_q-layer.6": 680.6351, "encoder_q-layer.7": 683.0203, "encoder_q-layer.8": 691.5432, "encoder_q-layer.9": 622.8452, "epoch": 0.27, "inbatch_neg_score": 0.1177, "inbatch_pos_score": 0.7476, "learning_rate": 4.0444444444444444e-05, "loss": 3.68, "norm_diff": 0.0339, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1112.5869, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1157, "query_norm": 1.3209, "queue_k_norm": 1.3609, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4309, "sent_len_1": 66.6509, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2463, "stdk": 0.0473, "stdq": 0.0448, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27200 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.7134, "doc_norm": 1.3597, "encoder_q-embeddings": 574.7576, "encoder_q-layer.0": 378.7034, "encoder_q-layer.1": 386.7485, "encoder_q-layer.10": 608.3966, "encoder_q-layer.11": 1486.8662, "encoder_q-layer.2": 431.8412, "encoder_q-layer.3": 447.9872, "encoder_q-layer.4": 463.032, "encoder_q-layer.5": 496.924, "encoder_q-layer.6": 574.1587, "encoder_q-layer.7": 614.4226, "encoder_q-layer.8": 653.6326, "encoder_q-layer.9": 586.2809, "epoch": 0.27, "inbatch_neg_score": 0.1183, "inbatch_pos_score": 0.7446, "learning_rate": 4.038888888888889e-05, "loss": 3.7134, "norm_diff": 0.0311, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 977.3929, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1178, "query_norm": 1.3291, "queue_k_norm": 1.3609, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3942, "sent_len_1": 66.872, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0488, "stdk": 0.0475, "stdq": 0.0449, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27300 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.7063, "doc_norm": 1.3642, "encoder_q-embeddings": 630.4447, "encoder_q-layer.0": 421.6337, "encoder_q-layer.1": 439.936, "encoder_q-layer.10": 620.8064, "encoder_q-layer.11": 1595.8964, "encoder_q-layer.2": 494.8785, "encoder_q-layer.3": 524.98, "encoder_q-layer.4": 539.7971, "encoder_q-layer.5": 585.468, "encoder_q-layer.6": 654.2856, "encoder_q-layer.7": 714.3572, "encoder_q-layer.8": 708.489, "encoder_q-layer.9": 596.7435, "epoch": 0.27, "inbatch_neg_score": 0.1149, "inbatch_pos_score": 0.7168, "learning_rate": 4.0333333333333336e-05, "loss": 3.7063, "norm_diff": 0.0641, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1089.3844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1146, "query_norm": 1.3001, "queue_k_norm": 1.3603, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5774, "sent_len_1": 66.7859, "sent_max_len_0": 127.995, "sent_max_len_1": 190.4538, "stdk": 0.0476, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27400 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.6889, "doc_norm": 1.3658, "encoder_q-embeddings": 2648.4243, "encoder_q-layer.0": 2184.1028, "encoder_q-layer.1": 2152.8152, "encoder_q-layer.10": 644.0569, "encoder_q-layer.11": 1555.0568, "encoder_q-layer.2": 2413.7791, "encoder_q-layer.3": 2412.7644, "encoder_q-layer.4": 2353.3911, "encoder_q-layer.5": 2351.1433, "encoder_q-layer.6": 2191.7317, "encoder_q-layer.7": 1839.5208, "encoder_q-layer.8": 1141.8533, "encoder_q-layer.9": 645.7964, "epoch": 0.27, "inbatch_neg_score": 0.121, "inbatch_pos_score": 0.7417, "learning_rate": 4.027777777777778e-05, "loss": 3.6889, "norm_diff": 0.0444, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3116.3936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1221, "query_norm": 1.3214, "queue_k_norm": 1.3614, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7202, "sent_len_1": 66.9135, "sent_max_len_0": 128.0, "sent_max_len_1": 188.835, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27500 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.6974, "doc_norm": 1.3713, "encoder_q-embeddings": 560.7314, "encoder_q-layer.0": 356.5671, "encoder_q-layer.1": 365.7554, "encoder_q-layer.10": 615.4901, "encoder_q-layer.11": 1570.0948, "encoder_q-layer.2": 413.337, "encoder_q-layer.3": 421.2264, "encoder_q-layer.4": 447.9373, "encoder_q-layer.5": 451.1346, "encoder_q-layer.6": 510.4789, "encoder_q-layer.7": 537.5485, "encoder_q-layer.8": 601.6923, "encoder_q-layer.9": 554.7525, "epoch": 0.27, "inbatch_neg_score": 0.1303, "inbatch_pos_score": 0.7876, "learning_rate": 4.022222222222222e-05, "loss": 3.6974, "norm_diff": 0.0256, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1001.4864, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1305, "query_norm": 1.3467, "queue_k_norm": 1.362, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.547, "sent_len_1": 66.8817, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9837, "stdk": 0.0479, "stdq": 0.0454, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27600 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.6764, "doc_norm": 1.3691, "encoder_q-embeddings": 804.7065, "encoder_q-layer.0": 534.5659, "encoder_q-layer.1": 587.6952, "encoder_q-layer.10": 702.7753, "encoder_q-layer.11": 1774.134, "encoder_q-layer.2": 614.5253, "encoder_q-layer.3": 630.2148, "encoder_q-layer.4": 660.9649, "encoder_q-layer.5": 661.2522, "encoder_q-layer.6": 726.2741, "encoder_q-layer.7": 750.2894, "encoder_q-layer.8": 775.1046, "encoder_q-layer.9": 658.7847, "epoch": 0.27, "inbatch_neg_score": 0.1286, "inbatch_pos_score": 0.7637, "learning_rate": 4.016666666666667e-05, "loss": 3.6764, "norm_diff": 0.0334, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1230.2484, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1289, "query_norm": 1.3357, "queue_k_norm": 1.3643, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5011, "sent_len_1": 66.6851, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.3713, "stdk": 0.0478, "stdq": 0.0452, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 27700 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.718, "doc_norm": 1.3612, "encoder_q-embeddings": 677.6375, "encoder_q-layer.0": 442.9862, "encoder_q-layer.1": 457.0582, "encoder_q-layer.10": 663.3539, "encoder_q-layer.11": 1663.3872, "encoder_q-layer.2": 526.514, "encoder_q-layer.3": 551.6449, "encoder_q-layer.4": 579.4297, "encoder_q-layer.5": 560.8918, "encoder_q-layer.6": 593.9142, "encoder_q-layer.7": 680.4009, "encoder_q-layer.8": 701.5035, "encoder_q-layer.9": 624.0288, "epoch": 0.27, "inbatch_neg_score": 0.122, "inbatch_pos_score": 0.7354, "learning_rate": 4.011111111111111e-05, "loss": 3.718, "norm_diff": 0.0513, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1126.397, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1213, "query_norm": 1.31, "queue_k_norm": 1.3643, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.3607, "sent_len_1": 66.5792, "sent_max_len_0": 128.0, "sent_max_len_1": 189.125, "stdk": 0.0475, "stdq": 0.0447, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 27800 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.7065, "doc_norm": 1.3656, "encoder_q-embeddings": 575.921, "encoder_q-layer.0": 372.7059, "encoder_q-layer.1": 391.8888, "encoder_q-layer.10": 650.5999, "encoder_q-layer.11": 1727.1687, "encoder_q-layer.2": 433.7715, "encoder_q-layer.3": 463.1167, "encoder_q-layer.4": 486.3477, "encoder_q-layer.5": 498.7974, "encoder_q-layer.6": 557.0652, "encoder_q-layer.7": 592.4938, "encoder_q-layer.8": 692.8522, "encoder_q-layer.9": 622.0032, "epoch": 0.27, "inbatch_neg_score": 0.1182, "inbatch_pos_score": 0.7432, "learning_rate": 4.0055555555555554e-05, "loss": 3.7065, "norm_diff": 0.0799, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1084.1356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.119, "query_norm": 1.2857, "queue_k_norm": 1.3655, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4583, "sent_len_1": 66.7609, "sent_max_len_0": 128.0, "sent_max_len_1": 187.31, "stdk": 0.0476, "stdq": 0.044, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 27900 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.6782, "doc_norm": 1.3635, "encoder_q-embeddings": 924.8414, "encoder_q-layer.0": 682.123, "encoder_q-layer.1": 784.4939, "encoder_q-layer.10": 693.0145, "encoder_q-layer.11": 1677.0835, "encoder_q-layer.2": 791.2355, "encoder_q-layer.3": 766.28, "encoder_q-layer.4": 683.0537, "encoder_q-layer.5": 610.6081, "encoder_q-layer.6": 669.9675, "encoder_q-layer.7": 697.5362, "encoder_q-layer.8": 752.3054, "encoder_q-layer.9": 662.825, "epoch": 0.27, "inbatch_neg_score": 0.1167, "inbatch_pos_score": 0.7285, "learning_rate": 4e-05, "loss": 3.6782, "norm_diff": 0.0573, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1273.8831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1165, "query_norm": 1.3062, "queue_k_norm": 1.3662, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.555, "sent_len_1": 67.1052, "sent_max_len_0": 128.0, "sent_max_len_1": 191.115, "stdk": 0.0476, "stdq": 0.0447, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 28000 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.6845, "doc_norm": 1.3611, "encoder_q-embeddings": 521.8284, "encoder_q-layer.0": 344.5964, "encoder_q-layer.1": 354.2745, "encoder_q-layer.10": 685.4271, "encoder_q-layer.11": 1679.7092, "encoder_q-layer.2": 393.3343, "encoder_q-layer.3": 413.0944, "encoder_q-layer.4": 445.4077, "encoder_q-layer.5": 466.0135, "encoder_q-layer.6": 539.7038, "encoder_q-layer.7": 574.6151, "encoder_q-layer.8": 683.2933, "encoder_q-layer.9": 635.0726, "epoch": 0.27, "inbatch_neg_score": 0.1117, "inbatch_pos_score": 0.7246, "learning_rate": 3.9944444444444446e-05, "loss": 3.6845, "norm_diff": 0.0412, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1045.4335, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1116, "query_norm": 1.3199, "queue_k_norm": 1.3653, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4397, "sent_len_1": 66.7581, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3875, "stdk": 0.0475, "stdq": 0.0454, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 28100 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.6709, "doc_norm": 1.3608, "encoder_q-embeddings": 623.3076, "encoder_q-layer.0": 398.9183, "encoder_q-layer.1": 400.3332, "encoder_q-layer.10": 653.6202, "encoder_q-layer.11": 1489.4541, "encoder_q-layer.2": 447.5018, "encoder_q-layer.3": 468.0643, "encoder_q-layer.4": 490.0763, "encoder_q-layer.5": 499.3664, "encoder_q-layer.6": 580.3186, "encoder_q-layer.7": 609.3723, "encoder_q-layer.8": 712.3212, "encoder_q-layer.9": 609.3327, "epoch": 0.28, "inbatch_neg_score": 0.1113, "inbatch_pos_score": 0.7275, "learning_rate": 3.9888888888888895e-05, "loss": 3.6709, "norm_diff": 0.0683, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 999.9816, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1104, "query_norm": 1.2925, "queue_k_norm": 1.3633, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6708, "sent_len_1": 66.7842, "sent_max_len_0": 127.99, "sent_max_len_1": 190.8787, "stdk": 0.0475, "stdq": 0.0441, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 28200 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.6823, "doc_norm": 1.3665, "encoder_q-embeddings": 539.3518, "encoder_q-layer.0": 350.7657, "encoder_q-layer.1": 369.8342, "encoder_q-layer.10": 676.5423, "encoder_q-layer.11": 1575.9968, "encoder_q-layer.2": 429.6344, "encoder_q-layer.3": 448.2645, "encoder_q-layer.4": 466.2077, "encoder_q-layer.5": 485.9501, "encoder_q-layer.6": 551.7445, "encoder_q-layer.7": 619.046, "encoder_q-layer.8": 692.0641, "encoder_q-layer.9": 618.4037, "epoch": 0.28, "inbatch_neg_score": 0.114, "inbatch_pos_score": 0.7363, "learning_rate": 3.983333333333333e-05, "loss": 3.6823, "norm_diff": 0.0686, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1013.0022, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1133, "query_norm": 1.2979, "queue_k_norm": 1.3612, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6497, "sent_len_1": 66.7071, "sent_max_len_0": 127.9925, "sent_max_len_1": 188.515, "stdk": 0.0477, "stdq": 0.0441, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28300 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.6721, "doc_norm": 1.3654, "encoder_q-embeddings": 638.1461, "encoder_q-layer.0": 463.218, "encoder_q-layer.1": 480.6154, "encoder_q-layer.10": 636.5505, "encoder_q-layer.11": 1536.2681, "encoder_q-layer.2": 522.3107, "encoder_q-layer.3": 527.7631, "encoder_q-layer.4": 580.6296, "encoder_q-layer.5": 562.1338, "encoder_q-layer.6": 583.0247, "encoder_q-layer.7": 610.8491, "encoder_q-layer.8": 672.1314, "encoder_q-layer.9": 565.4762, "epoch": 0.28, "inbatch_neg_score": 0.114, "inbatch_pos_score": 0.7441, "learning_rate": 3.977777777777778e-05, "loss": 3.6721, "norm_diff": 0.062, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1056.7692, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1139, "query_norm": 1.3034, "queue_k_norm": 1.3635, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6782, "sent_len_1": 66.812, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4675, "stdk": 0.0477, "stdq": 0.0441, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 28400 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.6803, "doc_norm": 1.3673, "encoder_q-embeddings": 584.3727, "encoder_q-layer.0": 382.7974, "encoder_q-layer.1": 408.6432, "encoder_q-layer.10": 639.9258, "encoder_q-layer.11": 1595.5713, "encoder_q-layer.2": 468.242, "encoder_q-layer.3": 482.2108, "encoder_q-layer.4": 494.9388, "encoder_q-layer.5": 518.7546, "encoder_q-layer.6": 560.4579, "encoder_q-layer.7": 580.2681, "encoder_q-layer.8": 624.6415, "encoder_q-layer.9": 598.7725, "epoch": 0.28, "inbatch_neg_score": 0.1147, "inbatch_pos_score": 0.7573, "learning_rate": 3.972222222222222e-05, "loss": 3.6803, "norm_diff": 0.0303, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1040.3982, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1151, "query_norm": 1.3371, "queue_k_norm": 1.3634, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6168, "sent_len_1": 66.7779, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.1675, "stdk": 0.0478, "stdq": 0.0449, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 28500 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.6804, "doc_norm": 1.3567, "encoder_q-embeddings": 1069.1294, "encoder_q-layer.0": 689.3513, "encoder_q-layer.1": 719.9915, "encoder_q-layer.10": 1261.7524, "encoder_q-layer.11": 3106.459, "encoder_q-layer.2": 810.0848, "encoder_q-layer.3": 858.4899, "encoder_q-layer.4": 911.1342, "encoder_q-layer.5": 885.597, "encoder_q-layer.6": 1036.6559, "encoder_q-layer.7": 1181.3151, "encoder_q-layer.8": 1319.9038, "encoder_q-layer.9": 1175.1294, "epoch": 0.28, "inbatch_neg_score": 0.1109, "inbatch_pos_score": 0.731, "learning_rate": 3.966666666666667e-05, "loss": 3.6804, "norm_diff": 0.0232, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1979.9441, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1104, "query_norm": 1.3365, "queue_k_norm": 1.3636, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4346, "sent_len_1": 66.5967, "sent_max_len_0": 128.0, "sent_max_len_1": 190.63, "stdk": 0.0474, "stdq": 0.045, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 28600 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.6652, "doc_norm": 1.3621, "encoder_q-embeddings": 1483.9385, "encoder_q-layer.0": 951.7963, "encoder_q-layer.1": 1033.4312, "encoder_q-layer.10": 1377.9448, "encoder_q-layer.11": 3180.092, "encoder_q-layer.2": 1189.2141, "encoder_q-layer.3": 1186.5779, "encoder_q-layer.4": 1218.8165, "encoder_q-layer.5": 1252.5618, "encoder_q-layer.6": 1312.801, "encoder_q-layer.7": 1370.7864, "encoder_q-layer.8": 1390.9982, "encoder_q-layer.9": 1249.4384, "epoch": 0.28, "inbatch_neg_score": 0.1134, "inbatch_pos_score": 0.7085, "learning_rate": 3.961111111111111e-05, "loss": 3.6652, "norm_diff": 0.036, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2282.6071, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.112, "query_norm": 1.326, "queue_k_norm": 1.3638, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.3583, "sent_len_1": 67.0263, "sent_max_len_0": 128.0, "sent_max_len_1": 188.105, "stdk": 0.0476, "stdq": 0.0446, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 28700 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.686, "doc_norm": 1.3612, "encoder_q-embeddings": 1135.1187, "encoder_q-layer.0": 719.2528, "encoder_q-layer.1": 754.6346, "encoder_q-layer.10": 1428.0154, "encoder_q-layer.11": 3159.0759, "encoder_q-layer.2": 838.8551, "encoder_q-layer.3": 831.8137, "encoder_q-layer.4": 831.6024, "encoder_q-layer.5": 873.1046, "encoder_q-layer.6": 996.5211, "encoder_q-layer.7": 1145.1283, "encoder_q-layer.8": 1468.8029, "encoder_q-layer.9": 1295.1362, "epoch": 0.28, "inbatch_neg_score": 0.1167, "inbatch_pos_score": 0.7344, "learning_rate": 3.9555555555555556e-05, "loss": 3.686, "norm_diff": 0.0244, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1997.0492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1166, "query_norm": 1.3519, "queue_k_norm": 1.3642, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5414, "sent_len_1": 67.0535, "sent_max_len_0": 127.9975, "sent_max_len_1": 191.3013, "stdk": 0.0476, "stdq": 0.0456, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 28800 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.6717, "doc_norm": 1.3625, "encoder_q-embeddings": 2020.1946, "encoder_q-layer.0": 1454.0292, "encoder_q-layer.1": 1583.3893, "encoder_q-layer.10": 1406.439, "encoder_q-layer.11": 3297.7305, "encoder_q-layer.2": 1856.7426, "encoder_q-layer.3": 2008.8943, "encoder_q-layer.4": 2061.6396, "encoder_q-layer.5": 2314.5808, "encoder_q-layer.6": 2393.0391, "encoder_q-layer.7": 2198.9961, "encoder_q-layer.8": 2022.475, "encoder_q-layer.9": 1267.4865, "epoch": 0.28, "inbatch_neg_score": 0.1216, "inbatch_pos_score": 0.7202, "learning_rate": 3.9500000000000005e-05, "loss": 3.6717, "norm_diff": 0.0475, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3063.015, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1214, "query_norm": 1.315, "queue_k_norm": 1.3644, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7278, "sent_len_1": 66.7813, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9913, "stdk": 0.0476, "stdq": 0.0442, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 28900 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.6787, "doc_norm": 1.3684, "encoder_q-embeddings": 1097.4495, "encoder_q-layer.0": 735.8154, "encoder_q-layer.1": 796.5815, "encoder_q-layer.10": 1205.5625, "encoder_q-layer.11": 2936.9373, "encoder_q-layer.2": 863.2534, "encoder_q-layer.3": 898.301, "encoder_q-layer.4": 1002.6834, "encoder_q-layer.5": 988.024, "encoder_q-layer.6": 1162.4092, "encoder_q-layer.7": 1256.1918, "encoder_q-layer.8": 1313.5266, "encoder_q-layer.9": 1144.8539, "epoch": 0.28, "inbatch_neg_score": 0.123, "inbatch_pos_score": 0.7671, "learning_rate": 3.944444444444445e-05, "loss": 3.6787, "norm_diff": 0.0181, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1961.9307, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1229, "query_norm": 1.3528, "queue_k_norm": 1.3645, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.2935, "sent_len_1": 66.5045, "sent_max_len_0": 127.9975, "sent_max_len_1": 191.1562, "stdk": 0.0478, "stdq": 0.0457, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 29000 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.6575, "doc_norm": 1.3584, "encoder_q-embeddings": 1596.0636, "encoder_q-layer.0": 1073.8557, "encoder_q-layer.1": 1079.2845, "encoder_q-layer.10": 1250.1501, "encoder_q-layer.11": 2971.5171, "encoder_q-layer.2": 1209.0789, "encoder_q-layer.3": 1173.5199, "encoder_q-layer.4": 1056.7675, "encoder_q-layer.5": 1090.0884, "encoder_q-layer.6": 1247.3978, "encoder_q-layer.7": 1304.5647, "encoder_q-layer.8": 1323.1188, "encoder_q-layer.9": 1119.783, "epoch": 0.28, "inbatch_neg_score": 0.129, "inbatch_pos_score": 0.7441, "learning_rate": 3.938888888888889e-05, "loss": 3.6575, "norm_diff": 0.0412, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2180.8039, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1289, "query_norm": 1.3173, "queue_k_norm": 1.3649, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6026, "sent_len_1": 66.7566, "sent_max_len_0": 127.99, "sent_max_len_1": 190.5288, "stdk": 0.0474, "stdq": 0.0443, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 29100 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.6551, "doc_norm": 1.3647, "encoder_q-embeddings": 2375.395, "encoder_q-layer.0": 1549.6786, "encoder_q-layer.1": 1777.1084, "encoder_q-layer.10": 1477.3275, "encoder_q-layer.11": 3250.8096, "encoder_q-layer.2": 2138.3181, "encoder_q-layer.3": 2201.7644, "encoder_q-layer.4": 2459.7471, "encoder_q-layer.5": 2624.2512, "encoder_q-layer.6": 2692.3787, "encoder_q-layer.7": 2132.7791, "encoder_q-layer.8": 1987.746, "encoder_q-layer.9": 1278.8488, "epoch": 0.29, "inbatch_neg_score": 0.1228, "inbatch_pos_score": 0.7622, "learning_rate": 3.933333333333333e-05, "loss": 3.6551, "norm_diff": 0.0264, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3280.2724, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1232, "query_norm": 1.3384, "queue_k_norm": 1.3657, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6661, "sent_len_1": 66.9695, "sent_max_len_0": 128.0, "sent_max_len_1": 191.9588, "stdk": 0.0476, "stdq": 0.0455, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 29200 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.6647, "doc_norm": 1.3583, "encoder_q-embeddings": 1458.1921, "encoder_q-layer.0": 947.8936, "encoder_q-layer.1": 1072.7928, "encoder_q-layer.10": 1314.161, "encoder_q-layer.11": 3297.7729, "encoder_q-layer.2": 1217.2621, "encoder_q-layer.3": 1228.7875, "encoder_q-layer.4": 1173.0839, "encoder_q-layer.5": 1146.2786, "encoder_q-layer.6": 1176.5503, "encoder_q-layer.7": 1198.7595, "encoder_q-layer.8": 1374.3845, "encoder_q-layer.9": 1173.0255, "epoch": 0.29, "inbatch_neg_score": 0.1178, "inbatch_pos_score": 0.7451, "learning_rate": 3.927777777777778e-05, "loss": 3.6647, "norm_diff": 0.056, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2290.6287, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1183, "query_norm": 1.3024, "queue_k_norm": 1.3666, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8508, "sent_len_1": 66.8383, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.0213, "stdk": 0.0475, "stdq": 0.0442, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 29300 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.6607, "doc_norm": 1.373, "encoder_q-embeddings": 1131.4269, "encoder_q-layer.0": 697.9001, "encoder_q-layer.1": 716.0029, "encoder_q-layer.10": 1283.3844, "encoder_q-layer.11": 3227.6292, "encoder_q-layer.2": 787.4813, "encoder_q-layer.3": 820.2982, "encoder_q-layer.4": 889.6324, "encoder_q-layer.5": 884.4893, "encoder_q-layer.6": 1065.7303, "encoder_q-layer.7": 1167.2721, "encoder_q-layer.8": 1453.605, "encoder_q-layer.9": 1247.7491, "epoch": 0.29, "inbatch_neg_score": 0.1213, "inbatch_pos_score": 0.7446, "learning_rate": 3.922222222222223e-05, "loss": 3.6607, "norm_diff": 0.0774, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2065.5114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.122, "query_norm": 1.2956, "queue_k_norm": 1.3657, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6474, "sent_len_1": 66.6271, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8088, "stdk": 0.048, "stdq": 0.044, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 29400 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.6763, "doc_norm": 1.3676, "encoder_q-embeddings": 1202.4669, "encoder_q-layer.0": 774.7161, "encoder_q-layer.1": 792.727, "encoder_q-layer.10": 1253.5734, "encoder_q-layer.11": 2936.6697, "encoder_q-layer.2": 908.2374, "encoder_q-layer.3": 952.5177, "encoder_q-layer.4": 1026.8671, "encoder_q-layer.5": 1113.3197, "encoder_q-layer.6": 1190.8615, "encoder_q-layer.7": 1218.8523, "encoder_q-layer.8": 1348.4901, "encoder_q-layer.9": 1195.3817, "epoch": 0.29, "inbatch_neg_score": 0.122, "inbatch_pos_score": 0.7402, "learning_rate": 3.9166666666666665e-05, "loss": 3.6763, "norm_diff": 0.0543, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1989.5856, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.121, "query_norm": 1.3133, "queue_k_norm": 1.3671, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7459, "sent_len_1": 66.8205, "sent_max_len_0": 127.9938, "sent_max_len_1": 187.6825, "stdk": 0.0477, "stdq": 0.0446, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 29500 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.6689, "doc_norm": 1.3643, "encoder_q-embeddings": 1291.9083, "encoder_q-layer.0": 832.884, "encoder_q-layer.1": 954.9411, "encoder_q-layer.10": 1245.0885, "encoder_q-layer.11": 3141.2656, "encoder_q-layer.2": 1118.1559, "encoder_q-layer.3": 1121.1595, "encoder_q-layer.4": 1241.3558, "encoder_q-layer.5": 1220.7428, "encoder_q-layer.6": 1227.1847, "encoder_q-layer.7": 1326.9625, "encoder_q-layer.8": 1353.4071, "encoder_q-layer.9": 1127.4912, "epoch": 0.29, "inbatch_neg_score": 0.1213, "inbatch_pos_score": 0.7495, "learning_rate": 3.9111111111111115e-05, "loss": 3.6689, "norm_diff": 0.0451, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2196.0603, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1218, "query_norm": 1.3192, "queue_k_norm": 1.3654, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5281, "sent_len_1": 66.7323, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.565, "stdk": 0.0476, "stdq": 0.0445, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29600 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.6577, "doc_norm": 1.3668, "encoder_q-embeddings": 1099.5582, "encoder_q-layer.0": 770.6804, "encoder_q-layer.1": 806.4469, "encoder_q-layer.10": 1188.7305, "encoder_q-layer.11": 3034.9282, "encoder_q-layer.2": 876.8379, "encoder_q-layer.3": 910.824, "encoder_q-layer.4": 986.2327, "encoder_q-layer.5": 1033.4552, "encoder_q-layer.6": 1121.4181, "encoder_q-layer.7": 1160.4816, "encoder_q-layer.8": 1279.3895, "encoder_q-layer.9": 1119.6846, "epoch": 0.29, "inbatch_neg_score": 0.1272, "inbatch_pos_score": 0.748, "learning_rate": 3.905555555555556e-05, "loss": 3.6577, "norm_diff": 0.0443, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1978.203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1281, "query_norm": 1.3225, "queue_k_norm": 1.3669, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6495, "sent_len_1": 66.7011, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4812, "stdk": 0.0477, "stdq": 0.0442, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 29700 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.6561, "doc_norm": 1.3686, "encoder_q-embeddings": 1233.2406, "encoder_q-layer.0": 792.2429, "encoder_q-layer.1": 818.7854, "encoder_q-layer.10": 1417.2006, "encoder_q-layer.11": 3268.4263, "encoder_q-layer.2": 967.1748, "encoder_q-layer.3": 985.3978, "encoder_q-layer.4": 1028.6844, "encoder_q-layer.5": 1036.5885, "encoder_q-layer.6": 1257.3242, "encoder_q-layer.7": 1356.615, "encoder_q-layer.8": 1548.6735, "encoder_q-layer.9": 1289.0613, "epoch": 0.29, "inbatch_neg_score": 0.1144, "inbatch_pos_score": 0.731, "learning_rate": 3.9000000000000006e-05, "loss": 3.6561, "norm_diff": 0.0566, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2147.9726, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1153, "query_norm": 1.3121, "queue_k_norm": 1.3673, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7073, "sent_len_1": 66.7822, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.4538, "stdk": 0.0477, "stdq": 0.0441, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29800 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.696, "doc_norm": 1.3722, "encoder_q-embeddings": 1102.9471, "encoder_q-layer.0": 719.3687, "encoder_q-layer.1": 756.7296, "encoder_q-layer.10": 1287.2839, "encoder_q-layer.11": 2997.7166, "encoder_q-layer.2": 844.5563, "encoder_q-layer.3": 845.0155, "encoder_q-layer.4": 888.407, "encoder_q-layer.5": 921.7715, "encoder_q-layer.6": 1027.7441, "encoder_q-layer.7": 1145.2269, "encoder_q-layer.8": 1302.1497, "encoder_q-layer.9": 1182.3505, "epoch": 0.29, "inbatch_neg_score": 0.1075, "inbatch_pos_score": 0.7378, "learning_rate": 3.894444444444444e-05, "loss": 3.696, "norm_diff": 0.0391, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1937.9436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1074, "query_norm": 1.3331, "queue_k_norm": 1.3678, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4695, "sent_len_1": 66.5963, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9575, "stdk": 0.0478, "stdq": 0.0447, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29900 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.6362, "doc_norm": 1.3683, "encoder_q-embeddings": 1028.6825, "encoder_q-layer.0": 679.584, "encoder_q-layer.1": 695.0249, "encoder_q-layer.10": 1326.8285, "encoder_q-layer.11": 3287.4011, "encoder_q-layer.2": 758.5038, "encoder_q-layer.3": 756.6951, "encoder_q-layer.4": 799.1171, "encoder_q-layer.5": 830.4103, "encoder_q-layer.6": 920.5366, "encoder_q-layer.7": 1056.8646, "encoder_q-layer.8": 1248.103, "encoder_q-layer.9": 1180.2412, "epoch": 0.29, "inbatch_neg_score": 0.1038, "inbatch_pos_score": 0.7222, "learning_rate": 3.888888888888889e-05, "loss": 3.6362, "norm_diff": 0.0469, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1964.9102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1042, "query_norm": 1.3214, "queue_k_norm": 1.3688, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7293, "sent_len_1": 66.7281, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1863, "stdk": 0.0476, "stdq": 0.0438, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30000 }, { "dev_runtime": 26.6113, "dev_samples_per_second": 2.405, "dev_steps_per_second": 0.038, "epoch": 0.29, "step": 30000, "test_accuracy": 93.29833984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4043920636177063, "test_doc_norm": 1.3354803323745728, "test_inbatch_neg_score": 0.43266260623931885, "test_inbatch_pos_score": 1.300647258758545, "test_loss": 0.4043920636177063, "test_loss_align": 1.1955745220184326, "test_loss_unif": 3.9609198570251465, "test_loss_unif_q@queue": 3.9609198570251465, "test_norm_diff": 0.07348982244729996, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.09710794687271118, "test_query_norm": 1.4089703559875488, "test_queue_k_norm": 1.3686840534210205, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04133227467536926, "test_stdq": 0.04135624319314957, "test_stdqueue_k": 0.04776468873023987, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.6113, "dev_samples_per_second": 2.405, "dev_steps_per_second": 0.038, "epoch": 0.29, "eval_beir-arguana_ndcg@10": 0.35546, "eval_beir-arguana_recall@10": 0.59886, "eval_beir-arguana_recall@100": 0.88691, "eval_beir-arguana_recall@20": 0.73471, "eval_beir-avg_ndcg@10": 0.33956583333333334, "eval_beir-avg_recall@10": 0.4067254166666666, "eval_beir-avg_recall@100": 0.5910660833333333, "eval_beir-avg_recall@20": 0.46572966666666665, "eval_beir-cqadupstack_ndcg@10": 0.23872833333333332, "eval_beir-cqadupstack_recall@10": 0.32809416666666663, "eval_beir-cqadupstack_recall@100": 0.5536108333333334, "eval_beir-cqadupstack_recall@20": 0.39151666666666673, "eval_beir-fiqa_ndcg@10": 0.2063, "eval_beir-fiqa_recall@10": 0.2663, "eval_beir-fiqa_recall@100": 0.52064, "eval_beir-fiqa_recall@20": 0.33855, "eval_beir-nfcorpus_ndcg@10": 0.26474, "eval_beir-nfcorpus_recall@10": 0.12899, "eval_beir-nfcorpus_recall@100": 0.25565, "eval_beir-nfcorpus_recall@20": 0.1594, "eval_beir-nq_ndcg@10": 0.22955, "eval_beir-nq_recall@10": 0.38613, "eval_beir-nq_recall@100": 0.72219, "eval_beir-nq_recall@20": 0.49358, "eval_beir-quora_ndcg@10": 0.75177, "eval_beir-quora_recall@10": 0.86017, "eval_beir-quora_recall@100": 0.96835, "eval_beir-quora_recall@20": 0.90436, "eval_beir-scidocs_ndcg@10": 0.13916, "eval_beir-scidocs_recall@10": 0.14502, "eval_beir-scidocs_recall@100": 0.34228, "eval_beir-scidocs_recall@20": 0.19785, "eval_beir-scifact_ndcg@10": 0.59518, "eval_beir-scifact_recall@10": 0.75261, "eval_beir-scifact_recall@100": 0.89922, "eval_beir-scifact_recall@20": 0.80289, "eval_beir-trec-covid_ndcg@10": 0.47071, "eval_beir-trec-covid_recall@10": 0.496, "eval_beir-trec-covid_recall@100": 0.364, "eval_beir-trec-covid_recall@20": 0.465, "eval_beir-webis-touche2020_ndcg@10": 0.14406, "eval_beir-webis-touche2020_recall@10": 0.10508, "eval_beir-webis-touche2020_recall@100": 0.39781, "eval_beir-webis-touche2020_recall@20": 0.16944, "eval_senteval-avg_sts": 0.747651452994704, "eval_senteval-sickr_spearman": 0.7104784059965329, "eval_senteval-stsb_spearman": 0.7848244999928753, "step": 30000, "test_accuracy": 93.29833984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4043920636177063, "test_doc_norm": 1.3354803323745728, "test_inbatch_neg_score": 0.43266260623931885, "test_inbatch_pos_score": 1.300647258758545, "test_loss": 0.4043920636177063, "test_loss_align": 1.1955745220184326, "test_loss_unif": 3.9609198570251465, "test_loss_unif_q@queue": 3.9609198570251465, "test_norm_diff": 0.07348982244729996, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.09710794687271118, "test_query_norm": 1.4089703559875488, "test_queue_k_norm": 1.3686840534210205, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04133227467536926, "test_stdq": 0.04135624319314957, "test_stdqueue_k": 0.04776468873023987, "test_stdqueue_q": 0.0 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.6537, "doc_norm": 1.3799, "encoder_q-embeddings": 1098.0074, "encoder_q-layer.0": 691.4932, "encoder_q-layer.1": 720.4187, "encoder_q-layer.10": 1284.0515, "encoder_q-layer.11": 3296.5923, "encoder_q-layer.2": 789.4342, "encoder_q-layer.3": 837.857, "encoder_q-layer.4": 884.0397, "encoder_q-layer.5": 941.7459, "encoder_q-layer.6": 1068.2158, "encoder_q-layer.7": 1149.3229, "encoder_q-layer.8": 1292.0774, "encoder_q-layer.9": 1155.1387, "epoch": 0.29, "inbatch_neg_score": 0.0904, "inbatch_pos_score": 0.7104, "learning_rate": 3.883333333333333e-05, "loss": 3.6537, "norm_diff": 0.0421, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2032.2302, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0894, "query_norm": 1.3392, "queue_k_norm": 1.3722, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7143, "sent_len_1": 66.7861, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9913, "stdk": 0.048, "stdq": 0.0444, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 30100 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.6555, "doc_norm": 1.3736, "encoder_q-embeddings": 1527.7684, "encoder_q-layer.0": 1033.2024, "encoder_q-layer.1": 1068.4722, "encoder_q-layer.10": 1240.1293, "encoder_q-layer.11": 3175.1277, "encoder_q-layer.2": 1183.611, "encoder_q-layer.3": 1240.99, "encoder_q-layer.4": 1272.0608, "encoder_q-layer.5": 1220.7037, "encoder_q-layer.6": 1101.8331, "encoder_q-layer.7": 1089.0791, "encoder_q-layer.8": 1237.4669, "encoder_q-layer.9": 1103.5732, "epoch": 0.29, "inbatch_neg_score": 0.0806, "inbatch_pos_score": 0.6934, "learning_rate": 3.877777777777778e-05, "loss": 3.6555, "norm_diff": 0.025, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2198.4576, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0809, "query_norm": 1.3499, "queue_k_norm": 1.371, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5763, "sent_len_1": 66.834, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.3487, "stdk": 0.0478, "stdq": 0.0442, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30200 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.6769, "doc_norm": 1.3695, "encoder_q-embeddings": 1554.5176, "encoder_q-layer.0": 997.9687, "encoder_q-layer.1": 1128.0385, "encoder_q-layer.10": 1232.4231, "encoder_q-layer.11": 3332.5449, "encoder_q-layer.2": 1221.343, "encoder_q-layer.3": 1228.5128, "encoder_q-layer.4": 1257.7823, "encoder_q-layer.5": 1222.3326, "encoder_q-layer.6": 1303.7924, "encoder_q-layer.7": 1309.3225, "encoder_q-layer.8": 1421.9249, "encoder_q-layer.9": 1247.9078, "epoch": 0.3, "inbatch_neg_score": 0.0709, "inbatch_pos_score": 0.6885, "learning_rate": 3.8722222222222225e-05, "loss": 3.6769, "norm_diff": 0.0151, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2326.7338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0711, "query_norm": 1.3595, "queue_k_norm": 1.3701, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4631, "sent_len_1": 66.3817, "sent_max_len_0": 127.9938, "sent_max_len_1": 187.8262, "stdk": 0.0477, "stdq": 0.0444, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30300 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.6604, "doc_norm": 1.3711, "encoder_q-embeddings": 1214.9297, "encoder_q-layer.0": 806.8443, "encoder_q-layer.1": 860.8585, "encoder_q-layer.10": 1346.5817, "encoder_q-layer.11": 3295.1199, "encoder_q-layer.2": 995.329, "encoder_q-layer.3": 990.4992, "encoder_q-layer.4": 1005.499, "encoder_q-layer.5": 1050.8162, "encoder_q-layer.6": 1250.2983, "encoder_q-layer.7": 1199.1368, "encoder_q-layer.8": 1305.7739, "encoder_q-layer.9": 1220.7487, "epoch": 0.3, "inbatch_neg_score": 0.0762, "inbatch_pos_score": 0.7012, "learning_rate": 3.866666666666667e-05, "loss": 3.6604, "norm_diff": 0.0245, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2139.2975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0759, "query_norm": 1.3623, "queue_k_norm": 1.3701, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4536, "sent_len_1": 66.5237, "sent_max_len_0": 128.0, "sent_max_len_1": 186.8887, "stdk": 0.0478, "stdq": 0.0446, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30400 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.6203, "doc_norm": 1.3713, "encoder_q-embeddings": 1345.9163, "encoder_q-layer.0": 916.1301, "encoder_q-layer.1": 949.3579, "encoder_q-layer.10": 1230.6005, "encoder_q-layer.11": 3220.4355, "encoder_q-layer.2": 1038.397, "encoder_q-layer.3": 1024.4077, "encoder_q-layer.4": 1048.5359, "encoder_q-layer.5": 1032.2502, "encoder_q-layer.6": 1132.9132, "encoder_q-layer.7": 1260.958, "encoder_q-layer.8": 1311.8258, "encoder_q-layer.9": 1168.8158, "epoch": 0.3, "inbatch_neg_score": 0.0799, "inbatch_pos_score": 0.6943, "learning_rate": 3.8611111111111116e-05, "loss": 3.6203, "norm_diff": 0.0316, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2140.9775, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0803, "query_norm": 1.3435, "queue_k_norm": 1.3706, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6012, "sent_len_1": 66.6949, "sent_max_len_0": 128.0, "sent_max_len_1": 187.1763, "stdk": 0.0478, "stdq": 0.044, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 30500 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.6261, "doc_norm": 1.3777, "encoder_q-embeddings": 2277.854, "encoder_q-layer.0": 1499.5002, "encoder_q-layer.1": 1548.7191, "encoder_q-layer.10": 2604.0474, "encoder_q-layer.11": 6588.1826, "encoder_q-layer.2": 1703.134, "encoder_q-layer.3": 1750.0569, "encoder_q-layer.4": 1862.9132, "encoder_q-layer.5": 1973.095, "encoder_q-layer.6": 2180.5232, "encoder_q-layer.7": 2269.2493, "encoder_q-layer.8": 2547.0701, "encoder_q-layer.9": 2307.7637, "epoch": 0.3, "inbatch_neg_score": 0.078, "inbatch_pos_score": 0.7192, "learning_rate": 3.855555555555556e-05, "loss": 3.6261, "norm_diff": 0.024, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4082.4703, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0792, "query_norm": 1.3537, "queue_k_norm": 1.3726, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6765, "sent_len_1": 66.8979, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.7862, "stdk": 0.048, "stdq": 0.0448, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 30600 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.6543, "doc_norm": 1.3758, "encoder_q-embeddings": 2650.0847, "encoder_q-layer.0": 1821.6421, "encoder_q-layer.1": 2006.1482, "encoder_q-layer.10": 2719.657, "encoder_q-layer.11": 6484.2256, "encoder_q-layer.2": 2153.7944, "encoder_q-layer.3": 2221.1931, "encoder_q-layer.4": 2359.3989, "encoder_q-layer.5": 2575.0586, "encoder_q-layer.6": 2701.0972, "encoder_q-layer.7": 2720.0007, "encoder_q-layer.8": 2885.8464, "encoder_q-layer.9": 2468.2349, "epoch": 0.3, "inbatch_neg_score": 0.0855, "inbatch_pos_score": 0.7178, "learning_rate": 3.85e-05, "loss": 3.6543, "norm_diff": 0.0422, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4459.9508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0854, "query_norm": 1.3336, "queue_k_norm": 1.3729, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.3747, "sent_len_1": 66.6681, "sent_max_len_0": 128.0, "sent_max_len_1": 188.69, "stdk": 0.048, "stdq": 0.0441, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 30700 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.6333, "doc_norm": 1.3724, "encoder_q-embeddings": 2914.1113, "encoder_q-layer.0": 1918.9421, "encoder_q-layer.1": 2081.0977, "encoder_q-layer.10": 2783.6606, "encoder_q-layer.11": 6466.6133, "encoder_q-layer.2": 2276.2607, "encoder_q-layer.3": 2490.5928, "encoder_q-layer.4": 2553.0154, "encoder_q-layer.5": 2642.4275, "encoder_q-layer.6": 2935.5032, "encoder_q-layer.7": 2945.0825, "encoder_q-layer.8": 3039.7964, "encoder_q-layer.9": 2478.6824, "epoch": 0.3, "inbatch_neg_score": 0.0854, "inbatch_pos_score": 0.7324, "learning_rate": 3.844444444444444e-05, "loss": 3.6333, "norm_diff": 0.0235, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4650.4799, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0845, "query_norm": 1.3488, "queue_k_norm": 1.3713, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5627, "sent_len_1": 66.6693, "sent_max_len_0": 128.0, "sent_max_len_1": 189.55, "stdk": 0.0479, "stdq": 0.0456, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 30800 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.6358, "doc_norm": 1.3717, "encoder_q-embeddings": 3552.5435, "encoder_q-layer.0": 2323.1123, "encoder_q-layer.1": 2420.2319, "encoder_q-layer.10": 2658.936, "encoder_q-layer.11": 6298.6621, "encoder_q-layer.2": 2694.1221, "encoder_q-layer.3": 2842.1196, "encoder_q-layer.4": 2821.196, "encoder_q-layer.5": 2757.5461, "encoder_q-layer.6": 2901.5784, "encoder_q-layer.7": 2860.0139, "encoder_q-layer.8": 2869.5591, "encoder_q-layer.9": 2445.2095, "epoch": 0.3, "inbatch_neg_score": 0.0812, "inbatch_pos_score": 0.7261, "learning_rate": 3.838888888888889e-05, "loss": 3.6358, "norm_diff": 0.0472, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4880.2639, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0821, "query_norm": 1.328, "queue_k_norm": 1.372, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5207, "sent_len_1": 66.9101, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.2225, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 30900 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.6241, "doc_norm": 1.3709, "encoder_q-embeddings": 4270.3745, "encoder_q-layer.0": 2837.8511, "encoder_q-layer.1": 2802.3838, "encoder_q-layer.10": 2408.0837, "encoder_q-layer.11": 6617.8047, "encoder_q-layer.2": 3182.5291, "encoder_q-layer.3": 3407.4641, "encoder_q-layer.4": 3647.2595, "encoder_q-layer.5": 3051.0476, "encoder_q-layer.6": 2960.4692, "encoder_q-layer.7": 2577.3447, "encoder_q-layer.8": 2529.562, "encoder_q-layer.9": 2209.4658, "epoch": 0.3, "inbatch_neg_score": 0.0911, "inbatch_pos_score": 0.6919, "learning_rate": 3.8333333333333334e-05, "loss": 3.6241, "norm_diff": 0.0924, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5333.0784, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0908, "query_norm": 1.2785, "queue_k_norm": 1.3717, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5417, "sent_len_1": 66.7671, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.8738, "stdk": 0.0479, "stdq": 0.0429, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 31000 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.6422, "doc_norm": 1.3657, "encoder_q-embeddings": 4638.6841, "encoder_q-layer.0": 3220.5986, "encoder_q-layer.1": 3731.9795, "encoder_q-layer.10": 2657.873, "encoder_q-layer.11": 6793.4858, "encoder_q-layer.2": 4379.4116, "encoder_q-layer.3": 3886.0945, "encoder_q-layer.4": 3890.3071, "encoder_q-layer.5": 3799.6982, "encoder_q-layer.6": 3533.9849, "encoder_q-layer.7": 3124.0173, "encoder_q-layer.8": 2793.4094, "encoder_q-layer.9": 2420.3516, "epoch": 0.3, "inbatch_neg_score": 0.0865, "inbatch_pos_score": 0.7119, "learning_rate": 3.827777777777778e-05, "loss": 3.6422, "norm_diff": 0.0427, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6025.8728, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0861, "query_norm": 1.3229, "queue_k_norm": 1.3699, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4356, "sent_len_1": 66.6566, "sent_max_len_0": 128.0, "sent_max_len_1": 189.71, "stdk": 0.0477, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 31100 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.6287, "doc_norm": 1.3718, "encoder_q-embeddings": 2559.9834, "encoder_q-layer.0": 1635.8701, "encoder_q-layer.1": 1713.2615, "encoder_q-layer.10": 2792.2812, "encoder_q-layer.11": 7022.4355, "encoder_q-layer.2": 1894.7524, "encoder_q-layer.3": 1976.7124, "encoder_q-layer.4": 2006.3181, "encoder_q-layer.5": 2094.5894, "encoder_q-layer.6": 2352.8889, "encoder_q-layer.7": 2422.3684, "encoder_q-layer.8": 2760.1042, "encoder_q-layer.9": 2554.6228, "epoch": 0.3, "inbatch_neg_score": 0.0896, "inbatch_pos_score": 0.7031, "learning_rate": 3.8222222222222226e-05, "loss": 3.6287, "norm_diff": 0.0463, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4492.3598, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0902, "query_norm": 1.3255, "queue_k_norm": 1.3704, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5812, "sent_len_1": 66.5907, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.3313, "stdk": 0.048, "stdq": 0.0451, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 31200 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.6489, "doc_norm": 1.363, "encoder_q-embeddings": 2215.2876, "encoder_q-layer.0": 1450.1704, "encoder_q-layer.1": 1547.3923, "encoder_q-layer.10": 2728.8828, "encoder_q-layer.11": 6668.8604, "encoder_q-layer.2": 1734.9017, "encoder_q-layer.3": 1785.6263, "encoder_q-layer.4": 1916.9877, "encoder_q-layer.5": 1931.2341, "encoder_q-layer.6": 2114.241, "encoder_q-layer.7": 2409.6663, "encoder_q-layer.8": 2649.2939, "encoder_q-layer.9": 2426.9023, "epoch": 0.31, "inbatch_neg_score": 0.0896, "inbatch_pos_score": 0.7173, "learning_rate": 3.816666666666667e-05, "loss": 3.6489, "norm_diff": 0.0695, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4220.7601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0907, "query_norm": 1.2935, "queue_k_norm": 1.3666, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6428, "sent_len_1": 66.6122, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9575, "stdk": 0.0477, "stdq": 0.0439, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 31300 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.6362, "doc_norm": 1.362, "encoder_q-embeddings": 3353.7532, "encoder_q-layer.0": 2478.4875, "encoder_q-layer.1": 2413.689, "encoder_q-layer.10": 2611.957, "encoder_q-layer.11": 6442.2827, "encoder_q-layer.2": 2766.0178, "encoder_q-layer.3": 2777.4954, "encoder_q-layer.4": 2813.9944, "encoder_q-layer.5": 2877.7566, "encoder_q-layer.6": 3001.8962, "encoder_q-layer.7": 2651.6362, "encoder_q-layer.8": 2601.8003, "encoder_q-layer.9": 2231.9739, "epoch": 0.31, "inbatch_neg_score": 0.0836, "inbatch_pos_score": 0.6699, "learning_rate": 3.811111111111112e-05, "loss": 3.6362, "norm_diff": 0.0973, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4815.1598, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.084, "query_norm": 1.2647, "queue_k_norm": 1.3676, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4272, "sent_len_1": 66.7883, "sent_max_len_0": 128.0, "sent_max_len_1": 187.2012, "stdk": 0.0477, "stdq": 0.0431, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 31400 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.6321, "doc_norm": 1.3706, "encoder_q-embeddings": 2353.7612, "encoder_q-layer.0": 1471.7966, "encoder_q-layer.1": 1544.5808, "encoder_q-layer.10": 2580.2202, "encoder_q-layer.11": 6622.7021, "encoder_q-layer.2": 1694.608, "encoder_q-layer.3": 1775.5316, "encoder_q-layer.4": 1846.1455, "encoder_q-layer.5": 1958.6477, "encoder_q-layer.6": 2136.0342, "encoder_q-layer.7": 2442.7634, "encoder_q-layer.8": 2792.6611, "encoder_q-layer.9": 2504.9719, "epoch": 0.31, "inbatch_neg_score": 0.0884, "inbatch_pos_score": 0.7192, "learning_rate": 3.805555555555555e-05, "loss": 3.6321, "norm_diff": 0.041, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4183.5637, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0884, "query_norm": 1.3296, "queue_k_norm": 1.3658, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5063, "sent_len_1": 66.8614, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.87, "stdk": 0.048, "stdq": 0.0449, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 31500 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.6249, "doc_norm": 1.3677, "encoder_q-embeddings": 2873.1687, "encoder_q-layer.0": 1982.2078, "encoder_q-layer.1": 2097.1501, "encoder_q-layer.10": 2673.7798, "encoder_q-layer.11": 6403.168, "encoder_q-layer.2": 2443.6118, "encoder_q-layer.3": 2537.4182, "encoder_q-layer.4": 2684.6855, "encoder_q-layer.5": 2622.8923, "encoder_q-layer.6": 2809.437, "encoder_q-layer.7": 3003.625, "encoder_q-layer.8": 3133.6277, "encoder_q-layer.9": 2466.3677, "epoch": 0.31, "inbatch_neg_score": 0.0922, "inbatch_pos_score": 0.7192, "learning_rate": 3.8e-05, "loss": 3.6249, "norm_diff": 0.0638, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4625.337, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0922, "query_norm": 1.3039, "queue_k_norm": 1.3681, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4891, "sent_len_1": 66.962, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8975, "stdk": 0.0479, "stdq": 0.0448, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 31600 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.6055, "doc_norm": 1.3609, "encoder_q-embeddings": 2986.9814, "encoder_q-layer.0": 1964.7164, "encoder_q-layer.1": 2137.2119, "encoder_q-layer.10": 2720.0164, "encoder_q-layer.11": 6386.6514, "encoder_q-layer.2": 2331.7366, "encoder_q-layer.3": 2522.3508, "encoder_q-layer.4": 2961.7014, "encoder_q-layer.5": 3031.2993, "encoder_q-layer.6": 3281.4363, "encoder_q-layer.7": 3507.4797, "encoder_q-layer.8": 3273.3535, "encoder_q-layer.9": 2460.532, "epoch": 0.31, "inbatch_neg_score": 0.0927, "inbatch_pos_score": 0.7236, "learning_rate": 3.7944444444444444e-05, "loss": 3.6055, "norm_diff": 0.0378, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4862.2407, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0919, "query_norm": 1.3231, "queue_k_norm": 1.3655, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7751, "sent_len_1": 66.6141, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1362, "stdk": 0.0477, "stdq": 0.0453, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 31700 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.6572, "doc_norm": 1.3678, "encoder_q-embeddings": 4533.1436, "encoder_q-layer.0": 3267.0071, "encoder_q-layer.1": 3447.7161, "encoder_q-layer.10": 2617.9106, "encoder_q-layer.11": 6560.1631, "encoder_q-layer.2": 3800.188, "encoder_q-layer.3": 4450.5132, "encoder_q-layer.4": 4425.1035, "encoder_q-layer.5": 4764.665, "encoder_q-layer.6": 4402.041, "encoder_q-layer.7": 2864.4663, "encoder_q-layer.8": 2802.1904, "encoder_q-layer.9": 2515.2957, "epoch": 0.31, "inbatch_neg_score": 0.0968, "inbatch_pos_score": 0.7368, "learning_rate": 3.7888888888888894e-05, "loss": 3.6572, "norm_diff": 0.0679, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6049.779, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0968, "query_norm": 1.3, "queue_k_norm": 1.3658, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4207, "sent_len_1": 66.5859, "sent_max_len_0": 127.995, "sent_max_len_1": 188.4675, "stdk": 0.048, "stdq": 0.0446, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 31800 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.6297, "doc_norm": 1.3706, "encoder_q-embeddings": 2117.9624, "encoder_q-layer.0": 1374.7303, "encoder_q-layer.1": 1444.8705, "encoder_q-layer.10": 2563.3108, "encoder_q-layer.11": 6413.3564, "encoder_q-layer.2": 1650.4838, "encoder_q-layer.3": 1653.9093, "encoder_q-layer.4": 1752.1995, "encoder_q-layer.5": 1871.4327, "encoder_q-layer.6": 2112.6721, "encoder_q-layer.7": 2315.7493, "encoder_q-layer.8": 2675.5188, "encoder_q-layer.9": 2485.7446, "epoch": 0.31, "inbatch_neg_score": 0.0938, "inbatch_pos_score": 0.7217, "learning_rate": 3.7833333333333336e-05, "loss": 3.6297, "norm_diff": 0.0897, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4036.9003, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0948, "query_norm": 1.2809, "queue_k_norm": 1.3674, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7296, "sent_len_1": 66.9096, "sent_max_len_0": 127.995, "sent_max_len_1": 190.04, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 31900 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.6435, "doc_norm": 1.3644, "encoder_q-embeddings": 2576.9077, "encoder_q-layer.0": 1684.3044, "encoder_q-layer.1": 1838.1427, "encoder_q-layer.10": 2859.1379, "encoder_q-layer.11": 6360.1777, "encoder_q-layer.2": 1975.2993, "encoder_q-layer.3": 2046.5792, "encoder_q-layer.4": 2130.6277, "encoder_q-layer.5": 2142.8022, "encoder_q-layer.6": 2455.4363, "encoder_q-layer.7": 2859.5806, "encoder_q-layer.8": 3261.5554, "encoder_q-layer.9": 2575.2734, "epoch": 0.31, "inbatch_neg_score": 0.0952, "inbatch_pos_score": 0.7144, "learning_rate": 3.777777777777778e-05, "loss": 3.6435, "norm_diff": 0.0727, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4375.3617, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0959, "query_norm": 1.2916, "queue_k_norm": 1.3668, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3241, "sent_len_1": 66.7558, "sent_max_len_0": 128.0, "sent_max_len_1": 187.435, "stdk": 0.0479, "stdq": 0.0445, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 32000 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.5853, "doc_norm": 1.3659, "encoder_q-embeddings": 2497.9431, "encoder_q-layer.0": 1652.1798, "encoder_q-layer.1": 1716.5958, "encoder_q-layer.10": 2925.3623, "encoder_q-layer.11": 6665.5591, "encoder_q-layer.2": 1935.8372, "encoder_q-layer.3": 1867.1251, "encoder_q-layer.4": 1921.6284, "encoder_q-layer.5": 1916.9496, "encoder_q-layer.6": 2150.1697, "encoder_q-layer.7": 2618.729, "encoder_q-layer.8": 3009.709, "encoder_q-layer.9": 2697.0569, "epoch": 0.31, "inbatch_neg_score": 0.0997, "inbatch_pos_score": 0.7461, "learning_rate": 3.772222222222223e-05, "loss": 3.5853, "norm_diff": 0.1013, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4220.7936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0995, "query_norm": 1.2646, "queue_k_norm": 1.366, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7348, "sent_len_1": 66.8641, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2413, "stdk": 0.0479, "stdq": 0.0433, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 32100 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.6183, "doc_norm": 1.3685, "encoder_q-embeddings": 4451.5449, "encoder_q-layer.0": 3133.5771, "encoder_q-layer.1": 3366.7766, "encoder_q-layer.10": 2573.4929, "encoder_q-layer.11": 6211.2026, "encoder_q-layer.2": 3791.7598, "encoder_q-layer.3": 3814.0449, "encoder_q-layer.4": 4039.6187, "encoder_q-layer.5": 3964.8938, "encoder_q-layer.6": 3751.5405, "encoder_q-layer.7": 3385.9402, "encoder_q-layer.8": 3209.1399, "encoder_q-layer.9": 2520.4688, "epoch": 0.31, "inbatch_neg_score": 0.098, "inbatch_pos_score": 0.7578, "learning_rate": 3.766666666666667e-05, "loss": 3.6183, "norm_diff": 0.0516, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5757.1531, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0987, "query_norm": 1.3169, "queue_k_norm": 1.3663, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7979, "sent_len_1": 66.7209, "sent_max_len_0": 128.0, "sent_max_len_1": 188.28, "stdk": 0.048, "stdq": 0.0456, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 32200 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.6132, "doc_norm": 1.3624, "encoder_q-embeddings": 2203.8284, "encoder_q-layer.0": 1446.0291, "encoder_q-layer.1": 1530.3694, "encoder_q-layer.10": 2627.0989, "encoder_q-layer.11": 6490.9795, "encoder_q-layer.2": 1726.7511, "encoder_q-layer.3": 1782.6184, "encoder_q-layer.4": 1943.9011, "encoder_q-layer.5": 2001.8973, "encoder_q-layer.6": 2083.2498, "encoder_q-layer.7": 2275.8215, "encoder_q-layer.8": 2548.822, "encoder_q-layer.9": 2406.2939, "epoch": 0.32, "inbatch_neg_score": 0.1076, "inbatch_pos_score": 0.71, "learning_rate": 3.761111111111111e-05, "loss": 3.6132, "norm_diff": 0.0859, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4135.7575, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1055, "query_norm": 1.2765, "queue_k_norm": 1.3672, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5747, "sent_len_1": 66.5867, "sent_max_len_0": 127.99, "sent_max_len_1": 189.0875, "stdk": 0.0478, "stdq": 0.0439, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 32300 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.6142, "doc_norm": 1.3649, "encoder_q-embeddings": 3024.6294, "encoder_q-layer.0": 2139.4358, "encoder_q-layer.1": 2273.1621, "encoder_q-layer.10": 2437.8621, "encoder_q-layer.11": 5878.6733, "encoder_q-layer.2": 2809.939, "encoder_q-layer.3": 2916.2917, "encoder_q-layer.4": 3167.5203, "encoder_q-layer.5": 3266.1174, "encoder_q-layer.6": 3303.8708, "encoder_q-layer.7": 3493.9009, "encoder_q-layer.8": 3919.8818, "encoder_q-layer.9": 2688.5574, "epoch": 0.32, "inbatch_neg_score": 0.1122, "inbatch_pos_score": 0.752, "learning_rate": 3.7555555555555554e-05, "loss": 3.6142, "norm_diff": 0.0533, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4944.0458, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1108, "query_norm": 1.3116, "queue_k_norm": 1.3669, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7989, "sent_len_1": 66.7673, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.0075, "stdk": 0.0479, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 32400 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.6138, "doc_norm": 1.3681, "encoder_q-embeddings": 2783.3367, "encoder_q-layer.0": 1788.7639, "encoder_q-layer.1": 1971.7379, "encoder_q-layer.10": 2538.9661, "encoder_q-layer.11": 6196.231, "encoder_q-layer.2": 2089.4189, "encoder_q-layer.3": 2143.0159, "encoder_q-layer.4": 2347.5781, "encoder_q-layer.5": 2276.3782, "encoder_q-layer.6": 2421.8357, "encoder_q-layer.7": 2630.3923, "encoder_q-layer.8": 2640.3254, "encoder_q-layer.9": 2380.2725, "epoch": 0.32, "inbatch_neg_score": 0.1096, "inbatch_pos_score": 0.7412, "learning_rate": 3.7500000000000003e-05, "loss": 3.6138, "norm_diff": 0.0805, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4392.7014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1102, "query_norm": 1.2876, "queue_k_norm": 1.3683, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6377, "sent_len_1": 66.718, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.8738, "stdk": 0.048, "stdq": 0.044, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 32500 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.6142, "doc_norm": 1.3696, "encoder_q-embeddings": 5320.3643, "encoder_q-layer.0": 3316.2878, "encoder_q-layer.1": 3415.3428, "encoder_q-layer.10": 4943.5625, "encoder_q-layer.11": 12670.1533, "encoder_q-layer.2": 3806.105, "encoder_q-layer.3": 3908.6165, "encoder_q-layer.4": 3979.9668, "encoder_q-layer.5": 4321.7695, "encoder_q-layer.6": 4947.8745, "encoder_q-layer.7": 4851.5483, "encoder_q-layer.8": 4924.0586, "encoder_q-layer.9": 4481.8037, "epoch": 0.32, "inbatch_neg_score": 0.1067, "inbatch_pos_score": 0.7241, "learning_rate": 3.7444444444444446e-05, "loss": 3.6142, "norm_diff": 0.0785, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8420.7278, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1077, "query_norm": 1.2911, "queue_k_norm": 1.3664, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.603, "sent_len_1": 66.8334, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1175, "stdk": 0.048, "stdq": 0.0443, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 32600 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.6183, "doc_norm": 1.3673, "encoder_q-embeddings": 4857.9292, "encoder_q-layer.0": 3100.656, "encoder_q-layer.1": 3197.5161, "encoder_q-layer.10": 5389.9814, "encoder_q-layer.11": 12886.5723, "encoder_q-layer.2": 3613.752, "encoder_q-layer.3": 3782.5701, "encoder_q-layer.4": 4021.697, "encoder_q-layer.5": 4033.4832, "encoder_q-layer.6": 4572.5854, "encoder_q-layer.7": 5227.4912, "encoder_q-layer.8": 5536.3223, "encoder_q-layer.9": 5019.5273, "epoch": 0.32, "inbatch_neg_score": 0.1113, "inbatch_pos_score": 0.7422, "learning_rate": 3.738888888888889e-05, "loss": 3.6183, "norm_diff": 0.0483, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8436.2163, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1107, "query_norm": 1.319, "queue_k_norm": 1.3701, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6702, "sent_len_1": 66.5001, "sent_max_len_0": 128.0, "sent_max_len_1": 187.4663, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 32700 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.6365, "doc_norm": 1.3585, "encoder_q-embeddings": 10977.418, "encoder_q-layer.0": 8449.8945, "encoder_q-layer.1": 9774.2959, "encoder_q-layer.10": 5527.5181, "encoder_q-layer.11": 12345.543, "encoder_q-layer.2": 12339.6152, "encoder_q-layer.3": 13700.6777, "encoder_q-layer.4": 14410.4834, "encoder_q-layer.5": 11999.8291, "encoder_q-layer.6": 11133.7832, "encoder_q-layer.7": 7455.9438, "encoder_q-layer.8": 5772.4727, "encoder_q-layer.9": 4881.4873, "epoch": 0.32, "inbatch_neg_score": 0.1168, "inbatch_pos_score": 0.7065, "learning_rate": 3.733333333333334e-05, "loss": 3.6365, "norm_diff": 0.0561, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15221.8012, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1156, "query_norm": 1.3023, "queue_k_norm": 1.3676, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5162, "sent_len_1": 66.842, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.3462, "stdk": 0.0476, "stdq": 0.0443, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 32800 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.5703, "doc_norm": 1.3706, "encoder_q-embeddings": 42786.6367, "encoder_q-layer.0": 35929.7617, "encoder_q-layer.1": 34547.9688, "encoder_q-layer.10": 5504.3584, "encoder_q-layer.11": 12080.9814, "encoder_q-layer.2": 41105.4766, "encoder_q-layer.3": 34229.6836, "encoder_q-layer.4": 34914.6953, "encoder_q-layer.5": 31554.4258, "encoder_q-layer.6": 35208.1445, "encoder_q-layer.7": 24773.8223, "encoder_q-layer.8": 13054.6865, "encoder_q-layer.9": 5647.144, "epoch": 0.32, "inbatch_neg_score": 0.1164, "inbatch_pos_score": 0.7739, "learning_rate": 3.727777777777778e-05, "loss": 3.5703, "norm_diff": 0.026, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 45923.8139, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.1166, "query_norm": 1.3475, "queue_k_norm": 1.3667, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8877, "sent_len_1": 66.9797, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8075, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 32900 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.6135, "doc_norm": 1.3734, "encoder_q-embeddings": 4072.6545, "encoder_q-layer.0": 2761.5942, "encoder_q-layer.1": 2997.7747, "encoder_q-layer.10": 4453.4609, "encoder_q-layer.11": 10981.3486, "encoder_q-layer.2": 3554.5591, "encoder_q-layer.3": 3861.2974, "encoder_q-layer.4": 3975.5269, "encoder_q-layer.5": 3944.2891, "encoder_q-layer.6": 4236.4917, "encoder_q-layer.7": 4614.3223, "encoder_q-layer.8": 5211.5767, "encoder_q-layer.9": 4552.4961, "epoch": 0.32, "inbatch_neg_score": 0.1261, "inbatch_pos_score": 0.7676, "learning_rate": 3.722222222222222e-05, "loss": 3.6135, "norm_diff": 0.0295, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7485.1716, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.126, "query_norm": 1.3443, "queue_k_norm": 1.3694, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5352, "sent_len_1": 66.5105, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6138, "stdk": 0.048, "stdq": 0.0452, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 33000 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.5998, "doc_norm": 1.373, "encoder_q-embeddings": 4391.3276, "encoder_q-layer.0": 2955.2961, "encoder_q-layer.1": 3049.8098, "encoder_q-layer.10": 4850.9268, "encoder_q-layer.11": 12343.7334, "encoder_q-layer.2": 3305.1704, "encoder_q-layer.3": 3399.3105, "encoder_q-layer.4": 3697.9998, "encoder_q-layer.5": 3779.0166, "encoder_q-layer.6": 4182.876, "encoder_q-layer.7": 4616.4497, "encoder_q-layer.8": 5525.5288, "encoder_q-layer.9": 4772.019, "epoch": 0.32, "inbatch_neg_score": 0.1243, "inbatch_pos_score": 0.7466, "learning_rate": 3.7166666666666664e-05, "loss": 3.5998, "norm_diff": 0.054, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7985.4552, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1243, "query_norm": 1.3191, "queue_k_norm": 1.3714, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4191, "sent_len_1": 66.9851, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1138, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 33100 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.6117, "doc_norm": 1.3694, "encoder_q-embeddings": 4245.3794, "encoder_q-layer.0": 2737.8079, "encoder_q-layer.1": 2839.8994, "encoder_q-layer.10": 4896.0654, "encoder_q-layer.11": 11757.1611, "encoder_q-layer.2": 3243.5334, "encoder_q-layer.3": 3201.8132, "encoder_q-layer.4": 3376.9907, "encoder_q-layer.5": 3524.9436, "encoder_q-layer.6": 4077.5037, "encoder_q-layer.7": 4510.1597, "encoder_q-layer.8": 5040.895, "encoder_q-layer.9": 4781.2173, "epoch": 0.32, "inbatch_neg_score": 0.1259, "inbatch_pos_score": 0.7612, "learning_rate": 3.7111111111111113e-05, "loss": 3.6117, "norm_diff": 0.0575, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7590.8334, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1265, "query_norm": 1.312, "queue_k_norm": 1.3722, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4995, "sent_len_1": 66.732, "sent_max_len_0": 127.9988, "sent_max_len_1": 187.8013, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 33200 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.6128, "doc_norm": 1.3739, "encoder_q-embeddings": 4717.7705, "encoder_q-layer.0": 3040.188, "encoder_q-layer.1": 3207.5293, "encoder_q-layer.10": 5061.3838, "encoder_q-layer.11": 13021.3564, "encoder_q-layer.2": 3507.9028, "encoder_q-layer.3": 3729.8359, "encoder_q-layer.4": 3883.8491, "encoder_q-layer.5": 3873.5295, "encoder_q-layer.6": 4185.3281, "encoder_q-layer.7": 4522.1929, "encoder_q-layer.8": 5367.0723, "encoder_q-layer.9": 4675.2046, "epoch": 0.33, "inbatch_neg_score": 0.1233, "inbatch_pos_score": 0.7461, "learning_rate": 3.705555555555556e-05, "loss": 3.6128, "norm_diff": 0.073, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8272.4915, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.124, "query_norm": 1.3009, "queue_k_norm": 1.3711, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5399, "sent_len_1": 66.4745, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.5275, "stdk": 0.048, "stdq": 0.0443, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 33300 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.6034, "doc_norm": 1.3697, "encoder_q-embeddings": 9382.7959, "encoder_q-layer.0": 6433.2773, "encoder_q-layer.1": 6922.6855, "encoder_q-layer.10": 5094.6099, "encoder_q-layer.11": 11711.0068, "encoder_q-layer.2": 8202.0117, "encoder_q-layer.3": 8244.1377, "encoder_q-layer.4": 7780.1235, "encoder_q-layer.5": 7943.3516, "encoder_q-layer.6": 7914.2632, "encoder_q-layer.7": 7290.5508, "encoder_q-layer.8": 6121.3809, "encoder_q-layer.9": 4913.4272, "epoch": 0.33, "inbatch_neg_score": 0.1287, "inbatch_pos_score": 0.7583, "learning_rate": 3.7e-05, "loss": 3.6034, "norm_diff": 0.0379, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11633.1167, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1281, "query_norm": 1.3358, "queue_k_norm": 1.3749, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5176, "sent_len_1": 66.9543, "sent_max_len_0": 127.9988, "sent_max_len_1": 187.2375, "stdk": 0.0479, "stdq": 0.0455, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 33400 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.6084, "doc_norm": 1.376, "encoder_q-embeddings": 4109.2095, "encoder_q-layer.0": 2663.4819, "encoder_q-layer.1": 2742.4546, "encoder_q-layer.10": 4934.5874, "encoder_q-layer.11": 12379.8955, "encoder_q-layer.2": 3096.55, "encoder_q-layer.3": 3225.5186, "encoder_q-layer.4": 3499.8787, "encoder_q-layer.5": 3541.3291, "encoder_q-layer.6": 4009.7122, "encoder_q-layer.7": 4339.3457, "encoder_q-layer.8": 5241.084, "encoder_q-layer.9": 4772.0327, "epoch": 0.33, "inbatch_neg_score": 0.124, "inbatch_pos_score": 0.7603, "learning_rate": 3.694444444444445e-05, "loss": 3.6084, "norm_diff": 0.0599, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7690.3014, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1234, "query_norm": 1.3162, "queue_k_norm": 1.3754, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6843, "sent_len_1": 66.7498, "sent_max_len_0": 128.0, "sent_max_len_1": 190.285, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 33500 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.5919, "doc_norm": 1.3776, "encoder_q-embeddings": 4129.3569, "encoder_q-layer.0": 2612.7175, "encoder_q-layer.1": 2613.4302, "encoder_q-layer.10": 5018.7104, "encoder_q-layer.11": 12922.5293, "encoder_q-layer.2": 3007.8389, "encoder_q-layer.3": 3085.5586, "encoder_q-layer.4": 3344.4761, "encoder_q-layer.5": 3382.9746, "encoder_q-layer.6": 4152.0835, "encoder_q-layer.7": 4530.3687, "encoder_q-layer.8": 5200.5684, "encoder_q-layer.9": 4753.9253, "epoch": 0.33, "inbatch_neg_score": 0.1211, "inbatch_pos_score": 0.7168, "learning_rate": 3.688888888888889e-05, "loss": 3.5919, "norm_diff": 0.1151, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7922.3138, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1205, "query_norm": 1.2625, "queue_k_norm": 1.3749, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7812, "sent_len_1": 66.7192, "sent_max_len_0": 127.9925, "sent_max_len_1": 189.3663, "stdk": 0.0481, "stdq": 0.0432, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 33600 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.5796, "doc_norm": 1.3744, "encoder_q-embeddings": 9118.6387, "encoder_q-layer.0": 7225.7739, "encoder_q-layer.1": 7865.3774, "encoder_q-layer.10": 5501.8516, "encoder_q-layer.11": 11930.0166, "encoder_q-layer.2": 8351.7285, "encoder_q-layer.3": 9203.4766, "encoder_q-layer.4": 9867.0693, "encoder_q-layer.5": 11043.6133, "encoder_q-layer.6": 11739.2148, "encoder_q-layer.7": 12360.7246, "encoder_q-layer.8": 7953.5518, "encoder_q-layer.9": 5011.1919, "epoch": 0.33, "inbatch_neg_score": 0.1113, "inbatch_pos_score": 0.7173, "learning_rate": 3.683333333333334e-05, "loss": 3.5796, "norm_diff": 0.1063, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13853.0465, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1099, "query_norm": 1.2681, "queue_k_norm": 1.3773, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5796, "sent_len_1": 66.6728, "sent_max_len_0": 127.9925, "sent_max_len_1": 189.56, "stdk": 0.048, "stdq": 0.0435, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 33700 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.6116, "doc_norm": 1.3795, "encoder_q-embeddings": 4141.5811, "encoder_q-layer.0": 2644.9573, "encoder_q-layer.1": 2783.6355, "encoder_q-layer.10": 5382.3262, "encoder_q-layer.11": 11958.543, "encoder_q-layer.2": 2973.1982, "encoder_q-layer.3": 3188.4688, "encoder_q-layer.4": 3318.6189, "encoder_q-layer.5": 3491.0239, "encoder_q-layer.6": 4020.8467, "encoder_q-layer.7": 4554.1504, "encoder_q-layer.8": 5408.2808, "encoder_q-layer.9": 4755.7256, "epoch": 0.33, "inbatch_neg_score": 0.1116, "inbatch_pos_score": 0.7793, "learning_rate": 3.677777777777778e-05, "loss": 3.6116, "norm_diff": 0.0637, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7604.1412, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1129, "query_norm": 1.3158, "queue_k_norm": 1.3757, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6537, "sent_len_1": 66.8908, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8913, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 33800 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.5836, "doc_norm": 1.3765, "encoder_q-embeddings": 4156.7573, "encoder_q-layer.0": 2734.6926, "encoder_q-layer.1": 2788.6038, "encoder_q-layer.10": 4975.7729, "encoder_q-layer.11": 11070.3086, "encoder_q-layer.2": 3114.2708, "encoder_q-layer.3": 3204.3254, "encoder_q-layer.4": 3358.1021, "encoder_q-layer.5": 3536.6523, "encoder_q-layer.6": 3876.6182, "encoder_q-layer.7": 4248.5581, "encoder_q-layer.8": 4883.3501, "encoder_q-layer.9": 4505.5713, "epoch": 0.33, "inbatch_neg_score": 0.1058, "inbatch_pos_score": 0.7417, "learning_rate": 3.672222222222222e-05, "loss": 3.5836, "norm_diff": 0.0942, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7193.5859, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.107, "query_norm": 1.2823, "queue_k_norm": 1.3756, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7076, "sent_len_1": 66.6604, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1775, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 33900 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.5941, "doc_norm": 1.3742, "encoder_q-embeddings": 6352.9365, "encoder_q-layer.0": 4852.9634, "encoder_q-layer.1": 4824.1147, "encoder_q-layer.10": 4594.5586, "encoder_q-layer.11": 11240.3965, "encoder_q-layer.2": 5591.4712, "encoder_q-layer.3": 5560.7295, "encoder_q-layer.4": 5818.7212, "encoder_q-layer.5": 5725.9004, "encoder_q-layer.6": 5508.418, "encoder_q-layer.7": 6693.144, "encoder_q-layer.8": 5636.1787, "encoder_q-layer.9": 4571.7222, "epoch": 0.33, "inbatch_neg_score": 0.1025, "inbatch_pos_score": 0.728, "learning_rate": 3.6666666666666666e-05, "loss": 3.5941, "norm_diff": 0.0921, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9136.5792, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1029, "query_norm": 1.282, "queue_k_norm": 1.3741, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6031, "sent_len_1": 66.8422, "sent_max_len_0": 128.0, "sent_max_len_1": 189.53, "stdk": 0.048, "stdq": 0.0439, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 34000 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.5844, "doc_norm": 1.3742, "encoder_q-embeddings": 4160.7305, "encoder_q-layer.0": 2696.9429, "encoder_q-layer.1": 2860.0769, "encoder_q-layer.10": 5395.6147, "encoder_q-layer.11": 12882.8438, "encoder_q-layer.2": 3226.8643, "encoder_q-layer.3": 3364.9907, "encoder_q-layer.4": 3594.5571, "encoder_q-layer.5": 3841.5896, "encoder_q-layer.6": 4181.3999, "encoder_q-layer.7": 4546.5366, "encoder_q-layer.8": 5580.7896, "encoder_q-layer.9": 4967.4619, "epoch": 0.33, "inbatch_neg_score": 0.094, "inbatch_pos_score": 0.7256, "learning_rate": 3.6611111111111115e-05, "loss": 3.5844, "norm_diff": 0.0843, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7845.5577, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0947, "query_norm": 1.2899, "queue_k_norm": 1.376, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4959, "sent_len_1": 66.9549, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.4263, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 34100 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.6269, "doc_norm": 1.3786, "encoder_q-embeddings": 3932.0974, "encoder_q-layer.0": 2640.812, "encoder_q-layer.1": 2749.1597, "encoder_q-layer.10": 4722.5049, "encoder_q-layer.11": 11109.8262, "encoder_q-layer.2": 3123.3157, "encoder_q-layer.3": 3249.2114, "encoder_q-layer.4": 3460.4504, "encoder_q-layer.5": 3532.8696, "encoder_q-layer.6": 3862.7664, "encoder_q-layer.7": 4412.6885, "encoder_q-layer.8": 4925.0166, "encoder_q-layer.9": 4733.3071, "epoch": 0.33, "inbatch_neg_score": 0.0991, "inbatch_pos_score": 0.7256, "learning_rate": 3.655555555555556e-05, "loss": 3.6269, "norm_diff": 0.0885, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7234.8029, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0987, "query_norm": 1.2901, "queue_k_norm": 1.3754, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4216, "sent_len_1": 66.394, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6838, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 34200 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.5894, "doc_norm": 1.3751, "encoder_q-embeddings": 5437.0283, "encoder_q-layer.0": 3529.6311, "encoder_q-layer.1": 3685.9731, "encoder_q-layer.10": 5664.1372, "encoder_q-layer.11": 12440.2354, "encoder_q-layer.2": 4191.2026, "encoder_q-layer.3": 4110.5347, "encoder_q-layer.4": 4556.749, "encoder_q-layer.5": 4696.0698, "encoder_q-layer.6": 4804.4038, "encoder_q-layer.7": 4903.0884, "encoder_q-layer.8": 5512.624, "encoder_q-layer.9": 4834.9678, "epoch": 0.33, "inbatch_neg_score": 0.0953, "inbatch_pos_score": 0.6948, "learning_rate": 3.65e-05, "loss": 3.5894, "norm_diff": 0.0943, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8445.557, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0948, "query_norm": 1.2808, "queue_k_norm": 1.3738, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.64, "sent_len_1": 66.7665, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.3575, "stdk": 0.0481, "stdq": 0.0438, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 34300 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.5903, "doc_norm": 1.37, "encoder_q-embeddings": 4233.4854, "encoder_q-layer.0": 2734.0952, "encoder_q-layer.1": 2845.2512, "encoder_q-layer.10": 5059.3291, "encoder_q-layer.11": 11748.3506, "encoder_q-layer.2": 3200.2769, "encoder_q-layer.3": 3382.0442, "encoder_q-layer.4": 3605.5974, "encoder_q-layer.5": 3872.9919, "encoder_q-layer.6": 4097.8667, "encoder_q-layer.7": 4392.0869, "encoder_q-layer.8": 5357.1011, "encoder_q-layer.9": 4743.5464, "epoch": 0.34, "inbatch_neg_score": 0.0973, "inbatch_pos_score": 0.7266, "learning_rate": 3.644444444444445e-05, "loss": 3.5903, "norm_diff": 0.0843, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7664.6807, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0978, "query_norm": 1.2857, "queue_k_norm": 1.3749, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7004, "sent_len_1": 66.8254, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.8275, "stdk": 0.0479, "stdq": 0.044, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 34400 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.5836, "doc_norm": 1.3719, "encoder_q-embeddings": 4738.6885, "encoder_q-layer.0": 3129.4065, "encoder_q-layer.1": 3350.5435, "encoder_q-layer.10": 4795.7334, "encoder_q-layer.11": 12089.8496, "encoder_q-layer.2": 3955.0774, "encoder_q-layer.3": 4093.5542, "encoder_q-layer.4": 4474.0942, "encoder_q-layer.5": 4559.7964, "encoder_q-layer.6": 5211.4819, "encoder_q-layer.7": 5217.8149, "encoder_q-layer.8": 5342.6509, "encoder_q-layer.9": 4488.667, "epoch": 0.34, "inbatch_neg_score": 0.0978, "inbatch_pos_score": 0.7275, "learning_rate": 3.638888888888889e-05, "loss": 3.5836, "norm_diff": 0.1007, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8126.6297, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0975, "query_norm": 1.2712, "queue_k_norm": 1.3727, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5988, "sent_len_1": 66.4817, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6113, "stdk": 0.048, "stdq": 0.0432, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 34500 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.573, "doc_norm": 1.3712, "encoder_q-embeddings": 8773.0693, "encoder_q-layer.0": 5759.145, "encoder_q-layer.1": 5930.8906, "encoder_q-layer.10": 9808.8828, "encoder_q-layer.11": 22832.4258, "encoder_q-layer.2": 6461.6836, "encoder_q-layer.3": 6577.0205, "encoder_q-layer.4": 7006.2817, "encoder_q-layer.5": 7019.9775, "encoder_q-layer.6": 7879.5308, "encoder_q-layer.7": 8800.1406, "encoder_q-layer.8": 10523.9883, "encoder_q-layer.9": 9797.333, "epoch": 0.34, "inbatch_neg_score": 0.0942, "inbatch_pos_score": 0.7383, "learning_rate": 3.633333333333333e-05, "loss": 3.573, "norm_diff": 0.0576, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14977.1556, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0942, "query_norm": 1.3136, "queue_k_norm": 1.3722, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6795, "sent_len_1": 66.8307, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.4338, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 34600 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.5972, "doc_norm": 1.3705, "encoder_q-embeddings": 8184.2349, "encoder_q-layer.0": 5196.9658, "encoder_q-layer.1": 5448.71, "encoder_q-layer.10": 9249.1846, "encoder_q-layer.11": 21640.6953, "encoder_q-layer.2": 6047.1982, "encoder_q-layer.3": 6234.8335, "encoder_q-layer.4": 6645.6812, "encoder_q-layer.5": 6603.063, "encoder_q-layer.6": 7726.7202, "encoder_q-layer.7": 8511.666, "encoder_q-layer.8": 9968.1953, "encoder_q-layer.9": 9050.0889, "epoch": 0.34, "inbatch_neg_score": 0.0923, "inbatch_pos_score": 0.7476, "learning_rate": 3.6277777777777776e-05, "loss": 3.5972, "norm_diff": 0.0681, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14094.9569, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0921, "query_norm": 1.3023, "queue_k_norm": 1.3739, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.596, "sent_len_1": 66.8845, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.2688, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 34700 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.5587, "doc_norm": 1.3779, "encoder_q-embeddings": 8986.7568, "encoder_q-layer.0": 5998.75, "encoder_q-layer.1": 6247.5376, "encoder_q-layer.10": 10408.4443, "encoder_q-layer.11": 24076.4062, "encoder_q-layer.2": 6729.9795, "encoder_q-layer.3": 7001.9517, "encoder_q-layer.4": 7139.2354, "encoder_q-layer.5": 7714.4331, "encoder_q-layer.6": 8258.0312, "encoder_q-layer.7": 9630.9014, "encoder_q-layer.8": 10353.9062, "encoder_q-layer.9": 9676.0703, "epoch": 0.34, "inbatch_neg_score": 0.0947, "inbatch_pos_score": 0.7129, "learning_rate": 3.6222222222222225e-05, "loss": 3.5587, "norm_diff": 0.0807, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15759.4786, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0956, "query_norm": 1.2972, "queue_k_norm": 1.3727, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4676, "sent_len_1": 66.7317, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.9638, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 34800 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.5671, "doc_norm": 1.3735, "encoder_q-embeddings": 4829.4839, "encoder_q-layer.0": 3229.873, "encoder_q-layer.1": 3432.9285, "encoder_q-layer.10": 5056.1465, "encoder_q-layer.11": 11733.8906, "encoder_q-layer.2": 3687.5686, "encoder_q-layer.3": 3940.238, "encoder_q-layer.4": 4154.7568, "encoder_q-layer.5": 4075.0752, "encoder_q-layer.6": 4311.5146, "encoder_q-layer.7": 4875.4312, "encoder_q-layer.8": 5464.6924, "encoder_q-layer.9": 4966.6392, "epoch": 0.34, "inbatch_neg_score": 0.0946, "inbatch_pos_score": 0.7148, "learning_rate": 3.6166666666666674e-05, "loss": 3.5671, "norm_diff": 0.1043, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7902.6849, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0948, "query_norm": 1.2692, "queue_k_norm": 1.3723, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7423, "sent_len_1": 66.8766, "sent_max_len_0": 127.9838, "sent_max_len_1": 189.4338, "stdk": 0.0481, "stdq": 0.0434, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 34900 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.5811, "doc_norm": 1.3778, "encoder_q-embeddings": 2227.9578, "encoder_q-layer.0": 1465.2783, "encoder_q-layer.1": 1523.4249, "encoder_q-layer.10": 2361.0637, "encoder_q-layer.11": 5694.1333, "encoder_q-layer.2": 1695.4611, "encoder_q-layer.3": 1715.5272, "encoder_q-layer.4": 1793.1747, "encoder_q-layer.5": 1768.9944, "encoder_q-layer.6": 1959.1982, "encoder_q-layer.7": 2306.1548, "encoder_q-layer.8": 2578.9272, "encoder_q-layer.9": 2317.1323, "epoch": 0.34, "inbatch_neg_score": 0.0953, "inbatch_pos_score": 0.7148, "learning_rate": 3.611111111111111e-05, "loss": 3.5811, "norm_diff": 0.1181, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3786.8294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0956, "query_norm": 1.2598, "queue_k_norm": 1.3728, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6502, "sent_len_1": 66.6655, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8988, "stdk": 0.0483, "stdq": 0.0433, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 35000 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.5946, "doc_norm": 1.3715, "encoder_q-embeddings": 2066.5745, "encoder_q-layer.0": 1327.0743, "encoder_q-layer.1": 1357.9575, "encoder_q-layer.10": 2371.8394, "encoder_q-layer.11": 5644.5713, "encoder_q-layer.2": 1523.6118, "encoder_q-layer.3": 1693.2654, "encoder_q-layer.4": 1718.8947, "encoder_q-layer.5": 1766.3982, "encoder_q-layer.6": 1970.074, "encoder_q-layer.7": 2046.5177, "encoder_q-layer.8": 2307.0964, "encoder_q-layer.9": 2085.9836, "epoch": 0.34, "inbatch_neg_score": 0.0949, "inbatch_pos_score": 0.7192, "learning_rate": 3.605555555555556e-05, "loss": 3.5946, "norm_diff": 0.1112, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3617.9935, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0951, "query_norm": 1.2603, "queue_k_norm": 1.3731, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5434, "sent_len_1": 66.5919, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.5975, "stdk": 0.0481, "stdq": 0.0435, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 35100 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.563, "doc_norm": 1.3722, "encoder_q-embeddings": 1972.0518, "encoder_q-layer.0": 1287.9846, "encoder_q-layer.1": 1354.9236, "encoder_q-layer.10": 2488.4878, "encoder_q-layer.11": 5541.2241, "encoder_q-layer.2": 1436.3895, "encoder_q-layer.3": 1479.9398, "encoder_q-layer.4": 1532.059, "encoder_q-layer.5": 1556.3806, "encoder_q-layer.6": 1789.9371, "encoder_q-layer.7": 1978.8488, "encoder_q-layer.8": 2334.1853, "encoder_q-layer.9": 2172.9956, "epoch": 0.34, "inbatch_neg_score": 0.1, "inbatch_pos_score": 0.7388, "learning_rate": 3.6e-05, "loss": 3.563, "norm_diff": 0.0899, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3507.0866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1004, "query_norm": 1.2823, "queue_k_norm": 1.3705, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6102, "sent_len_1": 66.7971, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8237, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 35200 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.5664, "doc_norm": 1.3649, "encoder_q-embeddings": 2214.8025, "encoder_q-layer.0": 1369.7717, "encoder_q-layer.1": 1399.2334, "encoder_q-layer.10": 2465.124, "encoder_q-layer.11": 5856.6436, "encoder_q-layer.2": 1567.0929, "encoder_q-layer.3": 1614.3772, "encoder_q-layer.4": 1758.6606, "encoder_q-layer.5": 1812.4442, "encoder_q-layer.6": 1957.9312, "encoder_q-layer.7": 2038.347, "encoder_q-layer.8": 2370.4817, "encoder_q-layer.9": 2227.7532, "epoch": 0.34, "inbatch_neg_score": 0.0992, "inbatch_pos_score": 0.7153, "learning_rate": 3.594444444444445e-05, "loss": 3.5664, "norm_diff": 0.0928, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3722.9673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0994, "query_norm": 1.2722, "queue_k_norm": 1.3709, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7145, "sent_len_1": 66.7717, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5062, "stdk": 0.0478, "stdq": 0.044, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 35300 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.561, "doc_norm": 1.3789, "encoder_q-embeddings": 2056.9993, "encoder_q-layer.0": 1325.0498, "encoder_q-layer.1": 1360.9313, "encoder_q-layer.10": 2399.8022, "encoder_q-layer.11": 5526.1562, "encoder_q-layer.2": 1543.9426, "encoder_q-layer.3": 1615.5396, "encoder_q-layer.4": 1787.4503, "encoder_q-layer.5": 1850.0486, "encoder_q-layer.6": 2161.769, "encoder_q-layer.7": 2335.543, "encoder_q-layer.8": 2630.156, "encoder_q-layer.9": 2488.0042, "epoch": 0.35, "inbatch_neg_score": 0.101, "inbatch_pos_score": 0.7373, "learning_rate": 3.5888888888888886e-05, "loss": 3.561, "norm_diff": 0.0936, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3669.2275, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1005, "query_norm": 1.2853, "queue_k_norm": 1.3731, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6881, "sent_len_1": 66.7874, "sent_max_len_0": 128.0, "sent_max_len_1": 190.305, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 35400 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.5598, "doc_norm": 1.3684, "encoder_q-embeddings": 2620.2783, "encoder_q-layer.0": 1895.1146, "encoder_q-layer.1": 1896.4669, "encoder_q-layer.10": 2428.1228, "encoder_q-layer.11": 5561.6587, "encoder_q-layer.2": 2206.6379, "encoder_q-layer.3": 2514.1179, "encoder_q-layer.4": 2364.1753, "encoder_q-layer.5": 2494.9587, "encoder_q-layer.6": 2524.5298, "encoder_q-layer.7": 2544.5879, "encoder_q-layer.8": 2889.0151, "encoder_q-layer.9": 2489.1011, "epoch": 0.35, "inbatch_neg_score": 0.1014, "inbatch_pos_score": 0.7588, "learning_rate": 3.5833333333333335e-05, "loss": 3.5598, "norm_diff": 0.0816, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4147.6602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1021, "query_norm": 1.2868, "queue_k_norm": 1.3713, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5619, "sent_len_1": 66.8495, "sent_max_len_0": 128.0, "sent_max_len_1": 189.435, "stdk": 0.048, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 35500 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.5515, "doc_norm": 1.3768, "encoder_q-embeddings": 2017.9297, "encoder_q-layer.0": 1346.5167, "encoder_q-layer.1": 1424.4745, "encoder_q-layer.10": 2367.3242, "encoder_q-layer.11": 5461.0405, "encoder_q-layer.2": 1629.1079, "encoder_q-layer.3": 1631.3804, "encoder_q-layer.4": 1723.3047, "encoder_q-layer.5": 1804.905, "encoder_q-layer.6": 1977.6338, "encoder_q-layer.7": 2208.3813, "encoder_q-layer.8": 2420.2668, "encoder_q-layer.9": 2259.686, "epoch": 0.35, "inbatch_neg_score": 0.1003, "inbatch_pos_score": 0.7412, "learning_rate": 3.577777777777778e-05, "loss": 3.5515, "norm_diff": 0.0826, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3556.4731, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1006, "query_norm": 1.2942, "queue_k_norm": 1.372, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6087, "sent_len_1": 66.7355, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.715, "stdk": 0.0483, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 35600 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5443, "doc_norm": 1.368, "encoder_q-embeddings": 2435.0491, "encoder_q-layer.0": 1547.5524, "encoder_q-layer.1": 1616.6068, "encoder_q-layer.10": 2627.1135, "encoder_q-layer.11": 6234.8418, "encoder_q-layer.2": 1871.3738, "encoder_q-layer.3": 1972.3241, "encoder_q-layer.4": 1970.8318, "encoder_q-layer.5": 1977.4181, "encoder_q-layer.6": 2091.5059, "encoder_q-layer.7": 2347.9097, "encoder_q-layer.8": 2633.4519, "encoder_q-layer.9": 2547.2727, "epoch": 0.35, "inbatch_neg_score": 0.0951, "inbatch_pos_score": 0.7241, "learning_rate": 3.5722222222222226e-05, "loss": 3.5443, "norm_diff": 0.0844, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4012.5598, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0954, "query_norm": 1.2836, "queue_k_norm": 1.3719, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7632, "sent_len_1": 66.7503, "sent_max_len_0": 128.0, "sent_max_len_1": 186.6012, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 35700 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.5634, "doc_norm": 1.3721, "encoder_q-embeddings": 2111.9075, "encoder_q-layer.0": 1333.275, "encoder_q-layer.1": 1345.4449, "encoder_q-layer.10": 2433.9009, "encoder_q-layer.11": 5492.0669, "encoder_q-layer.2": 1502.9381, "encoder_q-layer.3": 1546.9874, "encoder_q-layer.4": 1668.1542, "encoder_q-layer.5": 1723.5533, "encoder_q-layer.6": 1967.4908, "encoder_q-layer.7": 2158.7393, "encoder_q-layer.8": 2510.5408, "encoder_q-layer.9": 2393.5525, "epoch": 0.35, "inbatch_neg_score": 0.0891, "inbatch_pos_score": 0.73, "learning_rate": 3.566666666666667e-05, "loss": 3.5634, "norm_diff": 0.074, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3640.5963, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0897, "query_norm": 1.2982, "queue_k_norm": 1.371, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6963, "sent_len_1": 66.8894, "sent_max_len_0": 127.9912, "sent_max_len_1": 191.1488, "stdk": 0.0481, "stdq": 0.0452, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 35800 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.5721, "doc_norm": 1.373, "encoder_q-embeddings": 2088.2305, "encoder_q-layer.0": 1361.54, "encoder_q-layer.1": 1404.1892, "encoder_q-layer.10": 2397.2686, "encoder_q-layer.11": 5661.9639, "encoder_q-layer.2": 1505.9729, "encoder_q-layer.3": 1606.2568, "encoder_q-layer.4": 1561.2957, "encoder_q-layer.5": 1623.0597, "encoder_q-layer.6": 1853.5826, "encoder_q-layer.7": 2001.1929, "encoder_q-layer.8": 2456.6597, "encoder_q-layer.9": 2250.1365, "epoch": 0.35, "inbatch_neg_score": 0.0971, "inbatch_pos_score": 0.7393, "learning_rate": 3.561111111111111e-05, "loss": 3.5721, "norm_diff": 0.0854, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3605.8191, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0969, "query_norm": 1.2875, "queue_k_norm": 1.3722, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6954, "sent_len_1": 66.4336, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7925, "stdk": 0.0481, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 35900 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.5662, "doc_norm": 1.3722, "encoder_q-embeddings": 2303.887, "encoder_q-layer.0": 1492.2688, "encoder_q-layer.1": 1572.4215, "encoder_q-layer.10": 2579.4658, "encoder_q-layer.11": 5556.127, "encoder_q-layer.2": 1797.3586, "encoder_q-layer.3": 1871.7969, "encoder_q-layer.4": 2022.3153, "encoder_q-layer.5": 1983.9958, "encoder_q-layer.6": 2204.9775, "encoder_q-layer.7": 2241.52, "encoder_q-layer.8": 2623.7214, "encoder_q-layer.9": 2341.8848, "epoch": 0.35, "inbatch_neg_score": 0.096, "inbatch_pos_score": 0.7627, "learning_rate": 3.555555555555556e-05, "loss": 3.5662, "norm_diff": 0.048, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3797.311, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0963, "query_norm": 1.3242, "queue_k_norm": 1.3697, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5919, "sent_len_1": 66.7234, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7388, "stdk": 0.0481, "stdq": 0.0459, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 36000 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.567, "doc_norm": 1.3697, "encoder_q-embeddings": 2480.8157, "encoder_q-layer.0": 1722.6554, "encoder_q-layer.1": 1767.2878, "encoder_q-layer.10": 2634.3882, "encoder_q-layer.11": 5724.7056, "encoder_q-layer.2": 2037.9363, "encoder_q-layer.3": 2106.6941, "encoder_q-layer.4": 2277.3752, "encoder_q-layer.5": 2380.5881, "encoder_q-layer.6": 2383.7388, "encoder_q-layer.7": 2523.8389, "encoder_q-layer.8": 2716.0874, "encoder_q-layer.9": 2469.4285, "epoch": 0.35, "inbatch_neg_score": 0.0909, "inbatch_pos_score": 0.7129, "learning_rate": 3.55e-05, "loss": 3.567, "norm_diff": 0.0959, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4038.9499, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0914, "query_norm": 1.2738, "queue_k_norm": 1.3697, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6494, "sent_len_1": 66.8858, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1362, "stdk": 0.0481, "stdq": 0.0441, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 36100 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.567, "doc_norm": 1.377, "encoder_q-embeddings": 2197.8872, "encoder_q-layer.0": 1420.0215, "encoder_q-layer.1": 1549.3523, "encoder_q-layer.10": 2393.6577, "encoder_q-layer.11": 5397.3525, "encoder_q-layer.2": 1771.2579, "encoder_q-layer.3": 1930.1882, "encoder_q-layer.4": 2127.8474, "encoder_q-layer.5": 2129.9812, "encoder_q-layer.6": 2298.1987, "encoder_q-layer.7": 2565.6814, "encoder_q-layer.8": 2549.207, "encoder_q-layer.9": 2337.7852, "epoch": 0.35, "inbatch_neg_score": 0.0984, "inbatch_pos_score": 0.7319, "learning_rate": 3.5444444444444445e-05, "loss": 3.567, "norm_diff": 0.0772, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3743.6389, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.098, "query_norm": 1.2998, "queue_k_norm": 1.372, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.595, "sent_len_1": 66.8252, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.0137, "stdk": 0.0483, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 36200 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.5595, "doc_norm": 1.3655, "encoder_q-embeddings": 2444.6089, "encoder_q-layer.0": 1666.8104, "encoder_q-layer.1": 1767.9611, "encoder_q-layer.10": 2274.4851, "encoder_q-layer.11": 5151.0117, "encoder_q-layer.2": 2118.1931, "encoder_q-layer.3": 2348.0784, "encoder_q-layer.4": 2554.7625, "encoder_q-layer.5": 2601.1062, "encoder_q-layer.6": 2773.2903, "encoder_q-layer.7": 2716.748, "encoder_q-layer.8": 2729.6189, "encoder_q-layer.9": 2249.719, "epoch": 0.35, "inbatch_neg_score": 0.0982, "inbatch_pos_score": 0.748, "learning_rate": 3.538888888888889e-05, "loss": 3.5595, "norm_diff": 0.0542, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4015.7116, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0991, "query_norm": 1.3113, "queue_k_norm": 1.3689, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6692, "sent_len_1": 66.9503, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5513, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 36300 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.5433, "doc_norm": 1.3661, "encoder_q-embeddings": 2511.5095, "encoder_q-layer.0": 1540.8893, "encoder_q-layer.1": 1663.2301, "encoder_q-layer.10": 2521.7405, "encoder_q-layer.11": 5668.876, "encoder_q-layer.2": 1872.5513, "encoder_q-layer.3": 1961.7706, "encoder_q-layer.4": 2106.4187, "encoder_q-layer.5": 2316.8308, "encoder_q-layer.6": 2560.1179, "encoder_q-layer.7": 2691.8618, "encoder_q-layer.8": 2794.5142, "encoder_q-layer.9": 2481.1028, "epoch": 0.36, "inbatch_neg_score": 0.101, "inbatch_pos_score": 0.71, "learning_rate": 3.5333333333333336e-05, "loss": 3.5433, "norm_diff": 0.0694, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4000.714, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1005, "query_norm": 1.2967, "queue_k_norm": 1.3722, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7043, "sent_len_1": 67.0156, "sent_max_len_0": 127.9912, "sent_max_len_1": 189.2675, "stdk": 0.0479, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 36400 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.5528, "doc_norm": 1.3738, "encoder_q-embeddings": 3615.6558, "encoder_q-layer.0": 2909.7341, "encoder_q-layer.1": 3034.166, "encoder_q-layer.10": 2455.4175, "encoder_q-layer.11": 5430.3198, "encoder_q-layer.2": 3389.1238, "encoder_q-layer.3": 3418.7207, "encoder_q-layer.4": 3060.946, "encoder_q-layer.5": 2634.0215, "encoder_q-layer.6": 2653.3018, "encoder_q-layer.7": 2613.8591, "encoder_q-layer.8": 2717.4819, "encoder_q-layer.9": 2305.0156, "epoch": 0.36, "inbatch_neg_score": 0.1041, "inbatch_pos_score": 0.7598, "learning_rate": 3.527777777777778e-05, "loss": 3.5528, "norm_diff": 0.062, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4758.4843, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1047, "query_norm": 1.3118, "queue_k_norm": 1.3716, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5562, "sent_len_1": 67.118, "sent_max_len_0": 128.0, "sent_max_len_1": 190.925, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 36500 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.5331, "doc_norm": 1.3677, "encoder_q-embeddings": 1874.1816, "encoder_q-layer.0": 1180.4812, "encoder_q-layer.1": 1207.1719, "encoder_q-layer.10": 2183.4844, "encoder_q-layer.11": 5047.2056, "encoder_q-layer.2": 1353.0229, "encoder_q-layer.3": 1359.4521, "encoder_q-layer.4": 1417.4216, "encoder_q-layer.5": 1485.6041, "encoder_q-layer.6": 1648.1074, "encoder_q-layer.7": 1879.361, "encoder_q-layer.8": 2287.1445, "encoder_q-layer.9": 2080.3857, "epoch": 0.36, "inbatch_neg_score": 0.1091, "inbatch_pos_score": 0.7588, "learning_rate": 3.522222222222222e-05, "loss": 3.5331, "norm_diff": 0.0661, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3244.5711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1089, "query_norm": 1.3016, "queue_k_norm": 1.3708, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6295, "sent_len_1": 67.0392, "sent_max_len_0": 127.9938, "sent_max_len_1": 191.015, "stdk": 0.0479, "stdq": 0.0441, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 36600 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.5509, "doc_norm": 1.3739, "encoder_q-embeddings": 2742.531, "encoder_q-layer.0": 2008.4819, "encoder_q-layer.1": 2144.4712, "encoder_q-layer.10": 2405.8037, "encoder_q-layer.11": 5717.0918, "encoder_q-layer.2": 2348.8831, "encoder_q-layer.3": 2425.1814, "encoder_q-layer.4": 2871.2229, "encoder_q-layer.5": 3488.3723, "encoder_q-layer.6": 3202.2622, "encoder_q-layer.7": 3004.0859, "encoder_q-layer.8": 2698.7559, "encoder_q-layer.9": 2231.6353, "epoch": 0.36, "inbatch_neg_score": 0.1123, "inbatch_pos_score": 0.7446, "learning_rate": 3.516666666666667e-05, "loss": 3.5509, "norm_diff": 0.0654, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4513.4289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1124, "query_norm": 1.3085, "queue_k_norm": 1.3726, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4942, "sent_len_1": 66.8476, "sent_max_len_0": 127.995, "sent_max_len_1": 188.1287, "stdk": 0.0481, "stdq": 0.0442, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 36700 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.5609, "doc_norm": 1.3747, "encoder_q-embeddings": 2044.1078, "encoder_q-layer.0": 1296.8252, "encoder_q-layer.1": 1328.4427, "encoder_q-layer.10": 2570.5474, "encoder_q-layer.11": 5430.9521, "encoder_q-layer.2": 1448.066, "encoder_q-layer.3": 1511.3699, "encoder_q-layer.4": 1532.7308, "encoder_q-layer.5": 1591.5775, "encoder_q-layer.6": 1800.7737, "encoder_q-layer.7": 2212.9954, "encoder_q-layer.8": 2566.8933, "encoder_q-layer.9": 2355.9846, "epoch": 0.36, "inbatch_neg_score": 0.1181, "inbatch_pos_score": 0.7446, "learning_rate": 3.511111111111111e-05, "loss": 3.5609, "norm_diff": 0.0537, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3531.2975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1191, "query_norm": 1.321, "queue_k_norm": 1.3719, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6021, "sent_len_1": 66.9605, "sent_max_len_0": 128.0, "sent_max_len_1": 188.46, "stdk": 0.0482, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 36800 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.5412, "doc_norm": 1.3716, "encoder_q-embeddings": 2201.1055, "encoder_q-layer.0": 1402.0759, "encoder_q-layer.1": 1434.9507, "encoder_q-layer.10": 2227.1379, "encoder_q-layer.11": 5270.3369, "encoder_q-layer.2": 1571.8042, "encoder_q-layer.3": 1560.2019, "encoder_q-layer.4": 1722.5499, "encoder_q-layer.5": 1768.5732, "encoder_q-layer.6": 2019.7747, "encoder_q-layer.7": 2168.3457, "encoder_q-layer.8": 2398.2085, "encoder_q-layer.9": 2200.1482, "epoch": 0.36, "inbatch_neg_score": 0.1253, "inbatch_pos_score": 0.7881, "learning_rate": 3.505555555555556e-05, "loss": 3.5412, "norm_diff": 0.037, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3539.6716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1248, "query_norm": 1.3348, "queue_k_norm": 1.3729, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6759, "sent_len_1": 66.7766, "sent_max_len_0": 127.9912, "sent_max_len_1": 189.6113, "stdk": 0.048, "stdq": 0.0449, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 36900 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.5476, "doc_norm": 1.3786, "encoder_q-embeddings": 4017.3584, "encoder_q-layer.0": 2573.3308, "encoder_q-layer.1": 2696.3223, "encoder_q-layer.10": 4586.6348, "encoder_q-layer.11": 11357.4932, "encoder_q-layer.2": 2901.7141, "encoder_q-layer.3": 3005.2583, "encoder_q-layer.4": 3352.6343, "encoder_q-layer.5": 3267.3518, "encoder_q-layer.6": 3607.7808, "encoder_q-layer.7": 4233.0186, "encoder_q-layer.8": 5124.7158, "encoder_q-layer.9": 4576.5913, "epoch": 0.36, "inbatch_neg_score": 0.1303, "inbatch_pos_score": 0.7725, "learning_rate": 3.5e-05, "loss": 3.5476, "norm_diff": 0.0561, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7074.5469, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1294, "query_norm": 1.3225, "queue_k_norm": 1.373, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5168, "sent_len_1": 66.7788, "sent_max_len_0": 127.99, "sent_max_len_1": 189.44, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 37000 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.5535, "doc_norm": 1.376, "encoder_q-embeddings": 4291.0498, "encoder_q-layer.0": 2635.3647, "encoder_q-layer.1": 2651.3494, "encoder_q-layer.10": 5117.978, "encoder_q-layer.11": 11574.2188, "encoder_q-layer.2": 2873.2798, "encoder_q-layer.3": 3025.9614, "encoder_q-layer.4": 3308.1501, "encoder_q-layer.5": 3334.5635, "encoder_q-layer.6": 3846.2678, "encoder_q-layer.7": 4301.7612, "encoder_q-layer.8": 5303.5703, "encoder_q-layer.9": 4787.5552, "epoch": 0.36, "inbatch_neg_score": 0.1254, "inbatch_pos_score": 0.751, "learning_rate": 3.4944444444444446e-05, "loss": 3.5535, "norm_diff": 0.0691, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7405.788, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1249, "query_norm": 1.3069, "queue_k_norm": 1.3722, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5522, "sent_len_1": 66.8067, "sent_max_len_0": 127.9975, "sent_max_len_1": 192.235, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 37100 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.5394, "doc_norm": 1.3782, "encoder_q-embeddings": 12898.7646, "encoder_q-layer.0": 9107.5049, "encoder_q-layer.1": 10896.3145, "encoder_q-layer.10": 5386.0967, "encoder_q-layer.11": 10720.209, "encoder_q-layer.2": 12234.7002, "encoder_q-layer.3": 11906.3906, "encoder_q-layer.4": 11696.1631, "encoder_q-layer.5": 12533.5, "encoder_q-layer.6": 14080.7881, "encoder_q-layer.7": 13678.3633, "encoder_q-layer.8": 10241.2051, "encoder_q-layer.9": 5070.3613, "epoch": 0.36, "inbatch_neg_score": 0.1268, "inbatch_pos_score": 0.7593, "learning_rate": 3.4888888888888895e-05, "loss": 3.5394, "norm_diff": 0.0958, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16748.4372, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1265, "query_norm": 1.2824, "queue_k_norm": 1.3741, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6402, "sent_len_1": 66.8523, "sent_max_len_0": 127.9875, "sent_max_len_1": 189.8175, "stdk": 0.0482, "stdq": 0.0436, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 37200 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.5591, "doc_norm": 1.3793, "encoder_q-embeddings": 4758.312, "encoder_q-layer.0": 3197.9363, "encoder_q-layer.1": 3289.0393, "encoder_q-layer.10": 4715.8398, "encoder_q-layer.11": 10976.6699, "encoder_q-layer.2": 3804.356, "encoder_q-layer.3": 3898.3, "encoder_q-layer.4": 3802.4724, "encoder_q-layer.5": 3708.0061, "encoder_q-layer.6": 3747.1487, "encoder_q-layer.7": 3988.1404, "encoder_q-layer.8": 4932.0723, "encoder_q-layer.9": 4673.6982, "epoch": 0.36, "inbatch_neg_score": 0.1209, "inbatch_pos_score": 0.7764, "learning_rate": 3.483333333333334e-05, "loss": 3.5591, "norm_diff": 0.0642, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7423.1729, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1206, "query_norm": 1.315, "queue_k_norm": 1.3739, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5964, "sent_len_1": 66.621, "sent_max_len_0": 128.0, "sent_max_len_1": 186.2088, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 37300 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.5386, "doc_norm": 1.369, "encoder_q-embeddings": 4757.7178, "encoder_q-layer.0": 3017.4253, "encoder_q-layer.1": 3249.2231, "encoder_q-layer.10": 4921.0435, "encoder_q-layer.11": 11824.0938, "encoder_q-layer.2": 3628.1436, "encoder_q-layer.3": 3687.9109, "encoder_q-layer.4": 3992.6064, "encoder_q-layer.5": 4096.2651, "encoder_q-layer.6": 4412.9785, "encoder_q-layer.7": 4382.5996, "encoder_q-layer.8": 5142.1523, "encoder_q-layer.9": 4567.6924, "epoch": 0.37, "inbatch_neg_score": 0.1179, "inbatch_pos_score": 0.7437, "learning_rate": 3.477777777777778e-05, "loss": 3.5386, "norm_diff": 0.0805, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7651.9779, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1188, "query_norm": 1.2885, "queue_k_norm": 1.3752, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.8285, "sent_len_1": 66.8492, "sent_max_len_0": 128.0, "sent_max_len_1": 191.8512, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 37400 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.5316, "doc_norm": 1.373, "encoder_q-embeddings": 4146.1831, "encoder_q-layer.0": 2614.301, "encoder_q-layer.1": 2759.6528, "encoder_q-layer.10": 4666.9951, "encoder_q-layer.11": 11233.1035, "encoder_q-layer.2": 3029.4126, "encoder_q-layer.3": 3054.7617, "encoder_q-layer.4": 3307.9897, "encoder_q-layer.5": 3548.1829, "encoder_q-layer.6": 3964.2148, "encoder_q-layer.7": 4500.8491, "encoder_q-layer.8": 5090.3652, "encoder_q-layer.9": 4750.5024, "epoch": 0.37, "inbatch_neg_score": 0.1135, "inbatch_pos_score": 0.7573, "learning_rate": 3.472222222222222e-05, "loss": 3.5316, "norm_diff": 0.0802, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7204.6735, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.113, "query_norm": 1.2928, "queue_k_norm": 1.3757, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6829, "sent_len_1": 66.7016, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6975, "stdk": 0.048, "stdq": 0.0446, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 37500 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.5606, "doc_norm": 1.3702, "encoder_q-embeddings": 5057.0, "encoder_q-layer.0": 3354.6072, "encoder_q-layer.1": 3561.3259, "encoder_q-layer.10": 5145.3838, "encoder_q-layer.11": 11324.7246, "encoder_q-layer.2": 4165.0063, "encoder_q-layer.3": 4570.2603, "encoder_q-layer.4": 4467.6348, "encoder_q-layer.5": 4563.1904, "encoder_q-layer.6": 4973.4087, "encoder_q-layer.7": 5102.6372, "encoder_q-layer.8": 5676.0854, "encoder_q-layer.9": 4718.5107, "epoch": 0.37, "inbatch_neg_score": 0.1064, "inbatch_pos_score": 0.731, "learning_rate": 3.466666666666667e-05, "loss": 3.5606, "norm_diff": 0.0865, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8074.9893, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1054, "query_norm": 1.2837, "queue_k_norm": 1.375, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6353, "sent_len_1": 66.626, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.6413, "stdk": 0.0479, "stdq": 0.0443, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 37600 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.5209, "doc_norm": 1.3789, "encoder_q-embeddings": 3951.9702, "encoder_q-layer.0": 2591.2588, "encoder_q-layer.1": 2673.6182, "encoder_q-layer.10": 4876.8003, "encoder_q-layer.11": 11087.7422, "encoder_q-layer.2": 3101.5271, "encoder_q-layer.3": 3219.2422, "encoder_q-layer.4": 3203.9558, "encoder_q-layer.5": 3393.4744, "encoder_q-layer.6": 3988.8376, "encoder_q-layer.7": 4174.8076, "encoder_q-layer.8": 4708.1816, "encoder_q-layer.9": 4458.6548, "epoch": 0.37, "inbatch_neg_score": 0.1034, "inbatch_pos_score": 0.7617, "learning_rate": 3.4611111111111114e-05, "loss": 3.5209, "norm_diff": 0.0996, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7022.7569, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1038, "query_norm": 1.2793, "queue_k_norm": 1.3731, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7137, "sent_len_1": 66.6052, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.82, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 37700 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.5391, "doc_norm": 1.375, "encoder_q-embeddings": 4321.1016, "encoder_q-layer.0": 2915.9131, "encoder_q-layer.1": 3070.6433, "encoder_q-layer.10": 4619.2393, "encoder_q-layer.11": 10686.0088, "encoder_q-layer.2": 3308.833, "encoder_q-layer.3": 3218.4636, "encoder_q-layer.4": 3246.3274, "encoder_q-layer.5": 3416.2563, "encoder_q-layer.6": 3871.9006, "encoder_q-layer.7": 4261.8984, "encoder_q-layer.8": 4988.3066, "encoder_q-layer.9": 4548.2344, "epoch": 0.37, "inbatch_neg_score": 0.1011, "inbatch_pos_score": 0.7368, "learning_rate": 3.4555555555555556e-05, "loss": 3.5391, "norm_diff": 0.0883, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7155.9886, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0995, "query_norm": 1.2867, "queue_k_norm": 1.3737, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5688, "sent_len_1": 66.6353, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4288, "stdk": 0.0481, "stdq": 0.0447, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 37800 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.5383, "doc_norm": 1.3696, "encoder_q-embeddings": 3932.2974, "encoder_q-layer.0": 2554.3962, "encoder_q-layer.1": 2724.2078, "encoder_q-layer.10": 4734.269, "encoder_q-layer.11": 10672.3584, "encoder_q-layer.2": 3033.177, "encoder_q-layer.3": 3153.3457, "encoder_q-layer.4": 3324.3777, "encoder_q-layer.5": 3388.7878, "encoder_q-layer.6": 3852.071, "encoder_q-layer.7": 4439.9678, "encoder_q-layer.8": 5218.1494, "encoder_q-layer.9": 4715.8374, "epoch": 0.37, "inbatch_neg_score": 0.0978, "inbatch_pos_score": 0.7261, "learning_rate": 3.45e-05, "loss": 3.5383, "norm_diff": 0.0831, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7029.4886, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0978, "query_norm": 1.2865, "queue_k_norm": 1.3722, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4875, "sent_len_1": 66.8118, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4988, "stdk": 0.0479, "stdq": 0.0447, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 37900 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.5271, "doc_norm": 1.3742, "encoder_q-embeddings": 4509.7485, "encoder_q-layer.0": 2850.2776, "encoder_q-layer.1": 3002.0784, "encoder_q-layer.10": 5032.6831, "encoder_q-layer.11": 11257.7686, "encoder_q-layer.2": 3301.4075, "encoder_q-layer.3": 3443.4055, "encoder_q-layer.4": 3594.301, "encoder_q-layer.5": 3719.4814, "encoder_q-layer.6": 4077.1548, "encoder_q-layer.7": 4470.9595, "encoder_q-layer.8": 5234.085, "encoder_q-layer.9": 4780.3433, "epoch": 0.37, "inbatch_neg_score": 0.0915, "inbatch_pos_score": 0.7256, "learning_rate": 3.444444444444445e-05, "loss": 3.5271, "norm_diff": 0.0907, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7373.0838, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0905, "query_norm": 1.2835, "queue_k_norm": 1.3737, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5167, "sent_len_1": 66.7849, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0488, "stdk": 0.0481, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 38000 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.5208, "doc_norm": 1.3669, "encoder_q-embeddings": 4623.186, "encoder_q-layer.0": 3017.5947, "encoder_q-layer.1": 3157.2219, "encoder_q-layer.10": 5017.4482, "encoder_q-layer.11": 11097.0029, "encoder_q-layer.2": 3653.2859, "encoder_q-layer.3": 3704.2219, "encoder_q-layer.4": 3890.2039, "encoder_q-layer.5": 3918.7439, "encoder_q-layer.6": 4431.5776, "encoder_q-layer.7": 4763.0225, "encoder_q-layer.8": 5621.668, "encoder_q-layer.9": 4874.6128, "epoch": 0.37, "inbatch_neg_score": 0.0876, "inbatch_pos_score": 0.7178, "learning_rate": 3.438888888888889e-05, "loss": 3.5208, "norm_diff": 0.0779, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7670.2548, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.088, "query_norm": 1.289, "queue_k_norm": 1.3731, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7006, "sent_len_1": 66.7672, "sent_max_len_0": 127.9838, "sent_max_len_1": 190.4412, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 38100 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.522, "doc_norm": 1.3662, "encoder_q-embeddings": 4226.5376, "encoder_q-layer.0": 2739.1067, "encoder_q-layer.1": 2881.0049, "encoder_q-layer.10": 4764.5845, "encoder_q-layer.11": 10674.3691, "encoder_q-layer.2": 3384.8357, "encoder_q-layer.3": 3572.6123, "encoder_q-layer.4": 3979.1707, "encoder_q-layer.5": 4184.2188, "encoder_q-layer.6": 4755.1572, "encoder_q-layer.7": 5017.6465, "encoder_q-layer.8": 5345.1572, "encoder_q-layer.9": 4946.0449, "epoch": 0.37, "inbatch_neg_score": 0.0788, "inbatch_pos_score": 0.7329, "learning_rate": 3.433333333333333e-05, "loss": 3.522, "norm_diff": 0.0656, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7374.6504, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0793, "query_norm": 1.3006, "queue_k_norm": 1.3718, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4771, "sent_len_1": 66.8263, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.0538, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 38200 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.5237, "doc_norm": 1.3711, "encoder_q-embeddings": 4176.9746, "encoder_q-layer.0": 2752.6675, "encoder_q-layer.1": 2811.5361, "encoder_q-layer.10": 4817.2485, "encoder_q-layer.11": 11029.7637, "encoder_q-layer.2": 3315.0176, "encoder_q-layer.3": 3395.6548, "encoder_q-layer.4": 3701.5376, "encoder_q-layer.5": 3778.9951, "encoder_q-layer.6": 4016.3281, "encoder_q-layer.7": 4268.251, "encoder_q-layer.8": 4887.7148, "encoder_q-layer.9": 4620.9185, "epoch": 0.37, "inbatch_neg_score": 0.0826, "inbatch_pos_score": 0.7056, "learning_rate": 3.427777777777778e-05, "loss": 3.5237, "norm_diff": 0.0953, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7232.4612, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0822, "query_norm": 1.2757, "queue_k_norm": 1.3711, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7027, "sent_len_1": 66.8085, "sent_max_len_0": 127.9875, "sent_max_len_1": 189.335, "stdk": 0.048, "stdq": 0.0443, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 38300 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.5203, "doc_norm": 1.3654, "encoder_q-embeddings": 3902.229, "encoder_q-layer.0": 2695.5869, "encoder_q-layer.1": 2821.21, "encoder_q-layer.10": 5118.9546, "encoder_q-layer.11": 10969.7812, "encoder_q-layer.2": 3099.8062, "encoder_q-layer.3": 3150.373, "encoder_q-layer.4": 3289.0991, "encoder_q-layer.5": 3471.3076, "encoder_q-layer.6": 3983.6877, "encoder_q-layer.7": 4268.2129, "encoder_q-layer.8": 4750.7446, "encoder_q-layer.9": 4633.0234, "epoch": 0.37, "inbatch_neg_score": 0.0769, "inbatch_pos_score": 0.709, "learning_rate": 3.4222222222222224e-05, "loss": 3.5203, "norm_diff": 0.08, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7090.2378, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.076, "query_norm": 1.2855, "queue_k_norm": 1.3715, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7782, "sent_len_1": 66.7692, "sent_max_len_0": 128.0, "sent_max_len_1": 191.06, "stdk": 0.0479, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 38400 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.5386, "doc_norm": 1.3744, "encoder_q-embeddings": 4266.0024, "encoder_q-layer.0": 2912.9187, "encoder_q-layer.1": 2895.0085, "encoder_q-layer.10": 5165.5771, "encoder_q-layer.11": 11404.0996, "encoder_q-layer.2": 3257.7712, "encoder_q-layer.3": 3332.9807, "encoder_q-layer.4": 3687.9585, "encoder_q-layer.5": 3864.2517, "encoder_q-layer.6": 4285.645, "encoder_q-layer.7": 4517.4062, "encoder_q-layer.8": 5101.9497, "encoder_q-layer.9": 4675.5986, "epoch": 0.38, "inbatch_neg_score": 0.0754, "inbatch_pos_score": 0.7202, "learning_rate": 3.4166666666666666e-05, "loss": 3.5386, "norm_diff": 0.0749, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7487.8404, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0748, "query_norm": 1.2994, "queue_k_norm": 1.3706, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6217, "sent_len_1": 66.7708, "sent_max_len_0": 127.975, "sent_max_len_1": 188.5188, "stdk": 0.0482, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 38500 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.5349, "doc_norm": 1.3757, "encoder_q-embeddings": 4118.7998, "encoder_q-layer.0": 2722.873, "encoder_q-layer.1": 2780.2129, "encoder_q-layer.10": 4698.9756, "encoder_q-layer.11": 10641.4395, "encoder_q-layer.2": 3118.2983, "encoder_q-layer.3": 3195.3105, "encoder_q-layer.4": 3422.1509, "encoder_q-layer.5": 3468.6111, "encoder_q-layer.6": 3823.105, "encoder_q-layer.7": 4147.8813, "encoder_q-layer.8": 4851.439, "encoder_q-layer.9": 4505.2227, "epoch": 0.38, "inbatch_neg_score": 0.0826, "inbatch_pos_score": 0.6948, "learning_rate": 3.411111111111111e-05, "loss": 3.5349, "norm_diff": 0.0968, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7002.9861, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0826, "query_norm": 1.2789, "queue_k_norm": 1.3703, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6328, "sent_len_1": 66.9146, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9338, "stdk": 0.0483, "stdq": 0.0441, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 38600 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.5394, "doc_norm": 1.3753, "encoder_q-embeddings": 4106.1802, "encoder_q-layer.0": 2606.3857, "encoder_q-layer.1": 2670.5505, "encoder_q-layer.10": 4978.6284, "encoder_q-layer.11": 11302.0225, "encoder_q-layer.2": 2942.0061, "encoder_q-layer.3": 3025.8289, "encoder_q-layer.4": 3264.3228, "encoder_q-layer.5": 3404.7634, "encoder_q-layer.6": 3893.7717, "encoder_q-layer.7": 4424.7939, "encoder_q-layer.8": 5259.8027, "encoder_q-layer.9": 4728.8677, "epoch": 0.38, "inbatch_neg_score": 0.0806, "inbatch_pos_score": 0.7061, "learning_rate": 3.405555555555556e-05, "loss": 3.5394, "norm_diff": 0.0808, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7283.8066, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0792, "query_norm": 1.2945, "queue_k_norm": 1.3692, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5751, "sent_len_1": 67.0002, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8088, "stdk": 0.0483, "stdq": 0.0443, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 38700 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5365, "doc_norm": 1.363, "encoder_q-embeddings": 3717.614, "encoder_q-layer.0": 2437.0457, "encoder_q-layer.1": 2467.2158, "encoder_q-layer.10": 4607.8276, "encoder_q-layer.11": 10468.9893, "encoder_q-layer.2": 2792.8994, "encoder_q-layer.3": 2854.2695, "encoder_q-layer.4": 2950.3445, "encoder_q-layer.5": 3017.1792, "encoder_q-layer.6": 3523.2754, "encoder_q-layer.7": 3750.6169, "encoder_q-layer.8": 4948.0679, "encoder_q-layer.9": 4470.6699, "epoch": 0.38, "inbatch_neg_score": 0.0824, "inbatch_pos_score": 0.7231, "learning_rate": 3.4000000000000007e-05, "loss": 3.5365, "norm_diff": 0.0423, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6648.1886, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0812, "query_norm": 1.3206, "queue_k_norm": 1.3682, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6599, "sent_len_1": 66.8206, "sent_max_len_0": 127.9938, "sent_max_len_1": 191.2937, "stdk": 0.0479, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 38800 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.5265, "doc_norm": 1.3719, "encoder_q-embeddings": 3917.8943, "encoder_q-layer.0": 2585.0251, "encoder_q-layer.1": 2619.3489, "encoder_q-layer.10": 4401.5337, "encoder_q-layer.11": 10625.4287, "encoder_q-layer.2": 2962.9985, "encoder_q-layer.3": 3043.7864, "encoder_q-layer.4": 3110.3105, "encoder_q-layer.5": 3397.4878, "encoder_q-layer.6": 3902.03, "encoder_q-layer.7": 4127.4395, "encoder_q-layer.8": 5037.6411, "encoder_q-layer.9": 4430.771, "epoch": 0.38, "inbatch_neg_score": 0.0908, "inbatch_pos_score": 0.7314, "learning_rate": 3.394444444444444e-05, "loss": 3.5265, "norm_diff": 0.0653, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6846.0887, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0914, "query_norm": 1.3066, "queue_k_norm": 1.3672, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4488, "sent_len_1": 66.631, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 38900 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.5252, "doc_norm": 1.3677, "encoder_q-embeddings": 11563.7051, "encoder_q-layer.0": 8127.8857, "encoder_q-layer.1": 8890.1279, "encoder_q-layer.10": 10733.4453, "encoder_q-layer.11": 23553.1523, "encoder_q-layer.2": 9513.5254, "encoder_q-layer.3": 9858.6465, "encoder_q-layer.4": 10196.1846, "encoder_q-layer.5": 10039.9561, "encoder_q-layer.6": 11233.6865, "encoder_q-layer.7": 12042.9287, "encoder_q-layer.8": 11987.7783, "encoder_q-layer.9": 9983.96, "epoch": 0.38, "inbatch_neg_score": 0.0951, "inbatch_pos_score": 0.7266, "learning_rate": 3.388888888888889e-05, "loss": 3.5252, "norm_diff": 0.0544, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17905.8166, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.0953, "query_norm": 1.3133, "queue_k_norm": 1.3689, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4884, "sent_len_1": 66.791, "sent_max_len_0": 128.0, "sent_max_len_1": 189.285, "stdk": 0.0481, "stdq": 0.0449, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 39000 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.5284, "doc_norm": 1.3655, "encoder_q-embeddings": 8395.6338, "encoder_q-layer.0": 5630.9014, "encoder_q-layer.1": 5702.1948, "encoder_q-layer.10": 9122.5938, "encoder_q-layer.11": 22115.8164, "encoder_q-layer.2": 6384.9873, "encoder_q-layer.3": 6457.062, "encoder_q-layer.4": 7022.7168, "encoder_q-layer.5": 7409.1416, "encoder_q-layer.6": 8127.4355, "encoder_q-layer.7": 8736.4941, "encoder_q-layer.8": 10162.9434, "encoder_q-layer.9": 8890.6836, "epoch": 0.38, "inbatch_neg_score": 0.0958, "inbatch_pos_score": 0.7368, "learning_rate": 3.3833333333333334e-05, "loss": 3.5284, "norm_diff": 0.0615, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14326.769, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.095, "query_norm": 1.304, "queue_k_norm": 1.3686, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4244, "sent_len_1": 66.8443, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9762, "stdk": 0.048, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 39100 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.5561, "doc_norm": 1.3716, "encoder_q-embeddings": 9585.0078, "encoder_q-layer.0": 6322.4419, "encoder_q-layer.1": 6672.499, "encoder_q-layer.10": 8967.9092, "encoder_q-layer.11": 20502.0352, "encoder_q-layer.2": 7464.7837, "encoder_q-layer.3": 7688.6406, "encoder_q-layer.4": 8031.9917, "encoder_q-layer.5": 7834.8184, "encoder_q-layer.6": 9123.04, "encoder_q-layer.7": 9701.5615, "encoder_q-layer.8": 10229.5957, "encoder_q-layer.9": 9067.4727, "epoch": 0.38, "inbatch_neg_score": 0.0979, "inbatch_pos_score": 0.77, "learning_rate": 3.377777777777778e-05, "loss": 3.5561, "norm_diff": 0.0385, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14800.5919, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0976, "query_norm": 1.3331, "queue_k_norm": 1.3688, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.2037, "sent_len_1": 66.7712, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.9263, "stdk": 0.0482, "stdq": 0.046, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 39200 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.5052, "doc_norm": 1.3607, "encoder_q-embeddings": 9699.7959, "encoder_q-layer.0": 6379.8101, "encoder_q-layer.1": 7021.4746, "encoder_q-layer.10": 9273.3389, "encoder_q-layer.11": 22368.8633, "encoder_q-layer.2": 7899.2246, "encoder_q-layer.3": 7747.8525, "encoder_q-layer.4": 8093.2246, "encoder_q-layer.5": 8112.4312, "encoder_q-layer.6": 9956.5283, "encoder_q-layer.7": 9873.2588, "encoder_q-layer.8": 10852.4336, "encoder_q-layer.9": 9070.0908, "epoch": 0.38, "inbatch_neg_score": 0.0932, "inbatch_pos_score": 0.7334, "learning_rate": 3.3722222222222225e-05, "loss": 3.5052, "norm_diff": 0.07, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15527.1987, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0939, "query_norm": 1.2906, "queue_k_norm": 1.3714, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8087, "sent_len_1": 66.8923, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.705, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 39300 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.5232, "doc_norm": 1.3715, "encoder_q-embeddings": 8857.0977, "encoder_q-layer.0": 5832.522, "encoder_q-layer.1": 6078.2935, "encoder_q-layer.10": 9367.9023, "encoder_q-layer.11": 21336.6758, "encoder_q-layer.2": 6908.188, "encoder_q-layer.3": 7277.9248, "encoder_q-layer.4": 7561.4492, "encoder_q-layer.5": 7862.0918, "encoder_q-layer.6": 8210.0996, "encoder_q-layer.7": 9303.6572, "encoder_q-layer.8": 10579.8604, "encoder_q-layer.9": 9458.6934, "epoch": 0.38, "inbatch_neg_score": 0.0985, "inbatch_pos_score": 0.7246, "learning_rate": 3.366666666666667e-05, "loss": 3.5232, "norm_diff": 0.0808, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14650.0015, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0988, "query_norm": 1.2907, "queue_k_norm": 1.3705, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.699, "sent_len_1": 66.6953, "sent_max_len_0": 127.97, "sent_max_len_1": 187.3575, "stdk": 0.0482, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 39400 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.533, "doc_norm": 1.3625, "encoder_q-embeddings": 8740.5088, "encoder_q-layer.0": 5627.1479, "encoder_q-layer.1": 5827.7637, "encoder_q-layer.10": 9727.3711, "encoder_q-layer.11": 21620.8652, "encoder_q-layer.2": 6660.7891, "encoder_q-layer.3": 6992.1455, "encoder_q-layer.4": 7648.7808, "encoder_q-layer.5": 8103.5664, "encoder_q-layer.6": 8947.7148, "encoder_q-layer.7": 9597.9463, "encoder_q-layer.8": 10960.126, "encoder_q-layer.9": 9983.0215, "epoch": 0.39, "inbatch_neg_score": 0.0966, "inbatch_pos_score": 0.7344, "learning_rate": 3.3611111111111116e-05, "loss": 3.533, "norm_diff": 0.0628, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14937.8502, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0965, "query_norm": 1.2997, "queue_k_norm": 1.372, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6659, "sent_len_1": 66.6316, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5337, "stdk": 0.0479, "stdq": 0.0453, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 39500 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.5166, "doc_norm": 1.3681, "encoder_q-embeddings": 8782.8145, "encoder_q-layer.0": 5433.2144, "encoder_q-layer.1": 5768.0693, "encoder_q-layer.10": 9886.8301, "encoder_q-layer.11": 22196.873, "encoder_q-layer.2": 6469.3672, "encoder_q-layer.3": 6477.0327, "encoder_q-layer.4": 7242.5913, "encoder_q-layer.5": 7347.333, "encoder_q-layer.6": 8314.6396, "encoder_q-layer.7": 9085.5625, "encoder_q-layer.8": 10042.0, "encoder_q-layer.9": 9100.2266, "epoch": 0.39, "inbatch_neg_score": 0.0966, "inbatch_pos_score": 0.7212, "learning_rate": 3.355555555555556e-05, "loss": 3.5166, "norm_diff": 0.1098, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14579.8274, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0964, "query_norm": 1.2584, "queue_k_norm": 1.369, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5856, "sent_len_1": 66.7766, "sent_max_len_0": 128.0, "sent_max_len_1": 189.66, "stdk": 0.0481, "stdq": 0.0436, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 39600 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.5111, "doc_norm": 1.3707, "encoder_q-embeddings": 8914.9932, "encoder_q-layer.0": 5956.6377, "encoder_q-layer.1": 6541.8179, "encoder_q-layer.10": 10468.0547, "encoder_q-layer.11": 23130.3516, "encoder_q-layer.2": 7226.0439, "encoder_q-layer.3": 7357.8608, "encoder_q-layer.4": 7785.709, "encoder_q-layer.5": 8990.0977, "encoder_q-layer.6": 9987.6484, "encoder_q-layer.7": 9893.8418, "encoder_q-layer.8": 10893.4678, "encoder_q-layer.9": 9768.1406, "epoch": 0.39, "inbatch_neg_score": 0.1004, "inbatch_pos_score": 0.7192, "learning_rate": 3.35e-05, "loss": 3.5111, "norm_diff": 0.0892, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15819.3036, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1006, "query_norm": 1.2815, "queue_k_norm": 1.3721, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.689, "sent_len_1": 66.9049, "sent_max_len_0": 128.0, "sent_max_len_1": 190.295, "stdk": 0.0482, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 39700 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.5193, "doc_norm": 1.3689, "encoder_q-embeddings": 10564.6045, "encoder_q-layer.0": 6956.0625, "encoder_q-layer.1": 7084.415, "encoder_q-layer.10": 9196.5146, "encoder_q-layer.11": 22910.9883, "encoder_q-layer.2": 8477.7549, "encoder_q-layer.3": 8711.1523, "encoder_q-layer.4": 8892.3369, "encoder_q-layer.5": 9409.9678, "encoder_q-layer.6": 9575.2158, "encoder_q-layer.7": 9359.6152, "encoder_q-layer.8": 10244.1299, "encoder_q-layer.9": 9865.0352, "epoch": 0.39, "inbatch_neg_score": 0.0986, "inbatch_pos_score": 0.7163, "learning_rate": 3.3444444444444443e-05, "loss": 3.5193, "norm_diff": 0.0887, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16512.1616, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.0983, "query_norm": 1.2802, "queue_k_norm": 1.3707, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5529, "sent_len_1": 66.8209, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5813, "stdk": 0.0481, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 39800 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.5363, "doc_norm": 1.369, "encoder_q-embeddings": 8498.0654, "encoder_q-layer.0": 5781.4214, "encoder_q-layer.1": 5798.9546, "encoder_q-layer.10": 9110.0996, "encoder_q-layer.11": 22009.2363, "encoder_q-layer.2": 6575.832, "encoder_q-layer.3": 6814.1099, "encoder_q-layer.4": 7191.48, "encoder_q-layer.5": 7531.5884, "encoder_q-layer.6": 8009.3901, "encoder_q-layer.7": 9238.3662, "encoder_q-layer.8": 9821.4131, "encoder_q-layer.9": 8865.293, "epoch": 0.39, "inbatch_neg_score": 0.1019, "inbatch_pos_score": 0.7407, "learning_rate": 3.338888888888889e-05, "loss": 3.5363, "norm_diff": 0.0942, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14252.0906, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1011, "query_norm": 1.2747, "queue_k_norm": 1.3701, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4881, "sent_len_1": 66.8728, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8162, "stdk": 0.048, "stdq": 0.0441, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 39900 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.5237, "doc_norm": 1.3697, "encoder_q-embeddings": 8399.8438, "encoder_q-layer.0": 5621.8989, "encoder_q-layer.1": 5870.2305, "encoder_q-layer.10": 10595.8096, "encoder_q-layer.11": 20689.8574, "encoder_q-layer.2": 6655.0654, "encoder_q-layer.3": 6834.7192, "encoder_q-layer.4": 7302.7388, "encoder_q-layer.5": 7207.481, "encoder_q-layer.6": 7920.6543, "encoder_q-layer.7": 8502.5498, "encoder_q-layer.8": 9359.208, "encoder_q-layer.9": 8745.3154, "epoch": 0.39, "inbatch_neg_score": 0.096, "inbatch_pos_score": 0.7236, "learning_rate": 3.3333333333333335e-05, "loss": 3.5237, "norm_diff": 0.1032, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14023.9455, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.096, "query_norm": 1.2665, "queue_k_norm": 1.371, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5968, "sent_len_1": 66.6929, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7038, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40000 }, { "dev_runtime": 26.5468, "dev_samples_per_second": 2.411, "dev_steps_per_second": 0.038, "epoch": 0.39, "step": 40000, "test_accuracy": 93.5791015625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.38111406564712524, "test_doc_norm": 1.355682373046875, "test_inbatch_neg_score": 0.45529159903526306, "test_inbatch_pos_score": 1.36717689037323, "test_loss": 0.38111406564712524, "test_loss_align": 1.0851057767868042, "test_loss_unif": 3.9589462280273438, "test_loss_unif_q@queue": 3.9589459896087646, "test_norm_diff": 0.03944579139351845, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.10086508840322495, "test_query_norm": 1.3951282501220703, "test_queue_k_norm": 1.3709901571273804, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042180098593235016, "test_stdq": 0.042515359818935394, "test_stdqueue_k": 0.04822894558310509, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.5468, "dev_samples_per_second": 2.411, "dev_steps_per_second": 0.038, "epoch": 0.39, "eval_beir-arguana_ndcg@10": 0.34952, "eval_beir-arguana_recall@10": 0.58108, "eval_beir-arguana_recall@100": 0.89047, "eval_beir-arguana_recall@20": 0.72262, "eval_beir-avg_ndcg@10": 0.36372041666666666, "eval_beir-avg_recall@10": 0.43166208333333334, "eval_beir-avg_recall@100": 0.6141438333333333, "eval_beir-avg_recall@20": 0.48831383333333334, "eval_beir-cqadupstack_ndcg@10": 0.25401416666666665, "eval_beir-cqadupstack_recall@10": 0.3434208333333333, "eval_beir-cqadupstack_recall@100": 0.5655783333333333, "eval_beir-cqadupstack_recall@20": 0.4044683333333334, "eval_beir-fiqa_ndcg@10": 0.23373, "eval_beir-fiqa_recall@10": 0.30073, "eval_beir-fiqa_recall@100": 0.54973, "eval_beir-fiqa_recall@20": 0.36669, "eval_beir-nfcorpus_ndcg@10": 0.28263, "eval_beir-nfcorpus_recall@10": 0.14136, "eval_beir-nfcorpus_recall@100": 0.27451, "eval_beir-nfcorpus_recall@20": 0.17261, "eval_beir-nq_ndcg@10": 0.26307, "eval_beir-nq_recall@10": 0.43419, "eval_beir-nq_recall@100": 0.75956, "eval_beir-nq_recall@20": 0.54744, "eval_beir-quora_ndcg@10": 0.78898, "eval_beir-quora_recall@10": 0.89209, "eval_beir-quora_recall@100": 0.97913, "eval_beir-quora_recall@20": 0.93049, "eval_beir-scidocs_ndcg@10": 0.14023, "eval_beir-scidocs_recall@10": 0.14663, "eval_beir-scidocs_recall@100": 0.34527, "eval_beir-scidocs_recall@20": 0.19787, "eval_beir-scifact_ndcg@10": 0.59418, "eval_beir-scifact_recall@10": 0.75778, "eval_beir-scifact_recall@100": 0.90433, "eval_beir-scifact_recall@20": 0.79078, "eval_beir-trec-covid_ndcg@10": 0.56314, "eval_beir-trec-covid_recall@10": 0.602, "eval_beir-trec-covid_recall@100": 0.4426, "eval_beir-trec-covid_recall@20": 0.562, "eval_beir-webis-touche2020_ndcg@10": 0.16771, "eval_beir-webis-touche2020_recall@10": 0.11734, "eval_beir-webis-touche2020_recall@100": 0.43026, "eval_beir-webis-touche2020_recall@20": 0.18817, "eval_senteval-avg_sts": 0.7546020075187649, "eval_senteval-sickr_spearman": 0.7109271605772691, "eval_senteval-stsb_spearman": 0.7982768544602606, "step": 40000, "test_accuracy": 93.5791015625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.38111406564712524, "test_doc_norm": 1.355682373046875, "test_inbatch_neg_score": 0.45529159903526306, "test_inbatch_pos_score": 1.36717689037323, "test_loss": 0.38111406564712524, "test_loss_align": 1.0851057767868042, "test_loss_unif": 3.9589462280273438, "test_loss_unif_q@queue": 3.9589459896087646, "test_norm_diff": 0.03944579139351845, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.10086508840322495, "test_query_norm": 1.3951282501220703, "test_queue_k_norm": 1.3709901571273804, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042180098593235016, "test_stdq": 0.042515359818935394, "test_stdqueue_k": 0.04822894558310509, "test_stdqueue_q": 0.0 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.5037, "doc_norm": 1.3701, "encoder_q-embeddings": 9532.8369, "encoder_q-layer.0": 6412.0352, "encoder_q-layer.1": 6878.2202, "encoder_q-layer.10": 10081.8242, "encoder_q-layer.11": 21761.2832, "encoder_q-layer.2": 7849.292, "encoder_q-layer.3": 8367.1729, "encoder_q-layer.4": 8884.1367, "encoder_q-layer.5": 9451.5391, "encoder_q-layer.6": 10154.1357, "encoder_q-layer.7": 9734.1201, "encoder_q-layer.8": 10722.4131, "encoder_q-layer.9": 9620.4512, "epoch": 0.39, "inbatch_neg_score": 0.0971, "inbatch_pos_score": 0.729, "learning_rate": 3.327777777777778e-05, "loss": 3.5037, "norm_diff": 0.0995, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15496.6317, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0977, "query_norm": 1.2706, "queue_k_norm": 1.3699, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.855, "sent_len_1": 66.7, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.3475, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40100 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.495, "doc_norm": 1.3659, "encoder_q-embeddings": 8670.1621, "encoder_q-layer.0": 5619.1392, "encoder_q-layer.1": 5605.8154, "encoder_q-layer.10": 10388.5762, "encoder_q-layer.11": 22322.7168, "encoder_q-layer.2": 6359.8833, "encoder_q-layer.3": 6701.2642, "encoder_q-layer.4": 7300.5854, "encoder_q-layer.5": 7373.3037, "encoder_q-layer.6": 8369.2461, "encoder_q-layer.7": 9174.8887, "encoder_q-layer.8": 10620.582, "encoder_q-layer.9": 9827.0107, "epoch": 0.39, "inbatch_neg_score": 0.0983, "inbatch_pos_score": 0.752, "learning_rate": 3.322222222222222e-05, "loss": 3.495, "norm_diff": 0.0593, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14879.4274, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0977, "query_norm": 1.3066, "queue_k_norm": 1.3693, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6332, "sent_len_1": 66.8045, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.005, "stdk": 0.0479, "stdq": 0.0452, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40200 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.5334, "doc_norm": 1.369, "encoder_q-embeddings": 8558.6025, "encoder_q-layer.0": 5628.0293, "encoder_q-layer.1": 5913.7549, "encoder_q-layer.10": 10057.4707, "encoder_q-layer.11": 20832.9629, "encoder_q-layer.2": 6464.958, "encoder_q-layer.3": 6874.6006, "encoder_q-layer.4": 7076.895, "encoder_q-layer.5": 7376.4033, "encoder_q-layer.6": 8365.7686, "encoder_q-layer.7": 9610.3311, "encoder_q-layer.8": 10274.835, "encoder_q-layer.9": 9100.7773, "epoch": 0.39, "inbatch_neg_score": 0.094, "inbatch_pos_score": 0.7256, "learning_rate": 3.316666666666667e-05, "loss": 3.5334, "norm_diff": 0.0873, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14436.4377, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0953, "query_norm": 1.2817, "queue_k_norm": 1.3695, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4199, "sent_len_1": 66.7134, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3288, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40300 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.5134, "doc_norm": 1.3663, "encoder_q-embeddings": 8823.9961, "encoder_q-layer.0": 5748.6865, "encoder_q-layer.1": 6195.4971, "encoder_q-layer.10": 8770.3545, "encoder_q-layer.11": 20946.4277, "encoder_q-layer.2": 6805.8677, "encoder_q-layer.3": 7087.8765, "encoder_q-layer.4": 7546.5645, "encoder_q-layer.5": 7295.0059, "encoder_q-layer.6": 7981.1528, "encoder_q-layer.7": 8496.3223, "encoder_q-layer.8": 9642.7178, "encoder_q-layer.9": 8546.2119, "epoch": 0.39, "inbatch_neg_score": 0.0944, "inbatch_pos_score": 0.7554, "learning_rate": 3.311111111111112e-05, "loss": 3.5134, "norm_diff": 0.0662, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14082.9975, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0942, "query_norm": 1.3001, "queue_k_norm": 1.3706, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.573, "sent_len_1": 66.7288, "sent_max_len_0": 127.9887, "sent_max_len_1": 189.5175, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40400 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5313, "doc_norm": 1.3645, "encoder_q-embeddings": 8793.8379, "encoder_q-layer.0": 5629.209, "encoder_q-layer.1": 5770.4189, "encoder_q-layer.10": 10530.8496, "encoder_q-layer.11": 22485.6758, "encoder_q-layer.2": 6449.8799, "encoder_q-layer.3": 6618.5181, "encoder_q-layer.4": 7208.3447, "encoder_q-layer.5": 7492.7529, "encoder_q-layer.6": 8268.1611, "encoder_q-layer.7": 9448.3379, "encoder_q-layer.8": 11179.5996, "encoder_q-layer.9": 10046.4766, "epoch": 0.4, "inbatch_neg_score": 0.0947, "inbatch_pos_score": 0.7173, "learning_rate": 3.3055555555555553e-05, "loss": 3.5313, "norm_diff": 0.0934, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14859.6262, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0952, "query_norm": 1.2712, "queue_k_norm": 1.3694, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6796, "sent_len_1": 66.9778, "sent_max_len_0": 128.0, "sent_max_len_1": 191.935, "stdk": 0.0479, "stdq": 0.0437, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40500 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.5066, "doc_norm": 1.3654, "encoder_q-embeddings": 9594.0186, "encoder_q-layer.0": 6656.0259, "encoder_q-layer.1": 7320.0776, "encoder_q-layer.10": 9623.1406, "encoder_q-layer.11": 22151.0215, "encoder_q-layer.2": 8397.0254, "encoder_q-layer.3": 9114.5605, "encoder_q-layer.4": 9194.1729, "encoder_q-layer.5": 9914.3428, "encoder_q-layer.6": 9824.6934, "encoder_q-layer.7": 10182.5088, "encoder_q-layer.8": 10568.1602, "encoder_q-layer.9": 9599.9111, "epoch": 0.4, "inbatch_neg_score": 0.1025, "inbatch_pos_score": 0.7251, "learning_rate": 3.3e-05, "loss": 3.5066, "norm_diff": 0.0667, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15777.3057, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1017, "query_norm": 1.2987, "queue_k_norm": 1.3712, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4706, "sent_len_1": 66.8171, "sent_max_len_0": 127.995, "sent_max_len_1": 189.4775, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40600 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4876, "doc_norm": 1.372, "encoder_q-embeddings": 8110.1187, "encoder_q-layer.0": 5427.8364, "encoder_q-layer.1": 5668.4897, "encoder_q-layer.10": 10672.5459, "encoder_q-layer.11": 22327.0918, "encoder_q-layer.2": 6306.1709, "encoder_q-layer.3": 6423.2139, "encoder_q-layer.4": 6739.5273, "encoder_q-layer.5": 6940.7466, "encoder_q-layer.6": 8264.917, "encoder_q-layer.7": 8828.2979, "encoder_q-layer.8": 10674.2148, "encoder_q-layer.9": 9748.9375, "epoch": 0.4, "inbatch_neg_score": 0.1027, "inbatch_pos_score": 0.7329, "learning_rate": 3.2944444444444445e-05, "loss": 3.4876, "norm_diff": 0.0568, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14578.2644, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1025, "query_norm": 1.3152, "queue_k_norm": 1.3697, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.8336, "sent_len_1": 66.8656, "sent_max_len_0": 127.9925, "sent_max_len_1": 189.5538, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40700 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.4678, "doc_norm": 1.3753, "encoder_q-embeddings": 8159.7651, "encoder_q-layer.0": 5265.1709, "encoder_q-layer.1": 5559.6343, "encoder_q-layer.10": 9239.4805, "encoder_q-layer.11": 21313.7871, "encoder_q-layer.2": 6317.0303, "encoder_q-layer.3": 6376.8823, "encoder_q-layer.4": 6905.6694, "encoder_q-layer.5": 7088.2695, "encoder_q-layer.6": 7841.7036, "encoder_q-layer.7": 9001.1035, "encoder_q-layer.8": 9920.3369, "encoder_q-layer.9": 9366.3271, "epoch": 0.4, "inbatch_neg_score": 0.1082, "inbatch_pos_score": 0.7651, "learning_rate": 3.2888888888888894e-05, "loss": 3.4678, "norm_diff": 0.0506, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14116.547, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1075, "query_norm": 1.3247, "queue_k_norm": 1.3715, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7203, "sent_len_1": 66.807, "sent_max_len_0": 127.995, "sent_max_len_1": 189.2, "stdk": 0.0482, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40800 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.5005, "doc_norm": 1.3571, "encoder_q-embeddings": 8820.1641, "encoder_q-layer.0": 5807.2349, "encoder_q-layer.1": 6138.3569, "encoder_q-layer.10": 9688.2578, "encoder_q-layer.11": 20310.3867, "encoder_q-layer.2": 6724.1943, "encoder_q-layer.3": 7074.1553, "encoder_q-layer.4": 7634.3032, "encoder_q-layer.5": 7613.7178, "encoder_q-layer.6": 8892.627, "encoder_q-layer.7": 9158.5049, "encoder_q-layer.8": 10411.7852, "encoder_q-layer.9": 9512.2148, "epoch": 0.4, "inbatch_neg_score": 0.1167, "inbatch_pos_score": 0.7344, "learning_rate": 3.283333333333333e-05, "loss": 3.5005, "norm_diff": 0.0454, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14277.7185, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1156, "query_norm": 1.3139, "queue_k_norm": 1.3715, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6703, "sent_len_1": 66.7391, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2763, "stdk": 0.0476, "stdq": 0.0442, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40900 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4977, "doc_norm": 1.3762, "encoder_q-embeddings": 15540.1406, "encoder_q-layer.0": 9817.4795, "encoder_q-layer.1": 10092.2617, "encoder_q-layer.10": 19355.1152, "encoder_q-layer.11": 43917.5781, "encoder_q-layer.2": 11227.2773, "encoder_q-layer.3": 11395.5967, "encoder_q-layer.4": 12222.4541, "encoder_q-layer.5": 12811.8701, "encoder_q-layer.6": 15282.9658, "encoder_q-layer.7": 16835.709, "encoder_q-layer.8": 19464.9863, "encoder_q-layer.9": 18494.1172, "epoch": 0.4, "inbatch_neg_score": 0.1191, "inbatch_pos_score": 0.7524, "learning_rate": 3.277777777777778e-05, "loss": 3.4977, "norm_diff": 0.0562, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27727.6512, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1179, "query_norm": 1.3201, "queue_k_norm": 1.3737, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7362, "sent_len_1": 67.0114, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8675, "stdk": 0.0482, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41000 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.5294, "doc_norm": 1.3699, "encoder_q-embeddings": 16763.8164, "encoder_q-layer.0": 10864.3447, "encoder_q-layer.1": 11625.5234, "encoder_q-layer.10": 18767.4238, "encoder_q-layer.11": 43288.7305, "encoder_q-layer.2": 12882.5928, "encoder_q-layer.3": 13249.6094, "encoder_q-layer.4": 14817.6826, "encoder_q-layer.5": 14937.9482, "encoder_q-layer.6": 16876.0078, "encoder_q-layer.7": 18753.0859, "encoder_q-layer.8": 21672.459, "encoder_q-layer.9": 18736.291, "epoch": 0.4, "inbatch_neg_score": 0.1196, "inbatch_pos_score": 0.769, "learning_rate": 3.272222222222223e-05, "loss": 3.5294, "norm_diff": 0.0422, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29524.7109, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1192, "query_norm": 1.3314, "queue_k_norm": 1.3741, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4055, "sent_len_1": 66.6299, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.4512, "stdk": 0.048, "stdq": 0.0449, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41100 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5189, "doc_norm": 1.3733, "encoder_q-embeddings": 19291.4102, "encoder_q-layer.0": 12149.4521, "encoder_q-layer.1": 12693.7656, "encoder_q-layer.10": 19965.7559, "encoder_q-layer.11": 46167.4219, "encoder_q-layer.2": 13916.6826, "encoder_q-layer.3": 14672.2734, "encoder_q-layer.4": 14888.4385, "encoder_q-layer.5": 15076.0508, "encoder_q-layer.6": 16943.0059, "encoder_q-layer.7": 20123.7578, "encoder_q-layer.8": 23492.3691, "encoder_q-layer.9": 20183.7402, "epoch": 0.4, "inbatch_neg_score": 0.1313, "inbatch_pos_score": 0.75, "learning_rate": 3.266666666666667e-05, "loss": 3.5189, "norm_diff": 0.0413, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31544.67, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1311, "query_norm": 1.332, "queue_k_norm": 1.3734, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5699, "sent_len_1": 66.8884, "sent_max_len_0": 128.0, "sent_max_len_1": 191.78, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41200 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4909, "doc_norm": 1.3693, "encoder_q-embeddings": 15643.6729, "encoder_q-layer.0": 10243.9824, "encoder_q-layer.1": 11055.6338, "encoder_q-layer.10": 18368.082, "encoder_q-layer.11": 42469.4062, "encoder_q-layer.2": 11733.2734, "encoder_q-layer.3": 12508.3037, "encoder_q-layer.4": 12660.1338, "encoder_q-layer.5": 13281.6104, "encoder_q-layer.6": 15081.6572, "encoder_q-layer.7": 16798.2578, "encoder_q-layer.8": 20243.5723, "encoder_q-layer.9": 18873.6133, "epoch": 0.4, "inbatch_neg_score": 0.1238, "inbatch_pos_score": 0.7471, "learning_rate": 3.261111111111111e-05, "loss": 3.4909, "norm_diff": 0.0666, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27777.7728, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1238, "query_norm": 1.3027, "queue_k_norm": 1.3753, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7867, "sent_len_1": 67.0448, "sent_max_len_0": 127.995, "sent_max_len_1": 191.9512, "stdk": 0.0479, "stdq": 0.0439, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41300 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.5147, "doc_norm": 1.3779, "encoder_q-embeddings": 17427.9512, "encoder_q-layer.0": 11367.2119, "encoder_q-layer.1": 11632.7988, "encoder_q-layer.10": 18661.7383, "encoder_q-layer.11": 44610.6016, "encoder_q-layer.2": 12595.0791, "encoder_q-layer.3": 13134.0078, "encoder_q-layer.4": 13939.9033, "encoder_q-layer.5": 14715.6992, "encoder_q-layer.6": 16283.6738, "encoder_q-layer.7": 17672.4824, "encoder_q-layer.8": 20195.209, "encoder_q-layer.9": 18727.0234, "epoch": 0.4, "inbatch_neg_score": 0.1326, "inbatch_pos_score": 0.7671, "learning_rate": 3.2555555555555555e-05, "loss": 3.5147, "norm_diff": 0.0438, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29147.6358, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.132, "query_norm": 1.3341, "queue_k_norm": 1.3745, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6578, "sent_len_1": 66.7794, "sent_max_len_0": 127.9862, "sent_max_len_1": 191.4837, "stdk": 0.0482, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41400 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.5005, "doc_norm": 1.3725, "encoder_q-embeddings": 14391.6855, "encoder_q-layer.0": 9806.3652, "encoder_q-layer.1": 9999.2988, "encoder_q-layer.10": 18063.7656, "encoder_q-layer.11": 41446.5586, "encoder_q-layer.2": 11129.6553, "encoder_q-layer.3": 11449.3096, "encoder_q-layer.4": 11782.5527, "encoder_q-layer.5": 12426.8457, "encoder_q-layer.6": 13704.5059, "encoder_q-layer.7": 15180.3291, "encoder_q-layer.8": 17927.5977, "encoder_q-layer.9": 16989.959, "epoch": 0.41, "inbatch_neg_score": 0.1268, "inbatch_pos_score": 0.7803, "learning_rate": 3.2500000000000004e-05, "loss": 3.5005, "norm_diff": 0.0671, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26128.3547, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.127, "query_norm": 1.3054, "queue_k_norm": 1.3764, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.642, "sent_len_1": 66.6605, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.3725, "stdk": 0.048, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41500 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.5007, "doc_norm": 1.3771, "encoder_q-embeddings": 17677.2832, "encoder_q-layer.0": 11576.7715, "encoder_q-layer.1": 12208.0186, "encoder_q-layer.10": 18704.9375, "encoder_q-layer.11": 44867.3359, "encoder_q-layer.2": 13570.8232, "encoder_q-layer.3": 14318.3008, "encoder_q-layer.4": 15207.5254, "encoder_q-layer.5": 15691.9082, "encoder_q-layer.6": 17617.375, "encoder_q-layer.7": 19165.457, "encoder_q-layer.8": 22006.8691, "encoder_q-layer.9": 20011.834, "epoch": 0.41, "inbatch_neg_score": 0.1223, "inbatch_pos_score": 0.7568, "learning_rate": 3.2444444444444446e-05, "loss": 3.5007, "norm_diff": 0.0804, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29548.8793, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1226, "query_norm": 1.2966, "queue_k_norm": 1.3782, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.602, "sent_len_1": 66.8099, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2837, "stdk": 0.0481, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 41600 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5028, "doc_norm": 1.3772, "encoder_q-embeddings": 15884.1377, "encoder_q-layer.0": 10493.4541, "encoder_q-layer.1": 11127.4873, "encoder_q-layer.10": 21276.9258, "encoder_q-layer.11": 45456.2266, "encoder_q-layer.2": 12574.666, "encoder_q-layer.3": 13271.875, "encoder_q-layer.4": 13944.5596, "encoder_q-layer.5": 14607.7305, "encoder_q-layer.6": 16828.8867, "encoder_q-layer.7": 18436.4727, "encoder_q-layer.8": 21887.6895, "encoder_q-layer.9": 19873.75, "epoch": 0.41, "inbatch_neg_score": 0.114, "inbatch_pos_score": 0.7324, "learning_rate": 3.238888888888889e-05, "loss": 3.5028, "norm_diff": 0.0893, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29347.0796, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.113, "query_norm": 1.2879, "queue_k_norm": 1.3776, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4953, "sent_len_1": 66.5888, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8638, "stdk": 0.0481, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41700 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.5003, "doc_norm": 1.3828, "encoder_q-embeddings": 17193.2031, "encoder_q-layer.0": 11105.8789, "encoder_q-layer.1": 12131.1934, "encoder_q-layer.10": 18056.377, "encoder_q-layer.11": 42601.6094, "encoder_q-layer.2": 13766.3359, "encoder_q-layer.3": 14114.1533, "encoder_q-layer.4": 14809.79, "encoder_q-layer.5": 15686.9707, "encoder_q-layer.6": 18901.2715, "encoder_q-layer.7": 18689.6992, "encoder_q-layer.8": 20512.7695, "encoder_q-layer.9": 18835.1426, "epoch": 0.41, "inbatch_neg_score": 0.1076, "inbatch_pos_score": 0.7734, "learning_rate": 3.233333333333333e-05, "loss": 3.5003, "norm_diff": 0.0837, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28938.0176, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.108, "query_norm": 1.2991, "queue_k_norm": 1.3773, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5999, "sent_len_1": 66.7975, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1975, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41800 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.5139, "doc_norm": 1.382, "encoder_q-embeddings": 15545.9199, "encoder_q-layer.0": 10019.9678, "encoder_q-layer.1": 10191.8018, "encoder_q-layer.10": 20210.3672, "encoder_q-layer.11": 42809.1445, "encoder_q-layer.2": 11423.9326, "encoder_q-layer.3": 11551.2197, "encoder_q-layer.4": 12044.9355, "encoder_q-layer.5": 12342.9277, "encoder_q-layer.6": 14717.2773, "encoder_q-layer.7": 16733.4863, "encoder_q-layer.8": 19807.8203, "encoder_q-layer.9": 18872.8926, "epoch": 0.41, "inbatch_neg_score": 0.1028, "inbatch_pos_score": 0.7559, "learning_rate": 3.227777777777778e-05, "loss": 3.5139, "norm_diff": 0.1044, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27526.1576, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1031, "query_norm": 1.2776, "queue_k_norm": 1.377, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4078, "sent_len_1": 66.5649, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2837, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41900 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.5212, "doc_norm": 1.3771, "encoder_q-embeddings": 23362.1191, "encoder_q-layer.0": 15711.1777, "encoder_q-layer.1": 16597.9922, "encoder_q-layer.10": 18289.9668, "encoder_q-layer.11": 41941.0586, "encoder_q-layer.2": 18952.4453, "encoder_q-layer.3": 19412.4062, "encoder_q-layer.4": 21241.2832, "encoder_q-layer.5": 20900.6973, "encoder_q-layer.6": 23297.1172, "encoder_q-layer.7": 20456.1387, "encoder_q-layer.8": 21737.1035, "encoder_q-layer.9": 18620.0332, "epoch": 0.41, "inbatch_neg_score": 0.0981, "inbatch_pos_score": 0.7471, "learning_rate": 3.222222222222223e-05, "loss": 3.5212, "norm_diff": 0.1034, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 33392.9496, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.0985, "query_norm": 1.2738, "queue_k_norm": 1.3776, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.644, "sent_len_1": 66.7043, "sent_max_len_0": 127.9988, "sent_max_len_1": 187.6413, "stdk": 0.0482, "stdq": 0.0444, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 42000 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.5111, "doc_norm": 1.3785, "encoder_q-embeddings": 15803.1025, "encoder_q-layer.0": 10088.0732, "encoder_q-layer.1": 10868.8447, "encoder_q-layer.10": 17146.9766, "encoder_q-layer.11": 39392.1211, "encoder_q-layer.2": 11959.1816, "encoder_q-layer.3": 12618.4541, "encoder_q-layer.4": 13407.7871, "encoder_q-layer.5": 14443.7471, "encoder_q-layer.6": 15208.5273, "encoder_q-layer.7": 17428.9062, "encoder_q-layer.8": 19178.5977, "encoder_q-layer.9": 17338.5273, "epoch": 0.41, "inbatch_neg_score": 0.0965, "inbatch_pos_score": 0.7583, "learning_rate": 3.2166666666666665e-05, "loss": 3.5111, "norm_diff": 0.0948, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26732.9281, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.0958, "query_norm": 1.2837, "queue_k_norm": 1.3748, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5981, "sent_len_1": 66.7186, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5125, "stdk": 0.0482, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42100 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.4979, "doc_norm": 1.377, "encoder_q-embeddings": 17495.0508, "encoder_q-layer.0": 11343.8438, "encoder_q-layer.1": 11961.6035, "encoder_q-layer.10": 18705.7461, "encoder_q-layer.11": 42601.5312, "encoder_q-layer.2": 13629.9014, "encoder_q-layer.3": 14199.2939, "encoder_q-layer.4": 15542.5547, "encoder_q-layer.5": 16952.502, "encoder_q-layer.6": 17676.9609, "encoder_q-layer.7": 19098.875, "encoder_q-layer.8": 20823.0566, "encoder_q-layer.9": 18459.7949, "epoch": 0.41, "inbatch_neg_score": 0.0945, "inbatch_pos_score": 0.7505, "learning_rate": 3.2111111111111114e-05, "loss": 3.4979, "norm_diff": 0.0964, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29217.881, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.0945, "query_norm": 1.2806, "queue_k_norm": 1.3764, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.483, "sent_len_1": 66.5898, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.5238, "stdk": 0.0482, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42200 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.5039, "doc_norm": 1.3737, "encoder_q-embeddings": 15884.0195, "encoder_q-layer.0": 10219.7646, "encoder_q-layer.1": 10458.207, "encoder_q-layer.10": 18830.916, "encoder_q-layer.11": 43392.5977, "encoder_q-layer.2": 11321.4727, "encoder_q-layer.3": 11598.8164, "encoder_q-layer.4": 12136.2246, "encoder_q-layer.5": 12409.1699, "encoder_q-layer.6": 14653.458, "encoder_q-layer.7": 16295.5947, "encoder_q-layer.8": 20111.1289, "encoder_q-layer.9": 18633.3965, "epoch": 0.41, "inbatch_neg_score": 0.0899, "inbatch_pos_score": 0.7192, "learning_rate": 3.2055555555555556e-05, "loss": 3.5039, "norm_diff": 0.0973, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27859.9955, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.0908, "query_norm": 1.2764, "queue_k_norm": 1.3777, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4938, "sent_len_1": 66.9925, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9588, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 42300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.4805, "doc_norm": 1.3754, "encoder_q-embeddings": 34406.6211, "encoder_q-layer.0": 29332.9453, "encoder_q-layer.1": 32510.1328, "encoder_q-layer.10": 19221.459, "encoder_q-layer.11": 41412.5703, "encoder_q-layer.2": 40086.3594, "encoder_q-layer.3": 42331.6641, "encoder_q-layer.4": 45648.6992, "encoder_q-layer.5": 47437.0703, "encoder_q-layer.6": 43654.3203, "encoder_q-layer.7": 34425.4258, "encoder_q-layer.8": 31013.0879, "encoder_q-layer.9": 19270.8379, "epoch": 0.41, "inbatch_neg_score": 0.0847, "inbatch_pos_score": 0.7466, "learning_rate": 3.2000000000000005e-05, "loss": 3.4805, "norm_diff": 0.0967, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 55102.4634, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 0.0854, "query_norm": 1.2787, "queue_k_norm": 1.3744, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7254, "sent_len_1": 66.7076, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7812, "stdk": 0.0482, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42400 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4974, "doc_norm": 1.3774, "encoder_q-embeddings": 44777.3828, "encoder_q-layer.0": 32929.3906, "encoder_q-layer.1": 30523.5957, "encoder_q-layer.10": 10063.7578, "encoder_q-layer.11": 21755.9336, "encoder_q-layer.2": 27412.8457, "encoder_q-layer.3": 28656.5898, "encoder_q-layer.4": 24236.7578, "encoder_q-layer.5": 20118.6309, "encoder_q-layer.6": 20784.4297, "encoder_q-layer.7": 19065.0742, "encoder_q-layer.8": 15637.9385, "encoder_q-layer.9": 9729.0254, "epoch": 0.41, "inbatch_neg_score": 0.0772, "inbatch_pos_score": 0.7222, "learning_rate": 3.194444444444444e-05, "loss": 3.4974, "norm_diff": 0.1025, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 40235.122, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.0768, "query_norm": 1.2749, "queue_k_norm": 1.3747, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7256, "sent_len_1": 66.9759, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.5225, "stdk": 0.0483, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42500 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4879, "doc_norm": 1.3757, "encoder_q-embeddings": 9100.8555, "encoder_q-layer.0": 5947.2739, "encoder_q-layer.1": 6238.1484, "encoder_q-layer.10": 10569.7676, "encoder_q-layer.11": 22191.9453, "encoder_q-layer.2": 7060.1328, "encoder_q-layer.3": 7298.8345, "encoder_q-layer.4": 7770.769, "encoder_q-layer.5": 8033.7949, "encoder_q-layer.6": 9085.0137, "encoder_q-layer.7": 10215.1309, "encoder_q-layer.8": 11807.1787, "encoder_q-layer.9": 10206.209, "epoch": 0.42, "inbatch_neg_score": 0.0791, "inbatch_pos_score": 0.7324, "learning_rate": 3.188888888888889e-05, "loss": 3.4879, "norm_diff": 0.0902, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15290.852, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0777, "query_norm": 1.2855, "queue_k_norm": 1.3746, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5942, "sent_len_1": 66.6764, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2763, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 42600 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.475, "doc_norm": 1.3797, "encoder_q-embeddings": 4736.3867, "encoder_q-layer.0": 3082.2891, "encoder_q-layer.1": 3408.9609, "encoder_q-layer.10": 4859.6118, "encoder_q-layer.11": 10668.9248, "encoder_q-layer.2": 4016.4377, "encoder_q-layer.3": 4215.2002, "encoder_q-layer.4": 4549.1616, "encoder_q-layer.5": 5032.4653, "encoder_q-layer.6": 5230.8823, "encoder_q-layer.7": 5217.9033, "encoder_q-layer.8": 5945.1333, "encoder_q-layer.9": 4754.7358, "epoch": 0.42, "inbatch_neg_score": 0.0773, "inbatch_pos_score": 0.6963, "learning_rate": 3.183333333333334e-05, "loss": 3.475, "norm_diff": 0.1298, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7964.4708, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0778, "query_norm": 1.2499, "queue_k_norm": 1.3722, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5202, "sent_len_1": 66.6234, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.36, "stdk": 0.0484, "stdq": 0.044, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42700 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.4808, "doc_norm": 1.371, "encoder_q-embeddings": 4081.6948, "encoder_q-layer.0": 2786.54, "encoder_q-layer.1": 2858.749, "encoder_q-layer.10": 5439.354, "encoder_q-layer.11": 11593.5127, "encoder_q-layer.2": 3251.6909, "encoder_q-layer.3": 3399.2498, "encoder_q-layer.4": 3562.7104, "encoder_q-layer.5": 3713.6396, "encoder_q-layer.6": 4363.7012, "encoder_q-layer.7": 4660.8306, "encoder_q-layer.8": 5719.2402, "encoder_q-layer.9": 5143.624, "epoch": 0.42, "inbatch_neg_score": 0.0769, "inbatch_pos_score": 0.6812, "learning_rate": 3.177777777777778e-05, "loss": 3.4808, "norm_diff": 0.105, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7475.8061, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0773, "query_norm": 1.266, "queue_k_norm": 1.3721, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7014, "sent_len_1": 66.7984, "sent_max_len_0": 127.9912, "sent_max_len_1": 190.6775, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42800 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.4869, "doc_norm": 1.3783, "encoder_q-embeddings": 4029.7466, "encoder_q-layer.0": 2765.3054, "encoder_q-layer.1": 2979.2109, "encoder_q-layer.10": 4521.9443, "encoder_q-layer.11": 10509.874, "encoder_q-layer.2": 3377.7273, "encoder_q-layer.3": 3481.989, "encoder_q-layer.4": 3677.6333, "encoder_q-layer.5": 3632.9749, "encoder_q-layer.6": 4098.7969, "encoder_q-layer.7": 4480.5361, "encoder_q-layer.8": 5165.9717, "encoder_q-layer.9": 4342.792, "epoch": 0.42, "inbatch_neg_score": 0.0757, "inbatch_pos_score": 0.689, "learning_rate": 3.1722222222222224e-05, "loss": 3.4869, "norm_diff": 0.1343, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7118.3115, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.076, "query_norm": 1.244, "queue_k_norm": 1.372, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5312, "sent_len_1": 66.8587, "sent_max_len_0": 127.995, "sent_max_len_1": 189.085, "stdk": 0.0484, "stdq": 0.0437, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 42900 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.49, "doc_norm": 1.3745, "encoder_q-embeddings": 3845.5474, "encoder_q-layer.0": 2557.8223, "encoder_q-layer.1": 2688.6934, "encoder_q-layer.10": 4697.2549, "encoder_q-layer.11": 10536.8242, "encoder_q-layer.2": 2945.5916, "encoder_q-layer.3": 3011.1042, "encoder_q-layer.4": 3117.9265, "encoder_q-layer.5": 3251.8013, "encoder_q-layer.6": 3868.2202, "encoder_q-layer.7": 4202.4663, "encoder_q-layer.8": 4985.771, "encoder_q-layer.9": 4596.4653, "epoch": 0.42, "inbatch_neg_score": 0.077, "inbatch_pos_score": 0.7334, "learning_rate": 3.1666666666666666e-05, "loss": 3.49, "norm_diff": 0.0905, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6900.1131, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0763, "query_norm": 1.284, "queue_k_norm": 1.37, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6071, "sent_len_1": 66.8173, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9638, "stdk": 0.0483, "stdq": 0.0449, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43000 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.5, "doc_norm": 1.3713, "encoder_q-embeddings": 3811.1604, "encoder_q-layer.0": 2496.7073, "encoder_q-layer.1": 2633.0371, "encoder_q-layer.10": 5107.8618, "encoder_q-layer.11": 10654.3076, "encoder_q-layer.2": 2901.4189, "encoder_q-layer.3": 2912.978, "encoder_q-layer.4": 3133.6538, "encoder_q-layer.5": 3237.2515, "encoder_q-layer.6": 3836.8003, "encoder_q-layer.7": 4361.5933, "encoder_q-layer.8": 5341.5532, "encoder_q-layer.9": 4765.8066, "epoch": 0.42, "inbatch_neg_score": 0.0728, "inbatch_pos_score": 0.7056, "learning_rate": 3.1611111111111115e-05, "loss": 3.5, "norm_diff": 0.1196, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6923.0685, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0719, "query_norm": 1.2517, "queue_k_norm": 1.3711, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.3245, "sent_len_1": 66.6716, "sent_max_len_0": 127.9862, "sent_max_len_1": 191.0563, "stdk": 0.0482, "stdq": 0.0434, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 43100 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.4901, "doc_norm": 1.3645, "encoder_q-embeddings": 4740.7681, "encoder_q-layer.0": 3078.4773, "encoder_q-layer.1": 3251.9287, "encoder_q-layer.10": 5042.2661, "encoder_q-layer.11": 10721.627, "encoder_q-layer.2": 3645.9714, "encoder_q-layer.3": 3758.6482, "encoder_q-layer.4": 3993.6187, "encoder_q-layer.5": 4179.7417, "encoder_q-layer.6": 4668.4512, "encoder_q-layer.7": 4870.3027, "encoder_q-layer.8": 5482.6274, "encoder_q-layer.9": 5025.395, "epoch": 0.42, "inbatch_neg_score": 0.0681, "inbatch_pos_score": 0.6895, "learning_rate": 3.155555555555556e-05, "loss": 3.4901, "norm_diff": 0.0861, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7528.3578, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0681, "query_norm": 1.2784, "queue_k_norm": 1.3706, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4968, "sent_len_1": 66.5058, "sent_max_len_0": 128.0, "sent_max_len_1": 187.0337, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 43200 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.4662, "doc_norm": 1.3692, "encoder_q-embeddings": 4131.7842, "encoder_q-layer.0": 2675.2139, "encoder_q-layer.1": 2858.9993, "encoder_q-layer.10": 4583.0366, "encoder_q-layer.11": 10277.6963, "encoder_q-layer.2": 3243.3035, "encoder_q-layer.3": 3316.8264, "encoder_q-layer.4": 3418.8665, "encoder_q-layer.5": 3449.1763, "encoder_q-layer.6": 3856.813, "encoder_q-layer.7": 4207.6221, "encoder_q-layer.8": 4772.3828, "encoder_q-layer.9": 4551.9102, "epoch": 0.42, "inbatch_neg_score": 0.0773, "inbatch_pos_score": 0.7563, "learning_rate": 3.15e-05, "loss": 3.4662, "norm_diff": 0.0347, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6810.4264, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0757, "query_norm": 1.3373, "queue_k_norm": 1.3692, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6805, "sent_len_1": 66.7718, "sent_max_len_0": 128.0, "sent_max_len_1": 192.0813, "stdk": 0.0481, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 43300 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4726, "doc_norm": 1.3644, "encoder_q-embeddings": 3802.7976, "encoder_q-layer.0": 2423.7178, "encoder_q-layer.1": 2529.7354, "encoder_q-layer.10": 4836.4502, "encoder_q-layer.11": 10873.5469, "encoder_q-layer.2": 2775.939, "encoder_q-layer.3": 2846.0662, "encoder_q-layer.4": 3041.7415, "encoder_q-layer.5": 3139.6311, "encoder_q-layer.6": 3578.7822, "encoder_q-layer.7": 3997.4441, "encoder_q-layer.8": 5042.5693, "encoder_q-layer.9": 4704.3877, "epoch": 0.42, "inbatch_neg_score": 0.079, "inbatch_pos_score": 0.7324, "learning_rate": 3.144444444444445e-05, "loss": 3.4726, "norm_diff": 0.046, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6738.004, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0778, "query_norm": 1.3183, "queue_k_norm": 1.3711, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6096, "sent_len_1": 66.8506, "sent_max_len_0": 128.0, "sent_max_len_1": 189.445, "stdk": 0.048, "stdq": 0.0451, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 43400 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4608, "doc_norm": 1.3674, "encoder_q-embeddings": 4242.0439, "encoder_q-layer.0": 2882.2732, "encoder_q-layer.1": 3012.8943, "encoder_q-layer.10": 4949.4683, "encoder_q-layer.11": 10406.4502, "encoder_q-layer.2": 3415.165, "encoder_q-layer.3": 3423.4407, "encoder_q-layer.4": 3591.5637, "encoder_q-layer.5": 3595.7468, "encoder_q-layer.6": 3899.1846, "encoder_q-layer.7": 4606.3916, "encoder_q-layer.8": 5309.1201, "encoder_q-layer.9": 4808.9946, "epoch": 0.42, "inbatch_neg_score": 0.0798, "inbatch_pos_score": 0.6958, "learning_rate": 3.138888888888889e-05, "loss": 3.4608, "norm_diff": 0.069, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7080.8475, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0798, "query_norm": 1.2984, "queue_k_norm": 1.3705, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6131, "sent_len_1": 66.9981, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.8862, "stdk": 0.0482, "stdq": 0.0439, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 43500 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.4828, "doc_norm": 1.3597, "encoder_q-embeddings": 5600.4004, "encoder_q-layer.0": 3849.3411, "encoder_q-layer.1": 4548.9805, "encoder_q-layer.10": 4531.4097, "encoder_q-layer.11": 10579.2959, "encoder_q-layer.2": 5332.7524, "encoder_q-layer.3": 5534.2979, "encoder_q-layer.4": 5889.23, "encoder_q-layer.5": 6849.2495, "encoder_q-layer.6": 7068.0645, "encoder_q-layer.7": 6782.5171, "encoder_q-layer.8": 5432.1094, "encoder_q-layer.9": 4471.5132, "epoch": 0.43, "inbatch_neg_score": 0.0873, "inbatch_pos_score": 0.7173, "learning_rate": 3.1333333333333334e-05, "loss": 3.4828, "norm_diff": 0.0319, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8981.0525, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0853, "query_norm": 1.3278, "queue_k_norm": 1.3698, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6462, "sent_len_1": 66.909, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7125, "stdk": 0.0478, "stdq": 0.0446, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 43600 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.455, "doc_norm": 1.3719, "encoder_q-embeddings": 4094.7549, "encoder_q-layer.0": 2766.9951, "encoder_q-layer.1": 2931.0327, "encoder_q-layer.10": 5119.0742, "encoder_q-layer.11": 10924.8955, "encoder_q-layer.2": 3499.4998, "encoder_q-layer.3": 3527.05, "encoder_q-layer.4": 3733.3032, "encoder_q-layer.5": 3867.2449, "encoder_q-layer.6": 4390.106, "encoder_q-layer.7": 4739.9624, "encoder_q-layer.8": 5129.2471, "encoder_q-layer.9": 4757.0093, "epoch": 0.43, "inbatch_neg_score": 0.0934, "inbatch_pos_score": 0.7539, "learning_rate": 3.1277777777777776e-05, "loss": 3.455, "norm_diff": 0.0328, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7164.6877, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0925, "query_norm": 1.3486, "queue_k_norm": 1.3707, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6981, "sent_len_1": 66.8893, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2312, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 43700 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5036, "doc_norm": 1.377, "encoder_q-embeddings": 5126.0762, "encoder_q-layer.0": 3588.4294, "encoder_q-layer.1": 3895.468, "encoder_q-layer.10": 4860.2681, "encoder_q-layer.11": 10364.1074, "encoder_q-layer.2": 4453.6743, "encoder_q-layer.3": 4536.7734, "encoder_q-layer.4": 4583.6562, "encoder_q-layer.5": 4575.7373, "encoder_q-layer.6": 4615.3242, "encoder_q-layer.7": 4470.8296, "encoder_q-layer.8": 5165.5928, "encoder_q-layer.9": 4659.1099, "epoch": 0.43, "inbatch_neg_score": 0.0988, "inbatch_pos_score": 0.7495, "learning_rate": 3.1222222222222225e-05, "loss": 3.5036, "norm_diff": 0.0417, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7761.881, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0988, "query_norm": 1.3388, "queue_k_norm": 1.3707, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.3818, "sent_len_1": 66.8751, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.395, "stdk": 0.0485, "stdq": 0.0454, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 43800 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.4813, "doc_norm": 1.3732, "encoder_q-embeddings": 5858.1665, "encoder_q-layer.0": 4155.481, "encoder_q-layer.1": 4608.3662, "encoder_q-layer.10": 4681.1118, "encoder_q-layer.11": 10388.541, "encoder_q-layer.2": 5525.3057, "encoder_q-layer.3": 5732.0396, "encoder_q-layer.4": 6136.9854, "encoder_q-layer.5": 6593.5669, "encoder_q-layer.6": 6258.2925, "encoder_q-layer.7": 6204.2173, "encoder_q-layer.8": 6049.2559, "encoder_q-layer.9": 4803.1733, "epoch": 0.43, "inbatch_neg_score": 0.1034, "inbatch_pos_score": 0.7441, "learning_rate": 3.116666666666667e-05, "loss": 3.4813, "norm_diff": 0.0712, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9077.9364, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1038, "query_norm": 1.302, "queue_k_norm": 1.371, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6829, "sent_len_1": 66.9972, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0538, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 43900 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.4957, "doc_norm": 1.3733, "encoder_q-embeddings": 3990.5715, "encoder_q-layer.0": 2688.2864, "encoder_q-layer.1": 2953.6392, "encoder_q-layer.10": 4895.5879, "encoder_q-layer.11": 10545.958, "encoder_q-layer.2": 3470.5039, "encoder_q-layer.3": 3585.4712, "encoder_q-layer.4": 3684.0771, "encoder_q-layer.5": 3888.2881, "encoder_q-layer.6": 4178.2334, "encoder_q-layer.7": 4591.165, "encoder_q-layer.8": 5341.9658, "encoder_q-layer.9": 4900.8774, "epoch": 0.43, "inbatch_neg_score": 0.1042, "inbatch_pos_score": 0.7627, "learning_rate": 3.111111111111111e-05, "loss": 3.4957, "norm_diff": 0.0554, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7063.9002, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1053, "query_norm": 1.3179, "queue_k_norm": 1.3702, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5757, "sent_len_1": 66.8859, "sent_max_len_0": 127.9975, "sent_max_len_1": 191.22, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 44000 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.479, "doc_norm": 1.3766, "encoder_q-embeddings": 4692.561, "encoder_q-layer.0": 3332.646, "encoder_q-layer.1": 3449.2812, "encoder_q-layer.10": 4845.7803, "encoder_q-layer.11": 10912.1504, "encoder_q-layer.2": 3921.8728, "encoder_q-layer.3": 4223.062, "encoder_q-layer.4": 4277.918, "encoder_q-layer.5": 4268.417, "encoder_q-layer.6": 4646.2163, "encoder_q-layer.7": 5008.3149, "encoder_q-layer.8": 5682.3379, "encoder_q-layer.9": 4955.9575, "epoch": 0.43, "inbatch_neg_score": 0.1102, "inbatch_pos_score": 0.7881, "learning_rate": 3.105555555555555e-05, "loss": 3.479, "norm_diff": 0.0476, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7677.0243, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1102, "query_norm": 1.3323, "queue_k_norm": 1.3729, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4585, "sent_len_1": 66.9279, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.84, "stdk": 0.0484, "stdq": 0.0458, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 44100 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4846, "doc_norm": 1.3774, "encoder_q-embeddings": 4265.4966, "encoder_q-layer.0": 2860.677, "encoder_q-layer.1": 3000.5498, "encoder_q-layer.10": 5156.585, "encoder_q-layer.11": 11114.8506, "encoder_q-layer.2": 3546.6731, "encoder_q-layer.3": 3687.4321, "encoder_q-layer.4": 3872.5598, "encoder_q-layer.5": 4203.1997, "encoder_q-layer.6": 4585.7231, "encoder_q-layer.7": 4623.7666, "encoder_q-layer.8": 5683.8872, "encoder_q-layer.9": 4937.3696, "epoch": 0.43, "inbatch_neg_score": 0.1027, "inbatch_pos_score": 0.7612, "learning_rate": 3.1e-05, "loss": 3.4846, "norm_diff": 0.0818, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7434.0423, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1034, "query_norm": 1.2956, "queue_k_norm": 1.3716, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5297, "sent_len_1": 66.638, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2612, "stdk": 0.0484, "stdq": 0.045, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 44200 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.4598, "doc_norm": 1.3773, "encoder_q-embeddings": 2598.3259, "encoder_q-layer.0": 1798.402, "encoder_q-layer.1": 1973.2512, "encoder_q-layer.10": 2561.7891, "encoder_q-layer.11": 5208.4727, "encoder_q-layer.2": 2342.531, "encoder_q-layer.3": 2335.0344, "encoder_q-layer.4": 2456.6367, "encoder_q-layer.5": 2649.6621, "encoder_q-layer.6": 2921.2532, "encoder_q-layer.7": 2619.4478, "encoder_q-layer.8": 2766.8047, "encoder_q-layer.9": 2384.6746, "epoch": 0.43, "inbatch_neg_score": 0.1024, "inbatch_pos_score": 0.7598, "learning_rate": 3.094444444444445e-05, "loss": 3.4598, "norm_diff": 0.0776, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4017.674, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1021, "query_norm": 1.2997, "queue_k_norm": 1.3742, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7158, "sent_len_1": 66.5792, "sent_max_len_0": 127.9925, "sent_max_len_1": 188.9837, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 44300 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.4611, "doc_norm": 1.3746, "encoder_q-embeddings": 2020.3594, "encoder_q-layer.0": 1316.3265, "encoder_q-layer.1": 1392.0237, "encoder_q-layer.10": 2616.8965, "encoder_q-layer.11": 5681.5303, "encoder_q-layer.2": 1522.7941, "encoder_q-layer.3": 1628.7029, "encoder_q-layer.4": 1693.3798, "encoder_q-layer.5": 1864.3701, "encoder_q-layer.6": 2210.2566, "encoder_q-layer.7": 2322.8892, "encoder_q-layer.8": 2669.9746, "encoder_q-layer.9": 2376.967, "epoch": 0.43, "inbatch_neg_score": 0.0959, "inbatch_pos_score": 0.7217, "learning_rate": 3.088888888888889e-05, "loss": 3.4611, "norm_diff": 0.1054, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3641.9112, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0959, "query_norm": 1.2692, "queue_k_norm": 1.3738, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7296, "sent_len_1": 66.6921, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.335, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 44400 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4863, "doc_norm": 1.372, "encoder_q-embeddings": 1975.0997, "encoder_q-layer.0": 1324.8571, "encoder_q-layer.1": 1380.9675, "encoder_q-layer.10": 2317.5847, "encoder_q-layer.11": 5723.522, "encoder_q-layer.2": 1520.91, "encoder_q-layer.3": 1599.1459, "encoder_q-layer.4": 1760.2667, "encoder_q-layer.5": 1770.3853, "encoder_q-layer.6": 1971.7506, "encoder_q-layer.7": 2170.415, "encoder_q-layer.8": 2599.7109, "encoder_q-layer.9": 2270.2842, "epoch": 0.43, "inbatch_neg_score": 0.0996, "inbatch_pos_score": 0.7275, "learning_rate": 3.0833333333333335e-05, "loss": 3.4863, "norm_diff": 0.1131, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3510.442, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1, "query_norm": 1.2589, "queue_k_norm": 1.3732, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4737, "sent_len_1": 66.6884, "sent_max_len_0": 127.995, "sent_max_len_1": 187.6975, "stdk": 0.0482, "stdq": 0.0438, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 44500 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4601, "doc_norm": 1.3745, "encoder_q-embeddings": 2044.5433, "encoder_q-layer.0": 1352.5546, "encoder_q-layer.1": 1403.5789, "encoder_q-layer.10": 2366.9644, "encoder_q-layer.11": 5352.52, "encoder_q-layer.2": 1577.7605, "encoder_q-layer.3": 1698.1754, "encoder_q-layer.4": 1816.3383, "encoder_q-layer.5": 1880.1676, "encoder_q-layer.6": 2107.4021, "encoder_q-layer.7": 2372.8176, "encoder_q-layer.8": 2636.6418, "encoder_q-layer.9": 2334.8296, "epoch": 0.44, "inbatch_neg_score": 0.0967, "inbatch_pos_score": 0.7324, "learning_rate": 3.077777777777778e-05, "loss": 3.4601, "norm_diff": 0.0898, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3597.3415, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0966, "query_norm": 1.2848, "queue_k_norm": 1.3743, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5835, "sent_len_1": 66.7979, "sent_max_len_0": 127.9912, "sent_max_len_1": 189.7537, "stdk": 0.0483, "stdq": 0.0448, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 44600 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.4602, "doc_norm": 1.3716, "encoder_q-embeddings": 2615.4084, "encoder_q-layer.0": 1811.4203, "encoder_q-layer.1": 1910.0234, "encoder_q-layer.10": 2368.5664, "encoder_q-layer.11": 5659.5903, "encoder_q-layer.2": 2159.3342, "encoder_q-layer.3": 2329.9214, "encoder_q-layer.4": 2403.6553, "encoder_q-layer.5": 2606.9016, "encoder_q-layer.6": 2693.1445, "encoder_q-layer.7": 2662.2781, "encoder_q-layer.8": 2685.1204, "encoder_q-layer.9": 2392.0762, "epoch": 0.44, "inbatch_neg_score": 0.0949, "inbatch_pos_score": 0.729, "learning_rate": 3.0722222222222227e-05, "loss": 3.4602, "norm_diff": 0.1097, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4087.0193, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0945, "query_norm": 1.2619, "queue_k_norm": 1.3748, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5888, "sent_len_1": 66.7676, "sent_max_len_0": 128.0, "sent_max_len_1": 189.145, "stdk": 0.0481, "stdq": 0.0442, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 44700 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.4646, "doc_norm": 1.372, "encoder_q-embeddings": 3057.8245, "encoder_q-layer.0": 2129.1736, "encoder_q-layer.1": 2212.082, "encoder_q-layer.10": 2383.9685, "encoder_q-layer.11": 5104.2476, "encoder_q-layer.2": 2564.0686, "encoder_q-layer.3": 2623.0857, "encoder_q-layer.4": 2631.7537, "encoder_q-layer.5": 2615.1721, "encoder_q-layer.6": 2721.9653, "encoder_q-layer.7": 2707.4341, "encoder_q-layer.8": 2818.1257, "encoder_q-layer.9": 2318.2864, "epoch": 0.44, "inbatch_neg_score": 0.0936, "inbatch_pos_score": 0.7148, "learning_rate": 3.066666666666667e-05, "loss": 3.4646, "norm_diff": 0.1152, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4252.2216, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0925, "query_norm": 1.2568, "queue_k_norm": 1.3721, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6208, "sent_len_1": 66.5499, "sent_max_len_0": 128.0, "sent_max_len_1": 187.5188, "stdk": 0.0482, "stdq": 0.0439, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 44800 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.4739, "doc_norm": 1.3745, "encoder_q-embeddings": 4471.334, "encoder_q-layer.0": 3462.1514, "encoder_q-layer.1": 3912.0979, "encoder_q-layer.10": 2334.0613, "encoder_q-layer.11": 5233.0103, "encoder_q-layer.2": 4338.5532, "encoder_q-layer.3": 4358.3184, "encoder_q-layer.4": 4457.3032, "encoder_q-layer.5": 4177.6055, "encoder_q-layer.6": 3705.4314, "encoder_q-layer.7": 3320.3254, "encoder_q-layer.8": 2932.623, "encoder_q-layer.9": 2310.0999, "epoch": 0.44, "inbatch_neg_score": 0.0931, "inbatch_pos_score": 0.771, "learning_rate": 3.061111111111111e-05, "loss": 3.4739, "norm_diff": 0.0757, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5719.5476, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0928, "query_norm": 1.2988, "queue_k_norm": 1.3745, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6048, "sent_len_1": 66.7353, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.95, "stdk": 0.0483, "stdq": 0.0453, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 44900 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.4563, "doc_norm": 1.3801, "encoder_q-embeddings": 2165.9719, "encoder_q-layer.0": 1397.4412, "encoder_q-layer.1": 1428.1393, "encoder_q-layer.10": 2332.5447, "encoder_q-layer.11": 5163.876, "encoder_q-layer.2": 1620.5771, "encoder_q-layer.3": 1702.0029, "encoder_q-layer.4": 1755.8967, "encoder_q-layer.5": 1841.3828, "encoder_q-layer.6": 2122.9939, "encoder_q-layer.7": 2269.7024, "encoder_q-layer.8": 2592.9177, "encoder_q-layer.9": 2238.3027, "epoch": 0.44, "inbatch_neg_score": 0.0903, "inbatch_pos_score": 0.7705, "learning_rate": 3.055555555555556e-05, "loss": 3.4563, "norm_diff": 0.0889, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3483.5661, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0908, "query_norm": 1.2912, "queue_k_norm": 1.3732, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7971, "sent_len_1": 66.7186, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0613, "stdk": 0.0485, "stdq": 0.0452, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45000 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.4624, "doc_norm": 1.3706, "encoder_q-embeddings": 2536.3826, "encoder_q-layer.0": 1665.3717, "encoder_q-layer.1": 1776.1846, "encoder_q-layer.10": 2398.6328, "encoder_q-layer.11": 5051.9673, "encoder_q-layer.2": 2039.9625, "encoder_q-layer.3": 2077.6184, "encoder_q-layer.4": 2046.8718, "encoder_q-layer.5": 2049.8062, "encoder_q-layer.6": 2107.4109, "encoder_q-layer.7": 2246.3774, "encoder_q-layer.8": 2625.3953, "encoder_q-layer.9": 2364.3799, "epoch": 0.44, "inbatch_neg_score": 0.0873, "inbatch_pos_score": 0.7573, "learning_rate": 3.05e-05, "loss": 3.4624, "norm_diff": 0.0769, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3729.7805, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.088, "query_norm": 1.2937, "queue_k_norm": 1.3755, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4989, "sent_len_1": 66.9707, "sent_max_len_0": 127.9925, "sent_max_len_1": 189.1687, "stdk": 0.0481, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 45100 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.466, "doc_norm": 1.3694, "encoder_q-embeddings": 2115.5337, "encoder_q-layer.0": 1393.2347, "encoder_q-layer.1": 1500.0922, "encoder_q-layer.10": 2146.072, "encoder_q-layer.11": 4869.2114, "encoder_q-layer.2": 1663.722, "encoder_q-layer.3": 1758.0126, "encoder_q-layer.4": 1868.8641, "encoder_q-layer.5": 1879.7441, "encoder_q-layer.6": 2180.8613, "encoder_q-layer.7": 2265.0559, "encoder_q-layer.8": 2384.0198, "encoder_q-layer.9": 2120.428, "epoch": 0.44, "inbatch_neg_score": 0.0847, "inbatch_pos_score": 0.7319, "learning_rate": 3.044444444444445e-05, "loss": 3.466, "norm_diff": 0.0998, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3381.1971, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0851, "query_norm": 1.2696, "queue_k_norm": 1.3731, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.383, "sent_len_1": 66.9603, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0, "stdk": 0.0481, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45200 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.4589, "doc_norm": 1.3746, "encoder_q-embeddings": 2739.9475, "encoder_q-layer.0": 1792.2821, "encoder_q-layer.1": 2004.9602, "encoder_q-layer.10": 2197.292, "encoder_q-layer.11": 5016.603, "encoder_q-layer.2": 2256.0642, "encoder_q-layer.3": 2361.5989, "encoder_q-layer.4": 2624.1418, "encoder_q-layer.5": 2483.0381, "encoder_q-layer.6": 2776.2112, "encoder_q-layer.7": 2439.5078, "encoder_q-layer.8": 2501.2012, "encoder_q-layer.9": 2137.958, "epoch": 0.44, "inbatch_neg_score": 0.0898, "inbatch_pos_score": 0.77, "learning_rate": 3.0388888888888887e-05, "loss": 3.4589, "norm_diff": 0.0844, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3935.6613, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0901, "query_norm": 1.2902, "queue_k_norm": 1.3754, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.585, "sent_len_1": 66.8139, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7725, "stdk": 0.0483, "stdq": 0.045, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 45300 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.4611, "doc_norm": 1.379, "encoder_q-embeddings": 2074.5237, "encoder_q-layer.0": 1315.068, "encoder_q-layer.1": 1389.6831, "encoder_q-layer.10": 2425.2822, "encoder_q-layer.11": 4995.0859, "encoder_q-layer.2": 1539.5078, "encoder_q-layer.3": 1634.1185, "encoder_q-layer.4": 1754.5297, "encoder_q-layer.5": 1798.8706, "encoder_q-layer.6": 2005.9222, "encoder_q-layer.7": 2071.1055, "encoder_q-layer.8": 2373.4031, "encoder_q-layer.9": 2231.1052, "epoch": 0.44, "inbatch_neg_score": 0.0924, "inbatch_pos_score": 0.7593, "learning_rate": 3.0333333333333337e-05, "loss": 3.4611, "norm_diff": 0.0763, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3406.1567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0928, "query_norm": 1.3027, "queue_k_norm": 1.373, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5566, "sent_len_1": 66.6618, "sent_max_len_0": 128.0, "sent_max_len_1": 189.845, "stdk": 0.0485, "stdq": 0.0455, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45400 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.4743, "doc_norm": 1.378, "encoder_q-embeddings": 2051.7866, "encoder_q-layer.0": 1396.0767, "encoder_q-layer.1": 1490.7629, "encoder_q-layer.10": 2433.6677, "encoder_q-layer.11": 5100.4707, "encoder_q-layer.2": 1630.4727, "encoder_q-layer.3": 1618.9539, "encoder_q-layer.4": 1778.986, "encoder_q-layer.5": 1838.1255, "encoder_q-layer.6": 2084.5884, "encoder_q-layer.7": 2312.1089, "encoder_q-layer.8": 2737.6685, "encoder_q-layer.9": 2383.8628, "epoch": 0.44, "inbatch_neg_score": 0.0875, "inbatch_pos_score": 0.7295, "learning_rate": 3.0277777777777776e-05, "loss": 3.4743, "norm_diff": 0.1038, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3525.7332, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0884, "query_norm": 1.2742, "queue_k_norm": 1.3735, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5101, "sent_len_1": 66.9143, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9975, "stdk": 0.0484, "stdq": 0.0444, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 45500 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.4643, "doc_norm": 1.3744, "encoder_q-embeddings": 3163.9106, "encoder_q-layer.0": 2241.3828, "encoder_q-layer.1": 2629.5686, "encoder_q-layer.10": 2286.7629, "encoder_q-layer.11": 4990.7349, "encoder_q-layer.2": 3545.5117, "encoder_q-layer.3": 3769.3459, "encoder_q-layer.4": 3217.21, "encoder_q-layer.5": 2986.0542, "encoder_q-layer.6": 3196.1089, "encoder_q-layer.7": 3147.729, "encoder_q-layer.8": 2728.2778, "encoder_q-layer.9": 2225.6599, "epoch": 0.45, "inbatch_neg_score": 0.0885, "inbatch_pos_score": 0.7378, "learning_rate": 3.0222222222222225e-05, "loss": 3.4643, "norm_diff": 0.0963, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4720.1805, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0876, "query_norm": 1.2781, "queue_k_norm": 1.3723, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.617, "sent_len_1": 66.7731, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.8613, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45600 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.4767, "doc_norm": 1.3737, "encoder_q-embeddings": 1963.8853, "encoder_q-layer.0": 1305.4652, "encoder_q-layer.1": 1400.9061, "encoder_q-layer.10": 2343.4431, "encoder_q-layer.11": 5127.4688, "encoder_q-layer.2": 1636.8289, "encoder_q-layer.3": 1730.9835, "encoder_q-layer.4": 1765.6302, "encoder_q-layer.5": 1737.3602, "encoder_q-layer.6": 1886.6945, "encoder_q-layer.7": 2046.9481, "encoder_q-layer.8": 2370.075, "encoder_q-layer.9": 2220.0737, "epoch": 0.45, "inbatch_neg_score": 0.0932, "inbatch_pos_score": 0.7598, "learning_rate": 3.016666666666667e-05, "loss": 3.4767, "norm_diff": 0.0651, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3394.2313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0929, "query_norm": 1.3086, "queue_k_norm": 1.3723, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.464, "sent_len_1": 66.7355, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.87, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45700 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.4856, "doc_norm": 1.3665, "encoder_q-embeddings": 2077.9277, "encoder_q-layer.0": 1450.9736, "encoder_q-layer.1": 1536.3461, "encoder_q-layer.10": 2254.4297, "encoder_q-layer.11": 4950.2842, "encoder_q-layer.2": 1707.0154, "encoder_q-layer.3": 1854.5792, "encoder_q-layer.4": 1961.4956, "encoder_q-layer.5": 2045.1105, "encoder_q-layer.6": 2076.3171, "encoder_q-layer.7": 2199.1023, "encoder_q-layer.8": 2477.749, "encoder_q-layer.9": 2154.6211, "epoch": 0.45, "inbatch_neg_score": 0.0911, "inbatch_pos_score": 0.7554, "learning_rate": 3.0111111111111113e-05, "loss": 3.4856, "norm_diff": 0.0752, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3461.7986, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0912, "query_norm": 1.2913, "queue_k_norm": 1.3713, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4563, "sent_len_1": 66.5212, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.9775, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45800 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.4613, "doc_norm": 1.3747, "encoder_q-embeddings": 2165.7344, "encoder_q-layer.0": 1388.558, "encoder_q-layer.1": 1485.6331, "encoder_q-layer.10": 2202.199, "encoder_q-layer.11": 5076.6538, "encoder_q-layer.2": 1682.3877, "encoder_q-layer.3": 1747.2633, "encoder_q-layer.4": 1830.7153, "encoder_q-layer.5": 1896.1067, "encoder_q-layer.6": 2098.7683, "encoder_q-layer.7": 2291.8943, "encoder_q-layer.8": 2667.7932, "encoder_q-layer.9": 2269.5576, "epoch": 0.45, "inbatch_neg_score": 0.0899, "inbatch_pos_score": 0.7427, "learning_rate": 3.005555555555556e-05, "loss": 3.4613, "norm_diff": 0.0867, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3565.8016, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0903, "query_norm": 1.288, "queue_k_norm": 1.3745, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6457, "sent_len_1": 66.9435, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.47, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 45900 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.4651, "doc_norm": 1.3754, "encoder_q-embeddings": 3505.7441, "encoder_q-layer.0": 2878.4639, "encoder_q-layer.1": 2802.4556, "encoder_q-layer.10": 2325.7935, "encoder_q-layer.11": 5230.8823, "encoder_q-layer.2": 2925.4788, "encoder_q-layer.3": 3051.167, "encoder_q-layer.4": 3016.2075, "encoder_q-layer.5": 2855.4653, "encoder_q-layer.6": 2733.5688, "encoder_q-layer.7": 2850.355, "encoder_q-layer.8": 2900.0247, "encoder_q-layer.9": 2333.2837, "epoch": 0.45, "inbatch_neg_score": 0.0985, "inbatch_pos_score": 0.7651, "learning_rate": 3e-05, "loss": 3.4651, "norm_diff": 0.0454, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4646.2022, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0978, "query_norm": 1.33, "queue_k_norm": 1.3742, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5481, "sent_len_1": 66.626, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.0987, "stdk": 0.0483, "stdq": 0.0457, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46000 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.4765, "doc_norm": 1.3702, "encoder_q-embeddings": 1981.0811, "encoder_q-layer.0": 1300.3635, "encoder_q-layer.1": 1378.3312, "encoder_q-layer.10": 2244.9358, "encoder_q-layer.11": 5005.2686, "encoder_q-layer.2": 1546.6759, "encoder_q-layer.3": 1615.4294, "encoder_q-layer.4": 1740.4984, "encoder_q-layer.5": 1799.0348, "encoder_q-layer.6": 1986.5399, "encoder_q-layer.7": 2290.6689, "encoder_q-layer.8": 2569.9458, "encoder_q-layer.9": 2226.9985, "epoch": 0.45, "inbatch_neg_score": 0.1051, "inbatch_pos_score": 0.7632, "learning_rate": 2.9944444444444446e-05, "loss": 3.4765, "norm_diff": 0.0626, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3352.4728, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1042, "query_norm": 1.3077, "queue_k_norm": 1.372, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5499, "sent_len_1": 66.8315, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.045, "stdk": 0.0481, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46100 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.4504, "doc_norm": 1.3689, "encoder_q-embeddings": 2016.377, "encoder_q-layer.0": 1269.5487, "encoder_q-layer.1": 1345.9193, "encoder_q-layer.10": 2375.6755, "encoder_q-layer.11": 5479.5518, "encoder_q-layer.2": 1528.2831, "encoder_q-layer.3": 1557.7379, "encoder_q-layer.4": 1694.0902, "encoder_q-layer.5": 1795.2377, "encoder_q-layer.6": 2080.4441, "encoder_q-layer.7": 2286.614, "encoder_q-layer.8": 2743.137, "encoder_q-layer.9": 2327.7224, "epoch": 0.45, "inbatch_neg_score": 0.1052, "inbatch_pos_score": 0.7476, "learning_rate": 2.988888888888889e-05, "loss": 3.4504, "norm_diff": 0.0506, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3585.2645, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1045, "query_norm": 1.3182, "queue_k_norm": 1.373, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5459, "sent_len_1": 66.8561, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8913, "stdk": 0.0481, "stdq": 0.0448, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4506, "doc_norm": 1.3747, "encoder_q-embeddings": 3887.7749, "encoder_q-layer.0": 2516.8081, "encoder_q-layer.1": 2635.0286, "encoder_q-layer.10": 4681.3862, "encoder_q-layer.11": 10273.0752, "encoder_q-layer.2": 2862.1826, "encoder_q-layer.3": 2993.2603, "encoder_q-layer.4": 3230.8906, "encoder_q-layer.5": 3440.959, "encoder_q-layer.6": 3982.7136, "encoder_q-layer.7": 4134.4907, "encoder_q-layer.8": 4965.4819, "encoder_q-layer.9": 4408.5815, "epoch": 0.45, "inbatch_neg_score": 0.1121, "inbatch_pos_score": 0.7432, "learning_rate": 2.9833333333333335e-05, "loss": 3.4506, "norm_diff": 0.0615, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6749.7202, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1123, "query_norm": 1.3132, "queue_k_norm": 1.3753, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7703, "sent_len_1": 66.7399, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.4325, "stdk": 0.0483, "stdq": 0.0442, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46300 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.4665, "doc_norm": 1.3782, "encoder_q-embeddings": 5497.1099, "encoder_q-layer.0": 4183.7266, "encoder_q-layer.1": 4588.3599, "encoder_q-layer.10": 5135.6426, "encoder_q-layer.11": 10444.291, "encoder_q-layer.2": 5291.0078, "encoder_q-layer.3": 5430.875, "encoder_q-layer.4": 6497.6929, "encoder_q-layer.5": 5788.1553, "encoder_q-layer.6": 5249.5933, "encoder_q-layer.7": 5144.6924, "encoder_q-layer.8": 5656.4692, "encoder_q-layer.9": 4614.5762, "epoch": 0.45, "inbatch_neg_score": 0.1143, "inbatch_pos_score": 0.79, "learning_rate": 2.9777777777777777e-05, "loss": 3.4665, "norm_diff": 0.0298, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8601.3116, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1143, "query_norm": 1.3538, "queue_k_norm": 1.3761, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5378, "sent_len_1": 66.6028, "sent_max_len_0": 127.995, "sent_max_len_1": 190.6438, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46400 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.4547, "doc_norm": 1.3746, "encoder_q-embeddings": 3871.4973, "encoder_q-layer.0": 2491.6282, "encoder_q-layer.1": 2601.4854, "encoder_q-layer.10": 4511.1958, "encoder_q-layer.11": 10219.2412, "encoder_q-layer.2": 2904.1191, "encoder_q-layer.3": 3040.947, "encoder_q-layer.4": 3279.9731, "encoder_q-layer.5": 3277.6753, "encoder_q-layer.6": 3723.1895, "encoder_q-layer.7": 4243.1943, "encoder_q-layer.8": 4955.3232, "encoder_q-layer.9": 4496.4546, "epoch": 0.45, "inbatch_neg_score": 0.1208, "inbatch_pos_score": 0.75, "learning_rate": 2.9722222222222223e-05, "loss": 3.4547, "norm_diff": 0.0482, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6768.9138, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1201, "query_norm": 1.3265, "queue_k_norm": 1.375, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6074, "sent_len_1": 66.8577, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.7525, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46500 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4756, "doc_norm": 1.3728, "encoder_q-embeddings": 4076.5554, "encoder_q-layer.0": 2783.0798, "encoder_q-layer.1": 2852.5725, "encoder_q-layer.10": 4399.7412, "encoder_q-layer.11": 10234.5225, "encoder_q-layer.2": 3121.3936, "encoder_q-layer.3": 3200.3591, "encoder_q-layer.4": 3391.571, "encoder_q-layer.5": 3686.8418, "encoder_q-layer.6": 3904.1008, "encoder_q-layer.7": 4319.2354, "encoder_q-layer.8": 5244.7319, "encoder_q-layer.9": 4590.6245, "epoch": 0.45, "inbatch_neg_score": 0.1296, "inbatch_pos_score": 0.7803, "learning_rate": 2.9666666666666672e-05, "loss": 3.4756, "norm_diff": 0.0285, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6985.4388, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.129, "query_norm": 1.3462, "queue_k_norm": 1.378, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5902, "sent_len_1": 66.6441, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.9087, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46600 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4711, "doc_norm": 1.3809, "encoder_q-embeddings": 4168.3057, "encoder_q-layer.0": 2715.6443, "encoder_q-layer.1": 2822.9006, "encoder_q-layer.10": 4641.3193, "encoder_q-layer.11": 10413.3564, "encoder_q-layer.2": 3174.0491, "encoder_q-layer.3": 3235.4836, "encoder_q-layer.4": 3420.3865, "encoder_q-layer.5": 3619.2725, "encoder_q-layer.6": 3922.426, "encoder_q-layer.7": 4378.3677, "encoder_q-layer.8": 5084.5537, "encoder_q-layer.9": 4564.0176, "epoch": 0.46, "inbatch_neg_score": 0.1344, "inbatch_pos_score": 0.7896, "learning_rate": 2.961111111111111e-05, "loss": 3.4711, "norm_diff": 0.0315, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7033.5444, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1344, "query_norm": 1.3494, "queue_k_norm": 1.3813, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5897, "sent_len_1": 66.7964, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2138, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 46700 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.4776, "doc_norm": 1.3777, "encoder_q-embeddings": 2100.6167, "encoder_q-layer.0": 1341.0708, "encoder_q-layer.1": 1388.2242, "encoder_q-layer.10": 2422.772, "encoder_q-layer.11": 5640.1128, "encoder_q-layer.2": 1547.7756, "encoder_q-layer.3": 1585.7938, "encoder_q-layer.4": 1710.8445, "encoder_q-layer.5": 1791.6143, "encoder_q-layer.6": 1993.5553, "encoder_q-layer.7": 2382.271, "encoder_q-layer.8": 2941.2705, "encoder_q-layer.9": 2384.8928, "epoch": 0.46, "inbatch_neg_score": 0.1389, "inbatch_pos_score": 0.7778, "learning_rate": 2.955555555555556e-05, "loss": 3.4776, "norm_diff": 0.0377, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3625.4118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1383, "query_norm": 1.3401, "queue_k_norm": 1.3811, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4208, "sent_len_1": 66.7754, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.875, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46800 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.4527, "doc_norm": 1.384, "encoder_q-embeddings": 2084.5483, "encoder_q-layer.0": 1366.5654, "encoder_q-layer.1": 1441.7565, "encoder_q-layer.10": 2455.9382, "encoder_q-layer.11": 5200.8701, "encoder_q-layer.2": 1569.0801, "encoder_q-layer.3": 1691.8119, "encoder_q-layer.4": 1822.2083, "encoder_q-layer.5": 1924.8018, "encoder_q-layer.6": 1984.8885, "encoder_q-layer.7": 2275.3682, "encoder_q-layer.8": 2673.3494, "encoder_q-layer.9": 2474.5989, "epoch": 0.46, "inbatch_neg_score": 0.1373, "inbatch_pos_score": 0.7949, "learning_rate": 2.95e-05, "loss": 3.4527, "norm_diff": 0.0415, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3558.9023, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1357, "query_norm": 1.3425, "queue_k_norm": 1.3835, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5358, "sent_len_1": 66.8848, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3137, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 46900 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4315, "doc_norm": 1.3824, "encoder_q-embeddings": 2179.5432, "encoder_q-layer.0": 1448.9342, "encoder_q-layer.1": 1522.3719, "encoder_q-layer.10": 2270.9058, "encoder_q-layer.11": 5476.5947, "encoder_q-layer.2": 1783.2097, "encoder_q-layer.3": 1864.8459, "encoder_q-layer.4": 1952.278, "encoder_q-layer.5": 1946.8855, "encoder_q-layer.6": 2335.5103, "encoder_q-layer.7": 2469.3621, "encoder_q-layer.8": 2581.751, "encoder_q-layer.9": 2330.8936, "epoch": 0.46, "inbatch_neg_score": 0.1257, "inbatch_pos_score": 0.7695, "learning_rate": 2.9444444444444448e-05, "loss": 3.4315, "norm_diff": 0.0771, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3669.1841, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1266, "query_norm": 1.3053, "queue_k_norm": 1.3838, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.8981, "sent_len_1": 66.7863, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8063, "stdk": 0.0483, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 47000 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.4719, "doc_norm": 1.3832, "encoder_q-embeddings": 1992.7717, "encoder_q-layer.0": 1402.9645, "encoder_q-layer.1": 1457.4915, "encoder_q-layer.10": 2364.1074, "encoder_q-layer.11": 5223.2559, "encoder_q-layer.2": 1620.4822, "encoder_q-layer.3": 1631.2557, "encoder_q-layer.4": 1750.0962, "encoder_q-layer.5": 1884.9899, "encoder_q-layer.6": 2085.1821, "encoder_q-layer.7": 2218.156, "encoder_q-layer.8": 2554.4875, "encoder_q-layer.9": 2268.0869, "epoch": 0.46, "inbatch_neg_score": 0.117, "inbatch_pos_score": 0.77, "learning_rate": 2.9388888888888887e-05, "loss": 3.4719, "norm_diff": 0.0857, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3490.2719, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1169, "query_norm": 1.2975, "queue_k_norm": 1.3835, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5115, "sent_len_1": 66.6655, "sent_max_len_0": 127.995, "sent_max_len_1": 188.92, "stdk": 0.0483, "stdq": 0.045, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 47100 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.4586, "doc_norm": 1.3835, "encoder_q-embeddings": 2001.0508, "encoder_q-layer.0": 1266.7435, "encoder_q-layer.1": 1328.9906, "encoder_q-layer.10": 2845.8555, "encoder_q-layer.11": 5538.8555, "encoder_q-layer.2": 1462.2563, "encoder_q-layer.3": 1486.3534, "encoder_q-layer.4": 1589.5967, "encoder_q-layer.5": 1700.7186, "encoder_q-layer.6": 1970.3422, "encoder_q-layer.7": 2227.0781, "encoder_q-layer.8": 2892.0459, "encoder_q-layer.9": 2683.6599, "epoch": 0.46, "inbatch_neg_score": 0.1147, "inbatch_pos_score": 0.7593, "learning_rate": 2.9333333333333336e-05, "loss": 3.4586, "norm_diff": 0.1122, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3632.1098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1156, "query_norm": 1.2713, "queue_k_norm": 1.3824, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6452, "sent_len_1": 66.6296, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2463, "stdk": 0.0483, "stdq": 0.0442, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 47200 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.4526, "doc_norm": 1.3865, "encoder_q-embeddings": 2186.7773, "encoder_q-layer.0": 1386.2556, "encoder_q-layer.1": 1441.9812, "encoder_q-layer.10": 2565.7632, "encoder_q-layer.11": 5794.77, "encoder_q-layer.2": 1602.8829, "encoder_q-layer.3": 1648.5135, "encoder_q-layer.4": 1731.1627, "encoder_q-layer.5": 1833.9208, "encoder_q-layer.6": 2251.0271, "encoder_q-layer.7": 2480.5996, "encoder_q-layer.8": 3127.8455, "encoder_q-layer.9": 2521.1841, "epoch": 0.46, "inbatch_neg_score": 0.1107, "inbatch_pos_score": 0.7578, "learning_rate": 2.927777777777778e-05, "loss": 3.4526, "norm_diff": 0.0839, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3838.0844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1111, "query_norm": 1.3026, "queue_k_norm": 1.3845, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7845, "sent_len_1": 66.7212, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6475, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 47300 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.4585, "doc_norm": 1.3847, "encoder_q-embeddings": 6930.9902, "encoder_q-layer.0": 5450.0928, "encoder_q-layer.1": 5709.1743, "encoder_q-layer.10": 2330.1243, "encoder_q-layer.11": 5832.2881, "encoder_q-layer.2": 5874.3481, "encoder_q-layer.3": 6460.4849, "encoder_q-layer.4": 6373.3008, "encoder_q-layer.5": 4113.4941, "encoder_q-layer.6": 4038.2585, "encoder_q-layer.7": 4291.3057, "encoder_q-layer.8": 4254.0503, "encoder_q-layer.9": 2676.1157, "epoch": 0.46, "inbatch_neg_score": 0.1027, "inbatch_pos_score": 0.7725, "learning_rate": 2.9222222222222224e-05, "loss": 3.4585, "norm_diff": 0.0991, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7884.7262, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1038, "query_norm": 1.2856, "queue_k_norm": 1.3828, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6021, "sent_len_1": 66.8493, "sent_max_len_0": 127.9938, "sent_max_len_1": 191.545, "stdk": 0.0484, "stdq": 0.045, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 47400 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4408, "doc_norm": 1.3834, "encoder_q-embeddings": 2167.4119, "encoder_q-layer.0": 1369.6416, "encoder_q-layer.1": 1455.0234, "encoder_q-layer.10": 2358.1541, "encoder_q-layer.11": 5309.9795, "encoder_q-layer.2": 1613.0288, "encoder_q-layer.3": 1670.0447, "encoder_q-layer.4": 1745.5688, "encoder_q-layer.5": 1723.8954, "encoder_q-layer.6": 1939.3727, "encoder_q-layer.7": 2176.677, "encoder_q-layer.8": 2678.1858, "encoder_q-layer.9": 2346.2576, "epoch": 0.46, "inbatch_neg_score": 0.0979, "inbatch_pos_score": 0.7681, "learning_rate": 2.916666666666667e-05, "loss": 3.4408, "norm_diff": 0.0774, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3539.1155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0974, "query_norm": 1.306, "queue_k_norm": 1.3837, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.81, "sent_len_1": 66.6862, "sent_max_len_0": 127.9813, "sent_max_len_1": 190.8713, "stdk": 0.0484, "stdq": 0.0458, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 47500 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.469, "doc_norm": 1.3823, "encoder_q-embeddings": 1936.26, "encoder_q-layer.0": 1284.3021, "encoder_q-layer.1": 1376.3447, "encoder_q-layer.10": 2176.3933, "encoder_q-layer.11": 5228.9727, "encoder_q-layer.2": 1527.5264, "encoder_q-layer.3": 1609.9875, "encoder_q-layer.4": 1627.6901, "encoder_q-layer.5": 1677.0349, "encoder_q-layer.6": 1901.5415, "encoder_q-layer.7": 2126.9287, "encoder_q-layer.8": 2439.8867, "encoder_q-layer.9": 2194.2156, "epoch": 0.46, "inbatch_neg_score": 0.0942, "inbatch_pos_score": 0.7549, "learning_rate": 2.9111111111111112e-05, "loss": 3.469, "norm_diff": 0.0982, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3435.2156, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0945, "query_norm": 1.2841, "queue_k_norm": 1.3848, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6332, "sent_len_1": 66.8104, "sent_max_len_0": 127.9925, "sent_max_len_1": 188.3925, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 47600 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4266, "doc_norm": 1.3877, "encoder_q-embeddings": 2210.8916, "encoder_q-layer.0": 1549.8474, "encoder_q-layer.1": 1676.6819, "encoder_q-layer.10": 2505.3232, "encoder_q-layer.11": 5452.0942, "encoder_q-layer.2": 1936.6229, "encoder_q-layer.3": 1972.3239, "encoder_q-layer.4": 2065.2227, "encoder_q-layer.5": 2058.7546, "encoder_q-layer.6": 2241.3408, "encoder_q-layer.7": 2393.6814, "encoder_q-layer.8": 2585.8384, "encoder_q-layer.9": 2252.2144, "epoch": 0.47, "inbatch_neg_score": 0.0893, "inbatch_pos_score": 0.7446, "learning_rate": 2.9055555555555558e-05, "loss": 3.4266, "norm_diff": 0.1167, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3815.1465, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0887, "query_norm": 1.271, "queue_k_norm": 1.3828, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7568, "sent_len_1": 66.8698, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6825, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 47700 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4484, "doc_norm": 1.3799, "encoder_q-embeddings": 2114.3474, "encoder_q-layer.0": 1365.9524, "encoder_q-layer.1": 1445.2373, "encoder_q-layer.10": 2492.1658, "encoder_q-layer.11": 5730.1299, "encoder_q-layer.2": 1583.9762, "encoder_q-layer.3": 1696.5562, "encoder_q-layer.4": 1757.0803, "encoder_q-layer.5": 1923.1329, "encoder_q-layer.6": 2138.1179, "encoder_q-layer.7": 2240.8865, "encoder_q-layer.8": 2633.7512, "encoder_q-layer.9": 2309.5869, "epoch": 0.47, "inbatch_neg_score": 0.0849, "inbatch_pos_score": 0.7139, "learning_rate": 2.9e-05, "loss": 3.4484, "norm_diff": 0.1267, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3696.1938, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0843, "query_norm": 1.2532, "queue_k_norm": 1.3811, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7586, "sent_len_1": 66.4287, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5175, "stdk": 0.0483, "stdq": 0.044, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 47800 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4542, "doc_norm": 1.3779, "encoder_q-embeddings": 1953.9437, "encoder_q-layer.0": 1312.4659, "encoder_q-layer.1": 1399.7532, "encoder_q-layer.10": 2361.1279, "encoder_q-layer.11": 5168.4238, "encoder_q-layer.2": 1463.1367, "encoder_q-layer.3": 1464.0596, "encoder_q-layer.4": 1569.8198, "encoder_q-layer.5": 1641.9264, "encoder_q-layer.6": 1830.168, "encoder_q-layer.7": 1973.6658, "encoder_q-layer.8": 2427.9614, "encoder_q-layer.9": 2213.9436, "epoch": 0.47, "inbatch_neg_score": 0.0834, "inbatch_pos_score": 0.7295, "learning_rate": 2.8944444444444446e-05, "loss": 3.4542, "norm_diff": 0.1186, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3377.551, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0831, "query_norm": 1.2593, "queue_k_norm": 1.3807, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7886, "sent_len_1": 66.457, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5362, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 47900 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.4453, "doc_norm": 1.3802, "encoder_q-embeddings": 1887.686, "encoder_q-layer.0": 1221.9047, "encoder_q-layer.1": 1286.8123, "encoder_q-layer.10": 2475.5222, "encoder_q-layer.11": 5527.7378, "encoder_q-layer.2": 1444.061, "encoder_q-layer.3": 1519.3553, "encoder_q-layer.4": 1582.7556, "encoder_q-layer.5": 1729.1823, "encoder_q-layer.6": 2005.1659, "encoder_q-layer.7": 2279.6836, "encoder_q-layer.8": 2740.2241, "encoder_q-layer.9": 2448.7727, "epoch": 0.47, "inbatch_neg_score": 0.0764, "inbatch_pos_score": 0.731, "learning_rate": 2.8888888888888888e-05, "loss": 3.4453, "norm_diff": 0.1352, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3540.0443, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0759, "query_norm": 1.245, "queue_k_norm": 1.3801, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7691, "sent_len_1": 66.7009, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8475, "stdk": 0.0484, "stdq": 0.044, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 48000 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.4402, "doc_norm": 1.3789, "encoder_q-embeddings": 1828.6028, "encoder_q-layer.0": 1227.1132, "encoder_q-layer.1": 1320.7771, "encoder_q-layer.10": 2180.9827, "encoder_q-layer.11": 5168.0073, "encoder_q-layer.2": 1457.9067, "encoder_q-layer.3": 1500.1696, "encoder_q-layer.4": 1608.1661, "encoder_q-layer.5": 1643.3674, "encoder_q-layer.6": 1829.2858, "encoder_q-layer.7": 2007.6637, "encoder_q-layer.8": 2535.7551, "encoder_q-layer.9": 2216.6458, "epoch": 0.47, "inbatch_neg_score": 0.07, "inbatch_pos_score": 0.7153, "learning_rate": 2.8833333333333334e-05, "loss": 3.4402, "norm_diff": 0.1302, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3396.2146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0708, "query_norm": 1.2487, "queue_k_norm": 1.3783, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6236, "sent_len_1": 66.6395, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9087, "stdk": 0.0484, "stdq": 0.0441, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 48100 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.4354, "doc_norm": 1.38, "encoder_q-embeddings": 2461.5327, "encoder_q-layer.0": 1693.8455, "encoder_q-layer.1": 1791.4215, "encoder_q-layer.10": 2222.6685, "encoder_q-layer.11": 5248.73, "encoder_q-layer.2": 2081.8501, "encoder_q-layer.3": 2326.7722, "encoder_q-layer.4": 2535.8066, "encoder_q-layer.5": 2521.9539, "encoder_q-layer.6": 2948.1013, "encoder_q-layer.7": 3047.3596, "encoder_q-layer.8": 2887.28, "encoder_q-layer.9": 2291.0459, "epoch": 0.47, "inbatch_neg_score": 0.0631, "inbatch_pos_score": 0.7163, "learning_rate": 2.877777777777778e-05, "loss": 3.4354, "norm_diff": 0.1231, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4073.7118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0634, "query_norm": 1.2569, "queue_k_norm": 1.3768, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5897, "sent_len_1": 66.8538, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.375, "stdk": 0.0484, "stdq": 0.0444, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 48200 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.4509, "doc_norm": 1.3726, "encoder_q-embeddings": 1968.8014, "encoder_q-layer.0": 1322.5474, "encoder_q-layer.1": 1431.7452, "encoder_q-layer.10": 2314.853, "encoder_q-layer.11": 5474.4766, "encoder_q-layer.2": 1676.2263, "encoder_q-layer.3": 1704.3677, "encoder_q-layer.4": 1952.4923, "encoder_q-layer.5": 1978.6636, "encoder_q-layer.6": 2013.2737, "encoder_q-layer.7": 2211.822, "encoder_q-layer.8": 2534.2957, "encoder_q-layer.9": 2243.8533, "epoch": 0.47, "inbatch_neg_score": 0.0652, "inbatch_pos_score": 0.7021, "learning_rate": 2.8722222222222222e-05, "loss": 3.4509, "norm_diff": 0.1158, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3597.6315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0656, "query_norm": 1.2567, "queue_k_norm": 1.3774, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5047, "sent_len_1": 66.7715, "sent_max_len_0": 127.985, "sent_max_len_1": 190.885, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 48300 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.4381, "doc_norm": 1.3791, "encoder_q-embeddings": 2090.5513, "encoder_q-layer.0": 1464.1744, "encoder_q-layer.1": 1563.1382, "encoder_q-layer.10": 2746.0161, "encoder_q-layer.11": 5777.2197, "encoder_q-layer.2": 1785.3759, "encoder_q-layer.3": 1805.8801, "encoder_q-layer.4": 1928.4767, "encoder_q-layer.5": 2011.0089, "encoder_q-layer.6": 2153.8386, "encoder_q-layer.7": 2321.5483, "encoder_q-layer.8": 2863.7583, "encoder_q-layer.9": 2421.9238, "epoch": 0.47, "inbatch_neg_score": 0.0655, "inbatch_pos_score": 0.7373, "learning_rate": 2.8666666666666668e-05, "loss": 3.4381, "norm_diff": 0.1021, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3832.5476, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0657, "query_norm": 1.2769, "queue_k_norm": 1.3761, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7055, "sent_len_1": 66.723, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3537, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 48400 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.4562, "doc_norm": 1.373, "encoder_q-embeddings": 1919.9336, "encoder_q-layer.0": 1261.8768, "encoder_q-layer.1": 1337.4603, "encoder_q-layer.10": 2215.3042, "encoder_q-layer.11": 5180.8467, "encoder_q-layer.2": 1530.436, "encoder_q-layer.3": 1593.6276, "encoder_q-layer.4": 1653.5011, "encoder_q-layer.5": 1696.9349, "encoder_q-layer.6": 1863.7061, "encoder_q-layer.7": 2122.78, "encoder_q-layer.8": 2388.3992, "encoder_q-layer.9": 2180.6201, "epoch": 0.47, "inbatch_neg_score": 0.0645, "inbatch_pos_score": 0.7305, "learning_rate": 2.861111111111111e-05, "loss": 3.4562, "norm_diff": 0.0906, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3405.2336, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.065, "query_norm": 1.2824, "queue_k_norm": 1.3744, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.2777, "sent_len_1": 66.7166, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.3913, "stdk": 0.0483, "stdq": 0.045, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 48500 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.4208, "doc_norm": 1.3764, "encoder_q-embeddings": 2235.4675, "encoder_q-layer.0": 1454.5024, "encoder_q-layer.1": 1539.2756, "encoder_q-layer.10": 2667.7483, "encoder_q-layer.11": 5088.2476, "encoder_q-layer.2": 1742.132, "encoder_q-layer.3": 1823.2653, "encoder_q-layer.4": 1997.46, "encoder_q-layer.5": 2105.9346, "encoder_q-layer.6": 2299.8103, "encoder_q-layer.7": 2678.8208, "encoder_q-layer.8": 2809.0098, "encoder_q-layer.9": 2548.374, "epoch": 0.47, "inbatch_neg_score": 0.0689, "inbatch_pos_score": 0.7417, "learning_rate": 2.855555555555556e-05, "loss": 3.4208, "norm_diff": 0.0618, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3672.6689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0682, "query_norm": 1.3147, "queue_k_norm": 1.3743, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.854, "sent_len_1": 66.7818, "sent_max_len_0": 128.0, "sent_max_len_1": 188.285, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 48600 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.4485, "doc_norm": 1.3736, "encoder_q-embeddings": 2062.8481, "encoder_q-layer.0": 1329.9331, "encoder_q-layer.1": 1404.2433, "encoder_q-layer.10": 2309.6892, "encoder_q-layer.11": 5354.4048, "encoder_q-layer.2": 1583.7867, "encoder_q-layer.3": 1740.3552, "encoder_q-layer.4": 1831.2107, "encoder_q-layer.5": 1907.6508, "encoder_q-layer.6": 2129.8196, "encoder_q-layer.7": 2305.9351, "encoder_q-layer.8": 2591.3342, "encoder_q-layer.9": 2394.9348, "epoch": 0.48, "inbatch_neg_score": 0.0652, "inbatch_pos_score": 0.7075, "learning_rate": 2.8499999999999998e-05, "loss": 3.4485, "norm_diff": 0.1065, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3571.0653, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0651, "query_norm": 1.2671, "queue_k_norm": 1.374, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5425, "sent_len_1": 66.8018, "sent_max_len_0": 127.9963, "sent_max_len_1": 192.0925, "stdk": 0.0484, "stdq": 0.0438, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 48700 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.4457, "doc_norm": 1.3801, "encoder_q-embeddings": 5043.8843, "encoder_q-layer.0": 3385.7961, "encoder_q-layer.1": 3619.6543, "encoder_q-layer.10": 4625.7402, "encoder_q-layer.11": 10168.6348, "encoder_q-layer.2": 4169.8452, "encoder_q-layer.3": 4228.8833, "encoder_q-layer.4": 4551.7197, "encoder_q-layer.5": 5091.9282, "encoder_q-layer.6": 4700.8794, "encoder_q-layer.7": 4853.3672, "encoder_q-layer.8": 5059.7012, "encoder_q-layer.9": 4283.4092, "epoch": 0.48, "inbatch_neg_score": 0.07, "inbatch_pos_score": 0.7354, "learning_rate": 2.8444444444444447e-05, "loss": 3.4457, "norm_diff": 0.0682, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7581.2723, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0697, "query_norm": 1.3119, "queue_k_norm": 1.3738, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6013, "sent_len_1": 66.7733, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1325, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 48800 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.4508, "doc_norm": 1.3768, "encoder_q-embeddings": 4980.4229, "encoder_q-layer.0": 3449.0474, "encoder_q-layer.1": 3650.6707, "encoder_q-layer.10": 4632.8867, "encoder_q-layer.11": 10473.7842, "encoder_q-layer.2": 4190.4883, "encoder_q-layer.3": 4550.7837, "encoder_q-layer.4": 5063.5986, "encoder_q-layer.5": 5391.9165, "encoder_q-layer.6": 5828.9912, "encoder_q-layer.7": 5888.4424, "encoder_q-layer.8": 5668.4209, "encoder_q-layer.9": 4590.6421, "epoch": 0.48, "inbatch_neg_score": 0.0725, "inbatch_pos_score": 0.7354, "learning_rate": 2.8388888888888893e-05, "loss": 3.4508, "norm_diff": 0.0707, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8125.3072, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0734, "query_norm": 1.3061, "queue_k_norm": 1.3736, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6468, "sent_len_1": 66.7128, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.4613, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 48900 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.4397, "doc_norm": 1.3737, "encoder_q-embeddings": 4167.9678, "encoder_q-layer.0": 2766.0967, "encoder_q-layer.1": 2844.7751, "encoder_q-layer.10": 4392.769, "encoder_q-layer.11": 10296.0557, "encoder_q-layer.2": 3210.4629, "encoder_q-layer.3": 3278.2673, "encoder_q-layer.4": 3393.6699, "encoder_q-layer.5": 3420.4111, "encoder_q-layer.6": 3775.5337, "encoder_q-layer.7": 4068.7051, "encoder_q-layer.8": 4946.2588, "encoder_q-layer.9": 4323.0454, "epoch": 0.48, "inbatch_neg_score": 0.0768, "inbatch_pos_score": 0.7222, "learning_rate": 2.8333333333333335e-05, "loss": 3.4397, "norm_diff": 0.0831, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6993.0114, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0764, "query_norm": 1.2906, "queue_k_norm": 1.3714, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5801, "sent_len_1": 66.5423, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9725, "stdk": 0.0484, "stdq": 0.0438, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49000 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.446, "doc_norm": 1.3788, "encoder_q-embeddings": 4241.9727, "encoder_q-layer.0": 2791.5203, "encoder_q-layer.1": 3006.1877, "encoder_q-layer.10": 4479.3555, "encoder_q-layer.11": 10422.043, "encoder_q-layer.2": 3336.0183, "encoder_q-layer.3": 3365.7197, "encoder_q-layer.4": 3536.2068, "encoder_q-layer.5": 3700.4768, "encoder_q-layer.6": 4166.3354, "encoder_q-layer.7": 4846.144, "encoder_q-layer.8": 5248.3525, "encoder_q-layer.9": 4573.7407, "epoch": 0.48, "inbatch_neg_score": 0.0827, "inbatch_pos_score": 0.7329, "learning_rate": 2.827777777777778e-05, "loss": 3.446, "norm_diff": 0.0575, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7166.1193, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0823, "query_norm": 1.3213, "queue_k_norm": 1.3753, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4705, "sent_len_1": 66.8816, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6213, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 49100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.448, "doc_norm": 1.373, "encoder_q-embeddings": 4744.3843, "encoder_q-layer.0": 3207.084, "encoder_q-layer.1": 3375.3376, "encoder_q-layer.10": 4656.5049, "encoder_q-layer.11": 10479.8984, "encoder_q-layer.2": 3858.1685, "encoder_q-layer.3": 3896.1777, "encoder_q-layer.4": 4148.2666, "encoder_q-layer.5": 4182.543, "encoder_q-layer.6": 4487.3779, "encoder_q-layer.7": 4815.2451, "encoder_q-layer.8": 5375.981, "encoder_q-layer.9": 4741.7651, "epoch": 0.48, "inbatch_neg_score": 0.0879, "inbatch_pos_score": 0.7466, "learning_rate": 2.8222222222222223e-05, "loss": 3.448, "norm_diff": 0.047, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7597.1833, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0862, "query_norm": 1.326, "queue_k_norm": 1.3719, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3576, "sent_len_1": 66.8567, "sent_max_len_0": 128.0, "sent_max_len_1": 188.215, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49200 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.4388, "doc_norm": 1.3812, "encoder_q-embeddings": 3948.3872, "encoder_q-layer.0": 2581.7544, "encoder_q-layer.1": 2697.0527, "encoder_q-layer.10": 4676.27, "encoder_q-layer.11": 10698.3965, "encoder_q-layer.2": 3029.1404, "encoder_q-layer.3": 2980.2539, "encoder_q-layer.4": 3198.4958, "encoder_q-layer.5": 3481.0999, "encoder_q-layer.6": 3812.2554, "encoder_q-layer.7": 4175.8066, "encoder_q-layer.8": 5035.9927, "encoder_q-layer.9": 4496.0381, "epoch": 0.48, "inbatch_neg_score": 0.0882, "inbatch_pos_score": 0.7354, "learning_rate": 2.816666666666667e-05, "loss": 3.4388, "norm_diff": 0.0694, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6861.3652, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0872, "query_norm": 1.3118, "queue_k_norm": 1.3727, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5993, "sent_len_1": 66.6938, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.8738, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 49300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.451, "doc_norm": 1.3755, "encoder_q-embeddings": 4413.2817, "encoder_q-layer.0": 2977.9089, "encoder_q-layer.1": 3288.6758, "encoder_q-layer.10": 5106.665, "encoder_q-layer.11": 10742.7959, "encoder_q-layer.2": 3663.7715, "encoder_q-layer.3": 3727.5872, "encoder_q-layer.4": 3877.0764, "encoder_q-layer.5": 4182.5635, "encoder_q-layer.6": 4347.2695, "encoder_q-layer.7": 4899.3623, "encoder_q-layer.8": 5595.1914, "encoder_q-layer.9": 4634.4932, "epoch": 0.48, "inbatch_neg_score": 0.0974, "inbatch_pos_score": 0.7646, "learning_rate": 2.811111111111111e-05, "loss": 3.451, "norm_diff": 0.0686, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7410.3472, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0967, "query_norm": 1.3069, "queue_k_norm": 1.374, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5076, "sent_len_1": 66.8858, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2837, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 49400 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.4325, "doc_norm": 1.3764, "encoder_q-embeddings": 5419.4204, "encoder_q-layer.0": 3480.697, "encoder_q-layer.1": 3898.2598, "encoder_q-layer.10": 4453.1807, "encoder_q-layer.11": 10707.8477, "encoder_q-layer.2": 4496.7969, "encoder_q-layer.3": 4793.8354, "encoder_q-layer.4": 5271.8989, "encoder_q-layer.5": 5231.7432, "encoder_q-layer.6": 5035.4409, "encoder_q-layer.7": 5145.2686, "encoder_q-layer.8": 5430.7666, "encoder_q-layer.9": 4564.124, "epoch": 0.48, "inbatch_neg_score": 0.0978, "inbatch_pos_score": 0.7329, "learning_rate": 2.8055555555555557e-05, "loss": 3.4325, "norm_diff": 0.0753, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8097.9352, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0985, "query_norm": 1.3011, "queue_k_norm": 1.3742, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4123, "sent_len_1": 66.8274, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.5125, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 49500 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.4337, "doc_norm": 1.3806, "encoder_q-embeddings": 4395.6167, "encoder_q-layer.0": 3007.5315, "encoder_q-layer.1": 3167.762, "encoder_q-layer.10": 5084.3384, "encoder_q-layer.11": 12583.0762, "encoder_q-layer.2": 3470.4438, "encoder_q-layer.3": 3620.5649, "encoder_q-layer.4": 3858.0535, "encoder_q-layer.5": 4274.627, "encoder_q-layer.6": 4820.0562, "encoder_q-layer.7": 4976.5659, "encoder_q-layer.8": 5505.6606, "encoder_q-layer.9": 4959.8154, "epoch": 0.48, "inbatch_neg_score": 0.0963, "inbatch_pos_score": 0.7471, "learning_rate": 2.8000000000000003e-05, "loss": 3.4337, "norm_diff": 0.0871, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7877.6377, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0982, "query_norm": 1.2935, "queue_k_norm": 1.3774, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.8535, "sent_len_1": 66.7365, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.71, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 49600 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4486, "doc_norm": 1.3759, "encoder_q-embeddings": 4652.1147, "encoder_q-layer.0": 3142.2717, "encoder_q-layer.1": 3403.6394, "encoder_q-layer.10": 4598.9995, "encoder_q-layer.11": 10427.873, "encoder_q-layer.2": 3840.2461, "encoder_q-layer.3": 4261.1641, "encoder_q-layer.4": 4686.9707, "encoder_q-layer.5": 5401.1904, "encoder_q-layer.6": 5476.8745, "encoder_q-layer.7": 5185.3496, "encoder_q-layer.8": 5815.9434, "encoder_q-layer.9": 4689.4971, "epoch": 0.49, "inbatch_neg_score": 0.0899, "inbatch_pos_score": 0.7114, "learning_rate": 2.7944444444444445e-05, "loss": 3.4486, "norm_diff": 0.1185, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7762.3789, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0906, "query_norm": 1.2574, "queue_k_norm": 1.3768, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3823, "sent_len_1": 66.7288, "sent_max_len_0": 128.0, "sent_max_len_1": 189.66, "stdk": 0.0485, "stdq": 0.044, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 49700 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.423, "doc_norm": 1.3804, "encoder_q-embeddings": 4178.8784, "encoder_q-layer.0": 2656.7119, "encoder_q-layer.1": 2914.3782, "encoder_q-layer.10": 4716.0347, "encoder_q-layer.11": 10733.21, "encoder_q-layer.2": 3187.6357, "encoder_q-layer.3": 3438.5659, "encoder_q-layer.4": 3465.0994, "encoder_q-layer.5": 3585.8076, "encoder_q-layer.6": 4055.9226, "encoder_q-layer.7": 4437.1489, "encoder_q-layer.8": 5101.7866, "encoder_q-layer.9": 4512.2417, "epoch": 0.49, "inbatch_neg_score": 0.0924, "inbatch_pos_score": 0.7656, "learning_rate": 2.788888888888889e-05, "loss": 3.423, "norm_diff": 0.0848, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7096.1202, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0924, "query_norm": 1.2956, "queue_k_norm": 1.3794, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5237, "sent_len_1": 66.8923, "sent_max_len_0": 127.995, "sent_max_len_1": 190.355, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 49800 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.4197, "doc_norm": 1.3729, "encoder_q-embeddings": 4972.083, "encoder_q-layer.0": 3276.8665, "encoder_q-layer.1": 3552.1133, "encoder_q-layer.10": 5269.0488, "encoder_q-layer.11": 11446.3818, "encoder_q-layer.2": 3949.6538, "encoder_q-layer.3": 4232.4639, "encoder_q-layer.4": 4473.7871, "encoder_q-layer.5": 4628.3403, "encoder_q-layer.6": 4892.4531, "encoder_q-layer.7": 5463.4199, "encoder_q-layer.8": 5770.2886, "encoder_q-layer.9": 4970.625, "epoch": 0.49, "inbatch_neg_score": 0.0897, "inbatch_pos_score": 0.7295, "learning_rate": 2.7833333333333333e-05, "loss": 3.4197, "norm_diff": 0.0887, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7978.1956, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0894, "query_norm": 1.2842, "queue_k_norm": 1.376, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.9146, "sent_len_1": 66.9008, "sent_max_len_0": 128.0, "sent_max_len_1": 192.2388, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 49900 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.437, "doc_norm": 1.3753, "encoder_q-embeddings": 4475.5898, "encoder_q-layer.0": 2959.1709, "encoder_q-layer.1": 3072.3857, "encoder_q-layer.10": 4720.8369, "encoder_q-layer.11": 10955.5869, "encoder_q-layer.2": 3497.7156, "encoder_q-layer.3": 3549.7407, "encoder_q-layer.4": 3794.9956, "encoder_q-layer.5": 4007.9421, "encoder_q-layer.6": 4344.2979, "encoder_q-layer.7": 4586.2939, "encoder_q-layer.8": 5404.6016, "encoder_q-layer.9": 4830.7886, "epoch": 0.49, "inbatch_neg_score": 0.0885, "inbatch_pos_score": 0.7373, "learning_rate": 2.777777777777778e-05, "loss": 3.437, "norm_diff": 0.0992, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7365.6215, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0892, "query_norm": 1.2761, "queue_k_norm": 1.3769, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6144, "sent_len_1": 66.9527, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1025, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 50000 }, { "dev_runtime": 26.4066, "dev_samples_per_second": 2.424, "dev_steps_per_second": 0.038, "epoch": 0.49, "step": 50000, "test_accuracy": 93.76220703125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3647818863391876, "test_doc_norm": 1.357397437095642, "test_inbatch_neg_score": 0.44864434003829956, "test_inbatch_pos_score": 1.389329433441162, "test_loss": 0.3647818863391876, "test_loss_align": 1.0548771619796753, "test_loss_unif": 3.961122989654541, "test_loss_unif_q@queue": 3.96112322807312, "test_norm_diff": 0.04334930330514908, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.09476882219314575, "test_query_norm": 1.4005849361419678, "test_queue_k_norm": 1.3767602443695068, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04243077337741852, "test_stdq": 0.0432288683950901, "test_stdqueue_k": 0.04854192957282066, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.4066, "dev_samples_per_second": 2.424, "dev_steps_per_second": 0.038, "epoch": 0.49, "eval_beir-arguana_ndcg@10": 0.37182, "eval_beir-arguana_recall@10": 0.6138, "eval_beir-arguana_recall@100": 0.91892, "eval_beir-arguana_recall@20": 0.75533, "eval_beir-avg_ndcg@10": 0.37263983333333334, "eval_beir-avg_recall@10": 0.44206991666666673, "eval_beir-avg_recall@100": 0.6229515833333334, "eval_beir-avg_recall@20": 0.5052514166666666, "eval_beir-cqadupstack_ndcg@10": 0.2641583333333334, "eval_beir-cqadupstack_recall@10": 0.35646916666666667, "eval_beir-cqadupstack_recall@100": 0.5810558333333333, "eval_beir-cqadupstack_recall@20": 0.4221641666666667, "eval_beir-fiqa_ndcg@10": 0.24308, "eval_beir-fiqa_recall@10": 0.31089, "eval_beir-fiqa_recall@100": 0.55576, "eval_beir-fiqa_recall@20": 0.38264, "eval_beir-nfcorpus_ndcg@10": 0.28521, "eval_beir-nfcorpus_recall@10": 0.14392, "eval_beir-nfcorpus_recall@100": 0.27264, "eval_beir-nfcorpus_recall@20": 0.17035, "eval_beir-nq_ndcg@10": 0.28096, "eval_beir-nq_recall@10": 0.45855, "eval_beir-nq_recall@100": 0.78317, "eval_beir-nq_recall@20": 0.56475, "eval_beir-quora_ndcg@10": 0.79827, "eval_beir-quora_recall@10": 0.89831, "eval_beir-quora_recall@100": 0.98026, "eval_beir-quora_recall@20": 0.93483, "eval_beir-scidocs_ndcg@10": 0.15073, "eval_beir-scidocs_recall@10": 0.15883, "eval_beir-scidocs_recall@100": 0.35993, "eval_beir-scidocs_recall@20": 0.21282, "eval_beir-scifact_ndcg@10": 0.61689, "eval_beir-scifact_recall@10": 0.77133, "eval_beir-scifact_recall@100": 0.89989, "eval_beir-scifact_recall@20": 0.83022, "eval_beir-trec-covid_ndcg@10": 0.55294, "eval_beir-trec-covid_recall@10": 0.6, "eval_beir-trec-covid_recall@100": 0.4492, "eval_beir-trec-covid_recall@20": 0.588, "eval_beir-webis-touche2020_ndcg@10": 0.16234, "eval_beir-webis-touche2020_recall@10": 0.1086, "eval_beir-webis-touche2020_recall@100": 0.42869, "eval_beir-webis-touche2020_recall@20": 0.19141, "eval_senteval-avg_sts": 0.76121727086381, "eval_senteval-sickr_spearman": 0.7248755845593028, "eval_senteval-stsb_spearman": 0.7975589571683172, "step": 50000, "test_accuracy": 93.76220703125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3647818863391876, "test_doc_norm": 1.357397437095642, "test_inbatch_neg_score": 0.44864434003829956, "test_inbatch_pos_score": 1.389329433441162, "test_loss": 0.3647818863391876, "test_loss_align": 1.0548771619796753, "test_loss_unif": 3.961122989654541, "test_loss_unif_q@queue": 3.96112322807312, "test_norm_diff": 0.04334930330514908, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.09476882219314575, "test_query_norm": 1.4005849361419678, "test_queue_k_norm": 1.3767602443695068, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04243077337741852, "test_stdq": 0.0432288683950901, "test_stdqueue_k": 0.04854192957282066, "test_stdqueue_q": 0.0 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.4339, "doc_norm": 1.3801, "encoder_q-embeddings": 4628.7583, "encoder_q-layer.0": 2956.2278, "encoder_q-layer.1": 3103.1946, "encoder_q-layer.10": 4540.0195, "encoder_q-layer.11": 10664.415, "encoder_q-layer.2": 3478.1758, "encoder_q-layer.3": 3648.0378, "encoder_q-layer.4": 3938.0554, "encoder_q-layer.5": 4000.3821, "encoder_q-layer.6": 4853.9194, "encoder_q-layer.7": 5133.4097, "encoder_q-layer.8": 5318.1831, "encoder_q-layer.9": 4572.3521, "epoch": 0.49, "inbatch_neg_score": 0.0851, "inbatch_pos_score": 0.7354, "learning_rate": 2.772222222222222e-05, "loss": 3.4339, "norm_diff": 0.1172, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7431.1551, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0856, "query_norm": 1.2629, "queue_k_norm": 1.3769, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5298, "sent_len_1": 66.9877, "sent_max_len_0": 127.99, "sent_max_len_1": 190.865, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 50100 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4256, "doc_norm": 1.3707, "encoder_q-embeddings": 5275.6118, "encoder_q-layer.0": 3344.989, "encoder_q-layer.1": 3564.6946, "encoder_q-layer.10": 4927.7046, "encoder_q-layer.11": 10680.3281, "encoder_q-layer.2": 4238.71, "encoder_q-layer.3": 4310.0186, "encoder_q-layer.4": 4542.8984, "encoder_q-layer.5": 4566.2598, "encoder_q-layer.6": 4841.6299, "encoder_q-layer.7": 4837.167, "encoder_q-layer.8": 5049.2881, "encoder_q-layer.9": 4490.5225, "epoch": 0.49, "inbatch_neg_score": 0.0863, "inbatch_pos_score": 0.7373, "learning_rate": 2.7666666666666667e-05, "loss": 3.4256, "norm_diff": 0.0978, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7776.6007, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0861, "query_norm": 1.2729, "queue_k_norm": 1.3744, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7675, "sent_len_1": 66.7483, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5062, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.4143, "doc_norm": 1.3695, "encoder_q-embeddings": 4478.061, "encoder_q-layer.0": 2884.0583, "encoder_q-layer.1": 3085.2197, "encoder_q-layer.10": 4657.0884, "encoder_q-layer.11": 10942.4014, "encoder_q-layer.2": 3520.5574, "encoder_q-layer.3": 3752.9812, "encoder_q-layer.4": 4112.7783, "encoder_q-layer.5": 4231.458, "encoder_q-layer.6": 4603.0156, "encoder_q-layer.7": 5083.2251, "encoder_q-layer.8": 5421.7129, "encoder_q-layer.9": 4821.3086, "epoch": 0.49, "inbatch_neg_score": 0.083, "inbatch_pos_score": 0.7373, "learning_rate": 2.761111111111111e-05, "loss": 3.4143, "norm_diff": 0.0991, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7582.5937, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0823, "query_norm": 1.2704, "queue_k_norm": 1.3749, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6282, "sent_len_1": 66.8802, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5188, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 50300 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.4318, "doc_norm": 1.3803, "encoder_q-embeddings": 5143.0127, "encoder_q-layer.0": 3461.9802, "encoder_q-layer.1": 3526.8855, "encoder_q-layer.10": 4854.3574, "encoder_q-layer.11": 11245.5518, "encoder_q-layer.2": 4296.0771, "encoder_q-layer.3": 4570.1001, "encoder_q-layer.4": 5040.6143, "encoder_q-layer.5": 5355.4233, "encoder_q-layer.6": 5565.2451, "encoder_q-layer.7": 5460.9468, "encoder_q-layer.8": 5710.6953, "encoder_q-layer.9": 4606.0771, "epoch": 0.49, "inbatch_neg_score": 0.0774, "inbatch_pos_score": 0.7349, "learning_rate": 2.7555555555555555e-05, "loss": 3.4318, "norm_diff": 0.1075, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8145.0765, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0771, "query_norm": 1.2729, "queue_k_norm": 1.3748, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6391, "sent_len_1": 66.5504, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.5737, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 50400 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.4231, "doc_norm": 1.378, "encoder_q-embeddings": 4090.9644, "encoder_q-layer.0": 2747.9827, "encoder_q-layer.1": 2851.5649, "encoder_q-layer.10": 4648.8755, "encoder_q-layer.11": 10568.3418, "encoder_q-layer.2": 3287.5608, "encoder_q-layer.3": 3391.7568, "encoder_q-layer.4": 3599.1877, "encoder_q-layer.5": 3729.5269, "encoder_q-layer.6": 4308.2617, "encoder_q-layer.7": 4507.9385, "encoder_q-layer.8": 5129.71, "encoder_q-layer.9": 4555.6016, "epoch": 0.49, "inbatch_neg_score": 0.0782, "inbatch_pos_score": 0.7515, "learning_rate": 2.7500000000000004e-05, "loss": 3.4231, "norm_diff": 0.0836, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7057.6581, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0789, "query_norm": 1.2943, "queue_k_norm": 1.3764, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7467, "sent_len_1": 66.6191, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0825, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 50500 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.4583, "doc_norm": 1.3701, "encoder_q-embeddings": 3767.5874, "encoder_q-layer.0": 2522.7498, "encoder_q-layer.1": 2621.0452, "encoder_q-layer.10": 5240.3955, "encoder_q-layer.11": 11163.7871, "encoder_q-layer.2": 3028.6367, "encoder_q-layer.3": 3032.6702, "encoder_q-layer.4": 3120.7048, "encoder_q-layer.5": 3359.9004, "encoder_q-layer.6": 3758.9932, "encoder_q-layer.7": 4689.4189, "encoder_q-layer.8": 5336.7188, "encoder_q-layer.9": 4888.4751, "epoch": 0.49, "inbatch_neg_score": 0.0745, "inbatch_pos_score": 0.6816, "learning_rate": 2.7444444444444443e-05, "loss": 3.4583, "norm_diff": 0.139, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7088.2202, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0745, "query_norm": 1.2311, "queue_k_norm": 1.3768, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5597, "sent_len_1": 66.8983, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5475, "stdk": 0.0483, "stdq": 0.0433, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 50600 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4331, "doc_norm": 1.3692, "encoder_q-embeddings": 3862.2959, "encoder_q-layer.0": 2543.0151, "encoder_q-layer.1": 2643.6143, "encoder_q-layer.10": 4507.814, "encoder_q-layer.11": 10097.0547, "encoder_q-layer.2": 2916.1301, "encoder_q-layer.3": 3064.1199, "encoder_q-layer.4": 3205.9746, "encoder_q-layer.5": 3262.2126, "encoder_q-layer.6": 3654.8896, "encoder_q-layer.7": 4176.918, "encoder_q-layer.8": 5102.9951, "encoder_q-layer.9": 4398.1494, "epoch": 0.49, "inbatch_neg_score": 0.0727, "inbatch_pos_score": 0.7432, "learning_rate": 2.7388888888888892e-05, "loss": 3.4331, "norm_diff": 0.0905, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6646.625, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0732, "query_norm": 1.2787, "queue_k_norm": 1.3758, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5516, "sent_len_1": 66.9509, "sent_max_len_0": 127.9988, "sent_max_len_1": 192.1675, "stdk": 0.0482, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 50700 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.407, "doc_norm": 1.3769, "encoder_q-embeddings": 7972.7524, "encoder_q-layer.0": 5289.4375, "encoder_q-layer.1": 5614.1519, "encoder_q-layer.10": 8817.5127, "encoder_q-layer.11": 19772.7773, "encoder_q-layer.2": 6442.7178, "encoder_q-layer.3": 6439.2485, "encoder_q-layer.4": 6800.7598, "encoder_q-layer.5": 7062.6196, "encoder_q-layer.6": 7726.2534, "encoder_q-layer.7": 8621.2354, "encoder_q-layer.8": 9985.7832, "encoder_q-layer.9": 8770.0117, "epoch": 0.5, "inbatch_neg_score": 0.0739, "inbatch_pos_score": 0.7227, "learning_rate": 2.733333333333333e-05, "loss": 3.407, "norm_diff": 0.1072, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13589.3215, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.074, "query_norm": 1.2697, "queue_k_norm": 1.3749, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7001, "sent_len_1": 66.8714, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9462, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 50800 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.4425, "doc_norm": 1.3782, "encoder_q-embeddings": 7263.4736, "encoder_q-layer.0": 5009.5532, "encoder_q-layer.1": 5287.7402, "encoder_q-layer.10": 8957.8271, "encoder_q-layer.11": 21173.459, "encoder_q-layer.2": 5986.5273, "encoder_q-layer.3": 6437.9478, "encoder_q-layer.4": 6782.8799, "encoder_q-layer.5": 6950.2012, "encoder_q-layer.6": 8003.6924, "encoder_q-layer.7": 9404.4316, "encoder_q-layer.8": 10196.6387, "encoder_q-layer.9": 8850.7295, "epoch": 0.5, "inbatch_neg_score": 0.0754, "inbatch_pos_score": 0.749, "learning_rate": 2.727777777777778e-05, "loss": 3.4425, "norm_diff": 0.1023, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13801.7568, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0748, "query_norm": 1.276, "queue_k_norm": 1.3754, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4645, "sent_len_1": 67.0228, "sent_max_len_0": 127.9887, "sent_max_len_1": 190.6087, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 50900 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.4355, "doc_norm": 1.3752, "encoder_q-embeddings": 8461.9814, "encoder_q-layer.0": 5601.373, "encoder_q-layer.1": 5661.79, "encoder_q-layer.10": 9506.0068, "encoder_q-layer.11": 21520.8145, "encoder_q-layer.2": 6526.0981, "encoder_q-layer.3": 7009.2842, "encoder_q-layer.4": 7543.6084, "encoder_q-layer.5": 7781.0806, "encoder_q-layer.6": 8570.2666, "encoder_q-layer.7": 9715.5908, "encoder_q-layer.8": 10956.2607, "encoder_q-layer.9": 9671.4463, "epoch": 0.5, "inbatch_neg_score": 0.0765, "inbatch_pos_score": 0.7388, "learning_rate": 2.7222222222222223e-05, "loss": 3.4355, "norm_diff": 0.0855, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14662.7085, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0765, "query_norm": 1.2897, "queue_k_norm": 1.3753, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5686, "sent_len_1": 66.6099, "sent_max_len_0": 128.0, "sent_max_len_1": 187.0563, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 51000 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.4225, "doc_norm": 1.3807, "encoder_q-embeddings": 11297.8076, "encoder_q-layer.0": 7655.519, "encoder_q-layer.1": 8466.8721, "encoder_q-layer.10": 9015.4736, "encoder_q-layer.11": 20552.9883, "encoder_q-layer.2": 9783.6807, "encoder_q-layer.3": 10289.8799, "encoder_q-layer.4": 11029.3232, "encoder_q-layer.5": 10950.8828, "encoder_q-layer.6": 11475.2764, "encoder_q-layer.7": 11819.8818, "encoder_q-layer.8": 11199.8018, "encoder_q-layer.9": 9274.335, "epoch": 0.5, "inbatch_neg_score": 0.0795, "inbatch_pos_score": 0.7393, "learning_rate": 2.716666666666667e-05, "loss": 3.4225, "norm_diff": 0.0998, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17041.3562, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.0787, "query_norm": 1.2809, "queue_k_norm": 1.3722, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7008, "sent_len_1": 66.4222, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7812, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 51100 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.4193, "doc_norm": 1.3688, "encoder_q-embeddings": 8329.7422, "encoder_q-layer.0": 5482.8032, "encoder_q-layer.1": 5870.7295, "encoder_q-layer.10": 10765.7705, "encoder_q-layer.11": 21324.5547, "encoder_q-layer.2": 6501.2124, "encoder_q-layer.3": 6911.7046, "encoder_q-layer.4": 7297.9526, "encoder_q-layer.5": 7609.7617, "encoder_q-layer.6": 8169.5161, "encoder_q-layer.7": 9004.8564, "encoder_q-layer.8": 11033.4736, "encoder_q-layer.9": 9906.9922, "epoch": 0.5, "inbatch_neg_score": 0.0806, "inbatch_pos_score": 0.7397, "learning_rate": 2.7111111111111114e-05, "loss": 3.4193, "norm_diff": 0.0579, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14531.667, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0816, "query_norm": 1.3109, "queue_k_norm": 1.374, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3882, "sent_len_1": 66.7704, "sent_max_len_0": 127.995, "sent_max_len_1": 189.7488, "stdk": 0.0483, "stdq": 0.0461, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 51200 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.4424, "doc_norm": 1.3696, "encoder_q-embeddings": 7774.8052, "encoder_q-layer.0": 5015.6265, "encoder_q-layer.1": 5292.2168, "encoder_q-layer.10": 8541.0215, "encoder_q-layer.11": 20556.8652, "encoder_q-layer.2": 5908.9785, "encoder_q-layer.3": 6141.6807, "encoder_q-layer.4": 6595.8027, "encoder_q-layer.5": 6777.1953, "encoder_q-layer.6": 7484.166, "encoder_q-layer.7": 8115.5098, "encoder_q-layer.8": 9028.9922, "encoder_q-layer.9": 8376.627, "epoch": 0.5, "inbatch_neg_score": 0.0785, "inbatch_pos_score": 0.7324, "learning_rate": 2.7055555555555557e-05, "loss": 3.4424, "norm_diff": 0.1178, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13334.9792, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0781, "query_norm": 1.2518, "queue_k_norm": 1.3756, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.51, "sent_len_1": 66.6101, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0925, "stdk": 0.0483, "stdq": 0.0441, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 51300 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4393, "doc_norm": 1.3654, "encoder_q-embeddings": 15263.7295, "encoder_q-layer.0": 9294.1973, "encoder_q-layer.1": 10287.7988, "encoder_q-layer.10": 4723.6299, "encoder_q-layer.11": 10489.8496, "encoder_q-layer.2": 12529.666, "encoder_q-layer.3": 13252.9111, "encoder_q-layer.4": 14841.7295, "encoder_q-layer.5": 15233.459, "encoder_q-layer.6": 13959.1631, "encoder_q-layer.7": 15726.1221, "encoder_q-layer.8": 12000.417, "encoder_q-layer.9": 4869.7471, "epoch": 0.5, "inbatch_neg_score": 0.08, "inbatch_pos_score": 0.7104, "learning_rate": 2.7000000000000002e-05, "loss": 3.4393, "norm_diff": 0.1034, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19063.7018, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.0798, "query_norm": 1.262, "queue_k_norm": 1.3714, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7025, "sent_len_1": 66.7807, "sent_max_len_0": 127.995, "sent_max_len_1": 185.0538, "stdk": 0.0481, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 51400 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.4174, "doc_norm": 1.386, "encoder_q-embeddings": 4767.5977, "encoder_q-layer.0": 3129.6167, "encoder_q-layer.1": 3179.6763, "encoder_q-layer.10": 4902.4683, "encoder_q-layer.11": 9995.0078, "encoder_q-layer.2": 3425.7231, "encoder_q-layer.3": 3688.04, "encoder_q-layer.4": 3791.1045, "encoder_q-layer.5": 3806.0696, "encoder_q-layer.6": 4258.8057, "encoder_q-layer.7": 4377.6514, "encoder_q-layer.8": 5218.8276, "encoder_q-layer.9": 4554.7905, "epoch": 0.5, "inbatch_neg_score": 0.0767, "inbatch_pos_score": 0.7559, "learning_rate": 2.6944444444444445e-05, "loss": 3.4174, "norm_diff": 0.1044, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7192.168, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0769, "query_norm": 1.2816, "queue_k_norm": 1.3765, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5856, "sent_len_1": 67.0538, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9087, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 51500 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3974, "doc_norm": 1.3715, "encoder_q-embeddings": 2261.2844, "encoder_q-layer.0": 1433.0679, "encoder_q-layer.1": 1538.5283, "encoder_q-layer.10": 2533.78, "encoder_q-layer.11": 5153.2656, "encoder_q-layer.2": 1744.0599, "encoder_q-layer.3": 1845.4664, "encoder_q-layer.4": 1921.8408, "encoder_q-layer.5": 1992.254, "encoder_q-layer.6": 2375.1528, "encoder_q-layer.7": 2632.873, "encoder_q-layer.8": 2828.0552, "encoder_q-layer.9": 2457.8135, "epoch": 0.5, "inbatch_neg_score": 0.079, "inbatch_pos_score": 0.7505, "learning_rate": 2.688888888888889e-05, "loss": 3.3974, "norm_diff": 0.0607, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3714.54, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0794, "query_norm": 1.3108, "queue_k_norm": 1.3767, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5919, "sent_len_1": 66.6295, "sent_max_len_0": 128.0, "sent_max_len_1": 188.885, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 51600 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.4246, "doc_norm": 1.3781, "encoder_q-embeddings": 979.4158, "encoder_q-layer.0": 656.0124, "encoder_q-layer.1": 703.6392, "encoder_q-layer.10": 1168.3174, "encoder_q-layer.11": 2571.0625, "encoder_q-layer.2": 786.1404, "encoder_q-layer.3": 839.7662, "encoder_q-layer.4": 932.5943, "encoder_q-layer.5": 945.3496, "encoder_q-layer.6": 1028.7487, "encoder_q-layer.7": 1106.8254, "encoder_q-layer.8": 1255.2701, "encoder_q-layer.9": 1110.8691, "epoch": 0.5, "inbatch_neg_score": 0.0806, "inbatch_pos_score": 0.7578, "learning_rate": 2.6833333333333333e-05, "loss": 3.4246, "norm_diff": 0.0939, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1739.1826, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0806, "query_norm": 1.2842, "queue_k_norm": 1.3749, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5362, "sent_len_1": 66.6552, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0662, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 51700 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.4341, "doc_norm": 1.372, "encoder_q-embeddings": 1038.5792, "encoder_q-layer.0": 683.8088, "encoder_q-layer.1": 726.0693, "encoder_q-layer.10": 1313.8386, "encoder_q-layer.11": 2706.1082, "encoder_q-layer.2": 838.6794, "encoder_q-layer.3": 892.217, "encoder_q-layer.4": 937.9982, "encoder_q-layer.5": 1064.8898, "encoder_q-layer.6": 1233.0826, "encoder_q-layer.7": 1200.4735, "encoder_q-layer.8": 1311.421, "encoder_q-layer.9": 1168.2876, "epoch": 0.51, "inbatch_neg_score": 0.0747, "inbatch_pos_score": 0.6973, "learning_rate": 2.677777777777778e-05, "loss": 3.4341, "norm_diff": 0.1141, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1852.0145, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0746, "query_norm": 1.2579, "queue_k_norm": 1.3755, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5693, "sent_len_1": 66.7925, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.8913, "stdk": 0.0484, "stdq": 0.0443, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 51800 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4259, "doc_norm": 1.3744, "encoder_q-embeddings": 981.927, "encoder_q-layer.0": 646.3043, "encoder_q-layer.1": 680.5529, "encoder_q-layer.10": 1309.8859, "encoder_q-layer.11": 2554.2986, "encoder_q-layer.2": 790.7012, "encoder_q-layer.3": 816.0157, "encoder_q-layer.4": 826.1254, "encoder_q-layer.5": 916.2997, "encoder_q-layer.6": 1022.139, "encoder_q-layer.7": 1087.9951, "encoder_q-layer.8": 1299.2405, "encoder_q-layer.9": 1160.9462, "epoch": 0.51, "inbatch_neg_score": 0.0763, "inbatch_pos_score": 0.728, "learning_rate": 2.6722222222222228e-05, "loss": 3.4259, "norm_diff": 0.1085, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1721.8262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0764, "query_norm": 1.266, "queue_k_norm": 1.3747, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.3925, "sent_len_1": 66.9731, "sent_max_len_0": 128.0, "sent_max_len_1": 191.025, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 51900 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.4231, "doc_norm": 1.3795, "encoder_q-embeddings": 961.6373, "encoder_q-layer.0": 634.9868, "encoder_q-layer.1": 657.2492, "encoder_q-layer.10": 1254.5054, "encoder_q-layer.11": 2591.509, "encoder_q-layer.2": 717.6978, "encoder_q-layer.3": 743.3438, "encoder_q-layer.4": 786.3969, "encoder_q-layer.5": 801.0541, "encoder_q-layer.6": 905.9811, "encoder_q-layer.7": 1027.8213, "encoder_q-layer.8": 1221.7153, "encoder_q-layer.9": 1128.229, "epoch": 0.51, "inbatch_neg_score": 0.0757, "inbatch_pos_score": 0.7148, "learning_rate": 2.6666666666666667e-05, "loss": 3.4231, "norm_diff": 0.1069, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1697.3219, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0759, "query_norm": 1.2726, "queue_k_norm": 1.3744, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5707, "sent_len_1": 66.7221, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0925, "stdk": 0.0487, "stdq": 0.0445, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 52000 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.4236, "doc_norm": 1.3765, "encoder_q-embeddings": 1182.8428, "encoder_q-layer.0": 822.8549, "encoder_q-layer.1": 949.9822, "encoder_q-layer.10": 1168.2979, "encoder_q-layer.11": 2501.7864, "encoder_q-layer.2": 1090.4716, "encoder_q-layer.3": 1058.2577, "encoder_q-layer.4": 1041.4423, "encoder_q-layer.5": 1051.1938, "encoder_q-layer.6": 1084.1732, "encoder_q-layer.7": 1187.9316, "encoder_q-layer.8": 1311.873, "encoder_q-layer.9": 1162.6602, "epoch": 0.51, "inbatch_neg_score": 0.0829, "inbatch_pos_score": 0.7749, "learning_rate": 2.6611111111111116e-05, "loss": 3.4236, "norm_diff": 0.054, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1877.3784, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0826, "query_norm": 1.3226, "queue_k_norm": 1.3743, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5664, "sent_len_1": 66.6841, "sent_max_len_0": 127.995, "sent_max_len_1": 188.0012, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 52100 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4205, "doc_norm": 1.3752, "encoder_q-embeddings": 1555.0001, "encoder_q-layer.0": 998.7541, "encoder_q-layer.1": 1078.8431, "encoder_q-layer.10": 1213.1467, "encoder_q-layer.11": 2640.8787, "encoder_q-layer.2": 1294.3676, "encoder_q-layer.3": 1456.0397, "encoder_q-layer.4": 1579.7897, "encoder_q-layer.5": 1501.9573, "encoder_q-layer.6": 1604.8162, "encoder_q-layer.7": 1487.406, "encoder_q-layer.8": 1513.566, "encoder_q-layer.9": 1212.3038, "epoch": 0.51, "inbatch_neg_score": 0.0819, "inbatch_pos_score": 0.7275, "learning_rate": 2.6555555555555555e-05, "loss": 3.4205, "norm_diff": 0.1021, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2224.6184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0815, "query_norm": 1.273, "queue_k_norm": 1.3764, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6765, "sent_len_1": 66.6173, "sent_max_len_0": 127.995, "sent_max_len_1": 190.0687, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 52200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.4069, "doc_norm": 1.3759, "encoder_q-embeddings": 1108.9803, "encoder_q-layer.0": 776.4315, "encoder_q-layer.1": 800.6837, "encoder_q-layer.10": 1182.3569, "encoder_q-layer.11": 2601.6133, "encoder_q-layer.2": 921.6266, "encoder_q-layer.3": 933.4803, "encoder_q-layer.4": 994.2538, "encoder_q-layer.5": 1023.6974, "encoder_q-layer.6": 1129.4421, "encoder_q-layer.7": 1232.6936, "encoder_q-layer.8": 1402.1989, "encoder_q-layer.9": 1190.45, "epoch": 0.51, "inbatch_neg_score": 0.0826, "inbatch_pos_score": 0.77, "learning_rate": 2.6500000000000004e-05, "loss": 3.4069, "norm_diff": 0.0564, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1850.4393, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0831, "query_norm": 1.3196, "queue_k_norm": 1.3754, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5785, "sent_len_1": 66.6614, "sent_max_len_0": 127.9813, "sent_max_len_1": 190.925, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 52300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.4042, "doc_norm": 1.3738, "encoder_q-embeddings": 949.197, "encoder_q-layer.0": 635.5693, "encoder_q-layer.1": 675.0729, "encoder_q-layer.10": 1157.6385, "encoder_q-layer.11": 2587.8826, "encoder_q-layer.2": 752.9735, "encoder_q-layer.3": 787.996, "encoder_q-layer.4": 823.7699, "encoder_q-layer.5": 833.7372, "encoder_q-layer.6": 974.3846, "encoder_q-layer.7": 1059.8607, "encoder_q-layer.8": 1214.7047, "encoder_q-layer.9": 1143.3071, "epoch": 0.51, "inbatch_neg_score": 0.0897, "inbatch_pos_score": 0.7266, "learning_rate": 2.6444444444444443e-05, "loss": 3.4042, "norm_diff": 0.0951, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1719.1971, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0889, "query_norm": 1.2788, "queue_k_norm": 1.3754, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5352, "sent_len_1": 66.7406, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.6725, "stdk": 0.0484, "stdq": 0.0442, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 52400 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.4081, "doc_norm": 1.3747, "encoder_q-embeddings": 1082.5365, "encoder_q-layer.0": 718.4361, "encoder_q-layer.1": 769.9212, "encoder_q-layer.10": 1353.0394, "encoder_q-layer.11": 2746.9961, "encoder_q-layer.2": 846.6966, "encoder_q-layer.3": 880.4763, "encoder_q-layer.4": 960.4102, "encoder_q-layer.5": 1005.1828, "encoder_q-layer.6": 1086.2805, "encoder_q-layer.7": 1160.562, "encoder_q-layer.8": 1397.998, "encoder_q-layer.9": 1246.6813, "epoch": 0.51, "inbatch_neg_score": 0.0949, "inbatch_pos_score": 0.7534, "learning_rate": 2.6388888888888892e-05, "loss": 3.4081, "norm_diff": 0.0644, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1853.5508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0955, "query_norm": 1.3104, "queue_k_norm": 1.376, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5642, "sent_len_1": 66.9597, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1163, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 52500 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.4279, "doc_norm": 1.3746, "encoder_q-embeddings": 1749.3423, "encoder_q-layer.0": 1204.8907, "encoder_q-layer.1": 1304.1698, "encoder_q-layer.10": 1134.1512, "encoder_q-layer.11": 2671.8308, "encoder_q-layer.2": 1612.5553, "encoder_q-layer.3": 1655.2776, "encoder_q-layer.4": 1687.7218, "encoder_q-layer.5": 1622.155, "encoder_q-layer.6": 1547.6113, "encoder_q-layer.7": 1551.2119, "encoder_q-layer.8": 1425.5974, "encoder_q-layer.9": 1102.4077, "epoch": 0.51, "inbatch_neg_score": 0.099, "inbatch_pos_score": 0.75, "learning_rate": 2.633333333333333e-05, "loss": 3.4279, "norm_diff": 0.0739, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2421.5331, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0991, "query_norm": 1.3007, "queue_k_norm": 1.3771, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4812, "sent_len_1": 66.837, "sent_max_len_0": 128.0, "sent_max_len_1": 188.21, "stdk": 0.0484, "stdq": 0.0444, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 52600 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.4071, "doc_norm": 1.374, "encoder_q-embeddings": 930.7278, "encoder_q-layer.0": 598.8253, "encoder_q-layer.1": 635.6402, "encoder_q-layer.10": 1247.5797, "encoder_q-layer.11": 2673.4128, "encoder_q-layer.2": 690.2636, "encoder_q-layer.3": 725.9344, "encoder_q-layer.4": 792.0555, "encoder_q-layer.5": 814.9105, "encoder_q-layer.6": 944.6433, "encoder_q-layer.7": 1090.6266, "encoder_q-layer.8": 1286.8259, "encoder_q-layer.9": 1168.2441, "epoch": 0.51, "inbatch_neg_score": 0.1088, "inbatch_pos_score": 0.7549, "learning_rate": 2.627777777777778e-05, "loss": 3.4071, "norm_diff": 0.0673, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1739.237, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1085, "query_norm": 1.3067, "queue_k_norm": 1.3763, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5946, "sent_len_1": 66.8316, "sent_max_len_0": 127.9887, "sent_max_len_1": 188.9863, "stdk": 0.0484, "stdq": 0.0444, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 52700 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.4271, "doc_norm": 1.3761, "encoder_q-embeddings": 1143.7441, "encoder_q-layer.0": 767.7645, "encoder_q-layer.1": 771.1997, "encoder_q-layer.10": 1218.6664, "encoder_q-layer.11": 2588.0242, "encoder_q-layer.2": 876.2143, "encoder_q-layer.3": 884.0453, "encoder_q-layer.4": 957.1294, "encoder_q-layer.5": 963.6417, "encoder_q-layer.6": 1053.2563, "encoder_q-layer.7": 1132.9259, "encoder_q-layer.8": 1289.3507, "encoder_q-layer.9": 1143.059, "epoch": 0.52, "inbatch_neg_score": 0.1091, "inbatch_pos_score": 0.7842, "learning_rate": 2.6222222222222226e-05, "loss": 3.4271, "norm_diff": 0.0497, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1827.3172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.109, "query_norm": 1.3264, "queue_k_norm": 1.3748, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.375, "sent_len_1": 66.4776, "sent_max_len_0": 127.98, "sent_max_len_1": 188.5962, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52800 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.4123, "doc_norm": 1.3779, "encoder_q-embeddings": 865.399, "encoder_q-layer.0": 580.9974, "encoder_q-layer.1": 623.4944, "encoder_q-layer.10": 1279.613, "encoder_q-layer.11": 2556.7197, "encoder_q-layer.2": 707.7552, "encoder_q-layer.3": 726.7812, "encoder_q-layer.4": 780.6286, "encoder_q-layer.5": 834.736, "encoder_q-layer.6": 905.5276, "encoder_q-layer.7": 1016.7511, "encoder_q-layer.8": 1222.6578, "encoder_q-layer.9": 1100.1887, "epoch": 0.52, "inbatch_neg_score": 0.117, "inbatch_pos_score": 0.7778, "learning_rate": 2.6166666666666668e-05, "loss": 3.4123, "norm_diff": 0.0562, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1643.1838, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1163, "query_norm": 1.3217, "queue_k_norm": 1.3793, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3703, "sent_len_1": 66.6322, "sent_max_len_0": 127.995, "sent_max_len_1": 188.6662, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 52900 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.4084, "doc_norm": 1.3811, "encoder_q-embeddings": 1372.4189, "encoder_q-layer.0": 896.3552, "encoder_q-layer.1": 972.9694, "encoder_q-layer.10": 1121.6354, "encoder_q-layer.11": 2505.8845, "encoder_q-layer.2": 1066.3933, "encoder_q-layer.3": 1102.9164, "encoder_q-layer.4": 1107.1418, "encoder_q-layer.5": 1082.1405, "encoder_q-layer.6": 1166.2155, "encoder_q-layer.7": 1273.8801, "encoder_q-layer.8": 1314.2639, "encoder_q-layer.9": 1102.306, "epoch": 0.52, "inbatch_neg_score": 0.1209, "inbatch_pos_score": 0.7686, "learning_rate": 2.6111111111111114e-05, "loss": 3.4084, "norm_diff": 0.089, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1942.1509, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1213, "query_norm": 1.2921, "queue_k_norm": 1.3804, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6199, "sent_len_1": 66.8553, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.2012, "stdk": 0.0485, "stdq": 0.0438, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 53000 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4154, "doc_norm": 1.3851, "encoder_q-embeddings": 958.8185, "encoder_q-layer.0": 651.9086, "encoder_q-layer.1": 695.1155, "encoder_q-layer.10": 1097.5868, "encoder_q-layer.11": 2734.3662, "encoder_q-layer.2": 808.6497, "encoder_q-layer.3": 833.1348, "encoder_q-layer.4": 887.1728, "encoder_q-layer.5": 899.5067, "encoder_q-layer.6": 1012.7991, "encoder_q-layer.7": 1061.5126, "encoder_q-layer.8": 1195.212, "encoder_q-layer.9": 1115.3817, "epoch": 0.52, "inbatch_neg_score": 0.1249, "inbatch_pos_score": 0.7505, "learning_rate": 2.6055555555555556e-05, "loss": 3.4154, "norm_diff": 0.0794, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1720.2146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1252, "query_norm": 1.3057, "queue_k_norm": 1.3799, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5547, "sent_len_1": 66.7009, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5575, "stdk": 0.0487, "stdq": 0.0445, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 53100 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3863, "doc_norm": 1.3884, "encoder_q-embeddings": 1014.2026, "encoder_q-layer.0": 676.3602, "encoder_q-layer.1": 725.7214, "encoder_q-layer.10": 1182.7635, "encoder_q-layer.11": 2716.2764, "encoder_q-layer.2": 812.556, "encoder_q-layer.3": 831.3292, "encoder_q-layer.4": 891.1867, "encoder_q-layer.5": 944.3021, "encoder_q-layer.6": 1033.1975, "encoder_q-layer.7": 1213.6334, "encoder_q-layer.8": 1408.0009, "encoder_q-layer.9": 1167.7473, "epoch": 0.52, "inbatch_neg_score": 0.1205, "inbatch_pos_score": 0.7725, "learning_rate": 2.6000000000000002e-05, "loss": 3.3863, "norm_diff": 0.0833, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1825.3101, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1205, "query_norm": 1.3051, "queue_k_norm": 1.3846, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6831, "sent_len_1": 66.7909, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.36, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 53200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4297, "doc_norm": 1.3762, "encoder_q-embeddings": 1275.0416, "encoder_q-layer.0": 889.6932, "encoder_q-layer.1": 923.2447, "encoder_q-layer.10": 1203.5605, "encoder_q-layer.11": 2683.1421, "encoder_q-layer.2": 1113.697, "encoder_q-layer.3": 1216.7614, "encoder_q-layer.4": 1378.1787, "encoder_q-layer.5": 1596.0975, "encoder_q-layer.6": 1769.1945, "encoder_q-layer.7": 1916.5629, "encoder_q-layer.8": 2197.4863, "encoder_q-layer.9": 1219.813, "epoch": 0.52, "inbatch_neg_score": 0.1192, "inbatch_pos_score": 0.7739, "learning_rate": 2.5944444444444444e-05, "loss": 3.4297, "norm_diff": 0.085, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2356.241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1196, "query_norm": 1.2913, "queue_k_norm": 1.3836, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4417, "sent_len_1": 66.6878, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2138, "stdk": 0.0483, "stdq": 0.0446, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 53300 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4024, "doc_norm": 1.3888, "encoder_q-embeddings": 930.4224, "encoder_q-layer.0": 616.9965, "encoder_q-layer.1": 638.2123, "encoder_q-layer.10": 1122.9254, "encoder_q-layer.11": 2683.4434, "encoder_q-layer.2": 705.7187, "encoder_q-layer.3": 731.9866, "encoder_q-layer.4": 777.4758, "encoder_q-layer.5": 809.5891, "encoder_q-layer.6": 899.4291, "encoder_q-layer.7": 1002.2083, "encoder_q-layer.8": 1175.8157, "encoder_q-layer.9": 1112.3467, "epoch": 0.52, "inbatch_neg_score": 0.114, "inbatch_pos_score": 0.7627, "learning_rate": 2.588888888888889e-05, "loss": 3.4024, "norm_diff": 0.1049, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1672.2059, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1149, "query_norm": 1.2839, "queue_k_norm": 1.3842, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6972, "sent_len_1": 66.4615, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4588, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 53400 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.3929, "doc_norm": 1.394, "encoder_q-embeddings": 1043.5905, "encoder_q-layer.0": 670.6132, "encoder_q-layer.1": 699.2985, "encoder_q-layer.10": 1218.1841, "encoder_q-layer.11": 2728.0884, "encoder_q-layer.2": 765.0612, "encoder_q-layer.3": 797.0027, "encoder_q-layer.4": 836.7615, "encoder_q-layer.5": 862.8511, "encoder_q-layer.6": 952.3961, "encoder_q-layer.7": 1052.7388, "encoder_q-layer.8": 1260.1205, "encoder_q-layer.9": 1174.2554, "epoch": 0.52, "inbatch_neg_score": 0.111, "inbatch_pos_score": 0.7583, "learning_rate": 2.5833333333333336e-05, "loss": 3.3929, "norm_diff": 0.1206, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1806.2126, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1101, "query_norm": 1.2734, "queue_k_norm": 1.3876, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6175, "sent_len_1": 66.7752, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3963, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 53500 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.4074, "doc_norm": 1.3867, "encoder_q-embeddings": 899.7195, "encoder_q-layer.0": 580.2563, "encoder_q-layer.1": 627.4794, "encoder_q-layer.10": 1063.6683, "encoder_q-layer.11": 2526.9329, "encoder_q-layer.2": 700.2386, "encoder_q-layer.3": 720.4841, "encoder_q-layer.4": 753.4067, "encoder_q-layer.5": 807.6135, "encoder_q-layer.6": 922.205, "encoder_q-layer.7": 1035.6508, "encoder_q-layer.8": 1187.5641, "encoder_q-layer.9": 1057.2581, "epoch": 0.52, "inbatch_neg_score": 0.104, "inbatch_pos_score": 0.7861, "learning_rate": 2.5777777777777778e-05, "loss": 3.4074, "norm_diff": 0.098, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1603.9906, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1047, "query_norm": 1.2887, "queue_k_norm": 1.3867, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.75, "sent_len_1": 66.9583, "sent_max_len_0": 128.0, "sent_max_len_1": 192.8, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 53600 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.4001, "doc_norm": 1.3822, "encoder_q-embeddings": 4471.1904, "encoder_q-layer.0": 3180.5786, "encoder_q-layer.1": 3291.1821, "encoder_q-layer.10": 2267.2712, "encoder_q-layer.11": 5161.5571, "encoder_q-layer.2": 3446.6653, "encoder_q-layer.3": 3273.1707, "encoder_q-layer.4": 3381.1033, "encoder_q-layer.5": 3484.949, "encoder_q-layer.6": 4088.0986, "encoder_q-layer.7": 4043.6401, "encoder_q-layer.8": 4740.9116, "encoder_q-layer.9": 2396.1533, "epoch": 0.52, "inbatch_neg_score": 0.0951, "inbatch_pos_score": 0.7305, "learning_rate": 2.5722222222222224e-05, "loss": 3.4001, "norm_diff": 0.1319, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5687.2195, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0963, "query_norm": 1.2503, "queue_k_norm": 1.3851, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6222, "sent_len_1": 66.843, "sent_max_len_0": 127.99, "sent_max_len_1": 189.9412, "stdk": 0.0484, "stdq": 0.044, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 53700 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4129, "doc_norm": 1.3801, "encoder_q-embeddings": 1956.5608, "encoder_q-layer.0": 1316.3237, "encoder_q-layer.1": 1361.8318, "encoder_q-layer.10": 2506.4531, "encoder_q-layer.11": 5524.8975, "encoder_q-layer.2": 1566.8229, "encoder_q-layer.3": 1585.5308, "encoder_q-layer.4": 1752.2123, "encoder_q-layer.5": 1783.1118, "encoder_q-layer.6": 2024.071, "encoder_q-layer.7": 2313.6318, "encoder_q-layer.8": 2658.4736, "encoder_q-layer.9": 2480.259, "epoch": 0.53, "inbatch_neg_score": 0.0923, "inbatch_pos_score": 0.7446, "learning_rate": 2.5666666666666666e-05, "loss": 3.4129, "norm_diff": 0.095, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3566.844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0934, "query_norm": 1.2851, "queue_k_norm": 1.3876, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5422, "sent_len_1": 66.6775, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0675, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 53800 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.4257, "doc_norm": 1.3825, "encoder_q-embeddings": 1963.3568, "encoder_q-layer.0": 1278.443, "encoder_q-layer.1": 1351.5891, "encoder_q-layer.10": 2265.0891, "encoder_q-layer.11": 5567.7065, "encoder_q-layer.2": 1502.4718, "encoder_q-layer.3": 1632.9359, "encoder_q-layer.4": 1725.1909, "encoder_q-layer.5": 1791.9393, "encoder_q-layer.6": 1978.6027, "encoder_q-layer.7": 2168.9709, "encoder_q-layer.8": 2664.0361, "encoder_q-layer.9": 2332.1689, "epoch": 0.53, "inbatch_neg_score": 0.0874, "inbatch_pos_score": 0.7256, "learning_rate": 2.5611111111111115e-05, "loss": 3.4257, "norm_diff": 0.1362, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3603.4888, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0875, "query_norm": 1.2463, "queue_k_norm": 1.3854, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4888, "sent_len_1": 66.6209, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1788, "stdk": 0.0485, "stdq": 0.0439, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 53900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.4006, "doc_norm": 1.3899, "encoder_q-embeddings": 1968.2732, "encoder_q-layer.0": 1280.4207, "encoder_q-layer.1": 1385.0217, "encoder_q-layer.10": 2359.0027, "encoder_q-layer.11": 5290.5308, "encoder_q-layer.2": 1813.8005, "encoder_q-layer.3": 1899.8623, "encoder_q-layer.4": 2001.0531, "encoder_q-layer.5": 1922.6852, "encoder_q-layer.6": 2126.6504, "encoder_q-layer.7": 2451.3625, "encoder_q-layer.8": 2605.5349, "encoder_q-layer.9": 2293.1372, "epoch": 0.53, "inbatch_neg_score": 0.085, "inbatch_pos_score": 0.7632, "learning_rate": 2.5555555555555554e-05, "loss": 3.4006, "norm_diff": 0.1128, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3658.3027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.086, "query_norm": 1.277, "queue_k_norm": 1.3853, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5708, "sent_len_1": 66.8366, "sent_max_len_0": 128.0, "sent_max_len_1": 187.48, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 54000 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3943, "doc_norm": 1.3832, "encoder_q-embeddings": 1741.9795, "encoder_q-layer.0": 1188.1545, "encoder_q-layer.1": 1269.0089, "encoder_q-layer.10": 2232.1782, "encoder_q-layer.11": 5235.6836, "encoder_q-layer.2": 1387.516, "encoder_q-layer.3": 1451.2415, "encoder_q-layer.4": 1500.5449, "encoder_q-layer.5": 1665.0833, "encoder_q-layer.6": 1866.6896, "encoder_q-layer.7": 2212.1621, "encoder_q-layer.8": 2453.1997, "encoder_q-layer.9": 2204.4551, "epoch": 0.53, "inbatch_neg_score": 0.082, "inbatch_pos_score": 0.7334, "learning_rate": 2.5500000000000003e-05, "loss": 3.3943, "norm_diff": 0.1222, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3380.3343, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0823, "query_norm": 1.261, "queue_k_norm": 1.3845, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6826, "sent_len_1": 66.683, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.7562, "stdk": 0.0485, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 54100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3959, "doc_norm": 1.3775, "encoder_q-embeddings": 2494.1855, "encoder_q-layer.0": 1630.8517, "encoder_q-layer.1": 1773.2217, "encoder_q-layer.10": 2327.6047, "encoder_q-layer.11": 5446.3501, "encoder_q-layer.2": 2030.704, "encoder_q-layer.3": 2205.9512, "encoder_q-layer.4": 2487.811, "encoder_q-layer.5": 2587.7634, "encoder_q-layer.6": 2672.6382, "encoder_q-layer.7": 2866.2983, "encoder_q-layer.8": 2755.5945, "encoder_q-layer.9": 2357.2495, "epoch": 0.53, "inbatch_neg_score": 0.0818, "inbatch_pos_score": 0.7388, "learning_rate": 2.5444444444444442e-05, "loss": 3.3959, "norm_diff": 0.0944, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4020.1982, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0834, "query_norm": 1.2831, "queue_k_norm": 1.3836, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.586, "sent_len_1": 66.719, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0412, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 54200 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4144, "doc_norm": 1.3713, "encoder_q-embeddings": 2176.8647, "encoder_q-layer.0": 1467.6595, "encoder_q-layer.1": 1587.6914, "encoder_q-layer.10": 2465.4902, "encoder_q-layer.11": 5541.7568, "encoder_q-layer.2": 1809.5598, "encoder_q-layer.3": 1888.3333, "encoder_q-layer.4": 2022.0295, "encoder_q-layer.5": 2026.5612, "encoder_q-layer.6": 2213.7488, "encoder_q-layer.7": 2306.512, "encoder_q-layer.8": 2701.3916, "encoder_q-layer.9": 2526.8647, "epoch": 0.53, "inbatch_neg_score": 0.0804, "inbatch_pos_score": 0.7446, "learning_rate": 2.538888888888889e-05, "loss": 3.4144, "norm_diff": 0.0924, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3770.885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0801, "query_norm": 1.279, "queue_k_norm": 1.3836, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5808, "sent_len_1": 66.7149, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.6538, "stdk": 0.0481, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 54300 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.3791, "doc_norm": 1.3809, "encoder_q-embeddings": 2178.9255, "encoder_q-layer.0": 1376.5001, "encoder_q-layer.1": 1505.5405, "encoder_q-layer.10": 2429.228, "encoder_q-layer.11": 5149.2646, "encoder_q-layer.2": 1626.7052, "encoder_q-layer.3": 1681.9629, "encoder_q-layer.4": 1769.9843, "encoder_q-layer.5": 1894.1248, "encoder_q-layer.6": 2082.6917, "encoder_q-layer.7": 2257.5286, "encoder_q-layer.8": 2520.864, "encoder_q-layer.9": 2236.5913, "epoch": 0.53, "inbatch_neg_score": 0.0805, "inbatch_pos_score": 0.7446, "learning_rate": 2.5333333333333337e-05, "loss": 3.3791, "norm_diff": 0.0986, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3594.8413, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0806, "query_norm": 1.2823, "queue_k_norm": 1.383, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.8352, "sent_len_1": 66.8869, "sent_max_len_0": 128.0, "sent_max_len_1": 189.995, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 54400 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3874, "doc_norm": 1.3826, "encoder_q-embeddings": 2001.5297, "encoder_q-layer.0": 1343.5199, "encoder_q-layer.1": 1430.1863, "encoder_q-layer.10": 2602.3953, "encoder_q-layer.11": 5426.0361, "encoder_q-layer.2": 1552.9949, "encoder_q-layer.3": 1560.869, "encoder_q-layer.4": 1731.9862, "encoder_q-layer.5": 1779.1649, "encoder_q-layer.6": 1941.1279, "encoder_q-layer.7": 2190.0239, "encoder_q-layer.8": 2616.7214, "encoder_q-layer.9": 2443.0403, "epoch": 0.53, "inbatch_neg_score": 0.0773, "inbatch_pos_score": 0.7642, "learning_rate": 2.527777777777778e-05, "loss": 3.3874, "norm_diff": 0.0799, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3537.9012, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0769, "query_norm": 1.3027, "queue_k_norm": 1.3845, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8702, "sent_len_1": 66.8353, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.8875, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54500 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4, "doc_norm": 1.3759, "encoder_q-embeddings": 1907.8992, "encoder_q-layer.0": 1285.9739, "encoder_q-layer.1": 1369.1422, "encoder_q-layer.10": 2294.1438, "encoder_q-layer.11": 5364.8579, "encoder_q-layer.2": 1523.8281, "encoder_q-layer.3": 1614.7345, "encoder_q-layer.4": 1699.8734, "encoder_q-layer.5": 1713.3297, "encoder_q-layer.6": 1855.2734, "encoder_q-layer.7": 2056.1494, "encoder_q-layer.8": 2408.9973, "encoder_q-layer.9": 2212.2046, "epoch": 0.53, "inbatch_neg_score": 0.0759, "inbatch_pos_score": 0.7134, "learning_rate": 2.5222222222222225e-05, "loss": 3.4, "norm_diff": 0.1057, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3402.6317, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0753, "query_norm": 1.2702, "queue_k_norm": 1.3815, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5587, "sent_len_1": 66.9172, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8887, "stdk": 0.0484, "stdq": 0.0447, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 54600 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4066, "doc_norm": 1.3816, "encoder_q-embeddings": 2034.7661, "encoder_q-layer.0": 1307.1986, "encoder_q-layer.1": 1361.2866, "encoder_q-layer.10": 2524.7546, "encoder_q-layer.11": 5621.0117, "encoder_q-layer.2": 1513.6122, "encoder_q-layer.3": 1587.2646, "encoder_q-layer.4": 1672.8275, "encoder_q-layer.5": 1821.4542, "encoder_q-layer.6": 1965.1567, "encoder_q-layer.7": 2241.2615, "encoder_q-layer.8": 2839.3137, "encoder_q-layer.9": 2550.8218, "epoch": 0.53, "inbatch_neg_score": 0.0697, "inbatch_pos_score": 0.73, "learning_rate": 2.5166666666666667e-05, "loss": 3.4066, "norm_diff": 0.1175, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3623.8479, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0704, "query_norm": 1.2641, "queue_k_norm": 1.3812, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6933, "sent_len_1": 67.0134, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8475, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 54700 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.3921, "doc_norm": 1.3838, "encoder_q-embeddings": 1917.5026, "encoder_q-layer.0": 1279.561, "encoder_q-layer.1": 1318.7089, "encoder_q-layer.10": 2149.2471, "encoder_q-layer.11": 5194.0845, "encoder_q-layer.2": 1468.9532, "encoder_q-layer.3": 1556.5505, "encoder_q-layer.4": 1659.6138, "encoder_q-layer.5": 1703.6818, "encoder_q-layer.6": 1925.2438, "encoder_q-layer.7": 2124.3262, "encoder_q-layer.8": 2370.9771, "encoder_q-layer.9": 2156.3491, "epoch": 0.54, "inbatch_neg_score": 0.0744, "inbatch_pos_score": 0.7734, "learning_rate": 2.5111111111111113e-05, "loss": 3.3921, "norm_diff": 0.0846, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3390.657, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.074, "query_norm": 1.2992, "queue_k_norm": 1.3813, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6523, "sent_len_1": 66.7411, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.4638, "stdk": 0.0487, "stdq": 0.0455, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 54800 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3885, "doc_norm": 1.3793, "encoder_q-embeddings": 1932.0464, "encoder_q-layer.0": 1251.6646, "encoder_q-layer.1": 1334.6322, "encoder_q-layer.10": 2521.6558, "encoder_q-layer.11": 5431.915, "encoder_q-layer.2": 1538.1248, "encoder_q-layer.3": 1554.8561, "encoder_q-layer.4": 1652.1738, "encoder_q-layer.5": 1838.1316, "encoder_q-layer.6": 2045.6121, "encoder_q-layer.7": 2206.8513, "encoder_q-layer.8": 2651.698, "encoder_q-layer.9": 2392.4619, "epoch": 0.54, "inbatch_neg_score": 0.0786, "inbatch_pos_score": 0.7559, "learning_rate": 2.5055555555555555e-05, "loss": 3.3885, "norm_diff": 0.0693, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3547.0309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0781, "query_norm": 1.3101, "queue_k_norm": 1.3804, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5381, "sent_len_1": 66.9471, "sent_max_len_0": 127.995, "sent_max_len_1": 190.4613, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 54900 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.4033, "doc_norm": 1.3736, "encoder_q-embeddings": 1559.6681, "encoder_q-layer.0": 1141.3173, "encoder_q-layer.1": 1156.4301, "encoder_q-layer.10": 1181.2816, "encoder_q-layer.11": 2550.3521, "encoder_q-layer.2": 1362.329, "encoder_q-layer.3": 1312.7651, "encoder_q-layer.4": 1351.5828, "encoder_q-layer.5": 1315.3204, "encoder_q-layer.6": 1343.3165, "encoder_q-layer.7": 1479.692, "encoder_q-layer.8": 1352.2523, "encoder_q-layer.9": 1137.8712, "epoch": 0.54, "inbatch_neg_score": 0.0757, "inbatch_pos_score": 0.7466, "learning_rate": 2.5e-05, "loss": 3.4033, "norm_diff": 0.0815, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2176.4669, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0757, "query_norm": 1.2921, "queue_k_norm": 1.3798, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6943, "sent_len_1": 66.6046, "sent_max_len_0": 127.9925, "sent_max_len_1": 188.8275, "stdk": 0.0483, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 55000 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.4078, "doc_norm": 1.3794, "encoder_q-embeddings": 994.4346, "encoder_q-layer.0": 642.5956, "encoder_q-layer.1": 655.3304, "encoder_q-layer.10": 1284.5687, "encoder_q-layer.11": 2612.71, "encoder_q-layer.2": 753.9192, "encoder_q-layer.3": 775.6525, "encoder_q-layer.4": 835.5107, "encoder_q-layer.5": 883.2537, "encoder_q-layer.6": 1008.7103, "encoder_q-layer.7": 1153.63, "encoder_q-layer.8": 1333.1687, "encoder_q-layer.9": 1192.5498, "epoch": 0.54, "inbatch_neg_score": 0.0767, "inbatch_pos_score": 0.7378, "learning_rate": 2.4944444444444447e-05, "loss": 3.4078, "norm_diff": 0.0715, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1753.0944, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0762, "query_norm": 1.3079, "queue_k_norm": 1.379, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5148, "sent_len_1": 66.7338, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5175, "stdk": 0.0486, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 55100 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4126, "doc_norm": 1.3794, "encoder_q-embeddings": 993.1935, "encoder_q-layer.0": 681.7603, "encoder_q-layer.1": 714.2084, "encoder_q-layer.10": 1170.1497, "encoder_q-layer.11": 2621.7764, "encoder_q-layer.2": 808.6979, "encoder_q-layer.3": 846.0272, "encoder_q-layer.4": 913.7534, "encoder_q-layer.5": 958.8124, "encoder_q-layer.6": 1057.9207, "encoder_q-layer.7": 1156.7444, "encoder_q-layer.8": 1353.506, "encoder_q-layer.9": 1137.2354, "epoch": 0.54, "inbatch_neg_score": 0.0778, "inbatch_pos_score": 0.7275, "learning_rate": 2.488888888888889e-05, "loss": 3.4126, "norm_diff": 0.0919, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1784.4905, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0779, "query_norm": 1.2874, "queue_k_norm": 1.377, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5321, "sent_len_1": 66.7987, "sent_max_len_0": 128.0, "sent_max_len_1": 191.535, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 55200 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.3906, "doc_norm": 1.3831, "encoder_q-embeddings": 932.7383, "encoder_q-layer.0": 622.6948, "encoder_q-layer.1": 643.5336, "encoder_q-layer.10": 1109.6536, "encoder_q-layer.11": 2519.9849, "encoder_q-layer.2": 711.1971, "encoder_q-layer.3": 736.2167, "encoder_q-layer.4": 789.9405, "encoder_q-layer.5": 832.2885, "encoder_q-layer.6": 901.9156, "encoder_q-layer.7": 1036.625, "encoder_q-layer.8": 1254.0887, "encoder_q-layer.9": 1109.9568, "epoch": 0.54, "inbatch_neg_score": 0.0783, "inbatch_pos_score": 0.7773, "learning_rate": 2.4833333333333335e-05, "loss": 3.3906, "norm_diff": 0.048, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1665.3989, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0779, "query_norm": 1.335, "queue_k_norm": 1.3776, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5493, "sent_len_1": 66.7135, "sent_max_len_0": 127.9912, "sent_max_len_1": 188.9175, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 55300 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.4186, "doc_norm": 1.3764, "encoder_q-embeddings": 1099.2673, "encoder_q-layer.0": 734.9485, "encoder_q-layer.1": 771.9808, "encoder_q-layer.10": 1243.7164, "encoder_q-layer.11": 2598.1375, "encoder_q-layer.2": 875.6302, "encoder_q-layer.3": 922.3678, "encoder_q-layer.4": 941.2164, "encoder_q-layer.5": 981.2352, "encoder_q-layer.6": 1139.9696, "encoder_q-layer.7": 1224.8112, "encoder_q-layer.8": 1312.5413, "encoder_q-layer.9": 1108.1769, "epoch": 0.54, "inbatch_neg_score": 0.0834, "inbatch_pos_score": 0.7441, "learning_rate": 2.477777777777778e-05, "loss": 3.4186, "norm_diff": 0.0591, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1833.165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0827, "query_norm": 1.3173, "queue_k_norm": 1.3787, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6406, "sent_len_1": 66.6602, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0687, "stdk": 0.0485, "stdq": 0.0455, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 55400 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.4072, "doc_norm": 1.3883, "encoder_q-embeddings": 983.2309, "encoder_q-layer.0": 634.1633, "encoder_q-layer.1": 679.3674, "encoder_q-layer.10": 1165.7311, "encoder_q-layer.11": 2588.9646, "encoder_q-layer.2": 775.4788, "encoder_q-layer.3": 811.1057, "encoder_q-layer.4": 864.0355, "encoder_q-layer.5": 932.6759, "encoder_q-layer.6": 997.9781, "encoder_q-layer.7": 1104.9502, "encoder_q-layer.8": 1307.0858, "encoder_q-layer.9": 1164.9498, "epoch": 0.54, "inbatch_neg_score": 0.0875, "inbatch_pos_score": 0.771, "learning_rate": 2.4722222222222223e-05, "loss": 3.4072, "norm_diff": 0.0573, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1718.2075, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0869, "query_norm": 1.3309, "queue_k_norm": 1.3756, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.2912, "sent_len_1": 66.6738, "sent_max_len_0": 128.0, "sent_max_len_1": 189.765, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 55500 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.4201, "doc_norm": 1.3879, "encoder_q-embeddings": 976.0988, "encoder_q-layer.0": 628.9004, "encoder_q-layer.1": 671.8848, "encoder_q-layer.10": 1298.749, "encoder_q-layer.11": 2672.0386, "encoder_q-layer.2": 748.6497, "encoder_q-layer.3": 769.3188, "encoder_q-layer.4": 813.555, "encoder_q-layer.5": 841.5255, "encoder_q-layer.6": 970.8179, "encoder_q-layer.7": 1061.8127, "encoder_q-layer.8": 1295.6012, "encoder_q-layer.9": 1161.1697, "epoch": 0.54, "inbatch_neg_score": 0.0879, "inbatch_pos_score": 0.7432, "learning_rate": 2.466666666666667e-05, "loss": 3.4201, "norm_diff": 0.0948, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1744.4351, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0881, "query_norm": 1.2932, "queue_k_norm": 1.378, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3598, "sent_len_1": 66.7063, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.2713, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 55600 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4112, "doc_norm": 1.3807, "encoder_q-embeddings": 1079.8041, "encoder_q-layer.0": 761.0988, "encoder_q-layer.1": 818.184, "encoder_q-layer.10": 1132.1453, "encoder_q-layer.11": 2643.5005, "encoder_q-layer.2": 896.7754, "encoder_q-layer.3": 909.9016, "encoder_q-layer.4": 1001.4515, "encoder_q-layer.5": 1029.7368, "encoder_q-layer.6": 1087.5515, "encoder_q-layer.7": 1207.4701, "encoder_q-layer.8": 1334.9122, "encoder_q-layer.9": 1126.3341, "epoch": 0.54, "inbatch_neg_score": 0.0939, "inbatch_pos_score": 0.7417, "learning_rate": 2.461111111111111e-05, "loss": 3.4112, "norm_diff": 0.0956, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1823.4235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0928, "query_norm": 1.2851, "queue_k_norm": 1.3769, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4941, "sent_len_1": 66.7941, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7375, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 55700 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.4153, "doc_norm": 1.3785, "encoder_q-embeddings": 943.322, "encoder_q-layer.0": 622.1273, "encoder_q-layer.1": 662.7246, "encoder_q-layer.10": 1198.004, "encoder_q-layer.11": 2754.7112, "encoder_q-layer.2": 750.0956, "encoder_q-layer.3": 750.488, "encoder_q-layer.4": 797.2441, "encoder_q-layer.5": 806.0763, "encoder_q-layer.6": 1007.2919, "encoder_q-layer.7": 1118.6281, "encoder_q-layer.8": 1258.4452, "encoder_q-layer.9": 1212.7112, "epoch": 0.54, "inbatch_neg_score": 0.0887, "inbatch_pos_score": 0.7271, "learning_rate": 2.4555555555555557e-05, "loss": 3.4153, "norm_diff": 0.0993, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1761.1447, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0887, "query_norm": 1.2792, "queue_k_norm": 1.3781, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5217, "sent_len_1": 66.7673, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.7237, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 55800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.4102, "doc_norm": 1.3715, "encoder_q-embeddings": 1007.0008, "encoder_q-layer.0": 637.4305, "encoder_q-layer.1": 669.0706, "encoder_q-layer.10": 1246.0177, "encoder_q-layer.11": 2633.4058, "encoder_q-layer.2": 749.2382, "encoder_q-layer.3": 782.0515, "encoder_q-layer.4": 836.5684, "encoder_q-layer.5": 844.1124, "encoder_q-layer.6": 1002.5527, "encoder_q-layer.7": 1091.6206, "encoder_q-layer.8": 1325.425, "encoder_q-layer.9": 1198.8347, "epoch": 0.55, "inbatch_neg_score": 0.0924, "inbatch_pos_score": 0.7554, "learning_rate": 2.45e-05, "loss": 3.4102, "norm_diff": 0.0797, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1729.8261, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0926, "query_norm": 1.2918, "queue_k_norm": 1.3762, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4869, "sent_len_1": 66.6405, "sent_max_len_0": 127.9875, "sent_max_len_1": 188.2337, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 55900 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3942, "doc_norm": 1.3836, "encoder_q-embeddings": 1005.3724, "encoder_q-layer.0": 679.7531, "encoder_q-layer.1": 731.3927, "encoder_q-layer.10": 1185.1337, "encoder_q-layer.11": 2632.3906, "encoder_q-layer.2": 855.8262, "encoder_q-layer.3": 847.771, "encoder_q-layer.4": 881.3544, "encoder_q-layer.5": 867.3369, "encoder_q-layer.6": 965.8864, "encoder_q-layer.7": 1077.8372, "encoder_q-layer.8": 1282.0011, "encoder_q-layer.9": 1170.4194, "epoch": 0.55, "inbatch_neg_score": 0.0863, "inbatch_pos_score": 0.7583, "learning_rate": 2.4444444444444445e-05, "loss": 3.3942, "norm_diff": 0.1039, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1748.5205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.088, "query_norm": 1.2797, "queue_k_norm": 1.379, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.585, "sent_len_1": 66.9252, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 56000 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3929, "doc_norm": 1.3746, "encoder_q-embeddings": 982.6184, "encoder_q-layer.0": 621.835, "encoder_q-layer.1": 647.9423, "encoder_q-layer.10": 1096.047, "encoder_q-layer.11": 2642.5713, "encoder_q-layer.2": 721.3873, "encoder_q-layer.3": 735.6738, "encoder_q-layer.4": 832.5068, "encoder_q-layer.5": 885.2884, "encoder_q-layer.6": 937.3947, "encoder_q-layer.7": 1122.8492, "encoder_q-layer.8": 1271.9701, "encoder_q-layer.9": 1129.5464, "epoch": 0.55, "inbatch_neg_score": 0.0863, "inbatch_pos_score": 0.7319, "learning_rate": 2.4388888888888887e-05, "loss": 3.3929, "norm_diff": 0.1088, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1705.0785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0872, "query_norm": 1.2658, "queue_k_norm": 1.3784, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6797, "sent_len_1": 67.0115, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4963, "stdk": 0.0484, "stdq": 0.0445, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 56100 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3946, "doc_norm": 1.3694, "encoder_q-embeddings": 1008.1173, "encoder_q-layer.0": 646.8773, "encoder_q-layer.1": 726.3981, "encoder_q-layer.10": 1134.0305, "encoder_q-layer.11": 2544.7683, "encoder_q-layer.2": 840.1349, "encoder_q-layer.3": 881.5953, "encoder_q-layer.4": 994.2336, "encoder_q-layer.5": 976.9236, "encoder_q-layer.6": 1082.618, "encoder_q-layer.7": 1180.4191, "encoder_q-layer.8": 1379.7351, "encoder_q-layer.9": 1164.5962, "epoch": 0.55, "inbatch_neg_score": 0.0854, "inbatch_pos_score": 0.751, "learning_rate": 2.4333333333333336e-05, "loss": 3.3946, "norm_diff": 0.084, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1765.9291, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0843, "query_norm": 1.2853, "queue_k_norm": 1.3784, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.8442, "sent_len_1": 66.4293, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.9263, "stdk": 0.0482, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 56200 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3939, "doc_norm": 1.3832, "encoder_q-embeddings": 1028.958, "encoder_q-layer.0": 677.1393, "encoder_q-layer.1": 727.1649, "encoder_q-layer.10": 1277.0853, "encoder_q-layer.11": 2612.0664, "encoder_q-layer.2": 812.4371, "encoder_q-layer.3": 809.2349, "encoder_q-layer.4": 926.6147, "encoder_q-layer.5": 925.5938, "encoder_q-layer.6": 1043.9734, "encoder_q-layer.7": 1165.0199, "encoder_q-layer.8": 1369.5741, "encoder_q-layer.9": 1206.3518, "epoch": 0.55, "inbatch_neg_score": 0.0834, "inbatch_pos_score": 0.75, "learning_rate": 2.427777777777778e-05, "loss": 3.3939, "norm_diff": 0.096, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1761.0015, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0837, "query_norm": 1.2872, "queue_k_norm": 1.3797, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.8482, "sent_len_1": 66.7928, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.2713, "stdk": 0.0487, "stdq": 0.0455, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 56300 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.379, "doc_norm": 1.3874, "encoder_q-embeddings": 1046.3315, "encoder_q-layer.0": 669.3562, "encoder_q-layer.1": 706.1417, "encoder_q-layer.10": 1180.5022, "encoder_q-layer.11": 2529.8591, "encoder_q-layer.2": 796.3808, "encoder_q-layer.3": 834.2219, "encoder_q-layer.4": 880.3005, "encoder_q-layer.5": 891.0825, "encoder_q-layer.6": 939.0714, "encoder_q-layer.7": 1064.8579, "encoder_q-layer.8": 1270.8439, "encoder_q-layer.9": 1122.1797, "epoch": 0.55, "inbatch_neg_score": 0.0857, "inbatch_pos_score": 0.7456, "learning_rate": 2.4222222222222224e-05, "loss": 3.379, "norm_diff": 0.1087, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1723.8029, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.086, "query_norm": 1.2787, "queue_k_norm": 1.377, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.729, "sent_len_1": 66.6555, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4775, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 56400 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3856, "doc_norm": 1.3747, "encoder_q-embeddings": 945.2195, "encoder_q-layer.0": 626.7598, "encoder_q-layer.1": 667.1107, "encoder_q-layer.10": 1174.8569, "encoder_q-layer.11": 2610.0728, "encoder_q-layer.2": 730.299, "encoder_q-layer.3": 777.1066, "encoder_q-layer.4": 824.853, "encoder_q-layer.5": 845.3652, "encoder_q-layer.6": 949.3774, "encoder_q-layer.7": 1076.2847, "encoder_q-layer.8": 1272.1074, "encoder_q-layer.9": 1172.2356, "epoch": 0.55, "inbatch_neg_score": 0.0904, "inbatch_pos_score": 0.7109, "learning_rate": 2.4166666666666667e-05, "loss": 3.3856, "norm_diff": 0.1163, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1708.8425, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0887, "query_norm": 1.2584, "queue_k_norm": 1.3778, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6628, "sent_len_1": 66.8033, "sent_max_len_0": 128.0, "sent_max_len_1": 189.19, "stdk": 0.0484, "stdq": 0.0442, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 56500 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.3894, "doc_norm": 1.374, "encoder_q-embeddings": 939.4247, "encoder_q-layer.0": 613.9175, "encoder_q-layer.1": 649.4704, "encoder_q-layer.10": 1058.8992, "encoder_q-layer.11": 2529.322, "encoder_q-layer.2": 712.2346, "encoder_q-layer.3": 734.6417, "encoder_q-layer.4": 759.9347, "encoder_q-layer.5": 821.1094, "encoder_q-layer.6": 905.8118, "encoder_q-layer.7": 1057.3999, "encoder_q-layer.8": 1176.5763, "encoder_q-layer.9": 1087.4607, "epoch": 0.55, "inbatch_neg_score": 0.085, "inbatch_pos_score": 0.7598, "learning_rate": 2.4111111111111113e-05, "loss": 3.3894, "norm_diff": 0.1062, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1643.1674, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0839, "query_norm": 1.2678, "queue_k_norm": 1.3793, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.8084, "sent_len_1": 66.9434, "sent_max_len_0": 127.9975, "sent_max_len_1": 191.25, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 56600 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3956, "doc_norm": 1.3799, "encoder_q-embeddings": 974.7713, "encoder_q-layer.0": 686.5199, "encoder_q-layer.1": 736.369, "encoder_q-layer.10": 1175.6177, "encoder_q-layer.11": 2748.9399, "encoder_q-layer.2": 827.8372, "encoder_q-layer.3": 875.5425, "encoder_q-layer.4": 918.7949, "encoder_q-layer.5": 977.3126, "encoder_q-layer.6": 1066.202, "encoder_q-layer.7": 1173.507, "encoder_q-layer.8": 1336.0076, "encoder_q-layer.9": 1207.0737, "epoch": 0.55, "inbatch_neg_score": 0.0876, "inbatch_pos_score": 0.7598, "learning_rate": 2.4055555555555555e-05, "loss": 3.3956, "norm_diff": 0.0851, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1817.4202, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0877, "query_norm": 1.2948, "queue_k_norm": 1.3795, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5156, "sent_len_1": 66.7352, "sent_max_len_0": 128.0, "sent_max_len_1": 190.995, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 56700 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3735, "doc_norm": 1.3786, "encoder_q-embeddings": 899.1491, "encoder_q-layer.0": 608.7337, "encoder_q-layer.1": 623.179, "encoder_q-layer.10": 1126.9198, "encoder_q-layer.11": 2525.5356, "encoder_q-layer.2": 697.9664, "encoder_q-layer.3": 725.7744, "encoder_q-layer.4": 790.02, "encoder_q-layer.5": 805.1044, "encoder_q-layer.6": 915.7582, "encoder_q-layer.7": 1043.9067, "encoder_q-layer.8": 1226.4041, "encoder_q-layer.9": 1108.8243, "epoch": 0.55, "inbatch_neg_score": 0.0859, "inbatch_pos_score": 0.7388, "learning_rate": 2.4e-05, "loss": 3.3735, "norm_diff": 0.102, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1646.7855, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.086, "query_norm": 1.2766, "queue_k_norm": 1.3806, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6287, "sent_len_1": 66.6672, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.5425, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 56800 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3838, "doc_norm": 1.3732, "encoder_q-embeddings": 1086.7983, "encoder_q-layer.0": 738.0082, "encoder_q-layer.1": 800.5006, "encoder_q-layer.10": 1285.9823, "encoder_q-layer.11": 2704.3972, "encoder_q-layer.2": 921.7812, "encoder_q-layer.3": 960.2267, "encoder_q-layer.4": 1119.2096, "encoder_q-layer.5": 1074.6768, "encoder_q-layer.6": 1210.0447, "encoder_q-layer.7": 1287.4642, "encoder_q-layer.8": 1413.8899, "encoder_q-layer.9": 1216.0645, "epoch": 0.56, "inbatch_neg_score": 0.0827, "inbatch_pos_score": 0.7422, "learning_rate": 2.3944444444444443e-05, "loss": 3.3838, "norm_diff": 0.0928, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1923.0013, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0826, "query_norm": 1.2804, "queue_k_norm": 1.3788, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.496, "sent_len_1": 66.9308, "sent_max_len_0": 127.9925, "sent_max_len_1": 191.255, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 56900 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3762, "doc_norm": 1.3739, "encoder_q-embeddings": 1962.3936, "encoder_q-layer.0": 1295.9668, "encoder_q-layer.1": 1369.3391, "encoder_q-layer.10": 2354.9131, "encoder_q-layer.11": 5153.5928, "encoder_q-layer.2": 1542.321, "encoder_q-layer.3": 1552.5215, "encoder_q-layer.4": 1725.4135, "encoder_q-layer.5": 1808.7118, "encoder_q-layer.6": 1942.9688, "encoder_q-layer.7": 2248.5342, "encoder_q-layer.8": 2521.5715, "encoder_q-layer.9": 2347.9795, "epoch": 0.56, "inbatch_neg_score": 0.0853, "inbatch_pos_score": 0.7495, "learning_rate": 2.3888888888888892e-05, "loss": 3.3762, "norm_diff": 0.0958, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3489.1719, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0854, "query_norm": 1.2781, "queue_k_norm": 1.3785, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5106, "sent_len_1": 66.5559, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2975, "stdk": 0.0484, "stdq": 0.0448, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57000 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3869, "doc_norm": 1.381, "encoder_q-embeddings": 2371.3438, "encoder_q-layer.0": 1589.5627, "encoder_q-layer.1": 1751.3784, "encoder_q-layer.10": 2442.4404, "encoder_q-layer.11": 5350.0049, "encoder_q-layer.2": 1988.9092, "encoder_q-layer.3": 2055.4189, "encoder_q-layer.4": 2102.9211, "encoder_q-layer.5": 2225.6497, "encoder_q-layer.6": 2512.0078, "encoder_q-layer.7": 2572.1316, "encoder_q-layer.8": 3143.1509, "encoder_q-layer.9": 2654.196, "epoch": 0.56, "inbatch_neg_score": 0.0847, "inbatch_pos_score": 0.749, "learning_rate": 2.3833333333333334e-05, "loss": 3.3869, "norm_diff": 0.0806, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3920.3405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0848, "query_norm": 1.3004, "queue_k_norm": 1.3797, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4961, "sent_len_1": 66.7958, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1712, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 57100 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.3903, "doc_norm": 1.3797, "encoder_q-embeddings": 1057.2806, "encoder_q-layer.0": 696.3287, "encoder_q-layer.1": 707.9968, "encoder_q-layer.10": 1131.2599, "encoder_q-layer.11": 2566.6025, "encoder_q-layer.2": 785.0435, "encoder_q-layer.3": 820.3646, "encoder_q-layer.4": 917.6716, "encoder_q-layer.5": 938.6765, "encoder_q-layer.6": 1083.7147, "encoder_q-layer.7": 1134.1486, "encoder_q-layer.8": 1278.204, "encoder_q-layer.9": 1116.2924, "epoch": 0.56, "inbatch_neg_score": 0.0839, "inbatch_pos_score": 0.7646, "learning_rate": 2.377777777777778e-05, "loss": 3.3903, "norm_diff": 0.074, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1747.1057, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.084, "query_norm": 1.3057, "queue_k_norm": 1.3806, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4125, "sent_len_1": 66.7526, "sent_max_len_0": 127.98, "sent_max_len_1": 189.1312, "stdk": 0.0486, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 57200 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3665, "doc_norm": 1.3822, "encoder_q-embeddings": 1081.9927, "encoder_q-layer.0": 715.5071, "encoder_q-layer.1": 755.5483, "encoder_q-layer.10": 1166.6669, "encoder_q-layer.11": 2772.0854, "encoder_q-layer.2": 865.4344, "encoder_q-layer.3": 899.8166, "encoder_q-layer.4": 969.3923, "encoder_q-layer.5": 1023.6101, "encoder_q-layer.6": 1140.5315, "encoder_q-layer.7": 1140.3806, "encoder_q-layer.8": 1221.3323, "encoder_q-layer.9": 1121.1948, "epoch": 0.56, "inbatch_neg_score": 0.0845, "inbatch_pos_score": 0.7271, "learning_rate": 2.3722222222222222e-05, "loss": 3.3665, "norm_diff": 0.1199, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1834.3309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0848, "query_norm": 1.2624, "queue_k_norm": 1.3802, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7197, "sent_len_1": 66.9787, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3363, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 57300 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.3763, "doc_norm": 1.38, "encoder_q-embeddings": 994.0143, "encoder_q-layer.0": 646.9039, "encoder_q-layer.1": 673.2778, "encoder_q-layer.10": 1174.7668, "encoder_q-layer.11": 2571.1819, "encoder_q-layer.2": 747.3581, "encoder_q-layer.3": 783.9277, "encoder_q-layer.4": 828.1932, "encoder_q-layer.5": 896.507, "encoder_q-layer.6": 984.8661, "encoder_q-layer.7": 1110.0029, "encoder_q-layer.8": 1220.4052, "encoder_q-layer.9": 1155.0911, "epoch": 0.56, "inbatch_neg_score": 0.0891, "inbatch_pos_score": 0.7632, "learning_rate": 2.3666666666666668e-05, "loss": 3.3763, "norm_diff": 0.091, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1706.3337, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0886, "query_norm": 1.2889, "queue_k_norm": 1.3817, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.697, "sent_len_1": 66.6873, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.375, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 57400 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3845, "doc_norm": 1.3789, "encoder_q-embeddings": 1058.6711, "encoder_q-layer.0": 755.1993, "encoder_q-layer.1": 779.8328, "encoder_q-layer.10": 1172.115, "encoder_q-layer.11": 2653.3569, "encoder_q-layer.2": 883.3621, "encoder_q-layer.3": 882.602, "encoder_q-layer.4": 928.616, "encoder_q-layer.5": 974.799, "encoder_q-layer.6": 1067.3185, "encoder_q-layer.7": 1136.5176, "encoder_q-layer.8": 1368.9198, "encoder_q-layer.9": 1207.1504, "epoch": 0.56, "inbatch_neg_score": 0.0959, "inbatch_pos_score": 0.7549, "learning_rate": 2.361111111111111e-05, "loss": 3.3845, "norm_diff": 0.0739, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1830.3615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0956, "query_norm": 1.305, "queue_k_norm": 1.3795, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4953, "sent_len_1": 66.7094, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.9437, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 57500 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3809, "doc_norm": 1.3755, "encoder_q-embeddings": 1021.4893, "encoder_q-layer.0": 682.1094, "encoder_q-layer.1": 685.4216, "encoder_q-layer.10": 1310.9548, "encoder_q-layer.11": 2624.3252, "encoder_q-layer.2": 765.8889, "encoder_q-layer.3": 788.7877, "encoder_q-layer.4": 833.8892, "encoder_q-layer.5": 922.5793, "encoder_q-layer.6": 1041.3235, "encoder_q-layer.7": 1124.8658, "encoder_q-layer.8": 1394.3265, "encoder_q-layer.9": 1234.5061, "epoch": 0.56, "inbatch_neg_score": 0.0933, "inbatch_pos_score": 0.7651, "learning_rate": 2.3555555555555556e-05, "loss": 3.3809, "norm_diff": 0.0492, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1774.9762, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0932, "query_norm": 1.3262, "queue_k_norm": 1.3804, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7343, "sent_len_1": 66.4364, "sent_max_len_0": 127.9862, "sent_max_len_1": 188.6463, "stdk": 0.0484, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 57600 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.3853, "doc_norm": 1.3828, "encoder_q-embeddings": 1335.8582, "encoder_q-layer.0": 946.3566, "encoder_q-layer.1": 1059.9348, "encoder_q-layer.10": 1111.9979, "encoder_q-layer.11": 2547.1377, "encoder_q-layer.2": 1226.5729, "encoder_q-layer.3": 1207.8323, "encoder_q-layer.4": 1287.2579, "encoder_q-layer.5": 1334.7773, "encoder_q-layer.6": 1291.6549, "encoder_q-layer.7": 1274.016, "encoder_q-layer.8": 1357.3663, "encoder_q-layer.9": 1116.5125, "epoch": 0.56, "inbatch_neg_score": 0.0972, "inbatch_pos_score": 0.7725, "learning_rate": 2.35e-05, "loss": 3.3853, "norm_diff": 0.0694, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2036.3515, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0974, "query_norm": 1.3134, "queue_k_norm": 1.3804, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6071, "sent_len_1": 66.4041, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3587, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 57700 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3936, "doc_norm": 1.381, "encoder_q-embeddings": 924.8439, "encoder_q-layer.0": 592.0831, "encoder_q-layer.1": 631.0623, "encoder_q-layer.10": 1182.6849, "encoder_q-layer.11": 2683.321, "encoder_q-layer.2": 709.795, "encoder_q-layer.3": 728.2387, "encoder_q-layer.4": 791.893, "encoder_q-layer.5": 854.9319, "encoder_q-layer.6": 971.7505, "encoder_q-layer.7": 1072.4053, "encoder_q-layer.8": 1296.4922, "encoder_q-layer.9": 1184.8013, "epoch": 0.56, "inbatch_neg_score": 0.1049, "inbatch_pos_score": 0.7617, "learning_rate": 2.3444444444444448e-05, "loss": 3.3936, "norm_diff": 0.07, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1741.3907, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1037, "query_norm": 1.311, "queue_k_norm": 1.3811, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6084, "sent_len_1": 66.7169, "sent_max_len_0": 128.0, "sent_max_len_1": 187.3113, "stdk": 0.0486, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 57800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3673, "doc_norm": 1.3815, "encoder_q-embeddings": 1161.8737, "encoder_q-layer.0": 782.6772, "encoder_q-layer.1": 832.2999, "encoder_q-layer.10": 1114.198, "encoder_q-layer.11": 2530.1282, "encoder_q-layer.2": 924.5948, "encoder_q-layer.3": 978.2185, "encoder_q-layer.4": 1005.2292, "encoder_q-layer.5": 1008.0126, "encoder_q-layer.6": 1073.9421, "encoder_q-layer.7": 1201.269, "encoder_q-layer.8": 1322.1743, "encoder_q-layer.9": 1168.9177, "epoch": 0.57, "inbatch_neg_score": 0.1108, "inbatch_pos_score": 0.7856, "learning_rate": 2.338888888888889e-05, "loss": 3.3673, "norm_diff": 0.0484, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1830.509, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1109, "query_norm": 1.3331, "queue_k_norm": 1.3806, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.714, "sent_len_1": 66.6877, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2962, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57900 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.3851, "doc_norm": 1.3817, "encoder_q-embeddings": 1305.5675, "encoder_q-layer.0": 921.8568, "encoder_q-layer.1": 936.7321, "encoder_q-layer.10": 1093.2933, "encoder_q-layer.11": 2451.5208, "encoder_q-layer.2": 1087.4565, "encoder_q-layer.3": 1158.7177, "encoder_q-layer.4": 1183.8959, "encoder_q-layer.5": 1175.9701, "encoder_q-layer.6": 1202.877, "encoder_q-layer.7": 1334.6262, "encoder_q-layer.8": 1402.9673, "encoder_q-layer.9": 1146.4003, "epoch": 0.57, "inbatch_neg_score": 0.117, "inbatch_pos_score": 0.8145, "learning_rate": 2.3333333333333336e-05, "loss": 3.3851, "norm_diff": 0.0564, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1976.5409, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1172, "query_norm": 1.3261, "queue_k_norm": 1.3803, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6347, "sent_len_1": 66.7927, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2713, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58000 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3824, "doc_norm": 1.3765, "encoder_q-embeddings": 1164.7228, "encoder_q-layer.0": 790.9449, "encoder_q-layer.1": 785.4178, "encoder_q-layer.10": 1083.9298, "encoder_q-layer.11": 2500.6978, "encoder_q-layer.2": 877.6296, "encoder_q-layer.3": 890.5405, "encoder_q-layer.4": 957.4327, "encoder_q-layer.5": 930.5691, "encoder_q-layer.6": 975.5358, "encoder_q-layer.7": 1082.3765, "encoder_q-layer.8": 1267.1648, "encoder_q-layer.9": 1069.4316, "epoch": 0.57, "inbatch_neg_score": 0.1229, "inbatch_pos_score": 0.7539, "learning_rate": 2.3277777777777778e-05, "loss": 3.3824, "norm_diff": 0.0786, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1770.3189, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1219, "query_norm": 1.2979, "queue_k_norm": 1.3847, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5802, "sent_len_1": 66.6792, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6463, "stdk": 0.0483, "stdq": 0.0438, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 58100 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3852, "doc_norm": 1.3839, "encoder_q-embeddings": 1927.3145, "encoder_q-layer.0": 1376.7325, "encoder_q-layer.1": 1487.4351, "encoder_q-layer.10": 1157.2355, "encoder_q-layer.11": 2597.438, "encoder_q-layer.2": 1752.3633, "encoder_q-layer.3": 1813.8231, "encoder_q-layer.4": 1797.2134, "encoder_q-layer.5": 1823.0891, "encoder_q-layer.6": 1669.8069, "encoder_q-layer.7": 1469.9575, "encoder_q-layer.8": 1433.7905, "encoder_q-layer.9": 1149.4358, "epoch": 0.57, "inbatch_neg_score": 0.1255, "inbatch_pos_score": 0.7993, "learning_rate": 2.3222222222222224e-05, "loss": 3.3852, "norm_diff": 0.0359, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2522.7051, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.125, "query_norm": 1.3484, "queue_k_norm": 1.3853, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5527, "sent_len_1": 66.6381, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2725, "stdk": 0.0486, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 58200 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.377, "doc_norm": 1.3855, "encoder_q-embeddings": 1065.6907, "encoder_q-layer.0": 766.0022, "encoder_q-layer.1": 797.8522, "encoder_q-layer.10": 1174.1785, "encoder_q-layer.11": 2573.4468, "encoder_q-layer.2": 895.8181, "encoder_q-layer.3": 877.0427, "encoder_q-layer.4": 900.9222, "encoder_q-layer.5": 888.6691, "encoder_q-layer.6": 1038.2766, "encoder_q-layer.7": 1067.3937, "encoder_q-layer.8": 1204.0995, "encoder_q-layer.9": 1079.5212, "epoch": 0.57, "inbatch_neg_score": 0.1287, "inbatch_pos_score": 0.8008, "learning_rate": 2.3166666666666666e-05, "loss": 3.377, "norm_diff": 0.0736, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1740.9465, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1278, "query_norm": 1.3118, "queue_k_norm": 1.3884, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7549, "sent_len_1": 66.884, "sent_max_len_0": 127.995, "sent_max_len_1": 191.5325, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 58300 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3626, "doc_norm": 1.3903, "encoder_q-embeddings": 1041.891, "encoder_q-layer.0": 678.5523, "encoder_q-layer.1": 706.7351, "encoder_q-layer.10": 1151.7946, "encoder_q-layer.11": 2617.2822, "encoder_q-layer.2": 812.0208, "encoder_q-layer.3": 812.2729, "encoder_q-layer.4": 838.5059, "encoder_q-layer.5": 878.1033, "encoder_q-layer.6": 1010.5458, "encoder_q-layer.7": 1098.8728, "encoder_q-layer.8": 1301.5461, "encoder_q-layer.9": 1127.7926, "epoch": 0.57, "inbatch_neg_score": 0.1241, "inbatch_pos_score": 0.7944, "learning_rate": 2.3111111111111112e-05, "loss": 3.3626, "norm_diff": 0.0691, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1759.9309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1235, "query_norm": 1.3212, "queue_k_norm": 1.3863, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.893, "sent_len_1": 66.8859, "sent_max_len_0": 127.9875, "sent_max_len_1": 189.2075, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 58400 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.3786, "doc_norm": 1.3906, "encoder_q-embeddings": 929.5902, "encoder_q-layer.0": 597.9015, "encoder_q-layer.1": 615.6706, "encoder_q-layer.10": 1228.1411, "encoder_q-layer.11": 2722.0469, "encoder_q-layer.2": 671.9987, "encoder_q-layer.3": 692.0413, "encoder_q-layer.4": 794.5731, "encoder_q-layer.5": 796.8109, "encoder_q-layer.6": 912.0125, "encoder_q-layer.7": 1071.4552, "encoder_q-layer.8": 1312.0928, "encoder_q-layer.9": 1175.6415, "epoch": 0.57, "inbatch_neg_score": 0.1171, "inbatch_pos_score": 0.8047, "learning_rate": 2.3055555555555558e-05, "loss": 3.3786, "norm_diff": 0.0979, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1724.1749, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1177, "query_norm": 1.2927, "queue_k_norm": 1.3877, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7349, "sent_len_1": 67.028, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.075, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 58500 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3971, "doc_norm": 1.389, "encoder_q-embeddings": 1074.9641, "encoder_q-layer.0": 761.787, "encoder_q-layer.1": 819.3359, "encoder_q-layer.10": 1340.0643, "encoder_q-layer.11": 2808.6123, "encoder_q-layer.2": 948.446, "encoder_q-layer.3": 981.7411, "encoder_q-layer.4": 1016.1876, "encoder_q-layer.5": 1044.0981, "encoder_q-layer.6": 1127.1802, "encoder_q-layer.7": 1225.6677, "encoder_q-layer.8": 1591.1626, "encoder_q-layer.9": 1335.4482, "epoch": 0.57, "inbatch_neg_score": 0.1179, "inbatch_pos_score": 0.7666, "learning_rate": 2.3000000000000003e-05, "loss": 3.3971, "norm_diff": 0.1115, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1918.7527, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1172, "query_norm": 1.2775, "queue_k_norm": 1.3876, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6583, "sent_len_1": 66.7267, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5913, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 58600 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3868, "doc_norm": 1.3852, "encoder_q-embeddings": 1942.6744, "encoder_q-layer.0": 1483.2693, "encoder_q-layer.1": 1616.3575, "encoder_q-layer.10": 1188.9033, "encoder_q-layer.11": 2553.4087, "encoder_q-layer.2": 1773.5104, "encoder_q-layer.3": 1717.8267, "encoder_q-layer.4": 1809.1827, "encoder_q-layer.5": 1572.8348, "encoder_q-layer.6": 1683.46, "encoder_q-layer.7": 1712.6451, "encoder_q-layer.8": 1613.3695, "encoder_q-layer.9": 1190.9772, "epoch": 0.57, "inbatch_neg_score": 0.112, "inbatch_pos_score": 0.7822, "learning_rate": 2.2944444444444446e-05, "loss": 3.3868, "norm_diff": 0.0878, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2559.3598, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1135, "query_norm": 1.2974, "queue_k_norm": 1.3869, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5009, "sent_len_1": 66.5728, "sent_max_len_0": 127.9762, "sent_max_len_1": 186.4762, "stdk": 0.0485, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58700 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4087, "doc_norm": 1.3812, "encoder_q-embeddings": 1082.6176, "encoder_q-layer.0": 746.1931, "encoder_q-layer.1": 819.5388, "encoder_q-layer.10": 1130.118, "encoder_q-layer.11": 2669.0669, "encoder_q-layer.2": 912.1027, "encoder_q-layer.3": 924.6357, "encoder_q-layer.4": 979.6169, "encoder_q-layer.5": 1041.9486, "encoder_q-layer.6": 1057.4404, "encoder_q-layer.7": 1156.9214, "encoder_q-layer.8": 1342.1395, "encoder_q-layer.9": 1171.6989, "epoch": 0.57, "inbatch_neg_score": 0.1083, "inbatch_pos_score": 0.7515, "learning_rate": 2.288888888888889e-05, "loss": 3.4087, "norm_diff": 0.1145, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1815.2627, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.109, "query_norm": 1.2667, "queue_k_norm": 1.3895, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5287, "sent_len_1": 66.6251, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4087, "stdk": 0.0483, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 58800 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3817, "doc_norm": 1.3946, "encoder_q-embeddings": 1011.7985, "encoder_q-layer.0": 692.0404, "encoder_q-layer.1": 712.2053, "encoder_q-layer.10": 1202.771, "encoder_q-layer.11": 2616.0608, "encoder_q-layer.2": 807.3071, "encoder_q-layer.3": 822.6078, "encoder_q-layer.4": 882.9758, "encoder_q-layer.5": 908.2574, "encoder_q-layer.6": 1033.1495, "encoder_q-layer.7": 1121.0791, "encoder_q-layer.8": 1318.0677, "encoder_q-layer.9": 1171.5697, "epoch": 0.58, "inbatch_neg_score": 0.1033, "inbatch_pos_score": 0.7729, "learning_rate": 2.2833333333333334e-05, "loss": 3.3817, "norm_diff": 0.1131, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1763.3688, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1044, "query_norm": 1.2816, "queue_k_norm": 1.3914, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5238, "sent_len_1": 66.7037, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3425, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 58900 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3607, "doc_norm": 1.3909, "encoder_q-embeddings": 1024.2625, "encoder_q-layer.0": 680.7972, "encoder_q-layer.1": 736.2529, "encoder_q-layer.10": 1188.3041, "encoder_q-layer.11": 2758.0686, "encoder_q-layer.2": 845.6302, "encoder_q-layer.3": 840.0203, "encoder_q-layer.4": 910.6882, "encoder_q-layer.5": 887.9819, "encoder_q-layer.6": 971.3682, "encoder_q-layer.7": 1108.4369, "encoder_q-layer.8": 1329.9222, "encoder_q-layer.9": 1210.9866, "epoch": 0.58, "inbatch_neg_score": 0.1038, "inbatch_pos_score": 0.7803, "learning_rate": 2.277777777777778e-05, "loss": 3.3607, "norm_diff": 0.0972, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1825.5607, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.104, "query_norm": 1.2937, "queue_k_norm": 1.3917, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6025, "sent_len_1": 66.7683, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8625, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 59000 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3846, "doc_norm": 1.3866, "encoder_q-embeddings": 1031.1975, "encoder_q-layer.0": 647.9096, "encoder_q-layer.1": 672.9594, "encoder_q-layer.10": 1181.8099, "encoder_q-layer.11": 2826.4961, "encoder_q-layer.2": 738.3811, "encoder_q-layer.3": 795.5809, "encoder_q-layer.4": 812.7181, "encoder_q-layer.5": 857.0234, "encoder_q-layer.6": 950.4486, "encoder_q-layer.7": 1051.7192, "encoder_q-layer.8": 1264.5428, "encoder_q-layer.9": 1153.1051, "epoch": 0.58, "inbatch_neg_score": 0.1015, "inbatch_pos_score": 0.7603, "learning_rate": 2.2722222222222222e-05, "loss": 3.3846, "norm_diff": 0.1126, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1755.7672, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1016, "query_norm": 1.274, "queue_k_norm": 1.3918, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.9167, "sent_len_1": 66.8968, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9412, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 59100 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3708, "doc_norm": 1.3908, "encoder_q-embeddings": 2140.6458, "encoder_q-layer.0": 1456.8394, "encoder_q-layer.1": 1525.4882, "encoder_q-layer.10": 2217.375, "encoder_q-layer.11": 5215.1543, "encoder_q-layer.2": 1725.8904, "encoder_q-layer.3": 1760.6246, "encoder_q-layer.4": 1880.8782, "encoder_q-layer.5": 1877.5815, "encoder_q-layer.6": 1955.4603, "encoder_q-layer.7": 2257.1091, "encoder_q-layer.8": 2397.1641, "encoder_q-layer.9": 2209.3872, "epoch": 0.58, "inbatch_neg_score": 0.0961, "inbatch_pos_score": 0.7656, "learning_rate": 2.2666666666666668e-05, "loss": 3.3708, "norm_diff": 0.1146, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3525.1397, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0969, "query_norm": 1.2761, "queue_k_norm": 1.3897, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5933, "sent_len_1": 66.7614, "sent_max_len_0": 128.0, "sent_max_len_1": 189.24, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 59200 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3762, "doc_norm": 1.3956, "encoder_q-embeddings": 1903.3208, "encoder_q-layer.0": 1221.5342, "encoder_q-layer.1": 1273.7548, "encoder_q-layer.10": 2342.7202, "encoder_q-layer.11": 4922.4648, "encoder_q-layer.2": 1409.3046, "encoder_q-layer.3": 1441.374, "encoder_q-layer.4": 1544.4263, "encoder_q-layer.5": 1546.5297, "encoder_q-layer.6": 1766.6824, "encoder_q-layer.7": 1999.9315, "encoder_q-layer.8": 2393.8496, "encoder_q-layer.9": 2269.8174, "epoch": 0.58, "inbatch_neg_score": 0.0931, "inbatch_pos_score": 0.7607, "learning_rate": 2.2611111111111113e-05, "loss": 3.3762, "norm_diff": 0.1147, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3271.5909, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0935, "query_norm": 1.2809, "queue_k_norm": 1.3885, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5693, "sent_len_1": 66.9095, "sent_max_len_0": 127.9938, "sent_max_len_1": 191.62, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 59300 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3753, "doc_norm": 1.3869, "encoder_q-embeddings": 2417.9316, "encoder_q-layer.0": 1661.0355, "encoder_q-layer.1": 1798.5107, "encoder_q-layer.10": 2337.9409, "encoder_q-layer.11": 5295.1899, "encoder_q-layer.2": 2054.0845, "encoder_q-layer.3": 2254.2715, "encoder_q-layer.4": 2384.4819, "encoder_q-layer.5": 2502.6487, "encoder_q-layer.6": 2747.752, "encoder_q-layer.7": 2867.9824, "encoder_q-layer.8": 2956.251, "encoder_q-layer.9": 2387.3176, "epoch": 0.58, "inbatch_neg_score": 0.093, "inbatch_pos_score": 0.748, "learning_rate": 2.255555555555556e-05, "loss": 3.3753, "norm_diff": 0.1162, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4030.0633, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0925, "query_norm": 1.2707, "queue_k_norm": 1.3887, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4913, "sent_len_1": 66.979, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2262, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 59400 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3871, "doc_norm": 1.3867, "encoder_q-embeddings": 1913.0162, "encoder_q-layer.0": 1277.489, "encoder_q-layer.1": 1356.275, "encoder_q-layer.10": 2346.5247, "encoder_q-layer.11": 5064.5532, "encoder_q-layer.2": 1549.0292, "encoder_q-layer.3": 1621.3998, "encoder_q-layer.4": 1617.9702, "encoder_q-layer.5": 1608.1572, "encoder_q-layer.6": 1902.4973, "encoder_q-layer.7": 2010.1177, "encoder_q-layer.8": 2279.1855, "encoder_q-layer.9": 2116.6353, "epoch": 0.58, "inbatch_neg_score": 0.0859, "inbatch_pos_score": 0.7261, "learning_rate": 2.25e-05, "loss": 3.3871, "norm_diff": 0.1344, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3316.0023, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0864, "query_norm": 1.2523, "queue_k_norm": 1.3866, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6003, "sent_len_1": 66.902, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3775, "stdk": 0.0486, "stdq": 0.0441, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 59500 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3741, "doc_norm": 1.3786, "encoder_q-embeddings": 2168.6101, "encoder_q-layer.0": 1461.1133, "encoder_q-layer.1": 1509.3176, "encoder_q-layer.10": 2368.0581, "encoder_q-layer.11": 5301.0703, "encoder_q-layer.2": 1727.5106, "encoder_q-layer.3": 1740.7269, "encoder_q-layer.4": 1898.2577, "encoder_q-layer.5": 1970.7722, "encoder_q-layer.6": 2088.6431, "encoder_q-layer.7": 2375.855, "encoder_q-layer.8": 2647.6614, "encoder_q-layer.9": 2302.4104, "epoch": 0.58, "inbatch_neg_score": 0.087, "inbatch_pos_score": 0.7378, "learning_rate": 2.2444444444444447e-05, "loss": 3.3741, "norm_diff": 0.1045, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3681.7094, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.087, "query_norm": 1.2741, "queue_k_norm": 1.3871, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7019, "sent_len_1": 66.9216, "sent_max_len_0": 128.0, "sent_max_len_1": 189.305, "stdk": 0.0483, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 59600 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3755, "doc_norm": 1.3878, "encoder_q-embeddings": 2117.5312, "encoder_q-layer.0": 1389.3833, "encoder_q-layer.1": 1463.7789, "encoder_q-layer.10": 2394.4062, "encoder_q-layer.11": 5431.8193, "encoder_q-layer.2": 1627.7351, "encoder_q-layer.3": 1666.7533, "encoder_q-layer.4": 1756.1357, "encoder_q-layer.5": 1839.1799, "encoder_q-layer.6": 2219.063, "encoder_q-layer.7": 2409.313, "encoder_q-layer.8": 2524.4849, "encoder_q-layer.9": 2365.3787, "epoch": 0.58, "inbatch_neg_score": 0.0861, "inbatch_pos_score": 0.7383, "learning_rate": 2.238888888888889e-05, "loss": 3.3755, "norm_diff": 0.1175, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3555.2187, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0851, "query_norm": 1.2703, "queue_k_norm": 1.3874, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.595, "sent_len_1": 66.8681, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8713, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 59700 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3783, "doc_norm": 1.3819, "encoder_q-embeddings": 2319.1428, "encoder_q-layer.0": 1558.7225, "encoder_q-layer.1": 1729.5255, "encoder_q-layer.10": 2469.436, "encoder_q-layer.11": 5450.8208, "encoder_q-layer.2": 1972.8438, "encoder_q-layer.3": 1912.4572, "encoder_q-layer.4": 1932.3403, "encoder_q-layer.5": 1897.9547, "encoder_q-layer.6": 2062.2581, "encoder_q-layer.7": 2317.4509, "encoder_q-layer.8": 2740.5415, "encoder_q-layer.9": 2500.8582, "epoch": 0.58, "inbatch_neg_score": 0.0819, "inbatch_pos_score": 0.7437, "learning_rate": 2.2333333333333335e-05, "loss": 3.3783, "norm_diff": 0.1074, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3771.9561, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0815, "query_norm": 1.2745, "queue_k_norm": 1.3853, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6451, "sent_len_1": 66.8921, "sent_max_len_0": 127.9938, "sent_max_len_1": 190.5925, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 59800 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3746, "doc_norm": 1.3887, "encoder_q-embeddings": 1940.1641, "encoder_q-layer.0": 1222.3105, "encoder_q-layer.1": 1294.8816, "encoder_q-layer.10": 2350.5276, "encoder_q-layer.11": 5246.8955, "encoder_q-layer.2": 1427.1653, "encoder_q-layer.3": 1499.1147, "encoder_q-layer.4": 1635.5758, "encoder_q-layer.5": 1649.17, "encoder_q-layer.6": 1854.4995, "encoder_q-layer.7": 2051.4048, "encoder_q-layer.8": 2546.5303, "encoder_q-layer.9": 2305.3088, "epoch": 0.58, "inbatch_neg_score": 0.0811, "inbatch_pos_score": 0.7314, "learning_rate": 2.2277777777777778e-05, "loss": 3.3746, "norm_diff": 0.1311, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3443.8191, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0819, "query_norm": 1.2576, "queue_k_norm": 1.3874, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.788, "sent_len_1": 67.0392, "sent_max_len_0": 128.0, "sent_max_len_1": 190.535, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 59900 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3779, "doc_norm": 1.3885, "encoder_q-embeddings": 2102.8367, "encoder_q-layer.0": 1463.1483, "encoder_q-layer.1": 1544.5406, "encoder_q-layer.10": 2375.5947, "encoder_q-layer.11": 5291.0894, "encoder_q-layer.2": 1742.0682, "encoder_q-layer.3": 1857.1116, "encoder_q-layer.4": 1952.4507, "encoder_q-layer.5": 1948.3723, "encoder_q-layer.6": 2073.2451, "encoder_q-layer.7": 2367.2458, "encoder_q-layer.8": 2931.0881, "encoder_q-layer.9": 2436.0627, "epoch": 0.59, "inbatch_neg_score": 0.079, "inbatch_pos_score": 0.7178, "learning_rate": 2.2222222222222223e-05, "loss": 3.3779, "norm_diff": 0.1333, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3708.7167, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0792, "query_norm": 1.2552, "queue_k_norm": 1.3857, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5494, "sent_len_1": 66.9911, "sent_max_len_0": 128.0, "sent_max_len_1": 189.975, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 60000 }, { "dev_runtime": 27.3393, "dev_samples_per_second": 2.341, "dev_steps_per_second": 0.037, "epoch": 0.59, "step": 60000, "test_accuracy": 93.83544921875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.35345810651779175, "test_doc_norm": 1.3657957315444946, "test_inbatch_neg_score": 0.44765934348106384, "test_inbatch_pos_score": 1.3737914562225342, "test_loss": 0.35345810651779175, "test_loss_align": 1.0454767942428589, "test_loss_unif": 3.9643054008483887, "test_loss_unif_q@queue": 3.9643054008483887, "test_norm_diff": 0.014161448925733566, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.08598949015140533, "test_query_norm": 1.37770414352417, "test_queue_k_norm": 1.3854156732559204, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04254869744181633, "test_stdq": 0.042300187051296234, "test_stdqueue_k": 0.048759203404188156, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.3393, "dev_samples_per_second": 2.341, "dev_steps_per_second": 0.037, "epoch": 0.59, "eval_beir-arguana_ndcg@10": 0.36529, "eval_beir-arguana_recall@10": 0.61807, "eval_beir-arguana_recall@100": 0.90043, "eval_beir-arguana_recall@20": 0.74324, "eval_beir-avg_ndcg@10": 0.38618516666666663, "eval_beir-avg_recall@10": 0.4517696666666667, "eval_beir-avg_recall@100": 0.6254039999999998, "eval_beir-avg_recall@20": 0.50787175, "eval_beir-cqadupstack_ndcg@10": 0.26345166666666664, "eval_beir-cqadupstack_recall@10": 0.35669666666666666, "eval_beir-cqadupstack_recall@100": 0.5868300000000001, "eval_beir-cqadupstack_recall@20": 0.4226574999999999, "eval_beir-fiqa_ndcg@10": 0.24586, "eval_beir-fiqa_recall@10": 0.30363, "eval_beir-fiqa_recall@100": 0.55258, "eval_beir-fiqa_recall@20": 0.37594, "eval_beir-nfcorpus_ndcg@10": 0.29147, "eval_beir-nfcorpus_recall@10": 0.14604, "eval_beir-nfcorpus_recall@100": 0.2747, "eval_beir-nfcorpus_recall@20": 0.17495, "eval_beir-nq_ndcg@10": 0.28756, "eval_beir-nq_recall@10": 0.46833, "eval_beir-nq_recall@100": 0.78614, "eval_beir-nq_recall@20": 0.57595, "eval_beir-quora_ndcg@10": 0.79999, "eval_beir-quora_recall@10": 0.8985, "eval_beir-quora_recall@100": 0.97976, "eval_beir-quora_recall@20": 0.93628, "eval_beir-scidocs_ndcg@10": 0.14867, "eval_beir-scidocs_recall@10": 0.15613, "eval_beir-scidocs_recall@100": 0.36093, "eval_beir-scidocs_recall@20": 0.21258, "eval_beir-scifact_ndcg@10": 0.63566, "eval_beir-scifact_recall@10": 0.78789, "eval_beir-scifact_recall@100": 0.896, "eval_beir-scifact_recall@20": 0.83522, "eval_beir-trec-covid_ndcg@10": 0.60923, "eval_beir-trec-covid_recall@10": 0.636, "eval_beir-trec-covid_recall@100": 0.4698, "eval_beir-trec-covid_recall@20": 0.597, "eval_beir-webis-touche2020_ndcg@10": 0.21467, "eval_beir-webis-touche2020_recall@10": 0.14641, "eval_beir-webis-touche2020_recall@100": 0.44687, "eval_beir-webis-touche2020_recall@20": 0.2049, "eval_senteval-avg_sts": 0.7664503536381, "eval_senteval-sickr_spearman": 0.7302446451713595, "eval_senteval-stsb_spearman": 0.8026560621048404, "step": 60000, "test_accuracy": 93.83544921875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.35345810651779175, "test_doc_norm": 1.3657957315444946, "test_inbatch_neg_score": 0.44765934348106384, "test_inbatch_pos_score": 1.3737914562225342, "test_loss": 0.35345810651779175, "test_loss_align": 1.0454767942428589, "test_loss_unif": 3.9643054008483887, "test_loss_unif_q@queue": 3.9643054008483887, "test_norm_diff": 0.014161448925733566, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.08598949015140533, "test_query_norm": 1.37770414352417, "test_queue_k_norm": 1.3854156732559204, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04254869744181633, "test_stdq": 0.042300187051296234, "test_stdqueue_k": 0.048759203404188156, "test_stdqueue_q": 0.0 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.338, "doc_norm": 1.3853, "encoder_q-embeddings": 2034.1738, "encoder_q-layer.0": 1316.171, "encoder_q-layer.1": 1417.2864, "encoder_q-layer.10": 2292.5093, "encoder_q-layer.11": 5195.6543, "encoder_q-layer.2": 1606.8715, "encoder_q-layer.3": 1698.9664, "encoder_q-layer.4": 1750.2253, "encoder_q-layer.5": 1815.6469, "encoder_q-layer.6": 1999.0409, "encoder_q-layer.7": 2213.5786, "encoder_q-layer.8": 2646.1494, "encoder_q-layer.9": 2439.574, "epoch": 0.59, "inbatch_neg_score": 0.0816, "inbatch_pos_score": 0.7583, "learning_rate": 2.216666666666667e-05, "loss": 3.338, "norm_diff": 0.0906, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3535.0093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0818, "query_norm": 1.2947, "queue_k_norm": 1.3851, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6648, "sent_len_1": 66.8709, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9787, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 60100 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3668, "doc_norm": 1.3836, "encoder_q-embeddings": 3009.6758, "encoder_q-layer.0": 2078.8679, "encoder_q-layer.1": 2291.0247, "encoder_q-layer.10": 2624.9055, "encoder_q-layer.11": 5281.3735, "encoder_q-layer.2": 2550.1763, "encoder_q-layer.3": 2540.0513, "encoder_q-layer.4": 2660.5608, "encoder_q-layer.5": 2668.6702, "encoder_q-layer.6": 2639.9666, "encoder_q-layer.7": 2689.9724, "encoder_q-layer.8": 3131.1565, "encoder_q-layer.9": 2409.2991, "epoch": 0.59, "inbatch_neg_score": 0.0843, "inbatch_pos_score": 0.7485, "learning_rate": 2.211111111111111e-05, "loss": 3.3668, "norm_diff": 0.1009, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4362.0584, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0845, "query_norm": 1.2827, "queue_k_norm": 1.3836, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.763, "sent_len_1": 66.7235, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0712, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60200 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3555, "doc_norm": 1.3847, "encoder_q-embeddings": 5978.5786, "encoder_q-layer.0": 4649.2988, "encoder_q-layer.1": 5092.7422, "encoder_q-layer.10": 2301.8525, "encoder_q-layer.11": 5222.5557, "encoder_q-layer.2": 5848.9629, "encoder_q-layer.3": 5387.0488, "encoder_q-layer.4": 4886.1196, "encoder_q-layer.5": 4889.2974, "encoder_q-layer.6": 4551.0781, "encoder_q-layer.7": 3734.9802, "encoder_q-layer.8": 2787.4587, "encoder_q-layer.9": 2166.6589, "epoch": 0.59, "inbatch_neg_score": 0.0843, "inbatch_pos_score": 0.7402, "learning_rate": 2.2055555555555557e-05, "loss": 3.3555, "norm_diff": 0.1179, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6971.764, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0844, "query_norm": 1.2668, "queue_k_norm": 1.3844, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7652, "sent_len_1": 66.7006, "sent_max_len_0": 127.9925, "sent_max_len_1": 189.5888, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 60300 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.3735, "doc_norm": 1.3826, "encoder_q-embeddings": 2167.8188, "encoder_q-layer.0": 1560.5657, "encoder_q-layer.1": 1584.0876, "encoder_q-layer.10": 2209.4873, "encoder_q-layer.11": 5044.5674, "encoder_q-layer.2": 1932.2649, "encoder_q-layer.3": 1799.4196, "encoder_q-layer.4": 1969.0565, "encoder_q-layer.5": 1889.7393, "encoder_q-layer.6": 2066.8293, "encoder_q-layer.7": 2220.9592, "encoder_q-layer.8": 2614.9351, "encoder_q-layer.9": 2161.7666, "epoch": 0.59, "inbatch_neg_score": 0.0831, "inbatch_pos_score": 0.7471, "learning_rate": 2.2000000000000003e-05, "loss": 3.3735, "norm_diff": 0.1028, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3547.3916, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0833, "query_norm": 1.2797, "queue_k_norm": 1.3825, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6664, "sent_len_1": 66.829, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8187, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60400 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3625, "doc_norm": 1.3847, "encoder_q-embeddings": 1881.5022, "encoder_q-layer.0": 1245.3344, "encoder_q-layer.1": 1327.7751, "encoder_q-layer.10": 2288.8076, "encoder_q-layer.11": 5166.6797, "encoder_q-layer.2": 1479.3245, "encoder_q-layer.3": 1548.6365, "encoder_q-layer.4": 1618.4943, "encoder_q-layer.5": 1650.9401, "encoder_q-layer.6": 1846.7888, "encoder_q-layer.7": 2086.8125, "encoder_q-layer.8": 2475.4172, "encoder_q-layer.9": 2290.3159, "epoch": 0.59, "inbatch_neg_score": 0.0846, "inbatch_pos_score": 0.7607, "learning_rate": 2.1944444444444445e-05, "loss": 3.3625, "norm_diff": 0.085, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3357.9593, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0851, "query_norm": 1.2997, "queue_k_norm": 1.3836, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7786, "sent_len_1": 67.0679, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8137, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60500 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3699, "doc_norm": 1.3832, "encoder_q-embeddings": 2901.293, "encoder_q-layer.0": 2072.177, "encoder_q-layer.1": 2326.261, "encoder_q-layer.10": 2402.7913, "encoder_q-layer.11": 5258.2397, "encoder_q-layer.2": 2751.9089, "encoder_q-layer.3": 2993.8652, "encoder_q-layer.4": 3437.2393, "encoder_q-layer.5": 3546.1697, "encoder_q-layer.6": 3743.4905, "encoder_q-layer.7": 3478.2351, "encoder_q-layer.8": 3229.8352, "encoder_q-layer.9": 2372.6804, "epoch": 0.59, "inbatch_neg_score": 0.0849, "inbatch_pos_score": 0.749, "learning_rate": 2.188888888888889e-05, "loss": 3.3699, "norm_diff": 0.0857, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4723.9662, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0855, "query_norm": 1.2975, "queue_k_norm": 1.3831, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5091, "sent_len_1": 66.8138, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.3913, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60600 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3755, "doc_norm": 1.3752, "encoder_q-embeddings": 2264.6729, "encoder_q-layer.0": 1517.8098, "encoder_q-layer.1": 1602.9539, "encoder_q-layer.10": 2227.4729, "encoder_q-layer.11": 5130.3086, "encoder_q-layer.2": 1802.9957, "encoder_q-layer.3": 1876.9636, "encoder_q-layer.4": 1968.6864, "encoder_q-layer.5": 2077.5056, "encoder_q-layer.6": 2195.2666, "encoder_q-layer.7": 2183.751, "encoder_q-layer.8": 2429.9756, "encoder_q-layer.9": 2282.1509, "epoch": 0.59, "inbatch_neg_score": 0.0891, "inbatch_pos_score": 0.7451, "learning_rate": 2.1833333333333333e-05, "loss": 3.3755, "norm_diff": 0.0706, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3600.8963, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0885, "query_norm": 1.3046, "queue_k_norm": 1.3826, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4446, "sent_len_1": 66.6987, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.4925, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60700 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.3601, "doc_norm": 1.3898, "encoder_q-embeddings": 1786.9536, "encoder_q-layer.0": 1175.7798, "encoder_q-layer.1": 1216.4414, "encoder_q-layer.10": 2261.6846, "encoder_q-layer.11": 4935.3403, "encoder_q-layer.2": 1358.7496, "encoder_q-layer.3": 1402.3706, "encoder_q-layer.4": 1482.9417, "encoder_q-layer.5": 1572.6316, "encoder_q-layer.6": 1796.4882, "encoder_q-layer.7": 2099.2986, "encoder_q-layer.8": 2462.3484, "encoder_q-layer.9": 2153.7754, "epoch": 0.59, "inbatch_neg_score": 0.0921, "inbatch_pos_score": 0.7715, "learning_rate": 2.177777777777778e-05, "loss": 3.3601, "norm_diff": 0.0847, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3262.3795, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0921, "query_norm": 1.3051, "queue_k_norm": 1.3835, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5791, "sent_len_1": 66.8075, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.7537, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60800 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3871, "doc_norm": 1.3916, "encoder_q-embeddings": 2199.3164, "encoder_q-layer.0": 1462.0157, "encoder_q-layer.1": 1568.8071, "encoder_q-layer.10": 2469.2856, "encoder_q-layer.11": 5344.6753, "encoder_q-layer.2": 1813.384, "encoder_q-layer.3": 1893.1968, "encoder_q-layer.4": 2129.7468, "encoder_q-layer.5": 2181.228, "encoder_q-layer.6": 2356.4329, "encoder_q-layer.7": 2438.6133, "encoder_q-layer.8": 2601.7385, "encoder_q-layer.9": 2409.5476, "epoch": 0.59, "inbatch_neg_score": 0.0972, "inbatch_pos_score": 0.7402, "learning_rate": 2.1722222222222225e-05, "loss": 3.3871, "norm_diff": 0.0971, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3788.4893, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0971, "query_norm": 1.2945, "queue_k_norm": 1.3826, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5471, "sent_len_1": 66.7943, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.1387, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60900 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.3674, "doc_norm": 1.3815, "encoder_q-embeddings": 1888.4163, "encoder_q-layer.0": 1237.7388, "encoder_q-layer.1": 1346.9454, "encoder_q-layer.10": 2567.1807, "encoder_q-layer.11": 5212.1211, "encoder_q-layer.2": 1514.1565, "encoder_q-layer.3": 1553.0331, "encoder_q-layer.4": 1642.0479, "encoder_q-layer.5": 1779.8849, "encoder_q-layer.6": 2004.9592, "encoder_q-layer.7": 2259.2327, "encoder_q-layer.8": 2464.5317, "encoder_q-layer.9": 2270.8835, "epoch": 0.6, "inbatch_neg_score": 0.0985, "inbatch_pos_score": 0.7646, "learning_rate": 2.1666666666666667e-05, "loss": 3.3674, "norm_diff": 0.087, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3470.4806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0985, "query_norm": 1.2946, "queue_k_norm": 1.3831, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.448, "sent_len_1": 66.7117, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6163, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61000 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3715, "doc_norm": 1.3881, "encoder_q-embeddings": 4642.3667, "encoder_q-layer.0": 3377.7415, "encoder_q-layer.1": 4151.1841, "encoder_q-layer.10": 3118.4634, "encoder_q-layer.11": 5732.3428, "encoder_q-layer.2": 4794.771, "encoder_q-layer.3": 5269.0034, "encoder_q-layer.4": 5718.7783, "encoder_q-layer.5": 6179.7842, "encoder_q-layer.6": 5874.0332, "encoder_q-layer.7": 5568.7075, "encoder_q-layer.8": 4325.0991, "encoder_q-layer.9": 2876.3672, "epoch": 0.6, "inbatch_neg_score": 0.1002, "inbatch_pos_score": 0.7607, "learning_rate": 2.1611111111111113e-05, "loss": 3.3715, "norm_diff": 0.0773, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7183.1477, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1002, "query_norm": 1.3108, "queue_k_norm": 1.3849, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6553, "sent_len_1": 66.8338, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1612, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 61100 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3641, "doc_norm": 1.3818, "encoder_q-embeddings": 3612.3782, "encoder_q-layer.0": 2391.6853, "encoder_q-layer.1": 2496.2456, "encoder_q-layer.10": 4715.3945, "encoder_q-layer.11": 10411.0, "encoder_q-layer.2": 2775.9487, "encoder_q-layer.3": 2912.5432, "encoder_q-layer.4": 3003.0422, "encoder_q-layer.5": 3146.936, "encoder_q-layer.6": 3462.6685, "encoder_q-layer.7": 3995.3372, "encoder_q-layer.8": 5208.8179, "encoder_q-layer.9": 4582.0161, "epoch": 0.6, "inbatch_neg_score": 0.1063, "inbatch_pos_score": 0.7822, "learning_rate": 2.1555555555555555e-05, "loss": 3.3641, "norm_diff": 0.0558, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6718.7162, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1052, "query_norm": 1.326, "queue_k_norm": 1.3826, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6892, "sent_len_1": 66.6845, "sent_max_len_0": 127.9875, "sent_max_len_1": 189.6337, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61200 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3771, "doc_norm": 1.3891, "encoder_q-embeddings": 4768.103, "encoder_q-layer.0": 3359.5552, "encoder_q-layer.1": 3623.3745, "encoder_q-layer.10": 4528.5156, "encoder_q-layer.11": 10522.9297, "encoder_q-layer.2": 4380.8784, "encoder_q-layer.3": 4694.4824, "encoder_q-layer.4": 5212.7173, "encoder_q-layer.5": 5608.5327, "encoder_q-layer.6": 5602.0522, "encoder_q-layer.7": 5794.0176, "encoder_q-layer.8": 5795.1162, "encoder_q-layer.9": 4648.1729, "epoch": 0.6, "inbatch_neg_score": 0.1103, "inbatch_pos_score": 0.7651, "learning_rate": 2.15e-05, "loss": 3.3771, "norm_diff": 0.1, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8129.1314, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1102, "query_norm": 1.289, "queue_k_norm": 1.3844, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5216, "sent_len_1": 66.5983, "sent_max_len_0": 128.0, "sent_max_len_1": 189.845, "stdk": 0.0488, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.3795, "doc_norm": 1.393, "encoder_q-embeddings": 1951.4199, "encoder_q-layer.0": 1296.8438, "encoder_q-layer.1": 1374.5612, "encoder_q-layer.10": 2688.6973, "encoder_q-layer.11": 5156.1172, "encoder_q-layer.2": 1581.1494, "encoder_q-layer.3": 1625.2058, "encoder_q-layer.4": 1740.6255, "encoder_q-layer.5": 1830.3136, "encoder_q-layer.6": 2141.0151, "encoder_q-layer.7": 2458.7561, "encoder_q-layer.8": 2698.9836, "encoder_q-layer.9": 2385.8037, "epoch": 0.6, "inbatch_neg_score": 0.1048, "inbatch_pos_score": 0.8091, "learning_rate": 2.1444444444444443e-05, "loss": 3.3795, "norm_diff": 0.0715, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3561.9693, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1052, "query_norm": 1.3215, "queue_k_norm": 1.3869, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5326, "sent_len_1": 66.8937, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2788, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 61400 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.3592, "doc_norm": 1.3879, "encoder_q-embeddings": 1875.132, "encoder_q-layer.0": 1252.4661, "encoder_q-layer.1": 1300.1003, "encoder_q-layer.10": 2585.082, "encoder_q-layer.11": 5295.1763, "encoder_q-layer.2": 1431.8992, "encoder_q-layer.3": 1471.0277, "encoder_q-layer.4": 1567.6212, "encoder_q-layer.5": 1656.8754, "encoder_q-layer.6": 1833.9357, "encoder_q-layer.7": 2158.2581, "encoder_q-layer.8": 2733.4043, "encoder_q-layer.9": 2403.429, "epoch": 0.6, "inbatch_neg_score": 0.1085, "inbatch_pos_score": 0.7969, "learning_rate": 2.138888888888889e-05, "loss": 3.3592, "norm_diff": 0.1022, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3455.7168, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1088, "query_norm": 1.2857, "queue_k_norm": 1.3862, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7105, "sent_len_1": 66.8192, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7475, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 61500 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.363, "doc_norm": 1.3838, "encoder_q-embeddings": 1946.8385, "encoder_q-layer.0": 1319.546, "encoder_q-layer.1": 1372.1041, "encoder_q-layer.10": 2284.3352, "encoder_q-layer.11": 5122.9517, "encoder_q-layer.2": 1557.8009, "encoder_q-layer.3": 1602.7922, "encoder_q-layer.4": 1731.8992, "encoder_q-layer.5": 1766.8413, "encoder_q-layer.6": 1934.3179, "encoder_q-layer.7": 2091.3098, "encoder_q-layer.8": 2554.2302, "encoder_q-layer.9": 2228.6882, "epoch": 0.6, "inbatch_neg_score": 0.1126, "inbatch_pos_score": 0.7837, "learning_rate": 2.1333333333333335e-05, "loss": 3.363, "norm_diff": 0.0903, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3439.3141, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1116, "query_norm": 1.2935, "queue_k_norm": 1.3852, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7178, "sent_len_1": 66.7348, "sent_max_len_0": 127.9875, "sent_max_len_1": 188.58, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61600 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.3536, "doc_norm": 1.3872, "encoder_q-embeddings": 2059.5154, "encoder_q-layer.0": 1341.783, "encoder_q-layer.1": 1454.3193, "encoder_q-layer.10": 2326.708, "encoder_q-layer.11": 5339.021, "encoder_q-layer.2": 1675.5363, "encoder_q-layer.3": 1754.0518, "encoder_q-layer.4": 1903.8827, "encoder_q-layer.5": 1901.6383, "encoder_q-layer.6": 2124.3054, "encoder_q-layer.7": 2322.5803, "encoder_q-layer.8": 2590.1514, "encoder_q-layer.9": 2352.1646, "epoch": 0.6, "inbatch_neg_score": 0.1081, "inbatch_pos_score": 0.7759, "learning_rate": 2.127777777777778e-05, "loss": 3.3536, "norm_diff": 0.1018, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3599.2011, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1085, "query_norm": 1.2854, "queue_k_norm": 1.3894, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8778, "sent_len_1": 67.0944, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.6325, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 61700 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.3655, "doc_norm": 1.3835, "encoder_q-embeddings": 1854.9568, "encoder_q-layer.0": 1215.5361, "encoder_q-layer.1": 1253.0493, "encoder_q-layer.10": 2339.6619, "encoder_q-layer.11": 5481.3096, "encoder_q-layer.2": 1403.9341, "encoder_q-layer.3": 1513.0692, "encoder_q-layer.4": 1636.3029, "encoder_q-layer.5": 1731.5507, "encoder_q-layer.6": 1882.236, "encoder_q-layer.7": 2210.7126, "encoder_q-layer.8": 2613.7947, "encoder_q-layer.9": 2345.834, "epoch": 0.6, "inbatch_neg_score": 0.1037, "inbatch_pos_score": 0.7534, "learning_rate": 2.1222222222222223e-05, "loss": 3.3655, "norm_diff": 0.1121, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3486.1907, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1031, "query_norm": 1.2714, "queue_k_norm": 1.3854, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6105, "sent_len_1": 66.9747, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.4963, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3764, "doc_norm": 1.3869, "encoder_q-embeddings": 1948.1589, "encoder_q-layer.0": 1335.4509, "encoder_q-layer.1": 1425.1886, "encoder_q-layer.10": 2144.6973, "encoder_q-layer.11": 5187.1528, "encoder_q-layer.2": 1620.1785, "encoder_q-layer.3": 1754.0074, "encoder_q-layer.4": 1931.731, "encoder_q-layer.5": 1946.3048, "encoder_q-layer.6": 2249.2793, "encoder_q-layer.7": 2237.6228, "encoder_q-layer.8": 2467.3298, "encoder_q-layer.9": 2127.4307, "epoch": 0.6, "inbatch_neg_score": 0.1026, "inbatch_pos_score": 0.7622, "learning_rate": 2.116666666666667e-05, "loss": 3.3764, "norm_diff": 0.1249, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3530.6245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.103, "query_norm": 1.262, "queue_k_norm": 1.3867, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.615, "sent_len_1": 66.9366, "sent_max_len_0": 127.9975, "sent_max_len_1": 186.74, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 61900 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.367, "doc_norm": 1.392, "encoder_q-embeddings": 4959.3403, "encoder_q-layer.0": 3976.822, "encoder_q-layer.1": 4045.0759, "encoder_q-layer.10": 2526.5911, "encoder_q-layer.11": 5876.5391, "encoder_q-layer.2": 4644.8081, "encoder_q-layer.3": 4980.6855, "encoder_q-layer.4": 4547.7661, "encoder_q-layer.5": 3746.6316, "encoder_q-layer.6": 3469.228, "encoder_q-layer.7": 3455.0864, "encoder_q-layer.8": 2989.5078, "encoder_q-layer.9": 2508.6338, "epoch": 0.61, "inbatch_neg_score": 0.101, "inbatch_pos_score": 0.7495, "learning_rate": 2.111111111111111e-05, "loss": 3.367, "norm_diff": 0.1086, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6027.4948, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1017, "query_norm": 1.2833, "queue_k_norm": 1.3872, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4927, "sent_len_1": 66.9283, "sent_max_len_0": 127.9938, "sent_max_len_1": 190.975, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62000 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3555, "doc_norm": 1.3886, "encoder_q-embeddings": 2172.874, "encoder_q-layer.0": 1415.8691, "encoder_q-layer.1": 1506.5664, "encoder_q-layer.10": 2451.0959, "encoder_q-layer.11": 5482.062, "encoder_q-layer.2": 1693.3406, "encoder_q-layer.3": 1762.5519, "encoder_q-layer.4": 1858.8307, "encoder_q-layer.5": 1957.1653, "encoder_q-layer.6": 2170.0127, "encoder_q-layer.7": 2443.4624, "encoder_q-layer.8": 2815.9128, "encoder_q-layer.9": 2391.4062, "epoch": 0.61, "inbatch_neg_score": 0.0977, "inbatch_pos_score": 0.7671, "learning_rate": 2.1055555555555556e-05, "loss": 3.3555, "norm_diff": 0.1074, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3751.9555, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0977, "query_norm": 1.2812, "queue_k_norm": 1.3882, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5582, "sent_len_1": 67.0902, "sent_max_len_0": 127.995, "sent_max_len_1": 191.6225, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62100 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3684, "doc_norm": 1.3884, "encoder_q-embeddings": 2216.8313, "encoder_q-layer.0": 1495.7136, "encoder_q-layer.1": 1483.5006, "encoder_q-layer.10": 2208.6499, "encoder_q-layer.11": 5567.7173, "encoder_q-layer.2": 1709.0197, "encoder_q-layer.3": 1772.6982, "encoder_q-layer.4": 1795.6615, "encoder_q-layer.5": 1862.7058, "encoder_q-layer.6": 2066.5886, "encoder_q-layer.7": 2230.8411, "encoder_q-layer.8": 2526.2524, "encoder_q-layer.9": 2170.7842, "epoch": 0.61, "inbatch_neg_score": 0.097, "inbatch_pos_score": 0.77, "learning_rate": 2.1e-05, "loss": 3.3684, "norm_diff": 0.1045, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3674.0088, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.097, "query_norm": 1.2839, "queue_k_norm": 1.3871, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.441, "sent_len_1": 66.6429, "sent_max_len_0": 127.995, "sent_max_len_1": 191.135, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62200 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.354, "doc_norm": 1.3889, "encoder_q-embeddings": 1948.4193, "encoder_q-layer.0": 1286.9106, "encoder_q-layer.1": 1358.6936, "encoder_q-layer.10": 2350.865, "encoder_q-layer.11": 5419.5464, "encoder_q-layer.2": 1552.5967, "encoder_q-layer.3": 1648.9005, "encoder_q-layer.4": 1772.6427, "encoder_q-layer.5": 1763.4282, "encoder_q-layer.6": 1951.9862, "encoder_q-layer.7": 2247.1453, "encoder_q-layer.8": 2517.4424, "encoder_q-layer.9": 2259.1377, "epoch": 0.61, "inbatch_neg_score": 0.0954, "inbatch_pos_score": 0.7588, "learning_rate": 2.0944444444444445e-05, "loss": 3.354, "norm_diff": 0.1199, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3508.0041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0954, "query_norm": 1.269, "queue_k_norm": 1.388, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6707, "sent_len_1": 66.7455, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0525, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62300 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3686, "doc_norm": 1.3845, "encoder_q-embeddings": 2324.6675, "encoder_q-layer.0": 1624.2269, "encoder_q-layer.1": 1750.8517, "encoder_q-layer.10": 2338.5408, "encoder_q-layer.11": 5499.666, "encoder_q-layer.2": 1802.6733, "encoder_q-layer.3": 1732.0151, "encoder_q-layer.4": 1780.5283, "encoder_q-layer.5": 1898.6674, "encoder_q-layer.6": 1916.014, "encoder_q-layer.7": 2116.7026, "encoder_q-layer.8": 2517.7361, "encoder_q-layer.9": 2299.0325, "epoch": 0.61, "inbatch_neg_score": 0.095, "inbatch_pos_score": 0.748, "learning_rate": 2.088888888888889e-05, "loss": 3.3686, "norm_diff": 0.1108, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3615.5914, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0944, "query_norm": 1.2737, "queue_k_norm": 1.3892, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6783, "sent_len_1": 66.9081, "sent_max_len_0": 128.0, "sent_max_len_1": 191.125, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 62400 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3666, "doc_norm": 1.3833, "encoder_q-embeddings": 1846.3673, "encoder_q-layer.0": 1226.4653, "encoder_q-layer.1": 1279.3802, "encoder_q-layer.10": 2521.9253, "encoder_q-layer.11": 5420.9229, "encoder_q-layer.2": 1462.3922, "encoder_q-layer.3": 1491.554, "encoder_q-layer.4": 1573.661, "encoder_q-layer.5": 1671.0985, "encoder_q-layer.6": 1898.3545, "encoder_q-layer.7": 2252.3503, "encoder_q-layer.8": 2660.4783, "encoder_q-layer.9": 2324.6887, "epoch": 0.61, "inbatch_neg_score": 0.0901, "inbatch_pos_score": 0.7417, "learning_rate": 2.0833333333333336e-05, "loss": 3.3666, "norm_diff": 0.1183, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3479.1698, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.09, "query_norm": 1.2649, "queue_k_norm": 1.3893, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5416, "sent_len_1": 66.7764, "sent_max_len_0": 127.9875, "sent_max_len_1": 186.4387, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 62500 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3727, "doc_norm": 1.3849, "encoder_q-embeddings": 1962.6448, "encoder_q-layer.0": 1254.7045, "encoder_q-layer.1": 1325.9004, "encoder_q-layer.10": 2495.2905, "encoder_q-layer.11": 5260.4634, "encoder_q-layer.2": 1453.7819, "encoder_q-layer.3": 1492.9994, "encoder_q-layer.4": 1660.34, "encoder_q-layer.5": 1734.9209, "encoder_q-layer.6": 2062.8259, "encoder_q-layer.7": 2217.0527, "encoder_q-layer.8": 2661.3508, "encoder_q-layer.9": 2376.0347, "epoch": 0.61, "inbatch_neg_score": 0.0921, "inbatch_pos_score": 0.7451, "learning_rate": 2.077777777777778e-05, "loss": 3.3727, "norm_diff": 0.0982, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3510.3298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0905, "query_norm": 1.2867, "queue_k_norm": 1.3863, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5817, "sent_len_1": 66.6474, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9038, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62600 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3579, "doc_norm": 1.3865, "encoder_q-embeddings": 3044.2085, "encoder_q-layer.0": 2253.3679, "encoder_q-layer.1": 2417.1653, "encoder_q-layer.10": 2336.4902, "encoder_q-layer.11": 5501.3516, "encoder_q-layer.2": 2679.7837, "encoder_q-layer.3": 2613.8955, "encoder_q-layer.4": 2772.3145, "encoder_q-layer.5": 2665.123, "encoder_q-layer.6": 2816.3867, "encoder_q-layer.7": 2648.4692, "encoder_q-layer.8": 2772.0574, "encoder_q-layer.9": 2237.8567, "epoch": 0.61, "inbatch_neg_score": 0.0881, "inbatch_pos_score": 0.749, "learning_rate": 2.0722222222222224e-05, "loss": 3.3579, "norm_diff": 0.1155, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4363.8347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0875, "query_norm": 1.2709, "queue_k_norm": 1.3881, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.8649, "sent_len_1": 66.719, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.9025, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62700 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3799, "doc_norm": 1.3905, "encoder_q-embeddings": 1952.4568, "encoder_q-layer.0": 1264.3534, "encoder_q-layer.1": 1352.3154, "encoder_q-layer.10": 2225.498, "encoder_q-layer.11": 5201.4683, "encoder_q-layer.2": 1575.0763, "encoder_q-layer.3": 1602.6571, "encoder_q-layer.4": 1724.9856, "encoder_q-layer.5": 1801.4916, "encoder_q-layer.6": 2001.219, "encoder_q-layer.7": 2248.9202, "encoder_q-layer.8": 2411.251, "encoder_q-layer.9": 2156.8569, "epoch": 0.61, "inbatch_neg_score": 0.0886, "inbatch_pos_score": 0.7656, "learning_rate": 2.0666666666666666e-05, "loss": 3.3799, "norm_diff": 0.1173, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3389.6849, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0887, "query_norm": 1.2732, "queue_k_norm": 1.3862, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5347, "sent_len_1": 66.7244, "sent_max_len_0": 127.99, "sent_max_len_1": 190.07, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62800 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.3588, "doc_norm": 1.3819, "encoder_q-embeddings": 1712.2484, "encoder_q-layer.0": 1224.8665, "encoder_q-layer.1": 1268.5907, "encoder_q-layer.10": 2230.2239, "encoder_q-layer.11": 5169.3081, "encoder_q-layer.2": 1393.9535, "encoder_q-layer.3": 1416.002, "encoder_q-layer.4": 1509.9941, "encoder_q-layer.5": 1605.0641, "encoder_q-layer.6": 1832.8358, "encoder_q-layer.7": 2008.6311, "encoder_q-layer.8": 2339.1663, "encoder_q-layer.9": 2159.2058, "epoch": 0.61, "inbatch_neg_score": 0.0865, "inbatch_pos_score": 0.752, "learning_rate": 2.0611111111111112e-05, "loss": 3.3588, "norm_diff": 0.1208, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3248.7738, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0864, "query_norm": 1.261, "queue_k_norm": 1.3873, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5166, "sent_len_1": 66.6547, "sent_max_len_0": 128.0, "sent_max_len_1": 189.64, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62900 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.358, "doc_norm": 1.3876, "encoder_q-embeddings": 1843.1937, "encoder_q-layer.0": 1250.3038, "encoder_q-layer.1": 1287.3557, "encoder_q-layer.10": 2305.6736, "encoder_q-layer.11": 5248.3501, "encoder_q-layer.2": 1404.2584, "encoder_q-layer.3": 1414.3857, "encoder_q-layer.4": 1471.3628, "encoder_q-layer.5": 1533.7976, "encoder_q-layer.6": 1708.0548, "encoder_q-layer.7": 1924.2368, "encoder_q-layer.8": 2302.4397, "encoder_q-layer.9": 2078.6167, "epoch": 0.62, "inbatch_neg_score": 0.0864, "inbatch_pos_score": 0.7583, "learning_rate": 2.0555555555555555e-05, "loss": 3.358, "norm_diff": 0.1168, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3346.2777, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0867, "query_norm": 1.2708, "queue_k_norm": 1.3857, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6308, "sent_len_1": 66.9406, "sent_max_len_0": 127.9963, "sent_max_len_1": 187.8587, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63000 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.347, "doc_norm": 1.375, "encoder_q-embeddings": 1918.5957, "encoder_q-layer.0": 1262.9268, "encoder_q-layer.1": 1348.3584, "encoder_q-layer.10": 2496.9417, "encoder_q-layer.11": 5329.5557, "encoder_q-layer.2": 1505.2433, "encoder_q-layer.3": 1586.8506, "encoder_q-layer.4": 1705.3125, "encoder_q-layer.5": 1689.2352, "encoder_q-layer.6": 1912.5795, "encoder_q-layer.7": 2197.616, "encoder_q-layer.8": 2708.2068, "encoder_q-layer.9": 2440.6675, "epoch": 0.62, "inbatch_neg_score": 0.0882, "inbatch_pos_score": 0.7231, "learning_rate": 2.05e-05, "loss": 3.347, "norm_diff": 0.1056, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3512.5027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0881, "query_norm": 1.2694, "queue_k_norm": 1.3868, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7329, "sent_len_1": 67.072, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6738, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63100 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3492, "doc_norm": 1.3849, "encoder_q-embeddings": 2467.4629, "encoder_q-layer.0": 1662.5442, "encoder_q-layer.1": 1778.4363, "encoder_q-layer.10": 2645.728, "encoder_q-layer.11": 5559.8896, "encoder_q-layer.2": 1964.7549, "encoder_q-layer.3": 1999.3324, "encoder_q-layer.4": 2094.5425, "encoder_q-layer.5": 2115.2368, "encoder_q-layer.6": 2285.74, "encoder_q-layer.7": 2501.4219, "encoder_q-layer.8": 2635.146, "encoder_q-layer.9": 2458.948, "epoch": 0.62, "inbatch_neg_score": 0.088, "inbatch_pos_score": 0.7373, "learning_rate": 2.0444444444444446e-05, "loss": 3.3492, "norm_diff": 0.1083, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3869.8956, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0885, "query_norm": 1.2766, "queue_k_norm": 1.3873, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4791, "sent_len_1": 67.0787, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.3988, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 63200 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3739, "doc_norm": 1.3779, "encoder_q-embeddings": 1861.9014, "encoder_q-layer.0": 1176.8237, "encoder_q-layer.1": 1251.4354, "encoder_q-layer.10": 2213.6516, "encoder_q-layer.11": 5192.2119, "encoder_q-layer.2": 1384.9647, "encoder_q-layer.3": 1418.0613, "encoder_q-layer.4": 1516.2169, "encoder_q-layer.5": 1648.2524, "encoder_q-layer.6": 1823.5984, "encoder_q-layer.7": 2139.4949, "encoder_q-layer.8": 2456.0103, "encoder_q-layer.9": 2169.8511, "epoch": 0.62, "inbatch_neg_score": 0.0892, "inbatch_pos_score": 0.748, "learning_rate": 2.0388888888888892e-05, "loss": 3.3739, "norm_diff": 0.1116, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3343.431, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0887, "query_norm": 1.2663, "queue_k_norm": 1.3875, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5044, "sent_len_1": 66.9982, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.8575, "stdk": 0.0484, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 63300 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.3497, "doc_norm": 1.3818, "encoder_q-embeddings": 6848.4282, "encoder_q-layer.0": 4624.7842, "encoder_q-layer.1": 5026.9619, "encoder_q-layer.10": 4443.5386, "encoder_q-layer.11": 10534.8457, "encoder_q-layer.2": 5780.3838, "encoder_q-layer.3": 6375.7686, "encoder_q-layer.4": 6543.4062, "encoder_q-layer.5": 6552.3145, "encoder_q-layer.6": 6497.8257, "encoder_q-layer.7": 6536.0122, "encoder_q-layer.8": 6490.7266, "encoder_q-layer.9": 4444.3335, "epoch": 0.62, "inbatch_neg_score": 0.0921, "inbatch_pos_score": 0.7544, "learning_rate": 2.0333333333333334e-05, "loss": 3.3497, "norm_diff": 0.0936, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9604.1593, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0927, "query_norm": 1.2881, "queue_k_norm": 1.3874, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5768, "sent_len_1": 67.0223, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.5112, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 63400 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3541, "doc_norm": 1.3834, "encoder_q-embeddings": 4078.4595, "encoder_q-layer.0": 2761.863, "encoder_q-layer.1": 2905.6731, "encoder_q-layer.10": 4403.9326, "encoder_q-layer.11": 10145.0391, "encoder_q-layer.2": 3402.0635, "encoder_q-layer.3": 3552.1733, "encoder_q-layer.4": 3731.0818, "encoder_q-layer.5": 3663.4863, "encoder_q-layer.6": 4016.3708, "encoder_q-layer.7": 4376.167, "encoder_q-layer.8": 4954.3472, "encoder_q-layer.9": 4542.2515, "epoch": 0.62, "inbatch_neg_score": 0.0954, "inbatch_pos_score": 0.7554, "learning_rate": 2.027777777777778e-05, "loss": 3.3541, "norm_diff": 0.1001, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7001.3783, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0954, "query_norm": 1.2833, "queue_k_norm": 1.3851, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6304, "sent_len_1": 66.5502, "sent_max_len_0": 128.0, "sent_max_len_1": 189.175, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63500 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3591, "doc_norm": 1.3817, "encoder_q-embeddings": 3891.8467, "encoder_q-layer.0": 2507.6042, "encoder_q-layer.1": 2543.0793, "encoder_q-layer.10": 4610.7422, "encoder_q-layer.11": 10412.0215, "encoder_q-layer.2": 2859.1528, "encoder_q-layer.3": 3001.9526, "encoder_q-layer.4": 3199.2605, "encoder_q-layer.5": 3207.4951, "encoder_q-layer.6": 3700.2124, "encoder_q-layer.7": 4266.9414, "encoder_q-layer.8": 4913.7676, "encoder_q-layer.9": 4426.4595, "epoch": 0.62, "inbatch_neg_score": 0.0932, "inbatch_pos_score": 0.769, "learning_rate": 2.0222222222222222e-05, "loss": 3.3591, "norm_diff": 0.0777, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6827.587, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0939, "query_norm": 1.304, "queue_k_norm": 1.3855, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5001, "sent_len_1": 66.8115, "sent_max_len_0": 127.985, "sent_max_len_1": 188.9613, "stdk": 0.0485, "stdq": 0.0455, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63600 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.343, "doc_norm": 1.3885, "encoder_q-embeddings": 4278.3198, "encoder_q-layer.0": 2823.4885, "encoder_q-layer.1": 2981.2563, "encoder_q-layer.10": 4787.3545, "encoder_q-layer.11": 10570.834, "encoder_q-layer.2": 3339.0981, "encoder_q-layer.3": 3472.4138, "encoder_q-layer.4": 3709.0884, "encoder_q-layer.5": 3936.4363, "encoder_q-layer.6": 4326.3721, "encoder_q-layer.7": 4723.9155, "encoder_q-layer.8": 5419.7861, "encoder_q-layer.9": 4902.0864, "epoch": 0.62, "inbatch_neg_score": 0.098, "inbatch_pos_score": 0.752, "learning_rate": 2.0166666666666668e-05, "loss": 3.343, "norm_diff": 0.0922, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7273.1855, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.098, "query_norm": 1.2963, "queue_k_norm": 1.3852, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4727, "sent_len_1": 66.6966, "sent_max_len_0": 127.9963, "sent_max_len_1": 185.515, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63700 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3498, "doc_norm": 1.3945, "encoder_q-embeddings": 3926.6638, "encoder_q-layer.0": 2652.499, "encoder_q-layer.1": 2753.5889, "encoder_q-layer.10": 4671.5469, "encoder_q-layer.11": 11099.6738, "encoder_q-layer.2": 3085.0063, "encoder_q-layer.3": 3221.4641, "encoder_q-layer.4": 3433.147, "encoder_q-layer.5": 3642.2097, "encoder_q-layer.6": 3990.3257, "encoder_q-layer.7": 4400.2974, "encoder_q-layer.8": 5182.9414, "encoder_q-layer.9": 4766.6309, "epoch": 0.62, "inbatch_neg_score": 0.1008, "inbatch_pos_score": 0.7642, "learning_rate": 2.011111111111111e-05, "loss": 3.3498, "norm_diff": 0.0849, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7251.8609, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1013, "query_norm": 1.3097, "queue_k_norm": 1.3878, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5582, "sent_len_1": 67.0202, "sent_max_len_0": 127.9975, "sent_max_len_1": 192.52, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 63800 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.34, "doc_norm": 1.3791, "encoder_q-embeddings": 3771.2866, "encoder_q-layer.0": 2475.5967, "encoder_q-layer.1": 2664.4475, "encoder_q-layer.10": 4656.1265, "encoder_q-layer.11": 10447.7393, "encoder_q-layer.2": 2906.2383, "encoder_q-layer.3": 3003.4609, "encoder_q-layer.4": 3380.8691, "encoder_q-layer.5": 3353.8447, "encoder_q-layer.6": 3855.4739, "encoder_q-layer.7": 4649.2153, "encoder_q-layer.8": 5186.9932, "encoder_q-layer.9": 4810.189, "epoch": 0.62, "inbatch_neg_score": 0.1112, "inbatch_pos_score": 0.7646, "learning_rate": 2.0055555555555556e-05, "loss": 3.34, "norm_diff": 0.0529, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6877.9099, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1097, "query_norm": 1.3262, "queue_k_norm": 1.3875, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6229, "sent_len_1": 67.0678, "sent_max_len_0": 128.0, "sent_max_len_1": 190.25, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63900 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.3596, "doc_norm": 1.3854, "encoder_q-embeddings": 4148.9282, "encoder_q-layer.0": 2738.7932, "encoder_q-layer.1": 2845.3157, "encoder_q-layer.10": 4703.1494, "encoder_q-layer.11": 10566.8828, "encoder_q-layer.2": 3313.7478, "encoder_q-layer.3": 3492.1565, "encoder_q-layer.4": 3888.9722, "encoder_q-layer.5": 4150.2969, "encoder_q-layer.6": 4739.5835, "encoder_q-layer.7": 5207.9722, "encoder_q-layer.8": 5454.4824, "encoder_q-layer.9": 4745.6118, "epoch": 0.62, "inbatch_neg_score": 0.1126, "inbatch_pos_score": 0.7969, "learning_rate": 2e-05, "loss": 3.3596, "norm_diff": 0.0617, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7377.354, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1125, "query_norm": 1.3237, "queue_k_norm": 1.3891, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.2732, "sent_len_1": 66.9118, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.4963, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 64000 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.3691, "doc_norm": 1.3854, "encoder_q-embeddings": 4604.2759, "encoder_q-layer.0": 3267.7722, "encoder_q-layer.1": 3367.5317, "encoder_q-layer.10": 4945.6641, "encoder_q-layer.11": 11214.3916, "encoder_q-layer.2": 3982.0237, "encoder_q-layer.3": 4270.1724, "encoder_q-layer.4": 4852.6836, "encoder_q-layer.5": 4849.5269, "encoder_q-layer.6": 5283.54, "encoder_q-layer.7": 5525.6094, "encoder_q-layer.8": 5842.644, "encoder_q-layer.9": 4871.0557, "epoch": 0.63, "inbatch_neg_score": 0.1138, "inbatch_pos_score": 0.7627, "learning_rate": 1.9944444444444447e-05, "loss": 3.3691, "norm_diff": 0.0595, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8087.801, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.114, "query_norm": 1.3259, "queue_k_norm": 1.3892, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4212, "sent_len_1": 66.8619, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.2875, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 64100 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.3576, "doc_norm": 1.3934, "encoder_q-embeddings": 3677.3521, "encoder_q-layer.0": 2618.314, "encoder_q-layer.1": 2823.0515, "encoder_q-layer.10": 4258.0859, "encoder_q-layer.11": 9943.3164, "encoder_q-layer.2": 3263.4512, "encoder_q-layer.3": 3362.96, "encoder_q-layer.4": 3555.5078, "encoder_q-layer.5": 3711.4297, "encoder_q-layer.6": 4070.0312, "encoder_q-layer.7": 4384.5938, "encoder_q-layer.8": 4681.9595, "encoder_q-layer.9": 4351.0225, "epoch": 0.63, "inbatch_neg_score": 0.1217, "inbatch_pos_score": 0.8042, "learning_rate": 1.988888888888889e-05, "loss": 3.3576, "norm_diff": 0.065, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6748.4621, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1219, "query_norm": 1.3284, "queue_k_norm": 1.389, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5299, "sent_len_1": 66.6636, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8988, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 64200 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.3562, "doc_norm": 1.3805, "encoder_q-embeddings": 4123.6318, "encoder_q-layer.0": 2784.5615, "encoder_q-layer.1": 3057.3149, "encoder_q-layer.10": 5354.6396, "encoder_q-layer.11": 10535.5879, "encoder_q-layer.2": 3360.5515, "encoder_q-layer.3": 3523.5405, "encoder_q-layer.4": 3762.0198, "encoder_q-layer.5": 3969.6125, "encoder_q-layer.6": 4302.5503, "encoder_q-layer.7": 4621.8545, "encoder_q-layer.8": 5437.4243, "encoder_q-layer.9": 5097.2314, "epoch": 0.63, "inbatch_neg_score": 0.1288, "inbatch_pos_score": 0.7803, "learning_rate": 1.9833333333333335e-05, "loss": 3.3562, "norm_diff": 0.0344, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7211.3732, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1287, "query_norm": 1.3461, "queue_k_norm": 1.3931, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5021, "sent_len_1": 66.8472, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6262, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 64300 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3692, "doc_norm": 1.3945, "encoder_q-embeddings": 3997.7803, "encoder_q-layer.0": 2706.9434, "encoder_q-layer.1": 2791.5535, "encoder_q-layer.10": 4845.4995, "encoder_q-layer.11": 10319.5977, "encoder_q-layer.2": 3116.4089, "encoder_q-layer.3": 3214.3435, "encoder_q-layer.4": 3521.1958, "encoder_q-layer.5": 3457.2458, "encoder_q-layer.6": 3826.696, "encoder_q-layer.7": 4215.3823, "encoder_q-layer.8": 5115.4634, "encoder_q-layer.9": 4539.3423, "epoch": 0.63, "inbatch_neg_score": 0.1322, "inbatch_pos_score": 0.8037, "learning_rate": 1.9777777777777778e-05, "loss": 3.3692, "norm_diff": 0.057, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6904.8073, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1315, "query_norm": 1.3375, "queue_k_norm": 1.3904, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5851, "sent_len_1": 66.7211, "sent_max_len_0": 128.0, "sent_max_len_1": 190.195, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 64400 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.3458, "doc_norm": 1.3914, "encoder_q-embeddings": 3429.0647, "encoder_q-layer.0": 2251.2986, "encoder_q-layer.1": 2368.123, "encoder_q-layer.10": 4731.9604, "encoder_q-layer.11": 10344.9502, "encoder_q-layer.2": 2610.0142, "encoder_q-layer.3": 2679.7532, "encoder_q-layer.4": 2920.4915, "encoder_q-layer.5": 3016.6555, "encoder_q-layer.6": 3466.3235, "encoder_q-layer.7": 3855.1118, "encoder_q-layer.8": 4704.8774, "encoder_q-layer.9": 4456.0664, "epoch": 0.63, "inbatch_neg_score": 0.1337, "inbatch_pos_score": 0.8384, "learning_rate": 1.9722222222222224e-05, "loss": 3.3458, "norm_diff": 0.0447, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6530.2841, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1342, "query_norm": 1.3467, "queue_k_norm": 1.3934, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6748, "sent_len_1": 66.7766, "sent_max_len_0": 127.9975, "sent_max_len_1": 191.3, "stdk": 0.0488, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 64500 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3464, "doc_norm": 1.3894, "encoder_q-embeddings": 6017.3564, "encoder_q-layer.0": 4254.6689, "encoder_q-layer.1": 4932.7158, "encoder_q-layer.10": 4625.9487, "encoder_q-layer.11": 10491.7285, "encoder_q-layer.2": 6133.9917, "encoder_q-layer.3": 6392.4492, "encoder_q-layer.4": 6620.1167, "encoder_q-layer.5": 6668.0918, "encoder_q-layer.6": 6063.6406, "encoder_q-layer.7": 6514.7812, "encoder_q-layer.8": 6557.791, "encoder_q-layer.9": 4566.8853, "epoch": 0.63, "inbatch_neg_score": 0.1402, "inbatch_pos_score": 0.8188, "learning_rate": 1.9666666666666666e-05, "loss": 3.3464, "norm_diff": 0.0527, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9551.9238, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1398, "query_norm": 1.3368, "queue_k_norm": 1.3939, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6853, "sent_len_1": 66.899, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5288, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 64600 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3591, "doc_norm": 1.3879, "encoder_q-embeddings": 3770.8477, "encoder_q-layer.0": 2430.3755, "encoder_q-layer.1": 2634.6128, "encoder_q-layer.10": 5057.5498, "encoder_q-layer.11": 11407.0557, "encoder_q-layer.2": 2949.4053, "encoder_q-layer.3": 2995.5398, "encoder_q-layer.4": 3171.5637, "encoder_q-layer.5": 3392.0122, "encoder_q-layer.6": 4012.2144, "encoder_q-layer.7": 4705.564, "encoder_q-layer.8": 5503.6968, "encoder_q-layer.9": 5056.1123, "epoch": 0.63, "inbatch_neg_score": 0.1384, "inbatch_pos_score": 0.8018, "learning_rate": 1.9611111111111115e-05, "loss": 3.3591, "norm_diff": 0.0868, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7288.7822, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1398, "query_norm": 1.301, "queue_k_norm": 1.3947, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5734, "sent_len_1": 66.6836, "sent_max_len_0": 128.0, "sent_max_len_1": 191.63, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 64700 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3647, "doc_norm": 1.3904, "encoder_q-embeddings": 3676.1934, "encoder_q-layer.0": 2451.2935, "encoder_q-layer.1": 2589.5747, "encoder_q-layer.10": 5209.4971, "encoder_q-layer.11": 10890.2305, "encoder_q-layer.2": 2898.5952, "encoder_q-layer.3": 3002.4128, "encoder_q-layer.4": 3333.7729, "encoder_q-layer.5": 3355.3486, "encoder_q-layer.6": 3666.2307, "encoder_q-layer.7": 4225.6455, "encoder_q-layer.8": 5055.168, "encoder_q-layer.9": 4578.7793, "epoch": 0.63, "inbatch_neg_score": 0.1361, "inbatch_pos_score": 0.7959, "learning_rate": 1.9555555555555557e-05, "loss": 3.3647, "norm_diff": 0.0865, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6823.0252, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1362, "query_norm": 1.3039, "queue_k_norm": 1.3955, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4354, "sent_len_1": 66.555, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8075, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 64800 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3773, "doc_norm": 1.3954, "encoder_q-embeddings": 4162.5371, "encoder_q-layer.0": 2702.3184, "encoder_q-layer.1": 2824.5742, "encoder_q-layer.10": 4773.7949, "encoder_q-layer.11": 11032.4932, "encoder_q-layer.2": 3160.4634, "encoder_q-layer.3": 3340.9304, "encoder_q-layer.4": 3407.0898, "encoder_q-layer.5": 3492.7751, "encoder_q-layer.6": 3954.6279, "encoder_q-layer.7": 4443.8101, "encoder_q-layer.8": 4966.418, "encoder_q-layer.9": 4631.062, "epoch": 0.63, "inbatch_neg_score": 0.1313, "inbatch_pos_score": 0.7856, "learning_rate": 1.9500000000000003e-05, "loss": 3.3773, "norm_diff": 0.0861, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7132.8433, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.131, "query_norm": 1.3093, "queue_k_norm": 1.396, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4871, "sent_len_1": 66.8679, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3063, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 64900 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3485, "doc_norm": 1.3869, "encoder_q-embeddings": 4225.1543, "encoder_q-layer.0": 2879.7542, "encoder_q-layer.1": 3047.1035, "encoder_q-layer.10": 4786.5605, "encoder_q-layer.11": 11268.5898, "encoder_q-layer.2": 3559.4839, "encoder_q-layer.3": 3807.3132, "encoder_q-layer.4": 4137.6094, "encoder_q-layer.5": 4571.7095, "encoder_q-layer.6": 4906.2729, "encoder_q-layer.7": 5307.3877, "encoder_q-layer.8": 5698.2778, "encoder_q-layer.9": 4878.166, "epoch": 0.63, "inbatch_neg_score": 0.1276, "inbatch_pos_score": 0.7988, "learning_rate": 1.9444444444444445e-05, "loss": 3.3485, "norm_diff": 0.0795, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7556.2228, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1274, "query_norm": 1.3074, "queue_k_norm": 1.3954, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7491, "sent_len_1": 66.8022, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.7113, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 65000 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3544, "doc_norm": 1.3981, "encoder_q-embeddings": 3969.7727, "encoder_q-layer.0": 2562.2898, "encoder_q-layer.1": 2767.1467, "encoder_q-layer.10": 4820.0327, "encoder_q-layer.11": 11178.8135, "encoder_q-layer.2": 3160.6902, "encoder_q-layer.3": 3253.0027, "encoder_q-layer.4": 3446.4736, "encoder_q-layer.5": 3548.698, "encoder_q-layer.6": 4033.5093, "encoder_q-layer.7": 4674.2803, "encoder_q-layer.8": 5248.2056, "encoder_q-layer.9": 4701.9058, "epoch": 0.64, "inbatch_neg_score": 0.122, "inbatch_pos_score": 0.7793, "learning_rate": 1.938888888888889e-05, "loss": 3.3544, "norm_diff": 0.1093, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7095.2409, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1228, "query_norm": 1.2888, "queue_k_norm": 1.3977, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4806, "sent_len_1": 66.8505, "sent_max_len_0": 127.9862, "sent_max_len_1": 190.6987, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 65100 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3627, "doc_norm": 1.3984, "encoder_q-embeddings": 4120.3936, "encoder_q-layer.0": 2729.3069, "encoder_q-layer.1": 2973.4863, "encoder_q-layer.10": 4701.7031, "encoder_q-layer.11": 10534.04, "encoder_q-layer.2": 3272.1133, "encoder_q-layer.3": 3462.0322, "encoder_q-layer.4": 3717.6838, "encoder_q-layer.5": 3765.0203, "encoder_q-layer.6": 4220.4712, "encoder_q-layer.7": 4605.3066, "encoder_q-layer.8": 5231.2114, "encoder_q-layer.9": 4588.6846, "epoch": 0.64, "inbatch_neg_score": 0.1194, "inbatch_pos_score": 0.7759, "learning_rate": 1.9333333333333333e-05, "loss": 3.3627, "norm_diff": 0.1185, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7184.8618, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1201, "query_norm": 1.2799, "queue_k_norm": 1.3979, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.502, "sent_len_1": 66.7191, "sent_max_len_0": 128.0, "sent_max_len_1": 188.465, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 65200 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3765, "doc_norm": 1.3931, "encoder_q-embeddings": 5793.4141, "encoder_q-layer.0": 4087.4773, "encoder_q-layer.1": 4112.2231, "encoder_q-layer.10": 5161.936, "encoder_q-layer.11": 10996.293, "encoder_q-layer.2": 5055.7368, "encoder_q-layer.3": 5811.8853, "encoder_q-layer.4": 6144.5352, "encoder_q-layer.5": 6173.8755, "encoder_q-layer.6": 6471.3462, "encoder_q-layer.7": 6624.8755, "encoder_q-layer.8": 6063.5645, "encoder_q-layer.9": 4781.7568, "epoch": 0.64, "inbatch_neg_score": 0.117, "inbatch_pos_score": 0.7715, "learning_rate": 1.927777777777778e-05, "loss": 3.3765, "norm_diff": 0.106, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9079.5719, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1168, "query_norm": 1.2871, "queue_k_norm": 1.3958, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3858, "sent_len_1": 66.4375, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9762, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 65300 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.3475, "doc_norm": 1.3982, "encoder_q-embeddings": 7687.207, "encoder_q-layer.0": 5366.0898, "encoder_q-layer.1": 5379.5317, "encoder_q-layer.10": 8748.1875, "encoder_q-layer.11": 21610.5605, "encoder_q-layer.2": 6186.9468, "encoder_q-layer.3": 6513.021, "encoder_q-layer.4": 6566.0137, "encoder_q-layer.5": 6771.6431, "encoder_q-layer.6": 7611.8086, "encoder_q-layer.7": 8286.1631, "encoder_q-layer.8": 9763.5996, "encoder_q-layer.9": 8674.8008, "epoch": 0.64, "inbatch_neg_score": 0.1147, "inbatch_pos_score": 0.7979, "learning_rate": 1.922222222222222e-05, "loss": 3.3475, "norm_diff": 0.1116, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13696.0985, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1143, "query_norm": 1.2866, "queue_k_norm": 1.3954, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6167, "sent_len_1": 66.7885, "sent_max_len_0": 127.995, "sent_max_len_1": 190.5962, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 65400 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.3526, "doc_norm": 1.3996, "encoder_q-embeddings": 3721.5598, "encoder_q-layer.0": 2490.4104, "encoder_q-layer.1": 2772.7698, "encoder_q-layer.10": 5032.6401, "encoder_q-layer.11": 10576.374, "encoder_q-layer.2": 3117.9038, "encoder_q-layer.3": 3195.4436, "encoder_q-layer.4": 3235.0342, "encoder_q-layer.5": 3243.4155, "encoder_q-layer.6": 3743.3276, "encoder_q-layer.7": 4108.9155, "encoder_q-layer.8": 5333.6709, "encoder_q-layer.9": 4884.5747, "epoch": 0.64, "inbatch_neg_score": 0.1084, "inbatch_pos_score": 0.7778, "learning_rate": 1.9166666666666667e-05, "loss": 3.3526, "norm_diff": 0.153, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6877.0167, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1092, "query_norm": 1.2466, "queue_k_norm": 1.3967, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7609, "sent_len_1": 66.8013, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1113, "stdk": 0.0489, "stdq": 0.0439, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 65500 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3659, "doc_norm": 1.3919, "encoder_q-embeddings": 3536.4441, "encoder_q-layer.0": 2553.1814, "encoder_q-layer.1": 2599.7141, "encoder_q-layer.10": 4827.0664, "encoder_q-layer.11": 10784.1992, "encoder_q-layer.2": 2923.9783, "encoder_q-layer.3": 3028.5464, "encoder_q-layer.4": 3154.7993, "encoder_q-layer.5": 3341.613, "encoder_q-layer.6": 3685.9695, "encoder_q-layer.7": 4276.1655, "encoder_q-layer.8": 5126.3037, "encoder_q-layer.9": 4789.1802, "epoch": 0.64, "inbatch_neg_score": 0.109, "inbatch_pos_score": 0.7563, "learning_rate": 1.9111111111111113e-05, "loss": 3.3659, "norm_diff": 0.1401, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6937.4673, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1099, "query_norm": 1.2518, "queue_k_norm": 1.3984, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4656, "sent_len_1": 66.9822, "sent_max_len_0": 128.0, "sent_max_len_1": 190.105, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 65600 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.3401, "doc_norm": 1.4015, "encoder_q-embeddings": 7271.9126, "encoder_q-layer.0": 5165.6855, "encoder_q-layer.1": 5492.6567, "encoder_q-layer.10": 4511.4023, "encoder_q-layer.11": 9904.252, "encoder_q-layer.2": 6157.9941, "encoder_q-layer.3": 6377.4116, "encoder_q-layer.4": 6744.2173, "encoder_q-layer.5": 6527.9873, "encoder_q-layer.6": 7431.2114, "encoder_q-layer.7": 7063.4072, "encoder_q-layer.8": 6106.8169, "encoder_q-layer.9": 4641.0596, "epoch": 0.64, "inbatch_neg_score": 0.108, "inbatch_pos_score": 0.7974, "learning_rate": 1.905555555555556e-05, "loss": 3.3401, "norm_diff": 0.1028, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9792.2343, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1083, "query_norm": 1.2987, "queue_k_norm": 1.3978, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4811, "sent_len_1": 66.9146, "sent_max_len_0": 128.0, "sent_max_len_1": 190.655, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 65700 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3534, "doc_norm": 1.3991, "encoder_q-embeddings": 2226.9934, "encoder_q-layer.0": 1538.4314, "encoder_q-layer.1": 1678.439, "encoder_q-layer.10": 2259.8792, "encoder_q-layer.11": 5179.9819, "encoder_q-layer.2": 1881.2319, "encoder_q-layer.3": 1974.0822, "encoder_q-layer.4": 2119.8198, "encoder_q-layer.5": 2321.5137, "encoder_q-layer.6": 2479.3613, "encoder_q-layer.7": 2557.114, "encoder_q-layer.8": 2577.9539, "encoder_q-layer.9": 2262.5391, "epoch": 0.64, "inbatch_neg_score": 0.1006, "inbatch_pos_score": 0.7705, "learning_rate": 1.9e-05, "loss": 3.3534, "norm_diff": 0.1223, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3744.9338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1011, "query_norm": 1.2768, "queue_k_norm": 1.3955, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5988, "sent_len_1": 66.8231, "sent_max_len_0": 127.98, "sent_max_len_1": 189.4888, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 65800 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3478, "doc_norm": 1.3964, "encoder_q-embeddings": 2610.801, "encoder_q-layer.0": 1803.2754, "encoder_q-layer.1": 1910.8695, "encoder_q-layer.10": 2520.1399, "encoder_q-layer.11": 5237.8008, "encoder_q-layer.2": 2183.7363, "encoder_q-layer.3": 2372.7739, "encoder_q-layer.4": 2421.7793, "encoder_q-layer.5": 2587.9558, "encoder_q-layer.6": 2889.8015, "encoder_q-layer.7": 3077.4141, "encoder_q-layer.8": 3113.8013, "encoder_q-layer.9": 2403.6758, "epoch": 0.64, "inbatch_neg_score": 0.0996, "inbatch_pos_score": 0.769, "learning_rate": 1.8944444444444447e-05, "loss": 3.3478, "norm_diff": 0.1238, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4187.9434, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0989, "query_norm": 1.2727, "queue_k_norm": 1.3971, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5437, "sent_len_1": 66.7918, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0925, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 65900 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3499, "doc_norm": 1.3965, "encoder_q-embeddings": 2046.2375, "encoder_q-layer.0": 1317.9193, "encoder_q-layer.1": 1448.501, "encoder_q-layer.10": 2685.1082, "encoder_q-layer.11": 5602.7471, "encoder_q-layer.2": 1622.9138, "encoder_q-layer.3": 1666.5558, "encoder_q-layer.4": 1738.203, "encoder_q-layer.5": 1846.2487, "encoder_q-layer.6": 1974.5065, "encoder_q-layer.7": 2231.4443, "encoder_q-layer.8": 2658.4304, "encoder_q-layer.9": 2443.1265, "epoch": 0.64, "inbatch_neg_score": 0.0964, "inbatch_pos_score": 0.771, "learning_rate": 1.888888888888889e-05, "loss": 3.3499, "norm_diff": 0.1091, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3614.5415, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0956, "query_norm": 1.2875, "queue_k_norm": 1.394, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3913, "sent_len_1": 66.5588, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1662, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66000 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3533, "doc_norm": 1.3946, "encoder_q-embeddings": 2207.5034, "encoder_q-layer.0": 1450.6392, "encoder_q-layer.1": 1655.6178, "encoder_q-layer.10": 2334.5886, "encoder_q-layer.11": 5272.4556, "encoder_q-layer.2": 1918.8409, "encoder_q-layer.3": 1976.7654, "encoder_q-layer.4": 2073.0354, "encoder_q-layer.5": 2071.8152, "encoder_q-layer.6": 2261.4304, "encoder_q-layer.7": 2396.5486, "encoder_q-layer.8": 2448.3369, "encoder_q-layer.9": 2205.1497, "epoch": 0.65, "inbatch_neg_score": 0.0962, "inbatch_pos_score": 0.7808, "learning_rate": 1.8833333333333335e-05, "loss": 3.3533, "norm_diff": 0.1091, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3663.9092, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0967, "query_norm": 1.2855, "queue_k_norm": 1.3943, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.85, "sent_len_1": 66.8394, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9812, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 66100 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.3459, "doc_norm": 1.3989, "encoder_q-embeddings": 2477.7986, "encoder_q-layer.0": 1795.8162, "encoder_q-layer.1": 1812.2631, "encoder_q-layer.10": 2522.1152, "encoder_q-layer.11": 5162.6396, "encoder_q-layer.2": 2171.3037, "encoder_q-layer.3": 2102.0889, "encoder_q-layer.4": 2405.2649, "encoder_q-layer.5": 2408.2236, "encoder_q-layer.6": 2521.0442, "encoder_q-layer.7": 2694.7354, "encoder_q-layer.8": 2753.5051, "encoder_q-layer.9": 2379.0088, "epoch": 0.65, "inbatch_neg_score": 0.0915, "inbatch_pos_score": 0.7612, "learning_rate": 1.8777777777777777e-05, "loss": 3.3459, "norm_diff": 0.1401, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3940.7699, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0918, "query_norm": 1.2588, "queue_k_norm": 1.3936, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6926, "sent_len_1": 66.7322, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8275, "stdk": 0.049, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 66200 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3507, "doc_norm": 1.3939, "encoder_q-embeddings": 2133.5447, "encoder_q-layer.0": 1490.1339, "encoder_q-layer.1": 1549.8979, "encoder_q-layer.10": 2296.9055, "encoder_q-layer.11": 5558.5205, "encoder_q-layer.2": 1799.2119, "encoder_q-layer.3": 1947.0048, "encoder_q-layer.4": 2052.3379, "encoder_q-layer.5": 2047.6967, "encoder_q-layer.6": 2198.6182, "encoder_q-layer.7": 2272.4443, "encoder_q-layer.8": 2520.5181, "encoder_q-layer.9": 2246.3328, "epoch": 0.65, "inbatch_neg_score": 0.0945, "inbatch_pos_score": 0.7549, "learning_rate": 1.8722222222222223e-05, "loss": 3.3507, "norm_diff": 0.1156, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3722.6017, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.095, "query_norm": 1.2783, "queue_k_norm": 1.3921, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6977, "sent_len_1": 66.8161, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2388, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66300 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3646, "doc_norm": 1.3937, "encoder_q-embeddings": 2074.2571, "encoder_q-layer.0": 1334.1678, "encoder_q-layer.1": 1408.0344, "encoder_q-layer.10": 2512.5574, "encoder_q-layer.11": 5371.4561, "encoder_q-layer.2": 1639.5365, "encoder_q-layer.3": 1686.5774, "encoder_q-layer.4": 1822.9003, "encoder_q-layer.5": 1909.4316, "encoder_q-layer.6": 2125.7634, "encoder_q-layer.7": 2318.7803, "encoder_q-layer.8": 2686.95, "encoder_q-layer.9": 2404.4519, "epoch": 0.65, "inbatch_neg_score": 0.0902, "inbatch_pos_score": 0.7368, "learning_rate": 1.866666666666667e-05, "loss": 3.3646, "norm_diff": 0.1393, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3624.8986, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0903, "query_norm": 1.2544, "queue_k_norm": 1.3931, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5237, "sent_len_1": 66.7714, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.2512, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 66400 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3398, "doc_norm": 1.3942, "encoder_q-embeddings": 1856.99, "encoder_q-layer.0": 1191.1971, "encoder_q-layer.1": 1265.3525, "encoder_q-layer.10": 2545.4678, "encoder_q-layer.11": 5110.6299, "encoder_q-layer.2": 1403.2645, "encoder_q-layer.3": 1458.3038, "encoder_q-layer.4": 1574.0859, "encoder_q-layer.5": 1636.8417, "encoder_q-layer.6": 1818.7958, "encoder_q-layer.7": 2199.272, "encoder_q-layer.8": 2490.5054, "encoder_q-layer.9": 2342.9446, "epoch": 0.65, "inbatch_neg_score": 0.0849, "inbatch_pos_score": 0.7402, "learning_rate": 1.861111111111111e-05, "loss": 3.3398, "norm_diff": 0.1271, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3379.3346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0854, "query_norm": 1.267, "queue_k_norm": 1.3927, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.721, "sent_len_1": 66.8273, "sent_max_len_0": 127.975, "sent_max_len_1": 189.815, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 66500 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3402, "doc_norm": 1.388, "encoder_q-embeddings": 2299.4602, "encoder_q-layer.0": 1544.8284, "encoder_q-layer.1": 1590.5464, "encoder_q-layer.10": 2392.3955, "encoder_q-layer.11": 5213.519, "encoder_q-layer.2": 1816.7179, "encoder_q-layer.3": 1808.594, "encoder_q-layer.4": 1919.6823, "encoder_q-layer.5": 1920.9218, "encoder_q-layer.6": 2151.5767, "encoder_q-layer.7": 2439.4907, "encoder_q-layer.8": 2712.4475, "encoder_q-layer.9": 2388.5056, "epoch": 0.65, "inbatch_neg_score": 0.0886, "inbatch_pos_score": 0.7607, "learning_rate": 1.8555555555555557e-05, "loss": 3.3402, "norm_diff": 0.0868, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3679.7969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0887, "query_norm": 1.3012, "queue_k_norm": 1.3924, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6465, "sent_len_1": 66.6199, "sent_max_len_0": 128.0, "sent_max_len_1": 189.97, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 66600 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3649, "doc_norm": 1.3963, "encoder_q-embeddings": 2332.1365, "encoder_q-layer.0": 1552.2441, "encoder_q-layer.1": 1695.7542, "encoder_q-layer.10": 2389.6602, "encoder_q-layer.11": 5207.9312, "encoder_q-layer.2": 1892.823, "encoder_q-layer.3": 1959.5648, "encoder_q-layer.4": 2047.781, "encoder_q-layer.5": 2197.5701, "encoder_q-layer.6": 2407.9971, "encoder_q-layer.7": 2539.5627, "encoder_q-layer.8": 2822.9346, "encoder_q-layer.9": 2317.9062, "epoch": 0.65, "inbatch_neg_score": 0.0879, "inbatch_pos_score": 0.7578, "learning_rate": 1.85e-05, "loss": 3.3649, "norm_diff": 0.1424, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3761.6256, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0882, "query_norm": 1.2539, "queue_k_norm": 1.3899, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4196, "sent_len_1": 66.6202, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2562, "stdk": 0.049, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66700 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3513, "doc_norm": 1.3919, "encoder_q-embeddings": 2640.3254, "encoder_q-layer.0": 1874.005, "encoder_q-layer.1": 1887.0391, "encoder_q-layer.10": 2334.8389, "encoder_q-layer.11": 5091.6592, "encoder_q-layer.2": 2105.2734, "encoder_q-layer.3": 2063.5938, "encoder_q-layer.4": 2516.9678, "encoder_q-layer.5": 2329.5781, "encoder_q-layer.6": 2538.6738, "encoder_q-layer.7": 2687.1414, "encoder_q-layer.8": 2870.2224, "encoder_q-layer.9": 2367.6272, "epoch": 0.65, "inbatch_neg_score": 0.0902, "inbatch_pos_score": 0.7729, "learning_rate": 1.8444444444444445e-05, "loss": 3.3513, "norm_diff": 0.1012, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3982.2248, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0909, "query_norm": 1.2906, "queue_k_norm": 1.3893, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5156, "sent_len_1": 66.78, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.7562, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66800 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.3501, "doc_norm": 1.3939, "encoder_q-embeddings": 2188.2947, "encoder_q-layer.0": 1540.2295, "encoder_q-layer.1": 1623.972, "encoder_q-layer.10": 2272.7227, "encoder_q-layer.11": 5110.2212, "encoder_q-layer.2": 1770.8741, "encoder_q-layer.3": 1801.7876, "encoder_q-layer.4": 1979.1123, "encoder_q-layer.5": 1847.754, "encoder_q-layer.6": 2037.796, "encoder_q-layer.7": 2164.6641, "encoder_q-layer.8": 2543.343, "encoder_q-layer.9": 2225.4536, "epoch": 0.65, "inbatch_neg_score": 0.0895, "inbatch_pos_score": 0.7715, "learning_rate": 1.838888888888889e-05, "loss": 3.3501, "norm_diff": 0.1147, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3554.087, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0908, "query_norm": 1.2791, "queue_k_norm": 1.3902, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7467, "sent_len_1": 66.6261, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.9038, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 66900 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.3636, "doc_norm": 1.3896, "encoder_q-embeddings": 1784.866, "encoder_q-layer.0": 1151.1694, "encoder_q-layer.1": 1224.9539, "encoder_q-layer.10": 2389.4717, "encoder_q-layer.11": 5133.335, "encoder_q-layer.2": 1340.1136, "encoder_q-layer.3": 1381.1865, "encoder_q-layer.4": 1464.8123, "encoder_q-layer.5": 1553.5968, "encoder_q-layer.6": 1824.9985, "encoder_q-layer.7": 2168.5811, "encoder_q-layer.8": 2478.9814, "encoder_q-layer.9": 2322.8909, "epoch": 0.65, "inbatch_neg_score": 0.0842, "inbatch_pos_score": 0.748, "learning_rate": 1.8333333333333333e-05, "loss": 3.3636, "norm_diff": 0.1051, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3311.5519, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0843, "query_norm": 1.2846, "queue_k_norm": 1.3888, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5005, "sent_len_1": 66.9245, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9462, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67000 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3699, "doc_norm": 1.3914, "encoder_q-embeddings": 1804.666, "encoder_q-layer.0": 1209.6207, "encoder_q-layer.1": 1291.4633, "encoder_q-layer.10": 2439.5728, "encoder_q-layer.11": 5648.9487, "encoder_q-layer.2": 1407.973, "encoder_q-layer.3": 1457.7495, "encoder_q-layer.4": 1570.7544, "encoder_q-layer.5": 1585.1089, "encoder_q-layer.6": 1754.329, "encoder_q-layer.7": 2060.7246, "encoder_q-layer.8": 2586.9229, "encoder_q-layer.9": 2333.8499, "epoch": 0.66, "inbatch_neg_score": 0.0822, "inbatch_pos_score": 0.7402, "learning_rate": 1.827777777777778e-05, "loss": 3.3699, "norm_diff": 0.1481, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3488.2616, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0828, "query_norm": 1.2433, "queue_k_norm": 1.3899, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.3868, "sent_len_1": 66.7121, "sent_max_len_0": 127.9838, "sent_max_len_1": 188.9487, "stdk": 0.0489, "stdq": 0.0437, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 67100 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.357, "doc_norm": 1.3805, "encoder_q-embeddings": 1964.5272, "encoder_q-layer.0": 1281.0222, "encoder_q-layer.1": 1427.33, "encoder_q-layer.10": 2544.7188, "encoder_q-layer.11": 5629.6523, "encoder_q-layer.2": 1634.1151, "encoder_q-layer.3": 1709.079, "encoder_q-layer.4": 1910.3966, "encoder_q-layer.5": 1958.0502, "encoder_q-layer.6": 2221.1289, "encoder_q-layer.7": 2420.0518, "encoder_q-layer.8": 2877.4189, "encoder_q-layer.9": 2459.8381, "epoch": 0.66, "inbatch_neg_score": 0.0846, "inbatch_pos_score": 0.729, "learning_rate": 1.8222222222222224e-05, "loss": 3.357, "norm_diff": 0.1193, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3732.3888, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0844, "query_norm": 1.2612, "queue_k_norm": 1.3886, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.492, "sent_len_1": 66.6234, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1637, "stdk": 0.0484, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67200 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3308, "doc_norm": 1.3975, "encoder_q-embeddings": 2070.3682, "encoder_q-layer.0": 1373.9231, "encoder_q-layer.1": 1445.2917, "encoder_q-layer.10": 2265.7981, "encoder_q-layer.11": 5119.3428, "encoder_q-layer.2": 1596.269, "encoder_q-layer.3": 1657.5457, "encoder_q-layer.4": 1841.1129, "encoder_q-layer.5": 1892.9529, "encoder_q-layer.6": 2020.874, "encoder_q-layer.7": 2188.374, "encoder_q-layer.8": 2525.9893, "encoder_q-layer.9": 2217.3076, "epoch": 0.66, "inbatch_neg_score": 0.0819, "inbatch_pos_score": 0.7539, "learning_rate": 1.8166666666666667e-05, "loss": 3.3308, "norm_diff": 0.1225, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3485.7841, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0827, "query_norm": 1.275, "queue_k_norm": 1.3893, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6913, "sent_len_1": 66.8869, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2488, "stdk": 0.0491, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 67300 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.363, "doc_norm": 1.389, "encoder_q-embeddings": 2026.197, "encoder_q-layer.0": 1367.1262, "encoder_q-layer.1": 1454.3145, "encoder_q-layer.10": 2284.1782, "encoder_q-layer.11": 5090.9551, "encoder_q-layer.2": 1643.3813, "encoder_q-layer.3": 1818.0878, "encoder_q-layer.4": 1793.7014, "encoder_q-layer.5": 1892.5245, "encoder_q-layer.6": 2093.2097, "encoder_q-layer.7": 2383.0457, "encoder_q-layer.8": 2646.3433, "encoder_q-layer.9": 2203.2561, "epoch": 0.66, "inbatch_neg_score": 0.0852, "inbatch_pos_score": 0.7603, "learning_rate": 1.8111111111111112e-05, "loss": 3.363, "norm_diff": 0.1145, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3508.7472, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0847, "query_norm": 1.2746, "queue_k_norm": 1.3868, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7106, "sent_len_1": 67.0759, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4075, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67400 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3521, "doc_norm": 1.3832, "encoder_q-embeddings": 2087.9929, "encoder_q-layer.0": 1407.2649, "encoder_q-layer.1": 1494.7313, "encoder_q-layer.10": 2305.8176, "encoder_q-layer.11": 5275.0923, "encoder_q-layer.2": 1741.5103, "encoder_q-layer.3": 1848.7882, "encoder_q-layer.4": 2059.0269, "encoder_q-layer.5": 2029.3256, "encoder_q-layer.6": 2194.6794, "encoder_q-layer.7": 2341.8784, "encoder_q-layer.8": 2671.3457, "encoder_q-layer.9": 2338.3945, "epoch": 0.66, "inbatch_neg_score": 0.0833, "inbatch_pos_score": 0.7231, "learning_rate": 1.8055555555555555e-05, "loss": 3.3521, "norm_diff": 0.1007, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3660.016, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0834, "query_norm": 1.2825, "queue_k_norm": 1.388, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7318, "sent_len_1": 67.0212, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1975, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 67500 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3594, "doc_norm": 1.3909, "encoder_q-embeddings": 1952.7802, "encoder_q-layer.0": 1251.9373, "encoder_q-layer.1": 1316.1721, "encoder_q-layer.10": 2418.4839, "encoder_q-layer.11": 5247.7749, "encoder_q-layer.2": 1434.3777, "encoder_q-layer.3": 1509.3335, "encoder_q-layer.4": 1624.0515, "encoder_q-layer.5": 1690.0602, "encoder_q-layer.6": 1887.1613, "encoder_q-layer.7": 2186.936, "encoder_q-layer.8": 2615.4546, "encoder_q-layer.9": 2294.2849, "epoch": 0.66, "inbatch_neg_score": 0.0924, "inbatch_pos_score": 0.7578, "learning_rate": 1.8e-05, "loss": 3.3594, "norm_diff": 0.1183, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3471.7142, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0919, "query_norm": 1.2726, "queue_k_norm": 1.3864, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4352, "sent_len_1": 66.7501, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.7975, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67600 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3463, "doc_norm": 1.3848, "encoder_q-embeddings": 2107.4031, "encoder_q-layer.0": 1436.537, "encoder_q-layer.1": 1519.9878, "encoder_q-layer.10": 2449.5876, "encoder_q-layer.11": 5169.8042, "encoder_q-layer.2": 1694.5356, "encoder_q-layer.3": 1721.5439, "encoder_q-layer.4": 1811.3899, "encoder_q-layer.5": 1745.6031, "encoder_q-layer.6": 1915.1029, "encoder_q-layer.7": 2156.126, "encoder_q-layer.8": 2504.053, "encoder_q-layer.9": 2306.1626, "epoch": 0.66, "inbatch_neg_score": 0.0903, "inbatch_pos_score": 0.7393, "learning_rate": 1.7944444444444443e-05, "loss": 3.3463, "norm_diff": 0.0846, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3521.4503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0905, "query_norm": 1.3002, "queue_k_norm": 1.3879, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3445, "sent_len_1": 66.8838, "sent_max_len_0": 128.0, "sent_max_len_1": 188.96, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 67700 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.341, "doc_norm": 1.3878, "encoder_q-embeddings": 3912.9417, "encoder_q-layer.0": 2650.3992, "encoder_q-layer.1": 2752.2432, "encoder_q-layer.10": 4726.978, "encoder_q-layer.11": 10625.8076, "encoder_q-layer.2": 3130.2708, "encoder_q-layer.3": 3220.6313, "encoder_q-layer.4": 3278.3853, "encoder_q-layer.5": 3311.2576, "encoder_q-layer.6": 3710.9587, "encoder_q-layer.7": 4186.4814, "encoder_q-layer.8": 5001.2778, "encoder_q-layer.9": 4607.145, "epoch": 0.66, "inbatch_neg_score": 0.0935, "inbatch_pos_score": 0.7568, "learning_rate": 1.788888888888889e-05, "loss": 3.341, "norm_diff": 0.0795, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6863.7832, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0924, "query_norm": 1.3083, "queue_k_norm": 1.3872, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5613, "sent_len_1": 66.3966, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.275, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67800 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.3511, "doc_norm": 1.3839, "encoder_q-embeddings": 4185.2354, "encoder_q-layer.0": 2869.9333, "encoder_q-layer.1": 3110.5503, "encoder_q-layer.10": 4934.6226, "encoder_q-layer.11": 10207.4482, "encoder_q-layer.2": 3539.3149, "encoder_q-layer.3": 3708.4507, "encoder_q-layer.4": 4005.6897, "encoder_q-layer.5": 4038.1006, "encoder_q-layer.6": 4299.623, "encoder_q-layer.7": 4613.9888, "encoder_q-layer.8": 4943.5225, "encoder_q-layer.9": 4450.98, "epoch": 0.66, "inbatch_neg_score": 0.0977, "inbatch_pos_score": 0.7612, "learning_rate": 1.7833333333333334e-05, "loss": 3.3511, "norm_diff": 0.0715, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7120.8701, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.097, "query_norm": 1.3124, "queue_k_norm": 1.3862, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7943, "sent_len_1": 66.4849, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7063, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67900 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3277, "doc_norm": 1.3819, "encoder_q-embeddings": 3593.2966, "encoder_q-layer.0": 2387.1787, "encoder_q-layer.1": 2492.863, "encoder_q-layer.10": 4686.978, "encoder_q-layer.11": 10603.3604, "encoder_q-layer.2": 2830.0728, "encoder_q-layer.3": 2900.3381, "encoder_q-layer.4": 3058.291, "encoder_q-layer.5": 3167.0161, "encoder_q-layer.6": 3575.6033, "encoder_q-layer.7": 3871.6328, "encoder_q-layer.8": 4648.519, "encoder_q-layer.9": 4388.8188, "epoch": 0.66, "inbatch_neg_score": 0.1056, "inbatch_pos_score": 0.7681, "learning_rate": 1.777777777777778e-05, "loss": 3.3277, "norm_diff": 0.0724, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6668.3808, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1053, "query_norm": 1.3095, "queue_k_norm": 1.3869, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6386, "sent_len_1": 66.6614, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.3787, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68000 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3468, "doc_norm": 1.3882, "encoder_q-embeddings": 4942.5679, "encoder_q-layer.0": 3305.9788, "encoder_q-layer.1": 3401.5032, "encoder_q-layer.10": 4663.541, "encoder_q-layer.11": 10606.7305, "encoder_q-layer.2": 3965.6611, "encoder_q-layer.3": 4291.8223, "encoder_q-layer.4": 4524.6914, "encoder_q-layer.5": 4355.0737, "encoder_q-layer.6": 4581.3774, "encoder_q-layer.7": 4788.3467, "encoder_q-layer.8": 5384.6802, "encoder_q-layer.9": 4606.4746, "epoch": 0.66, "inbatch_neg_score": 0.1112, "inbatch_pos_score": 0.772, "learning_rate": 1.7722222222222222e-05, "loss": 3.3468, "norm_diff": 0.0728, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7643.1478, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1107, "query_norm": 1.3155, "queue_k_norm": 1.3869, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3143, "sent_len_1": 66.7091, "sent_max_len_0": 128.0, "sent_max_len_1": 189.25, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68100 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3487, "doc_norm": 1.3825, "encoder_q-embeddings": 2859.4563, "encoder_q-layer.0": 1995.4148, "encoder_q-layer.1": 2175.5408, "encoder_q-layer.10": 2320.9644, "encoder_q-layer.11": 5288.7402, "encoder_q-layer.2": 2499.6094, "encoder_q-layer.3": 2590.8413, "encoder_q-layer.4": 2918.8008, "encoder_q-layer.5": 2889.8223, "encoder_q-layer.6": 2855.9978, "encoder_q-layer.7": 2489.6467, "encoder_q-layer.8": 2833.0991, "encoder_q-layer.9": 2294.9824, "epoch": 0.67, "inbatch_neg_score": 0.1152, "inbatch_pos_score": 0.7622, "learning_rate": 1.7666666666666668e-05, "loss": 3.3487, "norm_diff": 0.0628, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4282.5087, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1146, "query_norm": 1.3197, "queue_k_norm": 1.3874, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4318, "sent_len_1": 66.8508, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5275, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68200 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.329, "doc_norm": 1.3903, "encoder_q-embeddings": 3490.998, "encoder_q-layer.0": 2496.147, "encoder_q-layer.1": 2636.0996, "encoder_q-layer.10": 2295.8879, "encoder_q-layer.11": 5432.3794, "encoder_q-layer.2": 3109.5762, "encoder_q-layer.3": 3508.7988, "encoder_q-layer.4": 3883.9783, "encoder_q-layer.5": 3391.5952, "encoder_q-layer.6": 3352.5256, "encoder_q-layer.7": 3214.3655, "encoder_q-layer.8": 3151.3123, "encoder_q-layer.9": 2321.0583, "epoch": 0.67, "inbatch_neg_score": 0.1202, "inbatch_pos_score": 0.8066, "learning_rate": 1.761111111111111e-05, "loss": 3.329, "norm_diff": 0.0587, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5056.3139, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1205, "query_norm": 1.3316, "queue_k_norm": 1.3887, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6518, "sent_len_1": 66.6586, "sent_max_len_0": 127.9975, "sent_max_len_1": 187.7488, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3538, "doc_norm": 1.3992, "encoder_q-embeddings": 2272.5776, "encoder_q-layer.0": 1524.5859, "encoder_q-layer.1": 1653.9384, "encoder_q-layer.10": 2467.575, "encoder_q-layer.11": 5397.2866, "encoder_q-layer.2": 1875.0326, "encoder_q-layer.3": 1945.9813, "encoder_q-layer.4": 2024.8405, "encoder_q-layer.5": 2089.4297, "encoder_q-layer.6": 2351.916, "encoder_q-layer.7": 2654.0049, "encoder_q-layer.8": 2739.5481, "encoder_q-layer.9": 2378.5295, "epoch": 0.67, "inbatch_neg_score": 0.1211, "inbatch_pos_score": 0.7935, "learning_rate": 1.7555555555555556e-05, "loss": 3.3538, "norm_diff": 0.082, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3837.313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.12, "query_norm": 1.3172, "queue_k_norm": 1.3905, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7219, "sent_len_1": 66.7626, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.3913, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 68400 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3473, "doc_norm": 1.3882, "encoder_q-embeddings": 2004.9735, "encoder_q-layer.0": 1351.7587, "encoder_q-layer.1": 1423.7401, "encoder_q-layer.10": 2382.1072, "encoder_q-layer.11": 5300.3496, "encoder_q-layer.2": 1571.5553, "encoder_q-layer.3": 1638.9584, "encoder_q-layer.4": 1784.1592, "encoder_q-layer.5": 1847.0382, "encoder_q-layer.6": 2034.9736, "encoder_q-layer.7": 2205.2971, "encoder_q-layer.8": 2538.8987, "encoder_q-layer.9": 2303.0784, "epoch": 0.67, "inbatch_neg_score": 0.1234, "inbatch_pos_score": 0.7681, "learning_rate": 1.75e-05, "loss": 3.3473, "norm_diff": 0.0803, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3511.7949, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1232, "query_norm": 1.3079, "queue_k_norm": 1.3917, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3361, "sent_len_1": 66.889, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2988, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 68500 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.3294, "doc_norm": 1.3969, "encoder_q-embeddings": 1913.5483, "encoder_q-layer.0": 1264.5276, "encoder_q-layer.1": 1374.3066, "encoder_q-layer.10": 2461.5586, "encoder_q-layer.11": 5187.9893, "encoder_q-layer.2": 1576.8503, "encoder_q-layer.3": 1652.1241, "encoder_q-layer.4": 1851.5186, "encoder_q-layer.5": 1928.4487, "encoder_q-layer.6": 2059.0908, "encoder_q-layer.7": 2391.9602, "encoder_q-layer.8": 2674.5981, "encoder_q-layer.9": 2386.0864, "epoch": 0.67, "inbatch_neg_score": 0.1225, "inbatch_pos_score": 0.8125, "learning_rate": 1.7444444444444448e-05, "loss": 3.3294, "norm_diff": 0.0922, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3575.9062, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1229, "query_norm": 1.3046, "queue_k_norm": 1.3911, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.3811, "sent_len_1": 66.7658, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0488, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68600 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3486, "doc_norm": 1.3945, "encoder_q-embeddings": 1956.8519, "encoder_q-layer.0": 1312.0867, "encoder_q-layer.1": 1398.2479, "encoder_q-layer.10": 2295.4919, "encoder_q-layer.11": 5309.6514, "encoder_q-layer.2": 1599.2053, "encoder_q-layer.3": 1641.8079, "encoder_q-layer.4": 1832.5936, "encoder_q-layer.5": 1782.6754, "encoder_q-layer.6": 1939.6289, "encoder_q-layer.7": 2091.4575, "encoder_q-layer.8": 2438.1704, "encoder_q-layer.9": 2205.5278, "epoch": 0.67, "inbatch_neg_score": 0.1181, "inbatch_pos_score": 0.7671, "learning_rate": 1.738888888888889e-05, "loss": 3.3486, "norm_diff": 0.1107, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3493.1297, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1181, "query_norm": 1.2839, "queue_k_norm": 1.3928, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6179, "sent_len_1": 66.4504, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.9425, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 68700 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3381, "doc_norm": 1.3978, "encoder_q-embeddings": 2923.3147, "encoder_q-layer.0": 2041.7386, "encoder_q-layer.1": 1861.6652, "encoder_q-layer.10": 2204.1482, "encoder_q-layer.11": 5185.0186, "encoder_q-layer.2": 2043.0369, "encoder_q-layer.3": 2160.6614, "encoder_q-layer.4": 2261.634, "encoder_q-layer.5": 2254.0789, "encoder_q-layer.6": 2386.8059, "encoder_q-layer.7": 2410.4912, "encoder_q-layer.8": 2556.3962, "encoder_q-layer.9": 2227.5437, "epoch": 0.67, "inbatch_neg_score": 0.1167, "inbatch_pos_score": 0.7842, "learning_rate": 1.7333333333333336e-05, "loss": 3.3381, "norm_diff": 0.1058, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3980.3518, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1172, "query_norm": 1.2921, "queue_k_norm": 1.3931, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5127, "sent_len_1": 66.6833, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.135, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 68800 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.342, "doc_norm": 1.3989, "encoder_q-embeddings": 2003.5176, "encoder_q-layer.0": 1377.1354, "encoder_q-layer.1": 1435.918, "encoder_q-layer.10": 2238.959, "encoder_q-layer.11": 4943.813, "encoder_q-layer.2": 1589.4521, "encoder_q-layer.3": 1619.67, "encoder_q-layer.4": 1792.6414, "encoder_q-layer.5": 1817.8535, "encoder_q-layer.6": 2010.1993, "encoder_q-layer.7": 2091.9219, "encoder_q-layer.8": 2381.4248, "encoder_q-layer.9": 2193.8579, "epoch": 0.67, "inbatch_neg_score": 0.1217, "inbatch_pos_score": 0.8066, "learning_rate": 1.7277777777777778e-05, "loss": 3.342, "norm_diff": 0.1106, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3395.7012, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1212, "query_norm": 1.2882, "queue_k_norm": 1.3958, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.65, "sent_len_1": 66.9023, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5575, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 68900 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.317, "doc_norm": 1.4005, "encoder_q-embeddings": 1997.8928, "encoder_q-layer.0": 1369.8842, "encoder_q-layer.1": 1539.6688, "encoder_q-layer.10": 2182.6382, "encoder_q-layer.11": 5078.5415, "encoder_q-layer.2": 1742.3309, "encoder_q-layer.3": 1853.6135, "encoder_q-layer.4": 2031.2345, "encoder_q-layer.5": 2044.6516, "encoder_q-layer.6": 2268.3928, "encoder_q-layer.7": 2404.489, "encoder_q-layer.8": 2539.1753, "encoder_q-layer.9": 2265.0776, "epoch": 0.67, "inbatch_neg_score": 0.1155, "inbatch_pos_score": 0.7964, "learning_rate": 1.7222222222222224e-05, "loss": 3.317, "norm_diff": 0.1219, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3568.7559, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1155, "query_norm": 1.2786, "queue_k_norm": 1.3937, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.8775, "sent_len_1": 67.0072, "sent_max_len_0": 128.0, "sent_max_len_1": 188.29, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 69000 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3353, "doc_norm": 1.3907, "encoder_q-embeddings": 1958.4075, "encoder_q-layer.0": 1310.1843, "encoder_q-layer.1": 1347.4297, "encoder_q-layer.10": 2295.1675, "encoder_q-layer.11": 5127.854, "encoder_q-layer.2": 1473.2806, "encoder_q-layer.3": 1527.0994, "encoder_q-layer.4": 1606.5192, "encoder_q-layer.5": 1684.9746, "encoder_q-layer.6": 1836.765, "encoder_q-layer.7": 2055.9971, "encoder_q-layer.8": 2542.6829, "encoder_q-layer.9": 2329.2893, "epoch": 0.67, "inbatch_neg_score": 0.1168, "inbatch_pos_score": 0.7837, "learning_rate": 1.7166666666666666e-05, "loss": 3.3353, "norm_diff": 0.1122, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3435.9342, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1165, "query_norm": 1.2785, "queue_k_norm": 1.395, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5175, "sent_len_1": 66.8866, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.905, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 69100 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3448, "doc_norm": 1.3867, "encoder_q-embeddings": 1781.2972, "encoder_q-layer.0": 1191.2567, "encoder_q-layer.1": 1261.3656, "encoder_q-layer.10": 2379.0566, "encoder_q-layer.11": 5727.5996, "encoder_q-layer.2": 1420.5988, "encoder_q-layer.3": 1463.9216, "encoder_q-layer.4": 1518.4282, "encoder_q-layer.5": 1649.1077, "encoder_q-layer.6": 1816.3939, "encoder_q-layer.7": 2021.7437, "encoder_q-layer.8": 2478.0344, "encoder_q-layer.9": 2299.4375, "epoch": 0.68, "inbatch_neg_score": 0.1116, "inbatch_pos_score": 0.7666, "learning_rate": 1.7111111111111112e-05, "loss": 3.3448, "norm_diff": 0.1177, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3441.4386, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1107, "query_norm": 1.269, "queue_k_norm": 1.3943, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4907, "sent_len_1": 66.5969, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5525, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 69200 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3293, "doc_norm": 1.3912, "encoder_q-embeddings": 1875.6261, "encoder_q-layer.0": 1226.5361, "encoder_q-layer.1": 1368.4644, "encoder_q-layer.10": 2484.77, "encoder_q-layer.11": 5131.1079, "encoder_q-layer.2": 1492.5192, "encoder_q-layer.3": 1528.0377, "encoder_q-layer.4": 1596.8795, "encoder_q-layer.5": 1685.9794, "encoder_q-layer.6": 1953.2104, "encoder_q-layer.7": 2254.9253, "encoder_q-layer.8": 2621.5481, "encoder_q-layer.9": 2332.1377, "epoch": 0.68, "inbatch_neg_score": 0.107, "inbatch_pos_score": 0.769, "learning_rate": 1.7055555555555554e-05, "loss": 3.3293, "norm_diff": 0.1161, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3457.3009, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1061, "query_norm": 1.2751, "queue_k_norm": 1.3948, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6814, "sent_len_1": 66.9374, "sent_max_len_0": 128.0, "sent_max_len_1": 189.975, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 69300 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.3198, "doc_norm": 1.4008, "encoder_q-embeddings": 1995.2955, "encoder_q-layer.0": 1277.4565, "encoder_q-layer.1": 1383.562, "encoder_q-layer.10": 2319.269, "encoder_q-layer.11": 5392.1772, "encoder_q-layer.2": 1540.0997, "encoder_q-layer.3": 1610.8176, "encoder_q-layer.4": 1717.3606, "encoder_q-layer.5": 1821.6481, "encoder_q-layer.6": 2019.932, "encoder_q-layer.7": 2251.1052, "encoder_q-layer.8": 2514.8618, "encoder_q-layer.9": 2228.4194, "epoch": 0.68, "inbatch_neg_score": 0.1031, "inbatch_pos_score": 0.7944, "learning_rate": 1.7000000000000003e-05, "loss": 3.3198, "norm_diff": 0.1112, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3553.4132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1041, "query_norm": 1.2895, "queue_k_norm": 1.3947, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.712, "sent_len_1": 66.7088, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2663, "stdk": 0.049, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 69400 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3327, "doc_norm": 1.3977, "encoder_q-embeddings": 2615.8496, "encoder_q-layer.0": 1739.6616, "encoder_q-layer.1": 1967.6582, "encoder_q-layer.10": 2267.1216, "encoder_q-layer.11": 5144.9058, "encoder_q-layer.2": 2365.5125, "encoder_q-layer.3": 2501.3445, "encoder_q-layer.4": 2538.749, "encoder_q-layer.5": 2491.9668, "encoder_q-layer.6": 2579.9614, "encoder_q-layer.7": 2643.1504, "encoder_q-layer.8": 2782.6631, "encoder_q-layer.9": 2269.6438, "epoch": 0.68, "inbatch_neg_score": 0.1025, "inbatch_pos_score": 0.7622, "learning_rate": 1.6944444444444446e-05, "loss": 3.3327, "norm_diff": 0.1151, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4057.251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.103, "query_norm": 1.2826, "queue_k_norm": 1.395, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7037, "sent_len_1": 66.7459, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2275, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 69500 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3259, "doc_norm": 1.3893, "encoder_q-embeddings": 2187.9558, "encoder_q-layer.0": 1476.4524, "encoder_q-layer.1": 1519.6149, "encoder_q-layer.10": 2487.0945, "encoder_q-layer.11": 5419.8169, "encoder_q-layer.2": 1695.6301, "encoder_q-layer.3": 1762.5454, "encoder_q-layer.4": 1852.9404, "encoder_q-layer.5": 1933.1913, "encoder_q-layer.6": 2144.8225, "encoder_q-layer.7": 2342.2371, "encoder_q-layer.8": 2603.1755, "encoder_q-layer.9": 2387.208, "epoch": 0.68, "inbatch_neg_score": 0.0988, "inbatch_pos_score": 0.7612, "learning_rate": 1.688888888888889e-05, "loss": 3.3259, "norm_diff": 0.1188, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3701.1814, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1006, "query_norm": 1.2705, "queue_k_norm": 1.3945, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6697, "sent_len_1": 66.8059, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0412, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 69600 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3307, "doc_norm": 1.3896, "encoder_q-embeddings": 1924.1337, "encoder_q-layer.0": 1295.8087, "encoder_q-layer.1": 1378.6252, "encoder_q-layer.10": 2245.2908, "encoder_q-layer.11": 5259.5059, "encoder_q-layer.2": 1527.2327, "encoder_q-layer.3": 1567.2444, "encoder_q-layer.4": 1666.7795, "encoder_q-layer.5": 1734.5845, "encoder_q-layer.6": 1972.8781, "encoder_q-layer.7": 2216.2129, "encoder_q-layer.8": 2475.1135, "encoder_q-layer.9": 2298.3604, "epoch": 0.68, "inbatch_neg_score": 0.1004, "inbatch_pos_score": 0.7749, "learning_rate": 1.6833333333333334e-05, "loss": 3.3307, "norm_diff": 0.1113, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3451.4998, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1008, "query_norm": 1.2783, "queue_k_norm": 1.3954, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7249, "sent_len_1": 66.65, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9263, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69700 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3228, "doc_norm": 1.3963, "encoder_q-embeddings": 1963.0405, "encoder_q-layer.0": 1404.229, "encoder_q-layer.1": 1452.5742, "encoder_q-layer.10": 2390.4238, "encoder_q-layer.11": 5016.1436, "encoder_q-layer.2": 1691.9493, "encoder_q-layer.3": 1777.2155, "encoder_q-layer.4": 1982.08, "encoder_q-layer.5": 1983.8739, "encoder_q-layer.6": 2119.8699, "encoder_q-layer.7": 2233.0173, "encoder_q-layer.8": 2524.843, "encoder_q-layer.9": 2179.5576, "epoch": 0.68, "inbatch_neg_score": 0.0996, "inbatch_pos_score": 0.7607, "learning_rate": 1.677777777777778e-05, "loss": 3.3228, "norm_diff": 0.1291, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3498.1879, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0991, "query_norm": 1.2672, "queue_k_norm": 1.3956, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5018, "sent_len_1": 66.6966, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.8688, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69800 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.3194, "doc_norm": 1.3884, "encoder_q-embeddings": 3953.9558, "encoder_q-layer.0": 2736.8538, "encoder_q-layer.1": 2722.4204, "encoder_q-layer.10": 2299.3506, "encoder_q-layer.11": 5260.6699, "encoder_q-layer.2": 3026.1726, "encoder_q-layer.3": 3256.8689, "encoder_q-layer.4": 3152.1194, "encoder_q-layer.5": 2930.5913, "encoder_q-layer.6": 2609.5754, "encoder_q-layer.7": 2865.0562, "encoder_q-layer.8": 2677.2805, "encoder_q-layer.9": 2267.8259, "epoch": 0.68, "inbatch_neg_score": 0.0992, "inbatch_pos_score": 0.7783, "learning_rate": 1.6722222222222222e-05, "loss": 3.3194, "norm_diff": 0.1231, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4781.7041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.099, "query_norm": 1.2652, "queue_k_norm": 1.3941, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7963, "sent_len_1": 66.7761, "sent_max_len_0": 127.9887, "sent_max_len_1": 190.1163, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 69900 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.3076, "doc_norm": 1.3965, "encoder_q-embeddings": 2173.271, "encoder_q-layer.0": 1536.5691, "encoder_q-layer.1": 1632.396, "encoder_q-layer.10": 2296.5312, "encoder_q-layer.11": 4881.6265, "encoder_q-layer.2": 1896.1161, "encoder_q-layer.3": 1927.4392, "encoder_q-layer.4": 2033.3049, "encoder_q-layer.5": 2073.9153, "encoder_q-layer.6": 2228.1372, "encoder_q-layer.7": 2366.7747, "encoder_q-layer.8": 2540.4426, "encoder_q-layer.9": 2270.8081, "epoch": 0.68, "inbatch_neg_score": 0.0935, "inbatch_pos_score": 0.7773, "learning_rate": 1.6666666666666667e-05, "loss": 3.3076, "norm_diff": 0.1176, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3565.3481, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0942, "query_norm": 1.2789, "queue_k_norm": 1.395, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6942, "sent_len_1": 66.7865, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6225, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70000 }, { "dev_runtime": 27.8515, "dev_samples_per_second": 2.298, "dev_steps_per_second": 0.036, "epoch": 0.68, "step": 70000, "test_accuracy": 94.04296875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.34075427055358887, "test_doc_norm": 1.3685663938522339, "test_inbatch_neg_score": 0.4555176794528961, "test_inbatch_pos_score": 1.3934296369552612, "test_loss": 0.34075427055358887, "test_loss_align": 1.016262173652649, "test_loss_unif": 3.960996389389038, "test_loss_unif_q@queue": 3.960996389389038, "test_norm_diff": 0.013060055673122406, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.09636950492858887, "test_query_norm": 1.3787803649902344, "test_queue_k_norm": 1.3954826593399048, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04259835183620453, "test_stdq": 0.04275026172399521, "test_stdqueue_k": 0.048995643854141235, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.8515, "dev_samples_per_second": 2.298, "dev_steps_per_second": 0.036, "epoch": 0.68, "eval_beir-arguana_ndcg@10": 0.36229, "eval_beir-arguana_recall@10": 0.61095, "eval_beir-arguana_recall@100": 0.89687, "eval_beir-arguana_recall@20": 0.73542, "eval_beir-avg_ndcg@10": 0.3824545, "eval_beir-avg_recall@10": 0.45144141666666665, "eval_beir-avg_recall@100": 0.6294208333333333, "eval_beir-avg_recall@20": 0.5077976666666666, "eval_beir-cqadupstack_ndcg@10": 0.270445, "eval_beir-cqadupstack_recall@10": 0.36397416666666665, "eval_beir-cqadupstack_recall@100": 0.5954583333333333, "eval_beir-cqadupstack_recall@20": 0.43299666666666664, "eval_beir-fiqa_ndcg@10": 0.24718, "eval_beir-fiqa_recall@10": 0.30774, "eval_beir-fiqa_recall@100": 0.57684, "eval_beir-fiqa_recall@20": 0.38012, "eval_beir-nfcorpus_ndcg@10": 0.29602, "eval_beir-nfcorpus_recall@10": 0.14706, "eval_beir-nfcorpus_recall@100": 0.27741, "eval_beir-nfcorpus_recall@20": 0.17702, "eval_beir-nq_ndcg@10": 0.28421, "eval_beir-nq_recall@10": 0.47011, "eval_beir-nq_recall@100": 0.7906, "eval_beir-nq_recall@20": 0.58514, "eval_beir-quora_ndcg@10": 0.80941, "eval_beir-quora_recall@10": 0.90467, "eval_beir-quora_recall@100": 0.98205, "eval_beir-quora_recall@20": 0.9407, "eval_beir-scidocs_ndcg@10": 0.15464, "eval_beir-scidocs_recall@10": 0.16068, "eval_beir-scidocs_recall@100": 0.36585, "eval_beir-scidocs_recall@20": 0.21692, "eval_beir-scifact_ndcg@10": 0.63288, "eval_beir-scifact_recall@10": 0.78556, "eval_beir-scifact_recall@100": 0.90656, "eval_beir-scifact_recall@20": 0.83289, "eval_beir-trec-covid_ndcg@10": 0.57306, "eval_beir-trec-covid_recall@10": 0.626, "eval_beir-trec-covid_recall@100": 0.4584, "eval_beir-trec-covid_recall@20": 0.584, "eval_beir-webis-touche2020_ndcg@10": 0.19441, "eval_beir-webis-touche2020_recall@10": 0.13767, "eval_beir-webis-touche2020_recall@100": 0.44417, "eval_beir-webis-touche2020_recall@20": 0.19277, "eval_senteval-avg_sts": 0.7619422862923666, "eval_senteval-sickr_spearman": 0.7229528995687352, "eval_senteval-stsb_spearman": 0.800931673015998, "step": 70000, "test_accuracy": 94.04296875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.34075427055358887, "test_doc_norm": 1.3685663938522339, "test_inbatch_neg_score": 0.4555176794528961, "test_inbatch_pos_score": 1.3934296369552612, "test_loss": 0.34075427055358887, "test_loss_align": 1.016262173652649, "test_loss_unif": 3.960996389389038, "test_loss_unif_q@queue": 3.960996389389038, "test_norm_diff": 0.013060055673122406, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.09636950492858887, "test_query_norm": 1.3787803649902344, "test_queue_k_norm": 1.3954826593399048, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04259835183620453, "test_stdq": 0.04275026172399521, "test_stdqueue_k": 0.048995643854141235, "test_stdqueue_q": 0.0 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3337, "doc_norm": 1.3932, "encoder_q-embeddings": 2444.7136, "encoder_q-layer.0": 1744.1493, "encoder_q-layer.1": 1799.2677, "encoder_q-layer.10": 2518.2351, "encoder_q-layer.11": 5212.6665, "encoder_q-layer.2": 1955.8281, "encoder_q-layer.3": 2053.8958, "encoder_q-layer.4": 2131.218, "encoder_q-layer.5": 1987.2709, "encoder_q-layer.6": 2153.3108, "encoder_q-layer.7": 2168.8547, "encoder_q-layer.8": 2473.9302, "encoder_q-layer.9": 2204.3474, "epoch": 0.68, "inbatch_neg_score": 0.0943, "inbatch_pos_score": 0.7617, "learning_rate": 1.661111111111111e-05, "loss": 3.3337, "norm_diff": 0.1344, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3757.81, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0929, "query_norm": 1.2588, "queue_k_norm": 1.3942, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6248, "sent_len_1": 66.9061, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.3438, "stdk": 0.0488, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70100 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3407, "doc_norm": 1.3978, "encoder_q-embeddings": 3592.4858, "encoder_q-layer.0": 2405.3518, "encoder_q-layer.1": 2556.2424, "encoder_q-layer.10": 4880.3037, "encoder_q-layer.11": 10858.3242, "encoder_q-layer.2": 2832.9333, "encoder_q-layer.3": 2926.3743, "encoder_q-layer.4": 3203.6125, "encoder_q-layer.5": 3341.1794, "encoder_q-layer.6": 3765.3384, "encoder_q-layer.7": 4294.5371, "encoder_q-layer.8": 5045.563, "encoder_q-layer.9": 4656.0005, "epoch": 0.69, "inbatch_neg_score": 0.0913, "inbatch_pos_score": 0.7568, "learning_rate": 1.655555555555556e-05, "loss": 3.3407, "norm_diff": 0.1353, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6969.5171, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0909, "query_norm": 1.2625, "queue_k_norm": 1.3941, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5815, "sent_len_1": 66.819, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5788, "stdk": 0.049, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70200 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3416, "doc_norm": 1.3871, "encoder_q-embeddings": 2206.2764, "encoder_q-layer.0": 1608.0131, "encoder_q-layer.1": 1762.6018, "encoder_q-layer.10": 2685.7864, "encoder_q-layer.11": 5365.0767, "encoder_q-layer.2": 2019.9825, "encoder_q-layer.3": 2109.5027, "encoder_q-layer.4": 2238.8279, "encoder_q-layer.5": 2140.0481, "encoder_q-layer.6": 2236.2927, "encoder_q-layer.7": 2380.9128, "encoder_q-layer.8": 2703.2502, "encoder_q-layer.9": 2546.9158, "epoch": 0.69, "inbatch_neg_score": 0.0878, "inbatch_pos_score": 0.7241, "learning_rate": 1.65e-05, "loss": 3.3416, "norm_diff": 0.1195, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3824.4116, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0882, "query_norm": 1.2676, "queue_k_norm": 1.3908, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4857, "sent_len_1": 66.8968, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.5575, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 70300 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.3342, "doc_norm": 1.3937, "encoder_q-embeddings": 1836.5426, "encoder_q-layer.0": 1200.671, "encoder_q-layer.1": 1250.4282, "encoder_q-layer.10": 2204.9465, "encoder_q-layer.11": 4882.4399, "encoder_q-layer.2": 1392.9158, "encoder_q-layer.3": 1489.5027, "encoder_q-layer.4": 1541.0856, "encoder_q-layer.5": 1617.115, "encoder_q-layer.6": 1797.3572, "encoder_q-layer.7": 2026.8649, "encoder_q-layer.8": 2351.6575, "encoder_q-layer.9": 2159.2354, "epoch": 0.69, "inbatch_neg_score": 0.0892, "inbatch_pos_score": 0.7759, "learning_rate": 1.6444444444444447e-05, "loss": 3.3342, "norm_diff": 0.1175, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3259.6079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0889, "query_norm": 1.2762, "queue_k_norm": 1.3927, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5572, "sent_len_1": 66.6094, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.0025, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70400 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.319, "doc_norm": 1.3954, "encoder_q-embeddings": 1905.027, "encoder_q-layer.0": 1237.0464, "encoder_q-layer.1": 1312.5149, "encoder_q-layer.10": 2401.4824, "encoder_q-layer.11": 5509.6553, "encoder_q-layer.2": 1441.6497, "encoder_q-layer.3": 1511.4375, "encoder_q-layer.4": 1638.858, "encoder_q-layer.5": 1737.3418, "encoder_q-layer.6": 2021.7524, "encoder_q-layer.7": 2366.572, "encoder_q-layer.8": 2823.6123, "encoder_q-layer.9": 2417.8394, "epoch": 0.69, "inbatch_neg_score": 0.0871, "inbatch_pos_score": 0.7466, "learning_rate": 1.638888888888889e-05, "loss": 3.319, "norm_diff": 0.155, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3593.2123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0879, "query_norm": 1.2403, "queue_k_norm": 1.3939, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6178, "sent_len_1": 66.989, "sent_max_len_0": 128.0, "sent_max_len_1": 192.9437, "stdk": 0.049, "stdq": 0.0438, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70500 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.3245, "doc_norm": 1.3933, "encoder_q-embeddings": 1634.9685, "encoder_q-layer.0": 1143.7799, "encoder_q-layer.1": 1222.6899, "encoder_q-layer.10": 2248.3279, "encoder_q-layer.11": 5045.3384, "encoder_q-layer.2": 1277.4963, "encoder_q-layer.3": 1310.4438, "encoder_q-layer.4": 1430.3209, "encoder_q-layer.5": 1457.8678, "encoder_q-layer.6": 1698.8715, "encoder_q-layer.7": 1920.3431, "encoder_q-layer.8": 2273.936, "encoder_q-layer.9": 2113.1863, "epoch": 0.69, "inbatch_neg_score": 0.0882, "inbatch_pos_score": 0.7915, "learning_rate": 1.6333333333333335e-05, "loss": 3.3245, "norm_diff": 0.1206, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3197.0475, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0897, "query_norm": 1.2728, "queue_k_norm": 1.3917, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7156, "sent_len_1": 66.8785, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1413, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 70600 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.3295, "doc_norm": 1.3853, "encoder_q-embeddings": 2035.0876, "encoder_q-layer.0": 1338.1353, "encoder_q-layer.1": 1440.3335, "encoder_q-layer.10": 2316.3684, "encoder_q-layer.11": 5287.7275, "encoder_q-layer.2": 1630.525, "encoder_q-layer.3": 1717.9713, "encoder_q-layer.4": 1829.673, "encoder_q-layer.5": 1901.4851, "encoder_q-layer.6": 2116.1262, "encoder_q-layer.7": 2314.5732, "encoder_q-layer.8": 2651.6357, "encoder_q-layer.9": 2347.3049, "epoch": 0.69, "inbatch_neg_score": 0.0876, "inbatch_pos_score": 0.7568, "learning_rate": 1.6277777777777777e-05, "loss": 3.3295, "norm_diff": 0.1144, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3575.2187, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.088, "query_norm": 1.2709, "queue_k_norm": 1.3913, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5494, "sent_len_1": 66.7179, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 70700 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3436, "doc_norm": 1.3886, "encoder_q-embeddings": 1932.0773, "encoder_q-layer.0": 1302.1, "encoder_q-layer.1": 1384.9265, "encoder_q-layer.10": 2348.2288, "encoder_q-layer.11": 5247.7168, "encoder_q-layer.2": 1544.4637, "encoder_q-layer.3": 1602.3961, "encoder_q-layer.4": 1712.3076, "encoder_q-layer.5": 1721.7534, "encoder_q-layer.6": 1743.3373, "encoder_q-layer.7": 1830.9226, "encoder_q-layer.8": 2297.4099, "encoder_q-layer.9": 2114.5032, "epoch": 0.69, "inbatch_neg_score": 0.0847, "inbatch_pos_score": 0.7637, "learning_rate": 1.6222222222222223e-05, "loss": 3.3436, "norm_diff": 0.1188, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3366.9095, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0847, "query_norm": 1.2698, "queue_k_norm": 1.3894, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6444, "sent_len_1": 66.5949, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7988, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 70800 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.353, "doc_norm": 1.3942, "encoder_q-embeddings": 1945.9672, "encoder_q-layer.0": 1279.2358, "encoder_q-layer.1": 1309.5537, "encoder_q-layer.10": 2504.9783, "encoder_q-layer.11": 5355.1089, "encoder_q-layer.2": 1461.5259, "encoder_q-layer.3": 1550.548, "encoder_q-layer.4": 1703.0282, "encoder_q-layer.5": 1708.7114, "encoder_q-layer.6": 1939.1714, "encoder_q-layer.7": 2176.9976, "encoder_q-layer.8": 2533.2305, "encoder_q-layer.9": 2291.8523, "epoch": 0.69, "inbatch_neg_score": 0.0834, "inbatch_pos_score": 0.75, "learning_rate": 1.6166666666666665e-05, "loss": 3.353, "norm_diff": 0.1373, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3486.9357, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0828, "query_norm": 1.2569, "queue_k_norm": 1.3882, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5159, "sent_len_1": 66.9103, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5675, "stdk": 0.049, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 70900 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.3375, "doc_norm": 1.3967, "encoder_q-embeddings": 1906.0122, "encoder_q-layer.0": 1280.2025, "encoder_q-layer.1": 1352.8461, "encoder_q-layer.10": 2205.4514, "encoder_q-layer.11": 5031.4458, "encoder_q-layer.2": 1550.8198, "encoder_q-layer.3": 1678.8351, "encoder_q-layer.4": 1752.0165, "encoder_q-layer.5": 1797.4436, "encoder_q-layer.6": 1905.2133, "encoder_q-layer.7": 2121.1287, "encoder_q-layer.8": 2369.7817, "encoder_q-layer.9": 2218.3511, "epoch": 0.69, "inbatch_neg_score": 0.0861, "inbatch_pos_score": 0.7456, "learning_rate": 1.6111111111111115e-05, "loss": 3.3375, "norm_diff": 0.131, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3391.9848, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0848, "query_norm": 1.2657, "queue_k_norm": 1.3904, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6103, "sent_len_1": 66.757, "sent_max_len_0": 127.99, "sent_max_len_1": 189.695, "stdk": 0.049, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71000 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3349, "doc_norm": 1.3854, "encoder_q-embeddings": 2232.71, "encoder_q-layer.0": 1497.616, "encoder_q-layer.1": 1631.3729, "encoder_q-layer.10": 2392.1479, "encoder_q-layer.11": 5327.624, "encoder_q-layer.2": 1854.8423, "encoder_q-layer.3": 1924.1399, "encoder_q-layer.4": 2101.0444, "encoder_q-layer.5": 2137.2979, "encoder_q-layer.6": 2299.8467, "encoder_q-layer.7": 2405.0415, "encoder_q-layer.8": 2607.9395, "encoder_q-layer.9": 2305.1287, "epoch": 0.69, "inbatch_neg_score": 0.0872, "inbatch_pos_score": 0.7339, "learning_rate": 1.6055555555555557e-05, "loss": 3.3349, "norm_diff": 0.1318, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3725.5893, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0867, "query_norm": 1.2536, "queue_k_norm": 1.3884, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5091, "sent_len_1": 66.4426, "sent_max_len_0": 127.9925, "sent_max_len_1": 189.745, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71100 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.3289, "doc_norm": 1.3894, "encoder_q-embeddings": 1968.2676, "encoder_q-layer.0": 1297.4755, "encoder_q-layer.1": 1361.4639, "encoder_q-layer.10": 2429.4985, "encoder_q-layer.11": 5279.4805, "encoder_q-layer.2": 1520.6691, "encoder_q-layer.3": 1590.1647, "encoder_q-layer.4": 1702.8912, "encoder_q-layer.5": 1693.1694, "encoder_q-layer.6": 1973.9628, "encoder_q-layer.7": 2115.1733, "encoder_q-layer.8": 2526.0046, "encoder_q-layer.9": 2291.8328, "epoch": 0.7, "inbatch_neg_score": 0.0839, "inbatch_pos_score": 0.7573, "learning_rate": 1.6000000000000003e-05, "loss": 3.3289, "norm_diff": 0.1096, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3489.8842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.085, "query_norm": 1.2798, "queue_k_norm": 1.3887, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3437, "sent_len_1": 66.7767, "sent_max_len_0": 128.0, "sent_max_len_1": 188.51, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71200 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3345, "doc_norm": 1.3815, "encoder_q-embeddings": 2203.365, "encoder_q-layer.0": 1460.0607, "encoder_q-layer.1": 1632.1392, "encoder_q-layer.10": 2407.2593, "encoder_q-layer.11": 5061.6201, "encoder_q-layer.2": 1831.173, "encoder_q-layer.3": 1896.2776, "encoder_q-layer.4": 2060.3435, "encoder_q-layer.5": 2124.9531, "encoder_q-layer.6": 2297.7073, "encoder_q-layer.7": 2478.8879, "encoder_q-layer.8": 2686.0776, "encoder_q-layer.9": 2232.231, "epoch": 0.7, "inbatch_neg_score": 0.0882, "inbatch_pos_score": 0.7671, "learning_rate": 1.5944444444444445e-05, "loss": 3.3345, "norm_diff": 0.1055, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3685.0767, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.087, "query_norm": 1.276, "queue_k_norm": 1.3876, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5361, "sent_len_1": 66.6536, "sent_max_len_0": 127.995, "sent_max_len_1": 190.4112, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71300 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.322, "doc_norm": 1.3899, "encoder_q-embeddings": 1921.4545, "encoder_q-layer.0": 1292.3916, "encoder_q-layer.1": 1336.3041, "encoder_q-layer.10": 2307.7959, "encoder_q-layer.11": 5148.3823, "encoder_q-layer.2": 1506.8049, "encoder_q-layer.3": 1543.7865, "encoder_q-layer.4": 1661.1841, "encoder_q-layer.5": 1698.7419, "encoder_q-layer.6": 1861.3119, "encoder_q-layer.7": 2153.1299, "encoder_q-layer.8": 2390.1152, "encoder_q-layer.9": 2201.9609, "epoch": 0.7, "inbatch_neg_score": 0.0841, "inbatch_pos_score": 0.7412, "learning_rate": 1.588888888888889e-05, "loss": 3.322, "norm_diff": 0.1356, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3379.6179, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.084, "query_norm": 1.2543, "queue_k_norm": 1.3873, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6102, "sent_len_1": 66.7452, "sent_max_len_0": 128.0, "sent_max_len_1": 190.865, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71400 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3297, "doc_norm": 1.3881, "encoder_q-embeddings": 1961.3765, "encoder_q-layer.0": 1266.1118, "encoder_q-layer.1": 1286.9829, "encoder_q-layer.10": 2421.8489, "encoder_q-layer.11": 5334.3516, "encoder_q-layer.2": 1444.5549, "encoder_q-layer.3": 1489.1354, "encoder_q-layer.4": 1570.8954, "encoder_q-layer.5": 1655.9175, "encoder_q-layer.6": 1972.9323, "encoder_q-layer.7": 2184.7485, "encoder_q-layer.8": 2578.7095, "encoder_q-layer.9": 2297.189, "epoch": 0.7, "inbatch_neg_score": 0.088, "inbatch_pos_score": 0.7729, "learning_rate": 1.5833333333333333e-05, "loss": 3.3297, "norm_diff": 0.1027, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3470.0221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0869, "query_norm": 1.2854, "queue_k_norm": 1.3884, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5747, "sent_len_1": 66.9588, "sent_max_len_0": 127.9737, "sent_max_len_1": 190.7562, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71500 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3255, "doc_norm": 1.3903, "encoder_q-embeddings": 2130.2136, "encoder_q-layer.0": 1432.9656, "encoder_q-layer.1": 1561.5886, "encoder_q-layer.10": 2502.7034, "encoder_q-layer.11": 5325.0649, "encoder_q-layer.2": 1766.5715, "encoder_q-layer.3": 1853.6509, "encoder_q-layer.4": 1943.4419, "encoder_q-layer.5": 2041.0859, "encoder_q-layer.6": 2242.7195, "encoder_q-layer.7": 2444.1621, "encoder_q-layer.8": 2789.1836, "encoder_q-layer.9": 2423.6956, "epoch": 0.7, "inbatch_neg_score": 0.088, "inbatch_pos_score": 0.7329, "learning_rate": 1.577777777777778e-05, "loss": 3.3255, "norm_diff": 0.1163, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3749.4616, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0884, "query_norm": 1.2739, "queue_k_norm": 1.3873, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5013, "sent_len_1": 66.6579, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7688, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71600 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.3222, "doc_norm": 1.3902, "encoder_q-embeddings": 2082.9121, "encoder_q-layer.0": 1466.6356, "encoder_q-layer.1": 1574.9221, "encoder_q-layer.10": 2657.1392, "encoder_q-layer.11": 5303.624, "encoder_q-layer.2": 1787.5186, "encoder_q-layer.3": 1841.4172, "encoder_q-layer.4": 1951.1632, "encoder_q-layer.5": 2061.1658, "encoder_q-layer.6": 2260.2852, "encoder_q-layer.7": 2286.657, "encoder_q-layer.8": 2751.658, "encoder_q-layer.9": 2510.4949, "epoch": 0.7, "inbatch_neg_score": 0.0906, "inbatch_pos_score": 0.7705, "learning_rate": 1.5722222222222225e-05, "loss": 3.3222, "norm_diff": 0.122, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3673.22, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0907, "query_norm": 1.2683, "queue_k_norm": 1.3865, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7367, "sent_len_1": 66.8335, "sent_max_len_0": 127.995, "sent_max_len_1": 187.3237, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71700 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3004, "doc_norm": 1.3881, "encoder_q-embeddings": 2395.8247, "encoder_q-layer.0": 1689.1257, "encoder_q-layer.1": 1841.3965, "encoder_q-layer.10": 2441.1201, "encoder_q-layer.11": 5267.2646, "encoder_q-layer.2": 2098.6799, "encoder_q-layer.3": 2228.8914, "encoder_q-layer.4": 2496.5103, "encoder_q-layer.5": 2593.123, "encoder_q-layer.6": 2917.8843, "encoder_q-layer.7": 3155.0857, "encoder_q-layer.8": 2793.374, "encoder_q-layer.9": 2367.5432, "epoch": 0.7, "inbatch_neg_score": 0.0869, "inbatch_pos_score": 0.7612, "learning_rate": 1.5666666666666667e-05, "loss": 3.3004, "norm_diff": 0.0925, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4054.6055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0873, "query_norm": 1.2955, "queue_k_norm": 1.3874, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5703, "sent_len_1": 66.6324, "sent_max_len_0": 128.0, "sent_max_len_1": 189.06, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71800 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3446, "doc_norm": 1.3924, "encoder_q-embeddings": 2444.1038, "encoder_q-layer.0": 1779.8091, "encoder_q-layer.1": 1983.5837, "encoder_q-layer.10": 2375.2966, "encoder_q-layer.11": 5355.0933, "encoder_q-layer.2": 2168.3318, "encoder_q-layer.3": 2123.4043, "encoder_q-layer.4": 2364.3701, "encoder_q-layer.5": 2343.5981, "encoder_q-layer.6": 2479.49, "encoder_q-layer.7": 2374.3269, "encoder_q-layer.8": 2776.7803, "encoder_q-layer.9": 2335.7231, "epoch": 0.7, "inbatch_neg_score": 0.0925, "inbatch_pos_score": 0.7471, "learning_rate": 1.5611111111111113e-05, "loss": 3.3446, "norm_diff": 0.1209, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3931.4323, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0921, "query_norm": 1.2716, "queue_k_norm": 1.3889, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4811, "sent_len_1": 66.8448, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6675, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71900 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3485, "doc_norm": 1.3811, "encoder_q-embeddings": 2152.3862, "encoder_q-layer.0": 1436.1018, "encoder_q-layer.1": 1558.1897, "encoder_q-layer.10": 2448.533, "encoder_q-layer.11": 5467.8896, "encoder_q-layer.2": 1774.7188, "encoder_q-layer.3": 1835.0522, "encoder_q-layer.4": 2069.3811, "encoder_q-layer.5": 2019.3289, "encoder_q-layer.6": 2140.7241, "encoder_q-layer.7": 2276.4026, "encoder_q-layer.8": 2619.3381, "encoder_q-layer.9": 2302.75, "epoch": 0.7, "inbatch_neg_score": 0.0919, "inbatch_pos_score": 0.751, "learning_rate": 1.5555555555555555e-05, "loss": 3.3485, "norm_diff": 0.1022, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3704.7065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0924, "query_norm": 1.2789, "queue_k_norm": 1.387, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.516, "sent_len_1": 66.819, "sent_max_len_0": 128.0, "sent_max_len_1": 191.005, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72000 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 3.3092, "doc_norm": 1.3851, "encoder_q-embeddings": 2018.2844, "encoder_q-layer.0": 1335.7766, "encoder_q-layer.1": 1402.8507, "encoder_q-layer.10": 2285.8696, "encoder_q-layer.11": 5023.6855, "encoder_q-layer.2": 1611.1699, "encoder_q-layer.3": 1577.9561, "encoder_q-layer.4": 1712.7316, "encoder_q-layer.5": 1739.0283, "encoder_q-layer.6": 1960.0312, "encoder_q-layer.7": 2184.627, "encoder_q-layer.8": 2573.7209, "encoder_q-layer.9": 2262.9045, "epoch": 0.7, "inbatch_neg_score": 0.0922, "inbatch_pos_score": 0.7964, "learning_rate": 1.55e-05, "loss": 3.3092, "norm_diff": 0.1011, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3460.3622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0925, "query_norm": 1.284, "queue_k_norm": 1.3889, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.659, "sent_len_1": 66.7305, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.915, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72100 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.3333, "doc_norm": 1.3924, "encoder_q-embeddings": 2413.6145, "encoder_q-layer.0": 1658.2087, "encoder_q-layer.1": 1821.8373, "encoder_q-layer.10": 2314.4568, "encoder_q-layer.11": 5300.1211, "encoder_q-layer.2": 2001.1632, "encoder_q-layer.3": 2026.5425, "encoder_q-layer.4": 2168.3608, "encoder_q-layer.5": 2234.835, "encoder_q-layer.6": 2339.3892, "encoder_q-layer.7": 2487.2693, "encoder_q-layer.8": 2677.5288, "encoder_q-layer.9": 2363.7029, "epoch": 0.7, "inbatch_neg_score": 0.0969, "inbatch_pos_score": 0.7773, "learning_rate": 1.5444444444444446e-05, "loss": 3.3333, "norm_diff": 0.0974, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3859.8493, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0967, "query_norm": 1.295, "queue_k_norm": 1.389, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7712, "sent_len_1": 66.5657, "sent_max_len_0": 127.99, "sent_max_len_1": 187.8425, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72200 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.3343, "doc_norm": 1.3845, "encoder_q-embeddings": 3812.0505, "encoder_q-layer.0": 2608.9299, "encoder_q-layer.1": 2762.6541, "encoder_q-layer.10": 4527.1372, "encoder_q-layer.11": 10154.2275, "encoder_q-layer.2": 3157.1997, "encoder_q-layer.3": 3231.8481, "encoder_q-layer.4": 3536.7451, "encoder_q-layer.5": 3561.5447, "encoder_q-layer.6": 4061.5093, "encoder_q-layer.7": 4388.1455, "encoder_q-layer.8": 4897.3442, "encoder_q-layer.9": 4601.7285, "epoch": 0.71, "inbatch_neg_score": 0.0975, "inbatch_pos_score": 0.7642, "learning_rate": 1.538888888888889e-05, "loss": 3.3343, "norm_diff": 0.0851, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6810.3114, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0976, "query_norm": 1.2994, "queue_k_norm": 1.3868, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6251, "sent_len_1": 66.7184, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6425, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72300 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3189, "doc_norm": 1.3829, "encoder_q-embeddings": 4682.9829, "encoder_q-layer.0": 3357.0723, "encoder_q-layer.1": 3571.0681, "encoder_q-layer.10": 4807.4004, "encoder_q-layer.11": 10920.2695, "encoder_q-layer.2": 4213.8237, "encoder_q-layer.3": 4301.0488, "encoder_q-layer.4": 4576.0024, "encoder_q-layer.5": 4515.9336, "encoder_q-layer.6": 4482.0586, "encoder_q-layer.7": 4855.6948, "encoder_q-layer.8": 5451.9062, "encoder_q-layer.9": 4711.3647, "epoch": 0.71, "inbatch_neg_score": 0.1018, "inbatch_pos_score": 0.7725, "learning_rate": 1.5333333333333334e-05, "loss": 3.3189, "norm_diff": 0.0918, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7836.9244, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1016, "query_norm": 1.2911, "queue_k_norm": 1.39, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4985, "sent_len_1": 66.8378, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1375, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 72400 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3334, "doc_norm": 1.3945, "encoder_q-embeddings": 4060.9739, "encoder_q-layer.0": 2696.356, "encoder_q-layer.1": 2856.0583, "encoder_q-layer.10": 4927.3037, "encoder_q-layer.11": 10627.959, "encoder_q-layer.2": 3240.2537, "encoder_q-layer.3": 3383.9468, "encoder_q-layer.4": 3718.2104, "encoder_q-layer.5": 3831.0042, "encoder_q-layer.6": 4253.6338, "encoder_q-layer.7": 4913.9976, "encoder_q-layer.8": 5235.9321, "encoder_q-layer.9": 4654.124, "epoch": 0.71, "inbatch_neg_score": 0.1071, "inbatch_pos_score": 0.7871, "learning_rate": 1.527777777777778e-05, "loss": 3.3334, "norm_diff": 0.071, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7234.4623, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1067, "query_norm": 1.3235, "queue_k_norm": 1.3887, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5245, "sent_len_1": 66.7811, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8887, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72500 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3166, "doc_norm": 1.3864, "encoder_q-embeddings": 4596.5488, "encoder_q-layer.0": 3025.9004, "encoder_q-layer.1": 3130.9407, "encoder_q-layer.10": 4980.228, "encoder_q-layer.11": 10488.6914, "encoder_q-layer.2": 3462.9912, "encoder_q-layer.3": 3596.7744, "encoder_q-layer.4": 3912.5903, "encoder_q-layer.5": 4027.7605, "encoder_q-layer.6": 4388.0596, "encoder_q-layer.7": 5038.0518, "encoder_q-layer.8": 5610.9541, "encoder_q-layer.9": 4758.1172, "epoch": 0.71, "inbatch_neg_score": 0.1118, "inbatch_pos_score": 0.7607, "learning_rate": 1.5222222222222224e-05, "loss": 3.3166, "norm_diff": 0.0803, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7503.4927, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1113, "query_norm": 1.3061, "queue_k_norm": 1.3899, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6928, "sent_len_1": 66.58, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.2463, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72600 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.3167, "doc_norm": 1.3898, "encoder_q-embeddings": 3621.4712, "encoder_q-layer.0": 2442.3372, "encoder_q-layer.1": 2523.7981, "encoder_q-layer.10": 4400.9033, "encoder_q-layer.11": 10048.7334, "encoder_q-layer.2": 2774.3276, "encoder_q-layer.3": 2836.8201, "encoder_q-layer.4": 3029.8406, "encoder_q-layer.5": 3101.2683, "encoder_q-layer.6": 3562.3259, "encoder_q-layer.7": 3991.7559, "encoder_q-layer.8": 4710.8228, "encoder_q-layer.9": 4296.8291, "epoch": 0.71, "inbatch_neg_score": 0.1191, "inbatch_pos_score": 0.7852, "learning_rate": 1.5166666666666668e-05, "loss": 3.3167, "norm_diff": 0.0661, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6436.4236, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1177, "query_norm": 1.3237, "queue_k_norm": 1.3917, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5896, "sent_len_1": 66.8472, "sent_max_len_0": 128.0, "sent_max_len_1": 190.665, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 72700 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.3004, "doc_norm": 1.3961, "encoder_q-embeddings": 4167.9746, "encoder_q-layer.0": 2803.7803, "encoder_q-layer.1": 2977.5869, "encoder_q-layer.10": 4651.1841, "encoder_q-layer.11": 10581.542, "encoder_q-layer.2": 3275.938, "encoder_q-layer.3": 3361.688, "encoder_q-layer.4": 3797.4016, "encoder_q-layer.5": 3847.6113, "encoder_q-layer.6": 4136.9985, "encoder_q-layer.7": 4440.8091, "encoder_q-layer.8": 5174.5, "encoder_q-layer.9": 4591.1851, "epoch": 0.71, "inbatch_neg_score": 0.1228, "inbatch_pos_score": 0.7778, "learning_rate": 1.5111111111111112e-05, "loss": 3.3004, "norm_diff": 0.0699, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7214.6741, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1213, "query_norm": 1.3262, "queue_k_norm": 1.3916, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6373, "sent_len_1": 66.7075, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.4275, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72800 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.3372, "doc_norm": 1.3946, "encoder_q-embeddings": 3581.6106, "encoder_q-layer.0": 2368.0583, "encoder_q-layer.1": 2540.9744, "encoder_q-layer.10": 4642.5063, "encoder_q-layer.11": 10876.9404, "encoder_q-layer.2": 2822.2815, "encoder_q-layer.3": 2885.124, "encoder_q-layer.4": 3110.9263, "encoder_q-layer.5": 3121.2646, "encoder_q-layer.6": 3695.3374, "encoder_q-layer.7": 4174.9507, "encoder_q-layer.8": 5166.5747, "encoder_q-layer.9": 4708.0161, "epoch": 0.71, "inbatch_neg_score": 0.1244, "inbatch_pos_score": 0.7954, "learning_rate": 1.5055555555555556e-05, "loss": 3.3372, "norm_diff": 0.0826, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6809.9985, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1246, "query_norm": 1.312, "queue_k_norm": 1.3929, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.463, "sent_len_1": 66.6256, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.2088, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72900 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3419, "doc_norm": 1.3865, "encoder_q-embeddings": 3796.0051, "encoder_q-layer.0": 2515.2512, "encoder_q-layer.1": 2652.0747, "encoder_q-layer.10": 5716.2666, "encoder_q-layer.11": 10976.7764, "encoder_q-layer.2": 2976.6638, "encoder_q-layer.3": 3101.4624, "encoder_q-layer.4": 3330.3403, "encoder_q-layer.5": 3554.709, "encoder_q-layer.6": 3942.4922, "encoder_q-layer.7": 4945.4932, "encoder_q-layer.8": 5646.3149, "encoder_q-layer.9": 4931.2002, "epoch": 0.71, "inbatch_neg_score": 0.1299, "inbatch_pos_score": 0.7676, "learning_rate": 1.5e-05, "loss": 3.3419, "norm_diff": 0.0916, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7246.6036, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.13, "query_norm": 1.2949, "queue_k_norm": 1.3916, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6575, "sent_len_1": 66.702, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.0362, "stdk": 0.0486, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73000 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3283, "doc_norm": 1.3864, "encoder_q-embeddings": 4201.3984, "encoder_q-layer.0": 2950.4685, "encoder_q-layer.1": 3302.4229, "encoder_q-layer.10": 4596.7773, "encoder_q-layer.11": 10488.2451, "encoder_q-layer.2": 3697.0063, "encoder_q-layer.3": 3566.4895, "encoder_q-layer.4": 3762.885, "encoder_q-layer.5": 3769.5476, "encoder_q-layer.6": 4368.0024, "encoder_q-layer.7": 4809.6611, "encoder_q-layer.8": 5357.7427, "encoder_q-layer.9": 4544.1084, "epoch": 0.71, "inbatch_neg_score": 0.1331, "inbatch_pos_score": 0.77, "learning_rate": 1.4944444444444444e-05, "loss": 3.3283, "norm_diff": 0.0884, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7288.4343, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1331, "query_norm": 1.298, "queue_k_norm": 1.3945, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6643, "sent_len_1": 66.6419, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0488, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73100 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3163, "doc_norm": 1.3985, "encoder_q-embeddings": 3724.1067, "encoder_q-layer.0": 2470.8269, "encoder_q-layer.1": 2689.2683, "encoder_q-layer.10": 4640.8057, "encoder_q-layer.11": 10713.5352, "encoder_q-layer.2": 3099.8787, "encoder_q-layer.3": 3178.6765, "encoder_q-layer.4": 3485.3513, "encoder_q-layer.5": 3596.6265, "encoder_q-layer.6": 4019.5347, "encoder_q-layer.7": 4313.3384, "encoder_q-layer.8": 4843.8594, "encoder_q-layer.9": 4597.4863, "epoch": 0.71, "inbatch_neg_score": 0.1407, "inbatch_pos_score": 0.8154, "learning_rate": 1.4888888888888888e-05, "loss": 3.3163, "norm_diff": 0.0714, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6944.8718, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1406, "query_norm": 1.3271, "queue_k_norm": 1.3961, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6214, "sent_len_1": 66.6362, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6325, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 73200 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3201, "doc_norm": 1.4015, "encoder_q-embeddings": 4701.8643, "encoder_q-layer.0": 3100.5103, "encoder_q-layer.1": 3314.5657, "encoder_q-layer.10": 5068.1255, "encoder_q-layer.11": 10974.3711, "encoder_q-layer.2": 3873.115, "encoder_q-layer.3": 3931.4939, "encoder_q-layer.4": 4225.876, "encoder_q-layer.5": 4453.3027, "encoder_q-layer.6": 4716.687, "encoder_q-layer.7": 5040.1309, "encoder_q-layer.8": 5543.6523, "encoder_q-layer.9": 4869.9824, "epoch": 0.72, "inbatch_neg_score": 0.1401, "inbatch_pos_score": 0.8257, "learning_rate": 1.4833333333333336e-05, "loss": 3.3201, "norm_diff": 0.059, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7756.4697, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1392, "query_norm": 1.3425, "queue_k_norm": 1.3944, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4186, "sent_len_1": 66.4855, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.0125, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73300 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.3253, "doc_norm": 1.3977, "encoder_q-embeddings": 3496.2578, "encoder_q-layer.0": 2469.7561, "encoder_q-layer.1": 2643.4167, "encoder_q-layer.10": 4425.7295, "encoder_q-layer.11": 10224.0176, "encoder_q-layer.2": 2872.4338, "encoder_q-layer.3": 2920.3623, "encoder_q-layer.4": 3081.8811, "encoder_q-layer.5": 3113.2322, "encoder_q-layer.6": 3479.6555, "encoder_q-layer.7": 3789.3071, "encoder_q-layer.8": 4694.146, "encoder_q-layer.9": 4231.1528, "epoch": 0.72, "inbatch_neg_score": 0.138, "inbatch_pos_score": 0.8457, "learning_rate": 1.477777777777778e-05, "loss": 3.3253, "norm_diff": 0.0852, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6440.3782, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1381, "query_norm": 1.3125, "queue_k_norm": 1.3979, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5709, "sent_len_1": 66.8958, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8313, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73400 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3094, "doc_norm": 1.3955, "encoder_q-embeddings": 4009.5308, "encoder_q-layer.0": 2574.2571, "encoder_q-layer.1": 2715.843, "encoder_q-layer.10": 4789.6133, "encoder_q-layer.11": 10783.043, "encoder_q-layer.2": 3034.3315, "encoder_q-layer.3": 3374.811, "encoder_q-layer.4": 3662.7756, "encoder_q-layer.5": 3688.6006, "encoder_q-layer.6": 3965.2295, "encoder_q-layer.7": 4368.8809, "encoder_q-layer.8": 5352.4053, "encoder_q-layer.9": 4826.9756, "epoch": 0.72, "inbatch_neg_score": 0.1398, "inbatch_pos_score": 0.8115, "learning_rate": 1.4722222222222224e-05, "loss": 3.3094, "norm_diff": 0.0819, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7109.1127, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1385, "query_norm": 1.3136, "queue_k_norm": 1.3977, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7137, "sent_len_1": 66.8414, "sent_max_len_0": 127.9875, "sent_max_len_1": 189.77, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73500 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3368, "doc_norm": 1.3998, "encoder_q-embeddings": 3746.9136, "encoder_q-layer.0": 2530.1833, "encoder_q-layer.1": 2729.4211, "encoder_q-layer.10": 4743.3389, "encoder_q-layer.11": 10805.998, "encoder_q-layer.2": 3138.1125, "encoder_q-layer.3": 3281.1104, "encoder_q-layer.4": 3526.6335, "encoder_q-layer.5": 3881.8311, "encoder_q-layer.6": 4275.8643, "encoder_q-layer.7": 4854.4761, "encoder_q-layer.8": 5713.3086, "encoder_q-layer.9": 4715.3525, "epoch": 0.72, "inbatch_neg_score": 0.1325, "inbatch_pos_score": 0.8145, "learning_rate": 1.4666666666666668e-05, "loss": 3.3368, "norm_diff": 0.103, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7152.3991, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1323, "query_norm": 1.2968, "queue_k_norm": 1.3995, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4848, "sent_len_1": 66.8118, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8975, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73600 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3424, "doc_norm": 1.3981, "encoder_q-embeddings": 6973.8687, "encoder_q-layer.0": 4951.9761, "encoder_q-layer.1": 5123.7559, "encoder_q-layer.10": 5013.957, "encoder_q-layer.11": 11481.377, "encoder_q-layer.2": 5876.165, "encoder_q-layer.3": 6250.9722, "encoder_q-layer.4": 6560.3267, "encoder_q-layer.5": 6908.5435, "encoder_q-layer.6": 6920.5078, "encoder_q-layer.7": 6040.5625, "encoder_q-layer.8": 6297.3838, "encoder_q-layer.9": 4814.2935, "epoch": 0.72, "inbatch_neg_score": 0.1377, "inbatch_pos_score": 0.7842, "learning_rate": 1.4611111111111112e-05, "loss": 3.3424, "norm_diff": 0.1071, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9846.1511, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1366, "query_norm": 1.2911, "queue_k_norm": 1.4009, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7741, "sent_len_1": 66.8904, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.4038, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 73700 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.3122, "doc_norm": 1.4052, "encoder_q-embeddings": 5053.4277, "encoder_q-layer.0": 3543.3206, "encoder_q-layer.1": 3538.4836, "encoder_q-layer.10": 4809.9785, "encoder_q-layer.11": 10609.5781, "encoder_q-layer.2": 3734.2844, "encoder_q-layer.3": 3884.1216, "encoder_q-layer.4": 4171.9644, "encoder_q-layer.5": 3990.9087, "encoder_q-layer.6": 4318.0552, "encoder_q-layer.7": 4484.334, "encoder_q-layer.8": 5033.5669, "encoder_q-layer.9": 4517.7598, "epoch": 0.72, "inbatch_neg_score": 0.1326, "inbatch_pos_score": 0.832, "learning_rate": 1.4555555555555556e-05, "loss": 3.3122, "norm_diff": 0.0942, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7522.926, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1338, "query_norm": 1.311, "queue_k_norm": 1.403, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6432, "sent_len_1": 66.5724, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2675, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 73800 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.327, "doc_norm": 1.3959, "encoder_q-embeddings": 3916.4204, "encoder_q-layer.0": 2658.8547, "encoder_q-layer.1": 2772.7319, "encoder_q-layer.10": 4841.168, "encoder_q-layer.11": 10769.3037, "encoder_q-layer.2": 3039.9644, "encoder_q-layer.3": 3101.3613, "encoder_q-layer.4": 3468.1055, "encoder_q-layer.5": 3582.21, "encoder_q-layer.6": 4112.7549, "encoder_q-layer.7": 4497.5049, "encoder_q-layer.8": 5489.3608, "encoder_q-layer.9": 4724.4341, "epoch": 0.72, "inbatch_neg_score": 0.1339, "inbatch_pos_score": 0.7769, "learning_rate": 1.45e-05, "loss": 3.327, "norm_diff": 0.1155, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7222.3921, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1338, "query_norm": 1.2804, "queue_k_norm": 1.4026, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6618, "sent_len_1": 66.6813, "sent_max_len_0": 127.9988, "sent_max_len_1": 187.6337, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 73900 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.3202, "doc_norm": 1.4054, "encoder_q-embeddings": 4408.0459, "encoder_q-layer.0": 3057.614, "encoder_q-layer.1": 3328.6143, "encoder_q-layer.10": 4982.2754, "encoder_q-layer.11": 10581.5479, "encoder_q-layer.2": 3842.967, "encoder_q-layer.3": 4099.1963, "encoder_q-layer.4": 4364.5156, "encoder_q-layer.5": 4349.8945, "encoder_q-layer.6": 4792.6895, "encoder_q-layer.7": 4853.938, "encoder_q-layer.8": 5231.3691, "encoder_q-layer.9": 4687.3813, "epoch": 0.72, "inbatch_neg_score": 0.128, "inbatch_pos_score": 0.7808, "learning_rate": 1.4444444444444444e-05, "loss": 3.3202, "norm_diff": 0.1219, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7516.0658, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1284, "query_norm": 1.2835, "queue_k_norm": 1.4006, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5685, "sent_len_1": 67.1017, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3625, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74000 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.3106, "doc_norm": 1.4061, "encoder_q-embeddings": 4880.1357, "encoder_q-layer.0": 3357.9626, "encoder_q-layer.1": 3864.4009, "encoder_q-layer.10": 4759.8721, "encoder_q-layer.11": 11939.166, "encoder_q-layer.2": 4453.2939, "encoder_q-layer.3": 4555.6079, "encoder_q-layer.4": 4890.0518, "encoder_q-layer.5": 5083.6216, "encoder_q-layer.6": 4821.6924, "encoder_q-layer.7": 5619.3779, "encoder_q-layer.8": 5483.9292, "encoder_q-layer.9": 4899.1572, "epoch": 0.72, "inbatch_neg_score": 0.1254, "inbatch_pos_score": 0.7905, "learning_rate": 1.438888888888889e-05, "loss": 3.3106, "norm_diff": 0.1189, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8157.989, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1246, "query_norm": 1.2873, "queue_k_norm": 1.4003, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6504, "sent_len_1": 66.8007, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2837, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74100 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.2948, "doc_norm": 1.4047, "encoder_q-embeddings": 3772.5615, "encoder_q-layer.0": 2485.6543, "encoder_q-layer.1": 2697.4866, "encoder_q-layer.10": 4663.2305, "encoder_q-layer.11": 10392.4834, "encoder_q-layer.2": 2986.6423, "encoder_q-layer.3": 3092.8484, "encoder_q-layer.4": 3407.9695, "encoder_q-layer.5": 3386.7625, "encoder_q-layer.6": 3685.166, "encoder_q-layer.7": 4235.166, "encoder_q-layer.8": 5105.1416, "encoder_q-layer.9": 4576.9346, "epoch": 0.72, "inbatch_neg_score": 0.1226, "inbatch_pos_score": 0.7905, "learning_rate": 1.4333333333333334e-05, "loss": 3.2948, "norm_diff": 0.1132, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6857.8191, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1236, "query_norm": 1.2915, "queue_k_norm": 1.4016, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7242, "sent_len_1": 67.085, "sent_max_len_0": 128.0, "sent_max_len_1": 189.985, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 74200 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.3267, "doc_norm": 1.402, "encoder_q-embeddings": 7525.0879, "encoder_q-layer.0": 5124.8213, "encoder_q-layer.1": 5525.0107, "encoder_q-layer.10": 10047.4502, "encoder_q-layer.11": 23134.4551, "encoder_q-layer.2": 6146.228, "encoder_q-layer.3": 6267.0146, "encoder_q-layer.4": 6612.2515, "encoder_q-layer.5": 7006.749, "encoder_q-layer.6": 8058.8213, "encoder_q-layer.7": 9545.3867, "encoder_q-layer.8": 11195.4746, "encoder_q-layer.9": 9751.1885, "epoch": 0.73, "inbatch_neg_score": 0.1161, "inbatch_pos_score": 0.7759, "learning_rate": 1.427777777777778e-05, "loss": 3.3267, "norm_diff": 0.1515, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14676.5333, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1166, "query_norm": 1.2505, "queue_k_norm": 1.4017, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4992, "sent_len_1": 66.3473, "sent_max_len_0": 128.0, "sent_max_len_1": 187.985, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 74300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3307, "doc_norm": 1.4065, "encoder_q-embeddings": 7460.7676, "encoder_q-layer.0": 5078.2915, "encoder_q-layer.1": 5313.7827, "encoder_q-layer.10": 10666.5918, "encoder_q-layer.11": 20670.2812, "encoder_q-layer.2": 5929.6436, "encoder_q-layer.3": 6112.4766, "encoder_q-layer.4": 6601.4092, "encoder_q-layer.5": 7134.5967, "encoder_q-layer.6": 8374.9619, "encoder_q-layer.7": 9473.9707, "encoder_q-layer.8": 10678.8262, "encoder_q-layer.9": 9682.6973, "epoch": 0.73, "inbatch_neg_score": 0.1124, "inbatch_pos_score": 0.7954, "learning_rate": 1.4222222222222224e-05, "loss": 3.3307, "norm_diff": 0.1296, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14073.7775, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1124, "query_norm": 1.2769, "queue_k_norm": 1.4004, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6873, "sent_len_1": 66.8734, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1113, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74400 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3025, "doc_norm": 1.3988, "encoder_q-embeddings": 7755.4932, "encoder_q-layer.0": 5274.7373, "encoder_q-layer.1": 5383.6631, "encoder_q-layer.10": 9630.9043, "encoder_q-layer.11": 21579.4219, "encoder_q-layer.2": 5993.3945, "encoder_q-layer.3": 6467.8174, "encoder_q-layer.4": 7136.542, "encoder_q-layer.5": 7099.9014, "encoder_q-layer.6": 7787.1733, "encoder_q-layer.7": 8985.9824, "encoder_q-layer.8": 10904.0498, "encoder_q-layer.9": 9676.2559, "epoch": 0.73, "inbatch_neg_score": 0.1126, "inbatch_pos_score": 0.7812, "learning_rate": 1.4166666666666668e-05, "loss": 3.3025, "norm_diff": 0.126, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14211.7635, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1129, "query_norm": 1.2728, "queue_k_norm": 1.4002, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7983, "sent_len_1": 66.973, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6875, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74500 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3143, "doc_norm": 1.3963, "encoder_q-embeddings": 7438.7998, "encoder_q-layer.0": 4839.1704, "encoder_q-layer.1": 5098.3062, "encoder_q-layer.10": 9357.6006, "encoder_q-layer.11": 20988.2793, "encoder_q-layer.2": 5779.9043, "encoder_q-layer.3": 5929.7603, "encoder_q-layer.4": 6177.729, "encoder_q-layer.5": 6604.5693, "encoder_q-layer.6": 7836.998, "encoder_q-layer.7": 8353.3955, "encoder_q-layer.8": 9824.5107, "encoder_q-layer.9": 8849.6035, "epoch": 0.73, "inbatch_neg_score": 0.1147, "inbatch_pos_score": 0.7783, "learning_rate": 1.4111111111111112e-05, "loss": 3.3143, "norm_diff": 0.1211, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13613.3176, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1144, "query_norm": 1.2752, "queue_k_norm": 1.4011, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6071, "sent_len_1": 66.6893, "sent_max_len_0": 127.9887, "sent_max_len_1": 189.2, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 74600 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.3376, "doc_norm": 1.4092, "encoder_q-embeddings": 8549.8604, "encoder_q-layer.0": 5876.375, "encoder_q-layer.1": 6339.4443, "encoder_q-layer.10": 9743.6982, "encoder_q-layer.11": 22617.2285, "encoder_q-layer.2": 7219.3945, "encoder_q-layer.3": 7910.5781, "encoder_q-layer.4": 8828.7266, "encoder_q-layer.5": 8808.6191, "encoder_q-layer.6": 9484.0146, "encoder_q-layer.7": 10405.7842, "encoder_q-layer.8": 10382.6299, "encoder_q-layer.9": 9345.2598, "epoch": 0.73, "inbatch_neg_score": 0.1095, "inbatch_pos_score": 0.7847, "learning_rate": 1.4055555555555556e-05, "loss": 3.3376, "norm_diff": 0.1381, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15295.3342, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1096, "query_norm": 1.2711, "queue_k_norm": 1.3999, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4416, "sent_len_1": 66.5776, "sent_max_len_0": 128.0, "sent_max_len_1": 190.365, "stdk": 0.0492, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 74700 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3119, "doc_norm": 1.3889, "encoder_q-embeddings": 9003.5801, "encoder_q-layer.0": 5990.7764, "encoder_q-layer.1": 6580.5371, "encoder_q-layer.10": 8918.1172, "encoder_q-layer.11": 21330.5156, "encoder_q-layer.2": 7118.8774, "encoder_q-layer.3": 7520.4541, "encoder_q-layer.4": 8123.8301, "encoder_q-layer.5": 8253.0381, "encoder_q-layer.6": 8903.0234, "encoder_q-layer.7": 9392.8389, "encoder_q-layer.8": 10122.3096, "encoder_q-layer.9": 9022.3828, "epoch": 0.73, "inbatch_neg_score": 0.108, "inbatch_pos_score": 0.7661, "learning_rate": 1.4000000000000001e-05, "loss": 3.3119, "norm_diff": 0.1246, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14636.279, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.109, "query_norm": 1.2642, "queue_k_norm": 1.399, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5315, "sent_len_1": 66.7932, "sent_max_len_0": 127.995, "sent_max_len_1": 189.2212, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74800 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.3135, "doc_norm": 1.406, "encoder_q-embeddings": 7116.8584, "encoder_q-layer.0": 4709.5225, "encoder_q-layer.1": 4936.9917, "encoder_q-layer.10": 9770.4971, "encoder_q-layer.11": 21659.1387, "encoder_q-layer.2": 5544.0317, "encoder_q-layer.3": 5751.6108, "encoder_q-layer.4": 6181.4653, "encoder_q-layer.5": 6794.2778, "encoder_q-layer.6": 7506.5303, "encoder_q-layer.7": 8764.7158, "encoder_q-layer.8": 10527.2061, "encoder_q-layer.9": 9435.4668, "epoch": 0.73, "inbatch_neg_score": 0.1089, "inbatch_pos_score": 0.8086, "learning_rate": 1.3944444444444446e-05, "loss": 3.3135, "norm_diff": 0.1175, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13904.0102, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1086, "query_norm": 1.2885, "queue_k_norm": 1.3979, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6848, "sent_len_1": 67.0763, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0337, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74900 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3232, "doc_norm": 1.3943, "encoder_q-embeddings": 17828.0449, "encoder_q-layer.0": 12386.4326, "encoder_q-layer.1": 15604.6807, "encoder_q-layer.10": 9325.126, "encoder_q-layer.11": 21497.459, "encoder_q-layer.2": 17236.5391, "encoder_q-layer.3": 16322.5518, "encoder_q-layer.4": 16423.459, "encoder_q-layer.5": 18862.8125, "encoder_q-layer.6": 17394.9082, "encoder_q-layer.7": 16518.418, "encoder_q-layer.8": 12305.248, "encoder_q-layer.9": 9768.2676, "epoch": 0.73, "inbatch_neg_score": 0.107, "inbatch_pos_score": 0.7612, "learning_rate": 1.388888888888889e-05, "loss": 3.3232, "norm_diff": 0.133, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 23376.9815, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1066, "query_norm": 1.2614, "queue_k_norm": 1.3994, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6021, "sent_len_1": 66.8878, "sent_max_len_0": 128.0, "sent_max_len_1": 189.485, "stdk": 0.0487, "stdq": 0.0445, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 75000 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.324, "doc_norm": 1.3973, "encoder_q-embeddings": 8018.688, "encoder_q-layer.0": 5156.6162, "encoder_q-layer.1": 5522.6997, "encoder_q-layer.10": 10068.2041, "encoder_q-layer.11": 22299.8047, "encoder_q-layer.2": 6279.6968, "encoder_q-layer.3": 6478.3135, "encoder_q-layer.4": 6896.5952, "encoder_q-layer.5": 7413.3848, "encoder_q-layer.6": 8487.4922, "encoder_q-layer.7": 9357.0273, "encoder_q-layer.8": 10708.5645, "encoder_q-layer.9": 9362.1641, "epoch": 0.73, "inbatch_neg_score": 0.104, "inbatch_pos_score": 0.7544, "learning_rate": 1.3833333333333334e-05, "loss": 3.324, "norm_diff": 0.1261, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14695.5664, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1047, "query_norm": 1.2713, "queue_k_norm": 1.3983, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.397, "sent_len_1": 66.495, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8738, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75100 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3096, "doc_norm": 1.3978, "encoder_q-embeddings": 8079.873, "encoder_q-layer.0": 5459.8501, "encoder_q-layer.1": 5804.3462, "encoder_q-layer.10": 9652.4307, "encoder_q-layer.11": 21254.4434, "encoder_q-layer.2": 6827.7925, "encoder_q-layer.3": 6822.7705, "encoder_q-layer.4": 7395.3501, "encoder_q-layer.5": 7939.833, "encoder_q-layer.6": 8233.1074, "encoder_q-layer.7": 8675.376, "encoder_q-layer.8": 9845.8125, "encoder_q-layer.9": 9027.3066, "epoch": 0.73, "inbatch_neg_score": 0.0985, "inbatch_pos_score": 0.7734, "learning_rate": 1.3777777777777778e-05, "loss": 3.3096, "norm_diff": 0.1349, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14178.3382, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0999, "query_norm": 1.2629, "queue_k_norm": 1.3965, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6952, "sent_len_1": 66.876, "sent_max_len_0": 127.9875, "sent_max_len_1": 188.3762, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75200 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3131, "doc_norm": 1.3942, "encoder_q-embeddings": 13898.876, "encoder_q-layer.0": 9090.6143, "encoder_q-layer.1": 10664.4883, "encoder_q-layer.10": 9416.5566, "encoder_q-layer.11": 21252.3965, "encoder_q-layer.2": 12124.5811, "encoder_q-layer.3": 11834.4355, "encoder_q-layer.4": 13058.4707, "encoder_q-layer.5": 11578.3682, "encoder_q-layer.6": 12856.5693, "encoder_q-layer.7": 14393.6504, "encoder_q-layer.8": 13647.9258, "encoder_q-layer.9": 9272.0381, "epoch": 0.74, "inbatch_neg_score": 0.1022, "inbatch_pos_score": 0.7686, "learning_rate": 1.3722222222222222e-05, "loss": 3.3131, "norm_diff": 0.1275, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19182.3105, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1025, "query_norm": 1.2667, "queue_k_norm": 1.3952, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6682, "sent_len_1": 66.9011, "sent_max_len_0": 127.9925, "sent_max_len_1": 190.7525, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75300 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3173, "doc_norm": 1.3972, "encoder_q-embeddings": 12999.5371, "encoder_q-layer.0": 10009.2998, "encoder_q-layer.1": 9468.4199, "encoder_q-layer.10": 9923.8311, "encoder_q-layer.11": 21121.4961, "encoder_q-layer.2": 12306.2539, "encoder_q-layer.3": 13674.4346, "encoder_q-layer.4": 13932.7549, "encoder_q-layer.5": 12144.4961, "encoder_q-layer.6": 11546.3311, "encoder_q-layer.7": 12035.0762, "encoder_q-layer.8": 12100.9551, "encoder_q-layer.9": 9389.8086, "epoch": 0.74, "inbatch_neg_score": 0.1018, "inbatch_pos_score": 0.7695, "learning_rate": 1.3666666666666666e-05, "loss": 3.3173, "norm_diff": 0.1268, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19115.8173, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1023, "query_norm": 1.2704, "queue_k_norm": 1.3981, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4808, "sent_len_1": 66.8496, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.5213, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 75400 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3193, "doc_norm": 1.3994, "encoder_q-embeddings": 14094.2383, "encoder_q-layer.0": 11886.6162, "encoder_q-layer.1": 11110.0479, "encoder_q-layer.10": 9947.5342, "encoder_q-layer.11": 22141.1523, "encoder_q-layer.2": 11123.1406, "encoder_q-layer.3": 10550.2109, "encoder_q-layer.4": 10594.584, "encoder_q-layer.5": 9479.125, "encoder_q-layer.6": 9121.5098, "encoder_q-layer.7": 9472.1582, "encoder_q-layer.8": 10973.0947, "encoder_q-layer.9": 10365.8584, "epoch": 0.74, "inbatch_neg_score": 0.1017, "inbatch_pos_score": 0.7637, "learning_rate": 1.3611111111111111e-05, "loss": 3.3193, "norm_diff": 0.1177, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18357.3912, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1011, "query_norm": 1.2818, "queue_k_norm": 1.3972, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6289, "sent_len_1": 66.7053, "sent_max_len_0": 127.9975, "sent_max_len_1": 192.0437, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 75500 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.3211, "doc_norm": 1.3971, "encoder_q-embeddings": 7292.0786, "encoder_q-layer.0": 4928.165, "encoder_q-layer.1": 5190.6572, "encoder_q-layer.10": 9782.4229, "encoder_q-layer.11": 20760.2227, "encoder_q-layer.2": 5962.0601, "encoder_q-layer.3": 6257.2139, "encoder_q-layer.4": 6424.1855, "encoder_q-layer.5": 6845.5562, "encoder_q-layer.6": 7886.6953, "encoder_q-layer.7": 9412.1787, "encoder_q-layer.8": 10276.5625, "encoder_q-layer.9": 9409.9229, "epoch": 0.74, "inbatch_neg_score": 0.0956, "inbatch_pos_score": 0.7856, "learning_rate": 1.3555555555555557e-05, "loss": 3.3211, "norm_diff": 0.1191, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13804.5821, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0965, "query_norm": 1.278, "queue_k_norm": 1.3953, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6036, "sent_len_1": 66.8028, "sent_max_len_0": 128.0, "sent_max_len_1": 189.155, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75600 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3289, "doc_norm": 1.3908, "encoder_q-embeddings": 7838.5146, "encoder_q-layer.0": 5192.4312, "encoder_q-layer.1": 5643.4204, "encoder_q-layer.10": 9691.5977, "encoder_q-layer.11": 21003.7949, "encoder_q-layer.2": 6515.2549, "encoder_q-layer.3": 6941.6514, "encoder_q-layer.4": 7303.834, "encoder_q-layer.5": 7508.1406, "encoder_q-layer.6": 8456.5518, "encoder_q-layer.7": 9149.2393, "encoder_q-layer.8": 10118.5254, "encoder_q-layer.9": 9020.4434, "epoch": 0.74, "inbatch_neg_score": 0.0992, "inbatch_pos_score": 0.7754, "learning_rate": 1.3500000000000001e-05, "loss": 3.3289, "norm_diff": 0.1254, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14156.2352, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0996, "query_norm": 1.2653, "queue_k_norm": 1.3969, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5792, "sent_len_1": 66.7304, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9787, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 75700 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3047, "doc_norm": 1.3917, "encoder_q-embeddings": 9482.8242, "encoder_q-layer.0": 6205.1895, "encoder_q-layer.1": 6715.1533, "encoder_q-layer.10": 9207.3984, "encoder_q-layer.11": 20961.1504, "encoder_q-layer.2": 7616.9854, "encoder_q-layer.3": 8175.3296, "encoder_q-layer.4": 8852.8066, "encoder_q-layer.5": 9197.959, "encoder_q-layer.6": 9968.9668, "encoder_q-layer.7": 10550.4023, "encoder_q-layer.8": 11189.2725, "encoder_q-layer.9": 9312.0703, "epoch": 0.74, "inbatch_neg_score": 0.0978, "inbatch_pos_score": 0.7554, "learning_rate": 1.3444444444444445e-05, "loss": 3.3047, "norm_diff": 0.1316, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15611.4979, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0973, "query_norm": 1.2601, "queue_k_norm": 1.3949, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.8006, "sent_len_1": 67.1103, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4625, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3181, "doc_norm": 1.3888, "encoder_q-embeddings": 7290.7314, "encoder_q-layer.0": 4817.5801, "encoder_q-layer.1": 5089.0854, "encoder_q-layer.10": 9130.1699, "encoder_q-layer.11": 20080.543, "encoder_q-layer.2": 5791.6484, "encoder_q-layer.3": 6127.0059, "encoder_q-layer.4": 6765.3413, "encoder_q-layer.5": 6993.2446, "encoder_q-layer.6": 8191.3359, "encoder_q-layer.7": 8472.8408, "encoder_q-layer.8": 9800.7168, "encoder_q-layer.9": 8838.334, "epoch": 0.74, "inbatch_neg_score": 0.1006, "inbatch_pos_score": 0.7827, "learning_rate": 1.338888888888889e-05, "loss": 3.3181, "norm_diff": 0.1069, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13413.1733, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1006, "query_norm": 1.2819, "queue_k_norm": 1.395, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7971, "sent_len_1": 66.8, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5012, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75900 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.2863, "doc_norm": 1.3971, "encoder_q-embeddings": 7788.1606, "encoder_q-layer.0": 4929.7837, "encoder_q-layer.1": 5263.3721, "encoder_q-layer.10": 9905.0098, "encoder_q-layer.11": 21527.373, "encoder_q-layer.2": 5923.126, "encoder_q-layer.3": 6173.7769, "encoder_q-layer.4": 6435.3994, "encoder_q-layer.5": 6514.8486, "encoder_q-layer.6": 7423.8545, "encoder_q-layer.7": 8488.2871, "encoder_q-layer.8": 10247.2676, "encoder_q-layer.9": 9218.9834, "epoch": 0.74, "inbatch_neg_score": 0.0981, "inbatch_pos_score": 0.7588, "learning_rate": 1.3333333333333333e-05, "loss": 3.2863, "norm_diff": 0.1309, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14082.465, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0982, "query_norm": 1.2662, "queue_k_norm": 1.3939, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6943, "sent_len_1": 66.7589, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.8338, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76000 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.3298, "doc_norm": 1.3975, "encoder_q-embeddings": 7547.3633, "encoder_q-layer.0": 4834.8608, "encoder_q-layer.1": 5157.6973, "encoder_q-layer.10": 9480.1748, "encoder_q-layer.11": 20401.7539, "encoder_q-layer.2": 6032.0898, "encoder_q-layer.3": 6237.7881, "encoder_q-layer.4": 6502.1372, "encoder_q-layer.5": 6809.1182, "encoder_q-layer.6": 7256.6108, "encoder_q-layer.7": 8111.374, "encoder_q-layer.8": 9923.7891, "encoder_q-layer.9": 9316.5156, "epoch": 0.74, "inbatch_neg_score": 0.0974, "inbatch_pos_score": 0.7783, "learning_rate": 1.3277777777777777e-05, "loss": 3.3298, "norm_diff": 0.1239, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13549.6652, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0976, "query_norm": 1.2737, "queue_k_norm": 1.3943, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6079, "sent_len_1": 66.8096, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8063, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76100 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.312, "doc_norm": 1.3842, "encoder_q-embeddings": 8106.2695, "encoder_q-layer.0": 5310.542, "encoder_q-layer.1": 5589.5732, "encoder_q-layer.10": 9439.7012, "encoder_q-layer.11": 21516.3555, "encoder_q-layer.2": 6154.3975, "encoder_q-layer.3": 6525.7246, "encoder_q-layer.4": 7126.9683, "encoder_q-layer.5": 6984.2124, "encoder_q-layer.6": 7805.3984, "encoder_q-layer.7": 9722.1416, "encoder_q-layer.8": 10669.293, "encoder_q-layer.9": 9598.418, "epoch": 0.74, "inbatch_neg_score": 0.0943, "inbatch_pos_score": 0.7583, "learning_rate": 1.3222222222222221e-05, "loss": 3.312, "norm_diff": 0.1173, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14405.0614, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.094, "query_norm": 1.2669, "queue_k_norm": 1.3944, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7101, "sent_len_1": 66.7957, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0863, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 76200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3317, "doc_norm": 1.3985, "encoder_q-embeddings": 15448.5781, "encoder_q-layer.0": 9976.9043, "encoder_q-layer.1": 10467.2695, "encoder_q-layer.10": 20477.3496, "encoder_q-layer.11": 43861.1406, "encoder_q-layer.2": 11542.25, "encoder_q-layer.3": 12268.3359, "encoder_q-layer.4": 12978.1475, "encoder_q-layer.5": 13619.123, "encoder_q-layer.6": 14897.6602, "encoder_q-layer.7": 16610.5332, "encoder_q-layer.8": 19746.6543, "encoder_q-layer.9": 18330.8301, "epoch": 0.74, "inbatch_neg_score": 0.094, "inbatch_pos_score": 0.7524, "learning_rate": 1.3166666666666665e-05, "loss": 3.3317, "norm_diff": 0.1418, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27931.7796, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.094, "query_norm": 1.2566, "queue_k_norm": 1.3938, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6466, "sent_len_1": 66.7883, "sent_max_len_0": 128.0, "sent_max_len_1": 191.445, "stdk": 0.049, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76300 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3208, "doc_norm": 1.3926, "encoder_q-embeddings": 15741.7109, "encoder_q-layer.0": 10341.249, "encoder_q-layer.1": 11032.0254, "encoder_q-layer.10": 19497.0586, "encoder_q-layer.11": 43496.1797, "encoder_q-layer.2": 12270.3066, "encoder_q-layer.3": 12518.0234, "encoder_q-layer.4": 13763.0889, "encoder_q-layer.5": 14388.3037, "encoder_q-layer.6": 16135.3535, "encoder_q-layer.7": 17370.5977, "encoder_q-layer.8": 21359.1484, "encoder_q-layer.9": 19305.7773, "epoch": 0.75, "inbatch_neg_score": 0.0957, "inbatch_pos_score": 0.771, "learning_rate": 1.3111111111111113e-05, "loss": 3.3208, "norm_diff": 0.1211, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28407.2761, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.0953, "query_norm": 1.2715, "queue_k_norm": 1.395, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5385, "sent_len_1": 66.9466, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.4638, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 76400 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3134, "doc_norm": 1.3955, "encoder_q-embeddings": 8898.6807, "encoder_q-layer.0": 6022.8286, "encoder_q-layer.1": 6273.0225, "encoder_q-layer.10": 9659.7822, "encoder_q-layer.11": 21022.7227, "encoder_q-layer.2": 7239.4731, "encoder_q-layer.3": 7490.7227, "encoder_q-layer.4": 7854.6328, "encoder_q-layer.5": 8012.624, "encoder_q-layer.6": 9790.79, "encoder_q-layer.7": 9727.4609, "encoder_q-layer.8": 10949.8584, "encoder_q-layer.9": 9109.2441, "epoch": 0.75, "inbatch_neg_score": 0.0958, "inbatch_pos_score": 0.7705, "learning_rate": 1.3055555555555557e-05, "loss": 3.3134, "norm_diff": 0.1269, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14842.1025, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0947, "query_norm": 1.2686, "queue_k_norm": 1.392, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6571, "sent_len_1": 66.7917, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.4363, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76500 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.2854, "doc_norm": 1.3881, "encoder_q-embeddings": 3911.627, "encoder_q-layer.0": 2650.4543, "encoder_q-layer.1": 2686.1895, "encoder_q-layer.10": 4558.5776, "encoder_q-layer.11": 10285.4229, "encoder_q-layer.2": 3021.926, "encoder_q-layer.3": 3123.7546, "encoder_q-layer.4": 3368.1426, "encoder_q-layer.5": 3461.7859, "encoder_q-layer.6": 3934.033, "encoder_q-layer.7": 4387.2871, "encoder_q-layer.8": 4843.7139, "encoder_q-layer.9": 4413.0059, "epoch": 0.75, "inbatch_neg_score": 0.0946, "inbatch_pos_score": 0.7539, "learning_rate": 1.3000000000000001e-05, "loss": 3.2854, "norm_diff": 0.1149, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6869.9608, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0948, "query_norm": 1.2732, "queue_k_norm": 1.3947, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.981, "sent_len_1": 66.7919, "sent_max_len_0": 128.0, "sent_max_len_1": 189.775, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 76600 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3103, "doc_norm": 1.3947, "encoder_q-embeddings": 3886.7341, "encoder_q-layer.0": 2548.0037, "encoder_q-layer.1": 2730.5093, "encoder_q-layer.10": 5074.042, "encoder_q-layer.11": 10681.7236, "encoder_q-layer.2": 3084.9463, "encoder_q-layer.3": 3180.2871, "encoder_q-layer.4": 3460.5354, "encoder_q-layer.5": 3524.9817, "encoder_q-layer.6": 4042.9233, "encoder_q-layer.7": 4570.1914, "encoder_q-layer.8": 4905.396, "encoder_q-layer.9": 4580.3169, "epoch": 0.75, "inbatch_neg_score": 0.0941, "inbatch_pos_score": 0.7573, "learning_rate": 1.2944444444444445e-05, "loss": 3.3103, "norm_diff": 0.1263, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7065.6121, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0941, "query_norm": 1.2685, "queue_k_norm": 1.3926, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7769, "sent_len_1": 66.8515, "sent_max_len_0": 128.0, "sent_max_len_1": 188.085, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76700 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.3188, "doc_norm": 1.3962, "encoder_q-embeddings": 2704.9487, "encoder_q-layer.0": 1746.9583, "encoder_q-layer.1": 1906.5863, "encoder_q-layer.10": 2309.0027, "encoder_q-layer.11": 5252.4614, "encoder_q-layer.2": 2130.197, "encoder_q-layer.3": 2211.3801, "encoder_q-layer.4": 2427.8132, "encoder_q-layer.5": 2801.26, "encoder_q-layer.6": 2984.218, "encoder_q-layer.7": 2770.8176, "encoder_q-layer.8": 2640.9617, "encoder_q-layer.9": 2339.2141, "epoch": 0.75, "inbatch_neg_score": 0.0931, "inbatch_pos_score": 0.7754, "learning_rate": 1.2888888888888889e-05, "loss": 3.3188, "norm_diff": 0.1139, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4102.2905, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0932, "query_norm": 1.2823, "queue_k_norm": 1.3934, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6081, "sent_len_1": 66.7567, "sent_max_len_0": 128.0, "sent_max_len_1": 187.96, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 76800 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.3269, "doc_norm": 1.4, "encoder_q-embeddings": 2540.4875, "encoder_q-layer.0": 1726.5098, "encoder_q-layer.1": 1824.0134, "encoder_q-layer.10": 2619.3875, "encoder_q-layer.11": 5627.5112, "encoder_q-layer.2": 2178.8723, "encoder_q-layer.3": 2341.4919, "encoder_q-layer.4": 2466.4675, "encoder_q-layer.5": 2499.2405, "encoder_q-layer.6": 2803.0134, "encoder_q-layer.7": 2980.7546, "encoder_q-layer.8": 3040.3098, "encoder_q-layer.9": 2499.189, "epoch": 0.75, "inbatch_neg_score": 0.0889, "inbatch_pos_score": 0.7495, "learning_rate": 1.2833333333333333e-05, "loss": 3.3269, "norm_diff": 0.1386, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4241.0024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.089, "query_norm": 1.2614, "queue_k_norm": 1.3934, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.696, "sent_len_1": 66.6969, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.2763, "stdk": 0.0491, "stdq": 0.0445, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 76900 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.3067, "doc_norm": 1.401, "encoder_q-embeddings": 1960.5571, "encoder_q-layer.0": 1291.9939, "encoder_q-layer.1": 1378.4333, "encoder_q-layer.10": 2736.967, "encoder_q-layer.11": 5397.2583, "encoder_q-layer.2": 1600.0221, "encoder_q-layer.3": 1687.4052, "encoder_q-layer.4": 1862.5431, "encoder_q-layer.5": 1899.1025, "encoder_q-layer.6": 2068.4097, "encoder_q-layer.7": 2484.8538, "encoder_q-layer.8": 2980.2827, "encoder_q-layer.9": 2554.2229, "epoch": 0.75, "inbatch_neg_score": 0.0921, "inbatch_pos_score": 0.7617, "learning_rate": 1.2777777777777777e-05, "loss": 3.3067, "norm_diff": 0.1213, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3631.6628, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0923, "query_norm": 1.2796, "queue_k_norm": 1.3934, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5429, "sent_len_1": 67.009, "sent_max_len_0": 128.0, "sent_max_len_1": 189.725, "stdk": 0.0491, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 77000 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3049, "doc_norm": 1.3978, "encoder_q-embeddings": 2418.1841, "encoder_q-layer.0": 1679.9602, "encoder_q-layer.1": 1723.6417, "encoder_q-layer.10": 2543.0354, "encoder_q-layer.11": 5222.3213, "encoder_q-layer.2": 1913.4105, "encoder_q-layer.3": 2038.3326, "encoder_q-layer.4": 2162.1206, "encoder_q-layer.5": 2187.4265, "encoder_q-layer.6": 2325.4185, "encoder_q-layer.7": 2334.6836, "encoder_q-layer.8": 2570.5896, "encoder_q-layer.9": 2271.7742, "epoch": 0.75, "inbatch_neg_score": 0.0934, "inbatch_pos_score": 0.7715, "learning_rate": 1.2722222222222221e-05, "loss": 3.3049, "norm_diff": 0.1115, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3788.9062, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0934, "query_norm": 1.2863, "queue_k_norm": 1.392, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6348, "sent_len_1": 66.761, "sent_max_len_0": 127.9963, "sent_max_len_1": 189.1225, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77100 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.3051, "doc_norm": 1.3922, "encoder_q-embeddings": 1786.7778, "encoder_q-layer.0": 1194.0385, "encoder_q-layer.1": 1252.3241, "encoder_q-layer.10": 2291.2017, "encoder_q-layer.11": 5031.0806, "encoder_q-layer.2": 1390.8555, "encoder_q-layer.3": 1448.9993, "encoder_q-layer.4": 1561.4071, "encoder_q-layer.5": 1591.8137, "encoder_q-layer.6": 1834.7178, "encoder_q-layer.7": 2011.485, "encoder_q-layer.8": 2391.2417, "encoder_q-layer.9": 2193.4219, "epoch": 0.75, "inbatch_neg_score": 0.0949, "inbatch_pos_score": 0.7871, "learning_rate": 1.2666666666666668e-05, "loss": 3.3051, "norm_diff": 0.1085, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3250.0023, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0947, "query_norm": 1.2838, "queue_k_norm": 1.3929, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5966, "sent_len_1": 66.7072, "sent_max_len_0": 127.9887, "sent_max_len_1": 189.4638, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 77200 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3097, "doc_norm": 1.3885, "encoder_q-embeddings": 2317.54, "encoder_q-layer.0": 1478.6332, "encoder_q-layer.1": 1619.184, "encoder_q-layer.10": 2633.3313, "encoder_q-layer.11": 5660.6978, "encoder_q-layer.2": 1889.9077, "encoder_q-layer.3": 1949.0704, "encoder_q-layer.4": 2140.5312, "encoder_q-layer.5": 2203.447, "encoder_q-layer.6": 2310.76, "encoder_q-layer.7": 2499.0779, "encoder_q-layer.8": 2917.3586, "encoder_q-layer.9": 2533.1062, "epoch": 0.75, "inbatch_neg_score": 0.0916, "inbatch_pos_score": 0.7354, "learning_rate": 1.2611111111111113e-05, "loss": 3.3097, "norm_diff": 0.1233, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3967.7226, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0914, "query_norm": 1.2652, "queue_k_norm": 1.3924, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6083, "sent_len_1": 66.6789, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.6438, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 77300 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3157, "doc_norm": 1.3966, "encoder_q-embeddings": 2118.2673, "encoder_q-layer.0": 1463.2125, "encoder_q-layer.1": 1548.8488, "encoder_q-layer.10": 2350.8508, "encoder_q-layer.11": 5402.0234, "encoder_q-layer.2": 1728.2557, "encoder_q-layer.3": 1827.2424, "encoder_q-layer.4": 1957.2096, "encoder_q-layer.5": 1985.8737, "encoder_q-layer.6": 2182.5569, "encoder_q-layer.7": 2470.2947, "encoder_q-layer.8": 2699.2153, "encoder_q-layer.9": 2401.8042, "epoch": 0.76, "inbatch_neg_score": 0.0995, "inbatch_pos_score": 0.7739, "learning_rate": 1.2555555555555557e-05, "loss": 3.3157, "norm_diff": 0.1009, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3730.7146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0999, "query_norm": 1.2957, "queue_k_norm": 1.3923, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4327, "sent_len_1": 66.6854, "sent_max_len_0": 127.9975, "sent_max_len_1": 191.4725, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 77400 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3268, "doc_norm": 1.3919, "encoder_q-embeddings": 1863.2651, "encoder_q-layer.0": 1250.4218, "encoder_q-layer.1": 1361.918, "encoder_q-layer.10": 2277.1509, "encoder_q-layer.11": 5217.9419, "encoder_q-layer.2": 1511.2047, "encoder_q-layer.3": 1557.5098, "encoder_q-layer.4": 1637.0181, "encoder_q-layer.5": 1728.4463, "encoder_q-layer.6": 1944.035, "encoder_q-layer.7": 2123.8081, "encoder_q-layer.8": 2541.3547, "encoder_q-layer.9": 2345.0007, "epoch": 0.76, "inbatch_neg_score": 0.1015, "inbatch_pos_score": 0.7695, "learning_rate": 1.25e-05, "loss": 3.3268, "norm_diff": 0.0866, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3431.54, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1002, "query_norm": 1.3054, "queue_k_norm": 1.3947, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4226, "sent_len_1": 66.9559, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.8675, "stdk": 0.0488, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 77500 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.2922, "doc_norm": 1.3966, "encoder_q-embeddings": 2148.4268, "encoder_q-layer.0": 1424.3998, "encoder_q-layer.1": 1550.347, "encoder_q-layer.10": 2211.4504, "encoder_q-layer.11": 5166.0493, "encoder_q-layer.2": 1851.4188, "encoder_q-layer.3": 1855.8633, "encoder_q-layer.4": 2045.741, "encoder_q-layer.5": 2184.7961, "encoder_q-layer.6": 2426.2869, "encoder_q-layer.7": 2569.1482, "encoder_q-layer.8": 2728.166, "encoder_q-layer.9": 2259.7266, "epoch": 0.76, "inbatch_neg_score": 0.1014, "inbatch_pos_score": 0.811, "learning_rate": 1.2444444444444445e-05, "loss": 3.2922, "norm_diff": 0.0935, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3687.7124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1013, "query_norm": 1.303, "queue_k_norm": 1.3931, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7065, "sent_len_1": 66.7245, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.0488, "stdk": 0.049, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 77600 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.3104, "doc_norm": 1.3899, "encoder_q-embeddings": 2092.4375, "encoder_q-layer.0": 1393.4155, "encoder_q-layer.1": 1507.7968, "encoder_q-layer.10": 2358.5247, "encoder_q-layer.11": 5152.1929, "encoder_q-layer.2": 1755.5376, "encoder_q-layer.3": 1904.067, "encoder_q-layer.4": 2007.4314, "encoder_q-layer.5": 1986.5171, "encoder_q-layer.6": 2089.4143, "encoder_q-layer.7": 2313.4641, "encoder_q-layer.8": 2597.5999, "encoder_q-layer.9": 2299.6733, "epoch": 0.76, "inbatch_neg_score": 0.1059, "inbatch_pos_score": 0.7896, "learning_rate": 1.238888888888889e-05, "loss": 3.3104, "norm_diff": 0.0835, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3633.506, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1055, "query_norm": 1.3065, "queue_k_norm": 1.3917, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6305, "sent_len_1": 66.5359, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.0175, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.3103, "doc_norm": 1.3909, "encoder_q-embeddings": 1904.7361, "encoder_q-layer.0": 1235.6519, "encoder_q-layer.1": 1272.6146, "encoder_q-layer.10": 2338.4858, "encoder_q-layer.11": 5240.7744, "encoder_q-layer.2": 1462.9161, "encoder_q-layer.3": 1543.8422, "encoder_q-layer.4": 1687.3542, "encoder_q-layer.5": 1702.0604, "encoder_q-layer.6": 1921.2488, "encoder_q-layer.7": 2187.7095, "encoder_q-layer.8": 2457.886, "encoder_q-layer.9": 2311.6121, "epoch": 0.76, "inbatch_neg_score": 0.1073, "inbatch_pos_score": 0.7803, "learning_rate": 1.2333333333333334e-05, "loss": 3.3103, "norm_diff": 0.0966, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3453.6065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1074, "query_norm": 1.2943, "queue_k_norm": 1.3928, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.659, "sent_len_1": 66.7862, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5312, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 77800 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.293, "doc_norm": 1.3902, "encoder_q-embeddings": 1846.1482, "encoder_q-layer.0": 1211.7611, "encoder_q-layer.1": 1285.7771, "encoder_q-layer.10": 2285.9272, "encoder_q-layer.11": 5229.4902, "encoder_q-layer.2": 1442.8672, "encoder_q-layer.3": 1482.282, "encoder_q-layer.4": 1508.9437, "encoder_q-layer.5": 1563.083, "encoder_q-layer.6": 1741.9089, "encoder_q-layer.7": 1939.8293, "encoder_q-layer.8": 2374.7295, "encoder_q-layer.9": 2182.073, "epoch": 0.76, "inbatch_neg_score": 0.1081, "inbatch_pos_score": 0.8027, "learning_rate": 1.2277777777777778e-05, "loss": 3.293, "norm_diff": 0.0913, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3346.0513, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1083, "query_norm": 1.2989, "queue_k_norm": 1.3941, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6783, "sent_len_1": 66.6478, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9288, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 77900 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3076, "doc_norm": 1.3859, "encoder_q-embeddings": 1985.6665, "encoder_q-layer.0": 1344.6024, "encoder_q-layer.1": 1470.433, "encoder_q-layer.10": 2446.8552, "encoder_q-layer.11": 5102.5264, "encoder_q-layer.2": 1667.2297, "encoder_q-layer.3": 1712.7908, "encoder_q-layer.4": 1781.3961, "encoder_q-layer.5": 2025.4053, "encoder_q-layer.6": 2129.5806, "encoder_q-layer.7": 2380.4883, "encoder_q-layer.8": 2649.4648, "encoder_q-layer.9": 2331.1921, "epoch": 0.76, "inbatch_neg_score": 0.1096, "inbatch_pos_score": 0.7803, "learning_rate": 1.2222222222222222e-05, "loss": 3.3076, "norm_diff": 0.0773, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3543.9898, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1102, "query_norm": 1.3087, "queue_k_norm": 1.3926, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5506, "sent_len_1": 66.8061, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3587, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78000 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2951, "doc_norm": 1.3933, "encoder_q-embeddings": 1995.6531, "encoder_q-layer.0": 1347.1025, "encoder_q-layer.1": 1479.4192, "encoder_q-layer.10": 2374.4553, "encoder_q-layer.11": 5129.0879, "encoder_q-layer.2": 1716.623, "encoder_q-layer.3": 1734.4283, "encoder_q-layer.4": 1873.333, "encoder_q-layer.5": 1909.6681, "encoder_q-layer.6": 2113.6858, "encoder_q-layer.7": 2358.8137, "encoder_q-layer.8": 2708.186, "encoder_q-layer.9": 2323.0811, "epoch": 0.76, "inbatch_neg_score": 0.1136, "inbatch_pos_score": 0.7808, "learning_rate": 1.2166666666666668e-05, "loss": 3.2951, "norm_diff": 0.104, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3591.9068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1113, "query_norm": 1.2893, "queue_k_norm": 1.3943, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6657, "sent_len_1": 66.8211, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.0387, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 78100 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3118, "doc_norm": 1.3933, "encoder_q-embeddings": 1864.3179, "encoder_q-layer.0": 1243.8026, "encoder_q-layer.1": 1314.8308, "encoder_q-layer.10": 2278.1189, "encoder_q-layer.11": 5164.2661, "encoder_q-layer.2": 1448.4871, "encoder_q-layer.3": 1510.3411, "encoder_q-layer.4": 1584.658, "encoder_q-layer.5": 1642.8313, "encoder_q-layer.6": 1865.3829, "encoder_q-layer.7": 2123.1909, "encoder_q-layer.8": 2520.8423, "encoder_q-layer.9": 2255.0703, "epoch": 0.76, "inbatch_neg_score": 0.1157, "inbatch_pos_score": 0.793, "learning_rate": 1.2111111111111112e-05, "loss": 3.3118, "norm_diff": 0.1068, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3388.9654, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1149, "query_norm": 1.2864, "queue_k_norm": 1.3944, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.755, "sent_len_1": 66.764, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1312, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 78200 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.3011, "doc_norm": 1.3977, "encoder_q-embeddings": 1774.597, "encoder_q-layer.0": 1219.8771, "encoder_q-layer.1": 1279.1666, "encoder_q-layer.10": 2308.3835, "encoder_q-layer.11": 5368.4619, "encoder_q-layer.2": 1402.9849, "encoder_q-layer.3": 1452.7864, "encoder_q-layer.4": 1572.6887, "encoder_q-layer.5": 1551.6973, "encoder_q-layer.6": 1879.6218, "encoder_q-layer.7": 2017.0613, "encoder_q-layer.8": 2365.145, "encoder_q-layer.9": 2155.1404, "epoch": 0.76, "inbatch_neg_score": 0.118, "inbatch_pos_score": 0.8247, "learning_rate": 1.2055555555555556e-05, "loss": 3.3011, "norm_diff": 0.0943, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3302.2437, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1187, "query_norm": 1.3034, "queue_k_norm": 1.3956, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5991, "sent_len_1": 66.6856, "sent_max_len_0": 127.9975, "sent_max_len_1": 186.8862, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 78300 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2845, "doc_norm": 1.4026, "encoder_q-embeddings": 2004.7789, "encoder_q-layer.0": 1311.3977, "encoder_q-layer.1": 1309.4875, "encoder_q-layer.10": 2375.7498, "encoder_q-layer.11": 5596.2222, "encoder_q-layer.2": 1467.1973, "encoder_q-layer.3": 1479.8154, "encoder_q-layer.4": 1611.1982, "encoder_q-layer.5": 1707.452, "encoder_q-layer.6": 1981.8405, "encoder_q-layer.7": 2099.6475, "encoder_q-layer.8": 2399.6929, "encoder_q-layer.9": 2307.9365, "epoch": 0.77, "inbatch_neg_score": 0.1137, "inbatch_pos_score": 0.7896, "learning_rate": 1.2e-05, "loss": 3.2845, "norm_diff": 0.1123, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3492.703, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1137, "query_norm": 1.2903, "queue_k_norm": 1.3982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6524, "sent_len_1": 66.8552, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7225, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78400 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3218, "doc_norm": 1.3995, "encoder_q-embeddings": 2027.2145, "encoder_q-layer.0": 1512.9504, "encoder_q-layer.1": 1710.6396, "encoder_q-layer.10": 2330.5176, "encoder_q-layer.11": 5086.8638, "encoder_q-layer.2": 2136.0359, "encoder_q-layer.3": 2342.0559, "encoder_q-layer.4": 2350.115, "encoder_q-layer.5": 1491.8931, "encoder_q-layer.6": 1774.7845, "encoder_q-layer.7": 2014.4004, "encoder_q-layer.8": 2286.7104, "encoder_q-layer.9": 2146.2056, "epoch": 0.77, "inbatch_neg_score": 0.113, "inbatch_pos_score": 0.7759, "learning_rate": 1.1944444444444446e-05, "loss": 3.3218, "norm_diff": 0.1274, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3575.1991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1137, "query_norm": 1.2722, "queue_k_norm": 1.3965, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5686, "sent_len_1": 66.7722, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6687, "stdk": 0.0491, "stdq": 0.0445, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 78500 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.31, "doc_norm": 1.3999, "encoder_q-embeddings": 1909.3929, "encoder_q-layer.0": 1308.5933, "encoder_q-layer.1": 1412.7777, "encoder_q-layer.10": 2298.8452, "encoder_q-layer.11": 5522.9526, "encoder_q-layer.2": 1630.3405, "encoder_q-layer.3": 1705.3383, "encoder_q-layer.4": 1811.7122, "encoder_q-layer.5": 1802.2471, "encoder_q-layer.6": 2003.8788, "encoder_q-layer.7": 2245.0039, "encoder_q-layer.8": 2626.2195, "encoder_q-layer.9": 2331.4482, "epoch": 0.77, "inbatch_neg_score": 0.1146, "inbatch_pos_score": 0.7837, "learning_rate": 1.188888888888889e-05, "loss": 3.31, "norm_diff": 0.1106, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3616.2615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1146, "query_norm": 1.2893, "queue_k_norm": 1.3974, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.352, "sent_len_1": 66.6565, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6712, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 78600 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3216, "doc_norm": 1.3952, "encoder_q-embeddings": 1854.2438, "encoder_q-layer.0": 1221.656, "encoder_q-layer.1": 1275.4298, "encoder_q-layer.10": 2529.9429, "encoder_q-layer.11": 5486.7305, "encoder_q-layer.2": 1447.4286, "encoder_q-layer.3": 1509.2405, "encoder_q-layer.4": 1547.4071, "encoder_q-layer.5": 1618.8962, "encoder_q-layer.6": 1858.6389, "encoder_q-layer.7": 2336.511, "encoder_q-layer.8": 2636.5298, "encoder_q-layer.9": 2375.3062, "epoch": 0.77, "inbatch_neg_score": 0.1125, "inbatch_pos_score": 0.7822, "learning_rate": 1.1833333333333334e-05, "loss": 3.3216, "norm_diff": 0.1123, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3530.9624, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1127, "query_norm": 1.2829, "queue_k_norm": 1.397, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5004, "sent_len_1": 66.9106, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9913, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 78700 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.2818, "doc_norm": 1.4029, "encoder_q-embeddings": 3723.8455, "encoder_q-layer.0": 2516.1453, "encoder_q-layer.1": 2559.5339, "encoder_q-layer.10": 4723.0215, "encoder_q-layer.11": 10491.9053, "encoder_q-layer.2": 2803.4756, "encoder_q-layer.3": 2847.8435, "encoder_q-layer.4": 2986.8281, "encoder_q-layer.5": 3151.8269, "encoder_q-layer.6": 3471.5088, "encoder_q-layer.7": 4048.6167, "encoder_q-layer.8": 4570.4604, "encoder_q-layer.9": 4371.2339, "epoch": 0.77, "inbatch_neg_score": 0.1092, "inbatch_pos_score": 0.8037, "learning_rate": 1.1777777777777778e-05, "loss": 3.2818, "norm_diff": 0.1185, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6623.6962, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1097, "query_norm": 1.2844, "queue_k_norm": 1.3965, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.8132, "sent_len_1": 66.8267, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6962, "stdk": 0.0492, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 78800 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3002, "doc_norm": 1.3977, "encoder_q-embeddings": 3821.9431, "encoder_q-layer.0": 2627.5183, "encoder_q-layer.1": 2734.9019, "encoder_q-layer.10": 5059.9287, "encoder_q-layer.11": 10816.459, "encoder_q-layer.2": 3143.1797, "encoder_q-layer.3": 3169.5005, "encoder_q-layer.4": 3331.7957, "encoder_q-layer.5": 3364.8386, "encoder_q-layer.6": 3872.252, "encoder_q-layer.7": 4334.1387, "encoder_q-layer.8": 4793.3418, "encoder_q-layer.9": 4505.2378, "epoch": 0.77, "inbatch_neg_score": 0.1112, "inbatch_pos_score": 0.7583, "learning_rate": 1.1722222222222224e-05, "loss": 3.3002, "norm_diff": 0.1399, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7048.4779, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1105, "query_norm": 1.2578, "queue_k_norm": 1.3987, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6422, "sent_len_1": 66.9126, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.485, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78900 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2961, "doc_norm": 1.4036, "encoder_q-embeddings": 4550.2729, "encoder_q-layer.0": 3241.4929, "encoder_q-layer.1": 3642.7168, "encoder_q-layer.10": 4651.7578, "encoder_q-layer.11": 10624.7275, "encoder_q-layer.2": 4545.6831, "encoder_q-layer.3": 4694.2334, "encoder_q-layer.4": 4747.0288, "encoder_q-layer.5": 4538.8081, "encoder_q-layer.6": 4883.978, "encoder_q-layer.7": 5208.1694, "encoder_q-layer.8": 5312.0088, "encoder_q-layer.9": 4534.7852, "epoch": 0.77, "inbatch_neg_score": 0.1083, "inbatch_pos_score": 0.793, "learning_rate": 1.1666666666666668e-05, "loss": 3.2961, "norm_diff": 0.1275, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7873.628, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1089, "query_norm": 1.2761, "queue_k_norm": 1.399, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6146, "sent_len_1": 66.9506, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.6413, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79000 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.3244, "doc_norm": 1.3942, "encoder_q-embeddings": 6591.085, "encoder_q-layer.0": 4311.9238, "encoder_q-layer.1": 4235.9146, "encoder_q-layer.10": 4543.1055, "encoder_q-layer.11": 10214.9854, "encoder_q-layer.2": 4807.9072, "encoder_q-layer.3": 4814.9282, "encoder_q-layer.4": 5254.0557, "encoder_q-layer.5": 5167.8965, "encoder_q-layer.6": 5005.394, "encoder_q-layer.7": 4908.332, "encoder_q-layer.8": 5659.3945, "encoder_q-layer.9": 4693.3442, "epoch": 0.77, "inbatch_neg_score": 0.1082, "inbatch_pos_score": 0.7803, "learning_rate": 1.1611111111111112e-05, "loss": 3.3244, "norm_diff": 0.1192, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8548.1013, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1083, "query_norm": 1.275, "queue_k_norm": 1.3967, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5023, "sent_len_1": 66.7256, "sent_max_len_0": 127.9838, "sent_max_len_1": 189.2425, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 79100 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3138, "doc_norm": 1.3966, "encoder_q-embeddings": 3677.1316, "encoder_q-layer.0": 2510.1084, "encoder_q-layer.1": 2600.5906, "encoder_q-layer.10": 4741.2734, "encoder_q-layer.11": 11213.8535, "encoder_q-layer.2": 2904.7278, "encoder_q-layer.3": 3027.0054, "encoder_q-layer.4": 3242.8655, "encoder_q-layer.5": 3217.9219, "encoder_q-layer.6": 3713.2683, "encoder_q-layer.7": 4209.5703, "encoder_q-layer.8": 4861.6943, "encoder_q-layer.9": 4597.7397, "epoch": 0.77, "inbatch_neg_score": 0.109, "inbatch_pos_score": 0.7886, "learning_rate": 1.1555555555555556e-05, "loss": 3.3138, "norm_diff": 0.1054, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6970.1043, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1093, "query_norm": 1.2912, "queue_k_norm": 1.399, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7391, "sent_len_1": 66.6715, "sent_max_len_0": 127.99, "sent_max_len_1": 189.785, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79200 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.2944, "doc_norm": 1.3971, "encoder_q-embeddings": 3973.9846, "encoder_q-layer.0": 2752.4695, "encoder_q-layer.1": 2921.2078, "encoder_q-layer.10": 4910.5444, "encoder_q-layer.11": 10666.541, "encoder_q-layer.2": 3444.3333, "encoder_q-layer.3": 3643.2856, "encoder_q-layer.4": 3946.1748, "encoder_q-layer.5": 4025.1204, "encoder_q-layer.6": 5005.3716, "encoder_q-layer.7": 4990.6577, "encoder_q-layer.8": 5556.4736, "encoder_q-layer.9": 4791.1831, "epoch": 0.77, "inbatch_neg_score": 0.1042, "inbatch_pos_score": 0.7812, "learning_rate": 1.1500000000000002e-05, "loss": 3.2944, "norm_diff": 0.1152, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7394.0053, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1046, "query_norm": 1.282, "queue_k_norm": 1.3988, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5778, "sent_len_1": 66.8875, "sent_max_len_0": 128.0, "sent_max_len_1": 190.245, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2894, "doc_norm": 1.3942, "encoder_q-embeddings": 5402.8389, "encoder_q-layer.0": 3842.4661, "encoder_q-layer.1": 4541.79, "encoder_q-layer.10": 4530.7676, "encoder_q-layer.11": 10525.791, "encoder_q-layer.2": 5294.9907, "encoder_q-layer.3": 5356.5586, "encoder_q-layer.4": 5956.0117, "encoder_q-layer.5": 5845.7607, "encoder_q-layer.6": 6477.5552, "encoder_q-layer.7": 5579.604, "encoder_q-layer.8": 5353.8789, "encoder_q-layer.9": 4439.6274, "epoch": 0.78, "inbatch_neg_score": 0.1018, "inbatch_pos_score": 0.7676, "learning_rate": 1.1444444444444446e-05, "loss": 3.2894, "norm_diff": 0.1295, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8632.0409, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.102, "query_norm": 1.2647, "queue_k_norm": 1.3975, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5943, "sent_len_1": 66.8715, "sent_max_len_0": 127.9963, "sent_max_len_1": 193.2475, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 79400 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3163, "doc_norm": 1.39, "encoder_q-embeddings": 3716.7937, "encoder_q-layer.0": 2446.5229, "encoder_q-layer.1": 2573.293, "encoder_q-layer.10": 4984.2627, "encoder_q-layer.11": 10448.0742, "encoder_q-layer.2": 2810.1321, "encoder_q-layer.3": 2946.8608, "encoder_q-layer.4": 3269.3967, "encoder_q-layer.5": 3272.9778, "encoder_q-layer.6": 3790.1191, "encoder_q-layer.7": 4351.5312, "encoder_q-layer.8": 5183.5903, "encoder_q-layer.9": 4760.4351, "epoch": 0.78, "inbatch_neg_score": 0.102, "inbatch_pos_score": 0.7603, "learning_rate": 1.138888888888889e-05, "loss": 3.3163, "norm_diff": 0.1309, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6856.9387, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1011, "query_norm": 1.259, "queue_k_norm": 1.3969, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7436, "sent_len_1": 66.953, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0662, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 79500 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3353, "doc_norm": 1.4007, "encoder_q-embeddings": 4937.2021, "encoder_q-layer.0": 3407.374, "encoder_q-layer.1": 3687.3423, "encoder_q-layer.10": 4649.0674, "encoder_q-layer.11": 10634.4014, "encoder_q-layer.2": 3788.9873, "encoder_q-layer.3": 3788.3958, "encoder_q-layer.4": 4224.5386, "encoder_q-layer.5": 3933.8225, "encoder_q-layer.6": 4193.7832, "encoder_q-layer.7": 4542.9971, "encoder_q-layer.8": 5381.7554, "encoder_q-layer.9": 4597.4409, "epoch": 0.78, "inbatch_neg_score": 0.0992, "inbatch_pos_score": 0.7642, "learning_rate": 1.1333333333333334e-05, "loss": 3.3353, "norm_diff": 0.1534, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7673.4907, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0992, "query_norm": 1.2473, "queue_k_norm": 1.3983, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.54, "sent_len_1": 66.9176, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3775, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79600 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3061, "doc_norm": 1.3981, "encoder_q-embeddings": 3882.4595, "encoder_q-layer.0": 2474.7722, "encoder_q-layer.1": 2604.0464, "encoder_q-layer.10": 5304.8535, "encoder_q-layer.11": 11138.7393, "encoder_q-layer.2": 2928.3315, "encoder_q-layer.3": 2976.0835, "encoder_q-layer.4": 3180.5989, "encoder_q-layer.5": 3417.0718, "encoder_q-layer.6": 3871.6667, "encoder_q-layer.7": 4277.6235, "encoder_q-layer.8": 5154.4546, "encoder_q-layer.9": 4845.6899, "epoch": 0.78, "inbatch_neg_score": 0.0971, "inbatch_pos_score": 0.7656, "learning_rate": 1.127777777777778e-05, "loss": 3.3061, "norm_diff": 0.1183, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7143.4181, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0978, "query_norm": 1.2798, "queue_k_norm": 1.3982, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5979, "sent_len_1": 66.8493, "sent_max_len_0": 127.995, "sent_max_len_1": 191.185, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79700 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.3042, "doc_norm": 1.4068, "encoder_q-embeddings": 3695.1392, "encoder_q-layer.0": 2416.1316, "encoder_q-layer.1": 2622.1196, "encoder_q-layer.10": 4936.1636, "encoder_q-layer.11": 10443.8477, "encoder_q-layer.2": 2885.6843, "encoder_q-layer.3": 3046.9736, "encoder_q-layer.4": 3088.178, "encoder_q-layer.5": 3230.5776, "encoder_q-layer.6": 3626.7991, "encoder_q-layer.7": 4251.4199, "encoder_q-layer.8": 5018.6641, "encoder_q-layer.9": 4459.875, "epoch": 0.78, "inbatch_neg_score": 0.0982, "inbatch_pos_score": 0.7852, "learning_rate": 1.1222222222222224e-05, "loss": 3.3042, "norm_diff": 0.1406, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6699.4795, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.098, "query_norm": 1.2662, "queue_k_norm": 1.3968, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7176, "sent_len_1": 67.0064, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.2375, "stdk": 0.0493, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 79800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3069, "doc_norm": 1.394, "encoder_q-embeddings": 4172.1416, "encoder_q-layer.0": 2827.793, "encoder_q-layer.1": 3002.9026, "encoder_q-layer.10": 4949.0122, "encoder_q-layer.11": 10475.7529, "encoder_q-layer.2": 3405.1953, "encoder_q-layer.3": 3718.8188, "encoder_q-layer.4": 3914.0806, "encoder_q-layer.5": 4054.7507, "encoder_q-layer.6": 4476.0396, "encoder_q-layer.7": 4559.7832, "encoder_q-layer.8": 5092.7549, "encoder_q-layer.9": 4530.1558, "epoch": 0.78, "inbatch_neg_score": 0.0988, "inbatch_pos_score": 0.7861, "learning_rate": 1.1166666666666668e-05, "loss": 3.3069, "norm_diff": 0.1158, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7270.6272, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0982, "query_norm": 1.2781, "queue_k_norm": 1.3975, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6282, "sent_len_1": 66.7984, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.6625, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79900 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.2929, "doc_norm": 1.3996, "encoder_q-embeddings": 3872.7729, "encoder_q-layer.0": 2641.9893, "encoder_q-layer.1": 2778.1501, "encoder_q-layer.10": 4857.855, "encoder_q-layer.11": 11047.876, "encoder_q-layer.2": 3122.1943, "encoder_q-layer.3": 3169.1641, "encoder_q-layer.4": 3398.6885, "encoder_q-layer.5": 3491.1016, "encoder_q-layer.6": 3975.3584, "encoder_q-layer.7": 4300.5698, "encoder_q-layer.8": 4864.0811, "encoder_q-layer.9": 4685.5845, "epoch": 0.78, "inbatch_neg_score": 0.0959, "inbatch_pos_score": 0.7529, "learning_rate": 1.1111111111111112e-05, "loss": 3.2929, "norm_diff": 0.1473, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7052.6021, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0952, "query_norm": 1.2523, "queue_k_norm": 1.3947, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.715, "sent_len_1": 66.7877, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0987, "stdk": 0.0491, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80000 }, { "dev_runtime": 26.8318, "dev_samples_per_second": 2.385, "dev_steps_per_second": 0.037, "epoch": 0.78, "step": 80000, "test_accuracy": 93.9453125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3460271656513214, "test_doc_norm": 1.3735435009002686, "test_inbatch_neg_score": 0.46360117197036743, "test_inbatch_pos_score": 1.3986196517944336, "test_loss": 0.3460271656513214, "test_loss_align": 0.9855124950408936, "test_loss_unif": 3.960263729095459, "test_loss_unif_q@queue": 3.960263967514038, "test_norm_diff": 0.011211354285478592, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.09888259321451187, "test_query_norm": 1.3658552169799805, "test_queue_k_norm": 1.394828200340271, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042774498462677, "test_stdq": 0.042275719344615936, "test_stdqueue_k": 0.048986129462718964, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.8318, "dev_samples_per_second": 2.385, "dev_steps_per_second": 0.037, "epoch": 0.78, "eval_beir-arguana_ndcg@10": 0.37596, "eval_beir-arguana_recall@10": 0.62447, "eval_beir-arguana_recall@100": 0.91607, "eval_beir-arguana_recall@20": 0.7596, "eval_beir-avg_ndcg@10": 0.38200708333333333, "eval_beir-avg_recall@10": 0.45456591666666657, "eval_beir-avg_recall@100": 0.6306932500000001, "eval_beir-avg_recall@20": 0.5114526666666667, "eval_beir-cqadupstack_ndcg@10": 0.27804083333333335, "eval_beir-cqadupstack_recall@10": 0.3748391666666668, "eval_beir-cqadupstack_recall@100": 0.5980525, "eval_beir-cqadupstack_recall@20": 0.44061666666666666, "eval_beir-fiqa_ndcg@10": 0.25133, "eval_beir-fiqa_recall@10": 0.31767, "eval_beir-fiqa_recall@100": 0.5728, "eval_beir-fiqa_recall@20": 0.39025, "eval_beir-nfcorpus_ndcg@10": 0.29313, "eval_beir-nfcorpus_recall@10": 0.14416, "eval_beir-nfcorpus_recall@100": 0.27322, "eval_beir-nfcorpus_recall@20": 0.17501, "eval_beir-nq_ndcg@10": 0.28841, "eval_beir-nq_recall@10": 0.46958, "eval_beir-nq_recall@100": 0.79983, "eval_beir-nq_recall@20": 0.58355, "eval_beir-quora_ndcg@10": 0.8096, "eval_beir-quora_recall@10": 0.90714, "eval_beir-quora_recall@100": 0.98238, "eval_beir-quora_recall@20": 0.94376, "eval_beir-scidocs_ndcg@10": 0.15519, "eval_beir-scidocs_recall@10": 0.16168, "eval_beir-scidocs_recall@100": 0.36968, "eval_beir-scidocs_recall@20": 0.21992, "eval_beir-scifact_ndcg@10": 0.62397, "eval_beir-scifact_recall@10": 0.79789, "eval_beir-scifact_recall@100": 0.90322, "eval_beir-scifact_recall@20": 0.83522, "eval_beir-trec-covid_ndcg@10": 0.5596, "eval_beir-trec-covid_recall@10": 0.62, "eval_beir-trec-covid_recall@100": 0.4556, "eval_beir-trec-covid_recall@20": 0.583, "eval_beir-webis-touche2020_ndcg@10": 0.18484, "eval_beir-webis-touche2020_recall@10": 0.12823, "eval_beir-webis-touche2020_recall@100": 0.43608, "eval_beir-webis-touche2020_recall@20": 0.1836, "eval_senteval-avg_sts": 0.7657605322842325, "eval_senteval-sickr_spearman": 0.731630342447849, "eval_senteval-stsb_spearman": 0.799890722120616, "step": 80000, "test_accuracy": 93.9453125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3460271656513214, "test_doc_norm": 1.3735435009002686, "test_inbatch_neg_score": 0.46360117197036743, "test_inbatch_pos_score": 1.3986196517944336, "test_loss": 0.3460271656513214, "test_loss_align": 0.9855124950408936, "test_loss_unif": 3.960263729095459, "test_loss_unif_q@queue": 3.960263967514038, "test_norm_diff": 0.011211354285478592, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.09888259321451187, "test_query_norm": 1.3658552169799805, "test_queue_k_norm": 1.394828200340271, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042774498462677, "test_stdq": 0.042275719344615936, "test_stdqueue_k": 0.048986129462718964, "test_stdqueue_q": 0.0 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.2979, "doc_norm": 1.396, "encoder_q-embeddings": 3503.7234, "encoder_q-layer.0": 2400.7275, "encoder_q-layer.1": 2494.5964, "encoder_q-layer.10": 4800.2681, "encoder_q-layer.11": 9986.5674, "encoder_q-layer.2": 2809.0559, "encoder_q-layer.3": 2875.9751, "encoder_q-layer.4": 3085.5217, "encoder_q-layer.5": 3133.9846, "encoder_q-layer.6": 3609.0789, "encoder_q-layer.7": 4061.7432, "encoder_q-layer.8": 4861.938, "encoder_q-layer.9": 4573.3828, "epoch": 0.78, "inbatch_neg_score": 0.0962, "inbatch_pos_score": 0.7739, "learning_rate": 1.1055555555555556e-05, "loss": 3.2979, "norm_diff": 0.1222, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6593.1903, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0959, "query_norm": 1.2738, "queue_k_norm": 1.3944, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7209, "sent_len_1": 66.7931, "sent_max_len_0": 127.9963, "sent_max_len_1": 187.8938, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80100 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.3, "doc_norm": 1.3957, "encoder_q-embeddings": 4067.2134, "encoder_q-layer.0": 2790.791, "encoder_q-layer.1": 3020.2722, "encoder_q-layer.10": 5126.3188, "encoder_q-layer.11": 10786.709, "encoder_q-layer.2": 3364.5234, "encoder_q-layer.3": 3665.2087, "encoder_q-layer.4": 3693.3625, "encoder_q-layer.5": 3729.5552, "encoder_q-layer.6": 4021.3926, "encoder_q-layer.7": 4382.2632, "encoder_q-layer.8": 4944.5068, "encoder_q-layer.9": 4540.6836, "epoch": 0.78, "inbatch_neg_score": 0.0916, "inbatch_pos_score": 0.7739, "learning_rate": 1.1000000000000001e-05, "loss": 3.3, "norm_diff": 0.1308, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7255.0604, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.091, "query_norm": 1.2649, "queue_k_norm": 1.3936, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5905, "sent_len_1": 66.7341, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.92, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80200 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3046, "doc_norm": 1.3943, "encoder_q-embeddings": 3895.23, "encoder_q-layer.0": 2583.8621, "encoder_q-layer.1": 2780.7354, "encoder_q-layer.10": 4546.9478, "encoder_q-layer.11": 10841.8359, "encoder_q-layer.2": 3219.446, "encoder_q-layer.3": 3374.6089, "encoder_q-layer.4": 3695.125, "encoder_q-layer.5": 3853.5771, "encoder_q-layer.6": 4046.5884, "encoder_q-layer.7": 4457.7832, "encoder_q-layer.8": 5078.7739, "encoder_q-layer.9": 4477.3599, "epoch": 0.78, "inbatch_neg_score": 0.0933, "inbatch_pos_score": 0.7441, "learning_rate": 1.0944444444444445e-05, "loss": 3.3046, "norm_diff": 0.1284, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7158.8315, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0931, "query_norm": 1.2659, "queue_k_norm": 1.3951, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5389, "sent_len_1": 66.9597, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9187, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80300 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2866, "doc_norm": 1.3951, "encoder_q-embeddings": 3462.1831, "encoder_q-layer.0": 2320.6899, "encoder_q-layer.1": 2426.6868, "encoder_q-layer.10": 4485.834, "encoder_q-layer.11": 10616.4873, "encoder_q-layer.2": 2778.6631, "encoder_q-layer.3": 2855.5225, "encoder_q-layer.4": 3028.8308, "encoder_q-layer.5": 3160.5747, "encoder_q-layer.6": 3590.7939, "encoder_q-layer.7": 4033.7183, "encoder_q-layer.8": 4644.9204, "encoder_q-layer.9": 4329.6123, "epoch": 0.78, "inbatch_neg_score": 0.096, "inbatch_pos_score": 0.7856, "learning_rate": 1.088888888888889e-05, "loss": 3.2866, "norm_diff": 0.1152, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6514.2227, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.095, "query_norm": 1.28, "queue_k_norm": 1.3962, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.8013, "sent_len_1": 66.8222, "sent_max_len_0": 127.995, "sent_max_len_1": 191.315, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80400 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.2757, "doc_norm": 1.3945, "encoder_q-embeddings": 4140.1934, "encoder_q-layer.0": 2920.575, "encoder_q-layer.1": 3150.7146, "encoder_q-layer.10": 5252.7197, "encoder_q-layer.11": 11039.7793, "encoder_q-layer.2": 3597.2986, "encoder_q-layer.3": 3880.0942, "encoder_q-layer.4": 4209.5322, "encoder_q-layer.5": 4221.3296, "encoder_q-layer.6": 4791.3159, "encoder_q-layer.7": 4824.8315, "encoder_q-layer.8": 5642.2363, "encoder_q-layer.9": 5051.2495, "epoch": 0.79, "inbatch_neg_score": 0.0919, "inbatch_pos_score": 0.771, "learning_rate": 1.0833333333333334e-05, "loss": 3.2757, "norm_diff": 0.1213, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7607.3413, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0925, "query_norm": 1.2733, "queue_k_norm": 1.3968, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.9619, "sent_len_1": 66.9653, "sent_max_len_0": 127.9875, "sent_max_len_1": 188.8738, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80500 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2838, "doc_norm": 1.3848, "encoder_q-embeddings": 3961.8503, "encoder_q-layer.0": 2633.5962, "encoder_q-layer.1": 2834.7646, "encoder_q-layer.10": 4915.335, "encoder_q-layer.11": 10528.5508, "encoder_q-layer.2": 3169.1389, "encoder_q-layer.3": 3267.3962, "encoder_q-layer.4": 3401.2173, "encoder_q-layer.5": 3619.0972, "encoder_q-layer.6": 3846.582, "encoder_q-layer.7": 4160.561, "encoder_q-layer.8": 4690.2959, "encoder_q-layer.9": 4393.6934, "epoch": 0.79, "inbatch_neg_score": 0.0935, "inbatch_pos_score": 0.7656, "learning_rate": 1.0777777777777778e-05, "loss": 3.2838, "norm_diff": 0.1162, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6969.0942, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0941, "query_norm": 1.2686, "queue_k_norm": 1.3952, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6687, "sent_len_1": 66.592, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0225, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80600 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2903, "doc_norm": 1.3906, "encoder_q-embeddings": 4455.9004, "encoder_q-layer.0": 2882.7808, "encoder_q-layer.1": 3096.6277, "encoder_q-layer.10": 4742.0356, "encoder_q-layer.11": 10417.6953, "encoder_q-layer.2": 3548.2175, "encoder_q-layer.3": 3654.4453, "encoder_q-layer.4": 3920.688, "encoder_q-layer.5": 3932.74, "encoder_q-layer.6": 4265.9922, "encoder_q-layer.7": 5072.542, "encoder_q-layer.8": 5139.7656, "encoder_q-layer.9": 4424.5601, "epoch": 0.79, "inbatch_neg_score": 0.0906, "inbatch_pos_score": 0.7593, "learning_rate": 1.0722222222222222e-05, "loss": 3.2903, "norm_diff": 0.1416, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7402.9875, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0905, "query_norm": 1.2489, "queue_k_norm": 1.3958, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6604, "sent_len_1": 66.631, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.335, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2935, "doc_norm": 1.4054, "encoder_q-embeddings": 9297.2793, "encoder_q-layer.0": 6203.0913, "encoder_q-layer.1": 6810.543, "encoder_q-layer.10": 9145.0283, "encoder_q-layer.11": 21504.543, "encoder_q-layer.2": 7683.2207, "encoder_q-layer.3": 8047.0894, "encoder_q-layer.4": 8571.7939, "encoder_q-layer.5": 9222.5566, "encoder_q-layer.6": 10031.2666, "encoder_q-layer.7": 9839.7783, "encoder_q-layer.8": 10258.2686, "encoder_q-layer.9": 8873.7939, "epoch": 0.79, "inbatch_neg_score": 0.0925, "inbatch_pos_score": 0.7681, "learning_rate": 1.0666666666666667e-05, "loss": 3.2935, "norm_diff": 0.1399, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15328.6521, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.093, "query_norm": 1.2654, "queue_k_norm": 1.3935, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5992, "sent_len_1": 66.6922, "sent_max_len_0": 127.99, "sent_max_len_1": 187.7675, "stdk": 0.0494, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80800 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3017, "doc_norm": 1.3892, "encoder_q-embeddings": 8302.8955, "encoder_q-layer.0": 5555.2441, "encoder_q-layer.1": 5852.0454, "encoder_q-layer.10": 9687.4482, "encoder_q-layer.11": 22429.8438, "encoder_q-layer.2": 6690.6562, "encoder_q-layer.3": 6974.8481, "encoder_q-layer.4": 7338.9766, "encoder_q-layer.5": 7474.6855, "encoder_q-layer.6": 8136.2671, "encoder_q-layer.7": 9474.2822, "encoder_q-layer.8": 10797.0605, "encoder_q-layer.9": 9464.1729, "epoch": 0.79, "inbatch_neg_score": 0.0922, "inbatch_pos_score": 0.7471, "learning_rate": 1.0611111111111111e-05, "loss": 3.3017, "norm_diff": 0.1292, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14762.9441, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.092, "query_norm": 1.2601, "queue_k_norm": 1.3923, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5477, "sent_len_1": 66.6917, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2363, "stdk": 0.0488, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80900 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3204, "doc_norm": 1.3978, "encoder_q-embeddings": 8087.6616, "encoder_q-layer.0": 5309.4883, "encoder_q-layer.1": 5468.9219, "encoder_q-layer.10": 9869.7959, "encoder_q-layer.11": 21593.0547, "encoder_q-layer.2": 6209.917, "encoder_q-layer.3": 6394.4102, "encoder_q-layer.4": 6982.4316, "encoder_q-layer.5": 7391.5391, "encoder_q-layer.6": 8177.3994, "encoder_q-layer.7": 9511.5645, "encoder_q-layer.8": 10739.293, "encoder_q-layer.9": 9728.6719, "epoch": 0.79, "inbatch_neg_score": 0.0896, "inbatch_pos_score": 0.7485, "learning_rate": 1.0555555555555555e-05, "loss": 3.3204, "norm_diff": 0.1385, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14415.5232, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0902, "query_norm": 1.2593, "queue_k_norm": 1.392, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4745, "sent_len_1": 66.7442, "sent_max_len_0": 127.9875, "sent_max_len_1": 190.0775, "stdk": 0.0491, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81000 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2952, "doc_norm": 1.3885, "encoder_q-embeddings": 4166.7236, "encoder_q-layer.0": 2951.9966, "encoder_q-layer.1": 2983.55, "encoder_q-layer.10": 4847.5098, "encoder_q-layer.11": 10506.3398, "encoder_q-layer.2": 3419.5574, "encoder_q-layer.3": 3578.9487, "encoder_q-layer.4": 3869.0552, "encoder_q-layer.5": 4025.6543, "encoder_q-layer.6": 4488.9556, "encoder_q-layer.7": 5082.6396, "encoder_q-layer.8": 5174.7861, "encoder_q-layer.9": 4718.9121, "epoch": 0.79, "inbatch_neg_score": 0.0929, "inbatch_pos_score": 0.7773, "learning_rate": 1.05e-05, "loss": 3.2952, "norm_diff": 0.0942, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7214.0384, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0925, "query_norm": 1.2943, "queue_k_norm": 1.3946, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7193, "sent_len_1": 66.6929, "sent_max_len_0": 128.0, "sent_max_len_1": 189.965, "stdk": 0.0488, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81100 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2933, "doc_norm": 1.3919, "encoder_q-embeddings": 4952.6484, "encoder_q-layer.0": 3601.6099, "encoder_q-layer.1": 3899.5083, "encoder_q-layer.10": 4561.7539, "encoder_q-layer.11": 10297.4717, "encoder_q-layer.2": 4368.2417, "encoder_q-layer.3": 5096.5786, "encoder_q-layer.4": 5426.9575, "encoder_q-layer.5": 5589.543, "encoder_q-layer.6": 6040.355, "encoder_q-layer.7": 6196.2788, "encoder_q-layer.8": 7987.0566, "encoder_q-layer.9": 6524.958, "epoch": 0.79, "inbatch_neg_score": 0.0944, "inbatch_pos_score": 0.7798, "learning_rate": 1.0444444444444445e-05, "loss": 3.2933, "norm_diff": 0.1136, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8749.937, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0944, "query_norm": 1.2783, "queue_k_norm": 1.3934, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7749, "sent_len_1": 66.7794, "sent_max_len_0": 128.0, "sent_max_len_1": 188.97, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81200 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3056, "doc_norm": 1.3977, "encoder_q-embeddings": 4469.7197, "encoder_q-layer.0": 3011.7759, "encoder_q-layer.1": 3254.606, "encoder_q-layer.10": 4784.853, "encoder_q-layer.11": 10291.4062, "encoder_q-layer.2": 3741.9001, "encoder_q-layer.3": 4075.6655, "encoder_q-layer.4": 4495.4688, "encoder_q-layer.5": 4907.9194, "encoder_q-layer.6": 5566.334, "encoder_q-layer.7": 5669.9468, "encoder_q-layer.8": 5711.0898, "encoder_q-layer.9": 4537.9453, "epoch": 0.79, "inbatch_neg_score": 0.0938, "inbatch_pos_score": 0.7588, "learning_rate": 1.038888888888889e-05, "loss": 3.3056, "norm_diff": 0.1237, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7744.6894, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0942, "query_norm": 1.2741, "queue_k_norm": 1.3944, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5187, "sent_len_1": 66.5601, "sent_max_len_0": 127.9912, "sent_max_len_1": 188.0225, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81300 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.3074, "doc_norm": 1.3943, "encoder_q-embeddings": 4020.4199, "encoder_q-layer.0": 2613.3689, "encoder_q-layer.1": 2734.0576, "encoder_q-layer.10": 4342.2349, "encoder_q-layer.11": 10255.666, "encoder_q-layer.2": 3066.344, "encoder_q-layer.3": 3212.4219, "encoder_q-layer.4": 3512.8489, "encoder_q-layer.5": 3484.8577, "encoder_q-layer.6": 3762.655, "encoder_q-layer.7": 4122.7529, "encoder_q-layer.8": 4799.0127, "encoder_q-layer.9": 4343.6064, "epoch": 0.79, "inbatch_neg_score": 0.0922, "inbatch_pos_score": 0.7671, "learning_rate": 1.0333333333333333e-05, "loss": 3.3074, "norm_diff": 0.1312, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6815.4681, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0927, "query_norm": 1.2632, "queue_k_norm": 1.394, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6955, "sent_len_1": 66.4164, "sent_max_len_0": 128.0, "sent_max_len_1": 189.635, "stdk": 0.049, "stdq": 0.0446, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81400 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.3018, "doc_norm": 1.3918, "encoder_q-embeddings": 4323.5068, "encoder_q-layer.0": 3025.0779, "encoder_q-layer.1": 3229.8716, "encoder_q-layer.10": 4599.6533, "encoder_q-layer.11": 10674.2334, "encoder_q-layer.2": 3746.603, "encoder_q-layer.3": 3850.1343, "encoder_q-layer.4": 3999.5762, "encoder_q-layer.5": 3910.9768, "encoder_q-layer.6": 4247.6118, "encoder_q-layer.7": 4436.3193, "encoder_q-layer.8": 4922.6045, "encoder_q-layer.9": 4427.7368, "epoch": 0.8, "inbatch_neg_score": 0.0929, "inbatch_pos_score": 0.7769, "learning_rate": 1.0277777777777777e-05, "loss": 3.3018, "norm_diff": 0.1179, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7310.0561, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0928, "query_norm": 1.2739, "queue_k_norm": 1.3937, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6511, "sent_len_1": 66.889, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6025, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81500 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3228, "doc_norm": 1.3973, "encoder_q-embeddings": 3715.3977, "encoder_q-layer.0": 2451.51, "encoder_q-layer.1": 2596.4773, "encoder_q-layer.10": 4724.0557, "encoder_q-layer.11": 10288.9023, "encoder_q-layer.2": 2931.1836, "encoder_q-layer.3": 3023.2715, "encoder_q-layer.4": 3293.0552, "encoder_q-layer.5": 3319.8931, "encoder_q-layer.6": 3729.385, "encoder_q-layer.7": 4273.7583, "encoder_q-layer.8": 4883.9355, "encoder_q-layer.9": 4533.0278, "epoch": 0.8, "inbatch_neg_score": 0.0934, "inbatch_pos_score": 0.769, "learning_rate": 1.0222222222222223e-05, "loss": 3.3228, "norm_diff": 0.1222, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6761.5861, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0942, "query_norm": 1.2751, "queue_k_norm": 1.3936, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7286, "sent_len_1": 66.5994, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2038, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81600 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.2704, "doc_norm": 1.3894, "encoder_q-embeddings": 4862.1562, "encoder_q-layer.0": 3401.718, "encoder_q-layer.1": 3686.6257, "encoder_q-layer.10": 4913.313, "encoder_q-layer.11": 10436.0205, "encoder_q-layer.2": 4449.2529, "encoder_q-layer.3": 5115.5391, "encoder_q-layer.4": 5292.4746, "encoder_q-layer.5": 5053.3662, "encoder_q-layer.6": 5084.9263, "encoder_q-layer.7": 5419.8027, "encoder_q-layer.8": 5535.6099, "encoder_q-layer.9": 4683.3979, "epoch": 0.8, "inbatch_neg_score": 0.0978, "inbatch_pos_score": 0.7705, "learning_rate": 1.0166666666666667e-05, "loss": 3.2704, "norm_diff": 0.1219, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8039.753, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0974, "query_norm": 1.2676, "queue_k_norm": 1.3924, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6559, "sent_len_1": 66.6444, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7562, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81700 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.2989, "doc_norm": 1.3952, "encoder_q-embeddings": 3943.8508, "encoder_q-layer.0": 2658.6841, "encoder_q-layer.1": 2769.9475, "encoder_q-layer.10": 5025.0762, "encoder_q-layer.11": 11009.5361, "encoder_q-layer.2": 3069.4272, "encoder_q-layer.3": 3196.1111, "encoder_q-layer.4": 3350.6604, "encoder_q-layer.5": 3482.5266, "encoder_q-layer.6": 3947.3662, "encoder_q-layer.7": 4341.9077, "encoder_q-layer.8": 5456.1304, "encoder_q-layer.9": 4728.2837, "epoch": 0.8, "inbatch_neg_score": 0.096, "inbatch_pos_score": 0.7881, "learning_rate": 1.0111111111111111e-05, "loss": 3.2989, "norm_diff": 0.1204, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7147.1609, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0971, "query_norm": 1.2748, "queue_k_norm": 1.3916, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6079, "sent_len_1": 66.7335, "sent_max_len_0": 127.9975, "sent_max_len_1": 187.5062, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81800 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.2851, "doc_norm": 1.3981, "encoder_q-embeddings": 4513.4292, "encoder_q-layer.0": 3017.0308, "encoder_q-layer.1": 3268.2017, "encoder_q-layer.10": 4995.2109, "encoder_q-layer.11": 11114.8936, "encoder_q-layer.2": 3771.8105, "encoder_q-layer.3": 3970.4492, "encoder_q-layer.4": 4353.3442, "encoder_q-layer.5": 4508.4189, "encoder_q-layer.6": 5295.769, "encoder_q-layer.7": 5908.0503, "encoder_q-layer.8": 6039.3408, "encoder_q-layer.9": 4974.9731, "epoch": 0.8, "inbatch_neg_score": 0.098, "inbatch_pos_score": 0.7617, "learning_rate": 1.0055555555555555e-05, "loss": 3.2851, "norm_diff": 0.119, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8043.4413, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0974, "query_norm": 1.2792, "queue_k_norm": 1.3955, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5754, "sent_len_1": 66.8925, "sent_max_len_0": 128.0, "sent_max_len_1": 190.025, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81900 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.3159, "doc_norm": 1.3824, "encoder_q-embeddings": 3858.0364, "encoder_q-layer.0": 2525.6648, "encoder_q-layer.1": 2727.0205, "encoder_q-layer.10": 5080.4097, "encoder_q-layer.11": 11253.3613, "encoder_q-layer.2": 3099.8564, "encoder_q-layer.3": 3255.292, "encoder_q-layer.4": 3452.2148, "encoder_q-layer.5": 3491.5139, "encoder_q-layer.6": 3945.0825, "encoder_q-layer.7": 4416.1914, "encoder_q-layer.8": 5474.0029, "encoder_q-layer.9": 4926.6211, "epoch": 0.8, "inbatch_neg_score": 0.0978, "inbatch_pos_score": 0.7324, "learning_rate": 1e-05, "loss": 3.3159, "norm_diff": 0.1114, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7338.1652, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0978, "query_norm": 1.271, "queue_k_norm": 1.3922, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5957, "sent_len_1": 66.6478, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.8212, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82000 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.3157, "doc_norm": 1.3877, "encoder_q-embeddings": 3972.4084, "encoder_q-layer.0": 2620.5671, "encoder_q-layer.1": 2866.0623, "encoder_q-layer.10": 5350.0474, "encoder_q-layer.11": 11117.4258, "encoder_q-layer.2": 3194.8076, "encoder_q-layer.3": 3281.7432, "encoder_q-layer.4": 3517.116, "encoder_q-layer.5": 3675.6521, "encoder_q-layer.6": 4125.3701, "encoder_q-layer.7": 4743.2402, "encoder_q-layer.8": 5477.1108, "encoder_q-layer.9": 5122.6938, "epoch": 0.8, "inbatch_neg_score": 0.0977, "inbatch_pos_score": 0.7544, "learning_rate": 9.944444444444445e-06, "loss": 3.3157, "norm_diff": 0.1038, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7353.7479, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0975, "query_norm": 1.2839, "queue_k_norm": 1.3943, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6425, "sent_len_1": 66.6505, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2388, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82100 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3025, "doc_norm": 1.3906, "encoder_q-embeddings": 1838.2999, "encoder_q-layer.0": 1256.181, "encoder_q-layer.1": 1295.1874, "encoder_q-layer.10": 2437.0479, "encoder_q-layer.11": 5471.1665, "encoder_q-layer.2": 1488.325, "encoder_q-layer.3": 1533.0834, "encoder_q-layer.4": 1652.1121, "encoder_q-layer.5": 1682.4363, "encoder_q-layer.6": 1860.1676, "encoder_q-layer.7": 2166.5894, "encoder_q-layer.8": 2427.9189, "encoder_q-layer.9": 2311.7998, "epoch": 0.8, "inbatch_neg_score": 0.1001, "inbatch_pos_score": 0.7559, "learning_rate": 9.888888888888889e-06, "loss": 3.3025, "norm_diff": 0.1121, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3499.1505, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0994, "query_norm": 1.2785, "queue_k_norm": 1.3944, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5444, "sent_len_1": 66.893, "sent_max_len_0": 128.0, "sent_max_len_1": 188.72, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82200 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.2889, "doc_norm": 1.3941, "encoder_q-embeddings": 1888.5638, "encoder_q-layer.0": 1225.5476, "encoder_q-layer.1": 1301.7357, "encoder_q-layer.10": 2443.6069, "encoder_q-layer.11": 5508.8467, "encoder_q-layer.2": 1473.4124, "encoder_q-layer.3": 1530.7791, "encoder_q-layer.4": 1633.5417, "encoder_q-layer.5": 1641.0532, "encoder_q-layer.6": 1898.6644, "encoder_q-layer.7": 2233.3184, "encoder_q-layer.8": 2653.9626, "encoder_q-layer.9": 2448.1479, "epoch": 0.8, "inbatch_neg_score": 0.0995, "inbatch_pos_score": 0.77, "learning_rate": 9.833333333333333e-06, "loss": 3.2889, "norm_diff": 0.1149, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3616.9803, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1, "query_norm": 1.2792, "queue_k_norm": 1.3925, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.812, "sent_len_1": 66.6238, "sent_max_len_0": 128.0, "sent_max_len_1": 189.575, "stdk": 0.049, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82300 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.3162, "doc_norm": 1.3959, "encoder_q-embeddings": 2090.3818, "encoder_q-layer.0": 1421.6521, "encoder_q-layer.1": 1523.0795, "encoder_q-layer.10": 2316.0598, "encoder_q-layer.11": 5385.0542, "encoder_q-layer.2": 1809.5745, "encoder_q-layer.3": 1966.4963, "encoder_q-layer.4": 2079.406, "encoder_q-layer.5": 2304.5276, "encoder_q-layer.6": 2505.093, "encoder_q-layer.7": 2744.7686, "encoder_q-layer.8": 2745.8826, "encoder_q-layer.9": 2427.9226, "epoch": 0.8, "inbatch_neg_score": 0.1012, "inbatch_pos_score": 0.7832, "learning_rate": 9.777777777777779e-06, "loss": 3.3162, "norm_diff": 0.1329, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3780.9587, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1014, "query_norm": 1.2629, "queue_k_norm": 1.3958, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6611, "sent_len_1": 66.8511, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6725, "stdk": 0.049, "stdq": 0.0443, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82400 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2996, "doc_norm": 1.3929, "encoder_q-embeddings": 1920.1179, "encoder_q-layer.0": 1314.7882, "encoder_q-layer.1": 1341.8218, "encoder_q-layer.10": 2351.6831, "encoder_q-layer.11": 5219.5146, "encoder_q-layer.2": 1532.645, "encoder_q-layer.3": 1625.1324, "encoder_q-layer.4": 1832.5067, "encoder_q-layer.5": 1840.0365, "encoder_q-layer.6": 1977.9128, "encoder_q-layer.7": 2374.8513, "encoder_q-layer.8": 2580.4548, "encoder_q-layer.9": 2219.4844, "epoch": 0.81, "inbatch_neg_score": 0.1055, "inbatch_pos_score": 0.7764, "learning_rate": 9.722222222222223e-06, "loss": 3.2996, "norm_diff": 0.1094, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3492.3619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1042, "query_norm": 1.2836, "queue_k_norm": 1.3922, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5974, "sent_len_1": 66.6201, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1238, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82500 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3077, "doc_norm": 1.3897, "encoder_q-embeddings": 2135.1826, "encoder_q-layer.0": 1444.1935, "encoder_q-layer.1": 1545.0614, "encoder_q-layer.10": 2268.3506, "encoder_q-layer.11": 5411.9141, "encoder_q-layer.2": 1759.8485, "encoder_q-layer.3": 1781.9518, "encoder_q-layer.4": 1999.0389, "encoder_q-layer.5": 2019.9878, "encoder_q-layer.6": 2148.0269, "encoder_q-layer.7": 2242.5686, "encoder_q-layer.8": 2563.6885, "encoder_q-layer.9": 2289.7615, "epoch": 0.81, "inbatch_neg_score": 0.1059, "inbatch_pos_score": 0.7671, "learning_rate": 9.666666666666667e-06, "loss": 3.3077, "norm_diff": 0.109, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3644.8309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1061, "query_norm": 1.2807, "queue_k_norm": 1.3922, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4627, "sent_len_1": 66.5792, "sent_max_len_0": 127.995, "sent_max_len_1": 188.7, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82600 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.29, "doc_norm": 1.3946, "encoder_q-embeddings": 1936.9305, "encoder_q-layer.0": 1297.4381, "encoder_q-layer.1": 1328.834, "encoder_q-layer.10": 2394.7314, "encoder_q-layer.11": 5565.0811, "encoder_q-layer.2": 1487.8088, "encoder_q-layer.3": 1529.2913, "encoder_q-layer.4": 1612.4583, "encoder_q-layer.5": 1672.504, "encoder_q-layer.6": 1890.5212, "encoder_q-layer.7": 2093.8186, "encoder_q-layer.8": 2504.5989, "encoder_q-layer.9": 2271.9634, "epoch": 0.81, "inbatch_neg_score": 0.109, "inbatch_pos_score": 0.7788, "learning_rate": 9.61111111111111e-06, "loss": 3.29, "norm_diff": 0.1114, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3545.9338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1089, "query_norm": 1.2832, "queue_k_norm": 1.3947, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.8688, "sent_len_1": 66.6494, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9387, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82700 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2975, "doc_norm": 1.3907, "encoder_q-embeddings": 1886.6193, "encoder_q-layer.0": 1296.67, "encoder_q-layer.1": 1402.0253, "encoder_q-layer.10": 2574.5894, "encoder_q-layer.11": 5345.062, "encoder_q-layer.2": 1629.6947, "encoder_q-layer.3": 1720.0746, "encoder_q-layer.4": 1855.1901, "encoder_q-layer.5": 1881.4569, "encoder_q-layer.6": 2117.8772, "encoder_q-layer.7": 2336.3037, "encoder_q-layer.8": 2837.3755, "encoder_q-layer.9": 2442.9458, "epoch": 0.81, "inbatch_neg_score": 0.1098, "inbatch_pos_score": 0.7739, "learning_rate": 9.555555555555556e-06, "loss": 3.2975, "norm_diff": 0.1018, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3634.1914, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1099, "query_norm": 1.2889, "queue_k_norm": 1.3933, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6114, "sent_len_1": 66.5603, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.3187, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82800 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.2843, "doc_norm": 1.4021, "encoder_q-embeddings": 2136.5386, "encoder_q-layer.0": 1522.7056, "encoder_q-layer.1": 1632.0374, "encoder_q-layer.10": 2276.7971, "encoder_q-layer.11": 5184.1675, "encoder_q-layer.2": 1782.4575, "encoder_q-layer.3": 1838.2115, "encoder_q-layer.4": 2017.3218, "encoder_q-layer.5": 1973.7948, "encoder_q-layer.6": 2300.9553, "encoder_q-layer.7": 2337.6191, "encoder_q-layer.8": 2480.1572, "encoder_q-layer.9": 2218.7729, "epoch": 0.81, "inbatch_neg_score": 0.1142, "inbatch_pos_score": 0.8042, "learning_rate": 9.5e-06, "loss": 3.2843, "norm_diff": 0.1088, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3649.3948, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1144, "query_norm": 1.2933, "queue_k_norm": 1.3936, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6984, "sent_len_1": 66.8917, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6687, "stdk": 0.0492, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82900 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3195, "doc_norm": 1.3975, "encoder_q-embeddings": 2057.4016, "encoder_q-layer.0": 1353.4336, "encoder_q-layer.1": 1442.7175, "encoder_q-layer.10": 2787.5747, "encoder_q-layer.11": 5876.1187, "encoder_q-layer.2": 1569.5988, "encoder_q-layer.3": 1636.6957, "encoder_q-layer.4": 1719.0972, "encoder_q-layer.5": 1771.1351, "encoder_q-layer.6": 1959.8042, "encoder_q-layer.7": 2222.1763, "encoder_q-layer.8": 2780.1951, "encoder_q-layer.9": 2599.7747, "epoch": 0.81, "inbatch_neg_score": 0.1156, "inbatch_pos_score": 0.791, "learning_rate": 9.444444444444445e-06, "loss": 3.3195, "norm_diff": 0.0996, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3736.1225, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.116, "query_norm": 1.2979, "queue_k_norm": 1.3944, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.3866, "sent_len_1": 66.7054, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7988, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83000 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3005, "doc_norm": 1.3944, "encoder_q-embeddings": 1853.5221, "encoder_q-layer.0": 1263.4086, "encoder_q-layer.1": 1296.5526, "encoder_q-layer.10": 2430.8682, "encoder_q-layer.11": 5672.1445, "encoder_q-layer.2": 1496.2439, "encoder_q-layer.3": 1532.5685, "encoder_q-layer.4": 1625.5745, "encoder_q-layer.5": 1726.2759, "encoder_q-layer.6": 1928.5868, "encoder_q-layer.7": 2131.0935, "encoder_q-layer.8": 2502.3706, "encoder_q-layer.9": 2307.783, "epoch": 0.81, "inbatch_neg_score": 0.1158, "inbatch_pos_score": 0.7651, "learning_rate": 9.388888888888889e-06, "loss": 3.3005, "norm_diff": 0.1188, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3575.2243, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1161, "query_norm": 1.2756, "queue_k_norm": 1.3965, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7, "sent_len_1": 66.9837, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.7675, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83100 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3015, "doc_norm": 1.3979, "encoder_q-embeddings": 2478.551, "encoder_q-layer.0": 1722.8065, "encoder_q-layer.1": 1812.2428, "encoder_q-layer.10": 2464.0911, "encoder_q-layer.11": 5765.6729, "encoder_q-layer.2": 2188.9551, "encoder_q-layer.3": 2367.6997, "encoder_q-layer.4": 2520.7312, "encoder_q-layer.5": 2628.8218, "encoder_q-layer.6": 2736.6667, "encoder_q-layer.7": 2845.6682, "encoder_q-layer.8": 3102.687, "encoder_q-layer.9": 2474.9695, "epoch": 0.81, "inbatch_neg_score": 0.119, "inbatch_pos_score": 0.7769, "learning_rate": 9.333333333333334e-06, "loss": 3.3015, "norm_diff": 0.1123, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4307.9269, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1194, "query_norm": 1.2856, "queue_k_norm": 1.3968, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5577, "sent_len_1": 66.699, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8713, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83200 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.295, "doc_norm": 1.4023, "encoder_q-embeddings": 2069.6357, "encoder_q-layer.0": 1414.8621, "encoder_q-layer.1": 1544.8938, "encoder_q-layer.10": 2276.574, "encoder_q-layer.11": 5201.5103, "encoder_q-layer.2": 1739.062, "encoder_q-layer.3": 1828.6344, "encoder_q-layer.4": 1902.1277, "encoder_q-layer.5": 2117.1233, "encoder_q-layer.6": 2167.376, "encoder_q-layer.7": 2312.8794, "encoder_q-layer.8": 2541.0066, "encoder_q-layer.9": 2313.189, "epoch": 0.81, "inbatch_neg_score": 0.1244, "inbatch_pos_score": 0.8032, "learning_rate": 9.277777777777778e-06, "loss": 3.295, "norm_diff": 0.1029, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3628.9407, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.125, "query_norm": 1.2994, "queue_k_norm": 1.3973, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8119, "sent_len_1": 66.9114, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9013, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83300 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.2889, "doc_norm": 1.3982, "encoder_q-embeddings": 1860.5148, "encoder_q-layer.0": 1209.3387, "encoder_q-layer.1": 1288.5929, "encoder_q-layer.10": 2280.8555, "encoder_q-layer.11": 5248.1772, "encoder_q-layer.2": 1432.9014, "encoder_q-layer.3": 1517.604, "encoder_q-layer.4": 1611.6105, "encoder_q-layer.5": 1763.0634, "encoder_q-layer.6": 1883.5184, "encoder_q-layer.7": 2173.6899, "encoder_q-layer.8": 2563.4519, "encoder_q-layer.9": 2272.6216, "epoch": 0.81, "inbatch_neg_score": 0.128, "inbatch_pos_score": 0.832, "learning_rate": 9.222222222222222e-06, "loss": 3.2889, "norm_diff": 0.0985, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3452.8437, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1282, "query_norm": 1.2997, "queue_k_norm": 1.3972, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7274, "sent_len_1": 66.8561, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7125, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83400 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.2995, "doc_norm": 1.3999, "encoder_q-embeddings": 1830.4435, "encoder_q-layer.0": 1186.8315, "encoder_q-layer.1": 1260.7917, "encoder_q-layer.10": 2285.6067, "encoder_q-layer.11": 5004.8945, "encoder_q-layer.2": 1437.2236, "encoder_q-layer.3": 1465.7377, "encoder_q-layer.4": 1543.7665, "encoder_q-layer.5": 1628.8307, "encoder_q-layer.6": 1829.9755, "encoder_q-layer.7": 2035.6031, "encoder_q-layer.8": 2396.3501, "encoder_q-layer.9": 2142.0623, "epoch": 0.82, "inbatch_neg_score": 0.1225, "inbatch_pos_score": 0.8188, "learning_rate": 9.166666666666666e-06, "loss": 3.2995, "norm_diff": 0.1031, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3291.6121, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1237, "query_norm": 1.2968, "queue_k_norm": 1.3995, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6296, "sent_len_1": 66.8788, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1037, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 83500 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3011, "doc_norm": 1.4037, "encoder_q-embeddings": 2042.0718, "encoder_q-layer.0": 1369.9138, "encoder_q-layer.1": 1390.226, "encoder_q-layer.10": 2415.5625, "encoder_q-layer.11": 5344.292, "encoder_q-layer.2": 1548.7825, "encoder_q-layer.3": 1642.4919, "encoder_q-layer.4": 1732.6652, "encoder_q-layer.5": 1750.3739, "encoder_q-layer.6": 1926.0498, "encoder_q-layer.7": 2124.2502, "encoder_q-layer.8": 2536.4131, "encoder_q-layer.9": 2396.843, "epoch": 0.82, "inbatch_neg_score": 0.1265, "inbatch_pos_score": 0.7656, "learning_rate": 9.111111111111112e-06, "loss": 3.3011, "norm_diff": 0.1207, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3572.733, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1257, "query_norm": 1.283, "queue_k_norm": 1.3986, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4705, "sent_len_1": 66.5416, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.5062, "stdk": 0.0491, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83600 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.2704, "doc_norm": 1.402, "encoder_q-embeddings": 2100.9207, "encoder_q-layer.0": 1417.093, "encoder_q-layer.1": 1494.0642, "encoder_q-layer.10": 2454.6794, "encoder_q-layer.11": 5220.0903, "encoder_q-layer.2": 1668.5984, "encoder_q-layer.3": 1739.4552, "encoder_q-layer.4": 1917.9507, "encoder_q-layer.5": 1961.9845, "encoder_q-layer.6": 2170.6023, "encoder_q-layer.7": 2420.5203, "encoder_q-layer.8": 2659.7654, "encoder_q-layer.9": 2329.6443, "epoch": 0.82, "inbatch_neg_score": 0.1258, "inbatch_pos_score": 0.8345, "learning_rate": 9.055555555555556e-06, "loss": 3.2704, "norm_diff": 0.0922, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3607.8817, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1265, "query_norm": 1.3098, "queue_k_norm": 1.3998, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5452, "sent_len_1": 66.9627, "sent_max_len_0": 127.9938, "sent_max_len_1": 190.0687, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83700 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2865, "doc_norm": 1.3996, "encoder_q-embeddings": 1749.6497, "encoder_q-layer.0": 1165.8988, "encoder_q-layer.1": 1213.9436, "encoder_q-layer.10": 2389.4163, "encoder_q-layer.11": 5429.7417, "encoder_q-layer.2": 1370.1801, "encoder_q-layer.3": 1402.0076, "encoder_q-layer.4": 1523.0187, "encoder_q-layer.5": 1594.4615, "encoder_q-layer.6": 1832.5144, "encoder_q-layer.7": 2050.3674, "encoder_q-layer.8": 2487.7256, "encoder_q-layer.9": 2394.3062, "epoch": 0.82, "inbatch_neg_score": 0.1243, "inbatch_pos_score": 0.7939, "learning_rate": 9e-06, "loss": 3.2865, "norm_diff": 0.1137, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3421.8866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1247, "query_norm": 1.2859, "queue_k_norm": 1.4021, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5481, "sent_len_1": 66.6603, "sent_max_len_0": 127.99, "sent_max_len_1": 187.5962, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 83800 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.3044, "doc_norm": 1.4026, "encoder_q-embeddings": 2275.6558, "encoder_q-layer.0": 1605.0315, "encoder_q-layer.1": 1758.5239, "encoder_q-layer.10": 2261.9917, "encoder_q-layer.11": 5223.0, "encoder_q-layer.2": 2062.0564, "encoder_q-layer.3": 2150.0522, "encoder_q-layer.4": 2321.7952, "encoder_q-layer.5": 2176.5066, "encoder_q-layer.6": 2231.2305, "encoder_q-layer.7": 2457.4673, "encoder_q-layer.8": 2590.6409, "encoder_q-layer.9": 2209.2363, "epoch": 0.82, "inbatch_neg_score": 0.1269, "inbatch_pos_score": 0.8237, "learning_rate": 8.944444444444444e-06, "loss": 3.3044, "norm_diff": 0.1098, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3808.7428, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1261, "query_norm": 1.2928, "queue_k_norm": 1.4013, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4874, "sent_len_1": 66.7969, "sent_max_len_0": 127.995, "sent_max_len_1": 190.3137, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 83900 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.2887, "doc_norm": 1.4003, "encoder_q-embeddings": 2684.5239, "encoder_q-layer.0": 1847.1864, "encoder_q-layer.1": 1907.4686, "encoder_q-layer.10": 2505.6887, "encoder_q-layer.11": 5183.292, "encoder_q-layer.2": 2148.5337, "encoder_q-layer.3": 2135.8384, "encoder_q-layer.4": 2243.0886, "encoder_q-layer.5": 2176.6553, "encoder_q-layer.6": 2197.4436, "encoder_q-layer.7": 2308.3364, "encoder_q-layer.8": 2490.5742, "encoder_q-layer.9": 2266.5688, "epoch": 0.82, "inbatch_neg_score": 0.1247, "inbatch_pos_score": 0.8262, "learning_rate": 8.88888888888889e-06, "loss": 3.2887, "norm_diff": 0.0995, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3888.9147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1241, "query_norm": 1.3008, "queue_k_norm": 1.4006, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5678, "sent_len_1": 66.5417, "sent_max_len_0": 127.99, "sent_max_len_1": 189.6138, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84000 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.2967, "doc_norm": 1.4059, "encoder_q-embeddings": 1739.1996, "encoder_q-layer.0": 1148.9602, "encoder_q-layer.1": 1228.859, "encoder_q-layer.10": 2421.8235, "encoder_q-layer.11": 5438.166, "encoder_q-layer.2": 1380.8658, "encoder_q-layer.3": 1430.2236, "encoder_q-layer.4": 1488.2651, "encoder_q-layer.5": 1569.6688, "encoder_q-layer.6": 1750.1293, "encoder_q-layer.7": 1982.8411, "encoder_q-layer.8": 2420.8989, "encoder_q-layer.9": 2281.4517, "epoch": 0.82, "inbatch_neg_score": 0.1241, "inbatch_pos_score": 0.8105, "learning_rate": 8.833333333333334e-06, "loss": 3.2967, "norm_diff": 0.1263, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3426.5311, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1249, "query_norm": 1.2796, "queue_k_norm": 1.4014, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5998, "sent_len_1": 66.8565, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5137, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84100 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.2837, "doc_norm": 1.3992, "encoder_q-embeddings": 3572.7063, "encoder_q-layer.0": 2453.3643, "encoder_q-layer.1": 2667.1531, "encoder_q-layer.10": 4695.8799, "encoder_q-layer.11": 10688.7842, "encoder_q-layer.2": 2962.8433, "encoder_q-layer.3": 3056.1577, "encoder_q-layer.4": 3311.2815, "encoder_q-layer.5": 3410.4102, "encoder_q-layer.6": 3849.0869, "encoder_q-layer.7": 4136.0684, "encoder_q-layer.8": 4707.7749, "encoder_q-layer.9": 4334.3066, "epoch": 0.82, "inbatch_neg_score": 0.1224, "inbatch_pos_score": 0.8145, "learning_rate": 8.777777777777778e-06, "loss": 3.2837, "norm_diff": 0.1115, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6791.9259, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1232, "query_norm": 1.2877, "queue_k_norm": 1.4028, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5845, "sent_len_1": 66.6517, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0563, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84200 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.3053, "doc_norm": 1.3969, "encoder_q-embeddings": 4099.1055, "encoder_q-layer.0": 2744.6643, "encoder_q-layer.1": 3060.3918, "encoder_q-layer.10": 4970.7788, "encoder_q-layer.11": 11043.9902, "encoder_q-layer.2": 3555.5779, "encoder_q-layer.3": 3778.6016, "encoder_q-layer.4": 4005.6758, "encoder_q-layer.5": 4135.2314, "encoder_q-layer.6": 4278.79, "encoder_q-layer.7": 4690.9551, "encoder_q-layer.8": 5518.4644, "encoder_q-layer.9": 5130.5244, "epoch": 0.82, "inbatch_neg_score": 0.1219, "inbatch_pos_score": 0.791, "learning_rate": 8.722222222222224e-06, "loss": 3.3053, "norm_diff": 0.1237, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7514.2138, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1211, "query_norm": 1.2732, "queue_k_norm": 1.403, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.2661, "sent_len_1": 66.6484, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8462, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84300 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.302, "doc_norm": 1.3976, "encoder_q-embeddings": 1985.854, "encoder_q-layer.0": 1290.7167, "encoder_q-layer.1": 1418.5724, "encoder_q-layer.10": 2486.7778, "encoder_q-layer.11": 5890.0259, "encoder_q-layer.2": 1593.155, "encoder_q-layer.3": 1659.4052, "encoder_q-layer.4": 1813.2816, "encoder_q-layer.5": 1800.7415, "encoder_q-layer.6": 1940.9958, "encoder_q-layer.7": 2161.0994, "encoder_q-layer.8": 2518.4822, "encoder_q-layer.9": 2401.9434, "epoch": 0.82, "inbatch_neg_score": 0.1204, "inbatch_pos_score": 0.7832, "learning_rate": 8.666666666666668e-06, "loss": 3.302, "norm_diff": 0.1293, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3708.0901, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1207, "query_norm": 1.2683, "queue_k_norm": 1.4007, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6501, "sent_len_1": 66.8017, "sent_max_len_0": 127.995, "sent_max_len_1": 189.8, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84400 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.283, "doc_norm": 1.4046, "encoder_q-embeddings": 1857.0261, "encoder_q-layer.0": 1289.5061, "encoder_q-layer.1": 1334.6873, "encoder_q-layer.10": 2329.0908, "encoder_q-layer.11": 5136.8643, "encoder_q-layer.2": 1451.1992, "encoder_q-layer.3": 1504.9965, "encoder_q-layer.4": 1550.1045, "encoder_q-layer.5": 1615.5033, "encoder_q-layer.6": 1813.1127, "encoder_q-layer.7": 2015.509, "encoder_q-layer.8": 2407.0078, "encoder_q-layer.9": 2213.7197, "epoch": 0.82, "inbatch_neg_score": 0.1181, "inbatch_pos_score": 0.7949, "learning_rate": 8.611111111111112e-06, "loss": 3.283, "norm_diff": 0.1308, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3393.7049, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1176, "query_norm": 1.2738, "queue_k_norm": 1.403, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6765, "sent_len_1": 66.6289, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3713, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2903, "doc_norm": 1.3999, "encoder_q-embeddings": 2053.6594, "encoder_q-layer.0": 1331.4688, "encoder_q-layer.1": 1399.2546, "encoder_q-layer.10": 2362.8621, "encoder_q-layer.11": 5456.1421, "encoder_q-layer.2": 1544.7679, "encoder_q-layer.3": 1620.6683, "encoder_q-layer.4": 1827.2329, "encoder_q-layer.5": 1821.4348, "encoder_q-layer.6": 2070.9065, "encoder_q-layer.7": 2279.5959, "encoder_q-layer.8": 2639.7246, "encoder_q-layer.9": 2432.9741, "epoch": 0.83, "inbatch_neg_score": 0.1142, "inbatch_pos_score": 0.7886, "learning_rate": 8.555555555555556e-06, "loss": 3.2903, "norm_diff": 0.124, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3614.1189, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1145, "query_norm": 1.2759, "queue_k_norm": 1.402, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4852, "sent_len_1": 66.7517, "sent_max_len_0": 127.985, "sent_max_len_1": 186.8262, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84600 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3074, "doc_norm": 1.4015, "encoder_q-embeddings": 1896.152, "encoder_q-layer.0": 1294.3451, "encoder_q-layer.1": 1391.5687, "encoder_q-layer.10": 2243.5002, "encoder_q-layer.11": 5273.7764, "encoder_q-layer.2": 1608.6171, "encoder_q-layer.3": 1678.9783, "encoder_q-layer.4": 1856.9138, "encoder_q-layer.5": 1874.5098, "encoder_q-layer.6": 2068.4011, "encoder_q-layer.7": 2237.1948, "encoder_q-layer.8": 2606.7036, "encoder_q-layer.9": 2256.5278, "epoch": 0.83, "inbatch_neg_score": 0.1161, "inbatch_pos_score": 0.7905, "learning_rate": 8.500000000000002e-06, "loss": 3.3074, "norm_diff": 0.1355, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3572.3486, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1154, "query_norm": 1.266, "queue_k_norm": 1.4012, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5393, "sent_len_1": 66.8638, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.5238, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84700 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.2833, "doc_norm": 1.4017, "encoder_q-embeddings": 2095.0342, "encoder_q-layer.0": 1401.4672, "encoder_q-layer.1": 1473.5935, "encoder_q-layer.10": 2474.9548, "encoder_q-layer.11": 5408.2124, "encoder_q-layer.2": 1647.166, "encoder_q-layer.3": 1728.3793, "encoder_q-layer.4": 1826.1818, "encoder_q-layer.5": 1837.6422, "encoder_q-layer.6": 2051.2861, "encoder_q-layer.7": 2224.3833, "encoder_q-layer.8": 2594.7329, "encoder_q-layer.9": 2326.4658, "epoch": 0.83, "inbatch_neg_score": 0.1138, "inbatch_pos_score": 0.7744, "learning_rate": 8.444444444444446e-06, "loss": 3.2833, "norm_diff": 0.131, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3597.4543, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.113, "query_norm": 1.2708, "queue_k_norm": 1.4034, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5265, "sent_len_1": 66.8364, "sent_max_len_0": 128.0, "sent_max_len_1": 191.9, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2815, "doc_norm": 1.4111, "encoder_q-embeddings": 1765.2133, "encoder_q-layer.0": 1178.1003, "encoder_q-layer.1": 1276.248, "encoder_q-layer.10": 2274.2461, "encoder_q-layer.11": 5367.5493, "encoder_q-layer.2": 1392.1071, "encoder_q-layer.3": 1449.6997, "encoder_q-layer.4": 1501.1189, "encoder_q-layer.5": 1567.9634, "encoder_q-layer.6": 1738.0874, "encoder_q-layer.7": 2139.8359, "encoder_q-layer.8": 2585.6892, "encoder_q-layer.9": 2322.4456, "epoch": 0.83, "inbatch_neg_score": 0.1112, "inbatch_pos_score": 0.8037, "learning_rate": 8.38888888888889e-06, "loss": 3.2815, "norm_diff": 0.1315, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3387.3135, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1109, "query_norm": 1.2796, "queue_k_norm": 1.4037, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6665, "sent_len_1": 66.7841, "sent_max_len_0": 128.0, "sent_max_len_1": 189.39, "stdk": 0.0493, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84900 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3039, "doc_norm": 1.4012, "encoder_q-embeddings": 1901.2903, "encoder_q-layer.0": 1225.9491, "encoder_q-layer.1": 1312.8444, "encoder_q-layer.10": 2286.9587, "encoder_q-layer.11": 5455.96, "encoder_q-layer.2": 1504.7018, "encoder_q-layer.3": 1504.7443, "encoder_q-layer.4": 1610.7068, "encoder_q-layer.5": 1668.7832, "encoder_q-layer.6": 1817.095, "encoder_q-layer.7": 2054.9263, "encoder_q-layer.8": 2433.2339, "encoder_q-layer.9": 2205.6914, "epoch": 0.83, "inbatch_neg_score": 0.1117, "inbatch_pos_score": 0.7656, "learning_rate": 8.333333333333334e-06, "loss": 3.3039, "norm_diff": 0.1463, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3478.203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1115, "query_norm": 1.2548, "queue_k_norm": 1.403, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5345, "sent_len_1": 66.919, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7212, "stdk": 0.0489, "stdq": 0.0444, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85000 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.29, "doc_norm": 1.4074, "encoder_q-embeddings": 2610.4648, "encoder_q-layer.0": 1897.2467, "encoder_q-layer.1": 1731.3264, "encoder_q-layer.10": 2338.2429, "encoder_q-layer.11": 5437.9712, "encoder_q-layer.2": 1967.3108, "encoder_q-layer.3": 1885.9773, "encoder_q-layer.4": 2006.9773, "encoder_q-layer.5": 2125.5691, "encoder_q-layer.6": 2112.4541, "encoder_q-layer.7": 2368.1851, "encoder_q-layer.8": 2737.4521, "encoder_q-layer.9": 2309.5662, "epoch": 0.83, "inbatch_neg_score": 0.1106, "inbatch_pos_score": 0.7856, "learning_rate": 8.27777777777778e-06, "loss": 3.29, "norm_diff": 0.1367, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3863.6604, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1105, "query_norm": 1.2706, "queue_k_norm": 1.4013, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6101, "sent_len_1": 66.8166, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4775, "stdk": 0.0492, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85100 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2792, "doc_norm": 1.4031, "encoder_q-embeddings": 1800.4438, "encoder_q-layer.0": 1210.7604, "encoder_q-layer.1": 1313.8013, "encoder_q-layer.10": 2469.5979, "encoder_q-layer.11": 5569.8389, "encoder_q-layer.2": 1425.9084, "encoder_q-layer.3": 1487.5499, "encoder_q-layer.4": 1557.1903, "encoder_q-layer.5": 1608.2913, "encoder_q-layer.6": 1829.8362, "encoder_q-layer.7": 2087.8706, "encoder_q-layer.8": 2515.4641, "encoder_q-layer.9": 2276.8364, "epoch": 0.83, "inbatch_neg_score": 0.109, "inbatch_pos_score": 0.7896, "learning_rate": 8.222222222222223e-06, "loss": 3.2792, "norm_diff": 0.1287, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3443.2654, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1087, "query_norm": 1.2744, "queue_k_norm": 1.4017, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6166, "sent_len_1": 66.874, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6637, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85200 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.3016, "doc_norm": 1.4062, "encoder_q-embeddings": 1750.6506, "encoder_q-layer.0": 1157.504, "encoder_q-layer.1": 1192.3611, "encoder_q-layer.10": 2340.4685, "encoder_q-layer.11": 5181.3384, "encoder_q-layer.2": 1345.0616, "encoder_q-layer.3": 1389.0054, "encoder_q-layer.4": 1484.5037, "encoder_q-layer.5": 1537.3065, "encoder_q-layer.6": 1718.543, "encoder_q-layer.7": 1972.9429, "encoder_q-layer.8": 2315.4641, "encoder_q-layer.9": 2186.2173, "epoch": 0.83, "inbatch_neg_score": 0.1035, "inbatch_pos_score": 0.8047, "learning_rate": 8.166666666666668e-06, "loss": 3.3016, "norm_diff": 0.1312, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3275.4871, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1039, "query_norm": 1.275, "queue_k_norm": 1.4013, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4583, "sent_len_1": 67.0205, "sent_max_len_0": 127.9887, "sent_max_len_1": 187.6337, "stdk": 0.0492, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85300 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2898, "doc_norm": 1.4094, "encoder_q-embeddings": 1792.3662, "encoder_q-layer.0": 1242.2562, "encoder_q-layer.1": 1327.0205, "encoder_q-layer.10": 2314.9939, "encoder_q-layer.11": 5338.3706, "encoder_q-layer.2": 1482.1237, "encoder_q-layer.3": 1513.1165, "encoder_q-layer.4": 1668.1934, "encoder_q-layer.5": 1759.9724, "encoder_q-layer.6": 1967.0416, "encoder_q-layer.7": 2236.8728, "encoder_q-layer.8": 2682.4558, "encoder_q-layer.9": 2304.9744, "epoch": 0.83, "inbatch_neg_score": 0.1052, "inbatch_pos_score": 0.8066, "learning_rate": 8.111111111111112e-06, "loss": 3.2898, "norm_diff": 0.1227, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3489.4594, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1058, "query_norm": 1.2866, "queue_k_norm": 1.3999, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6941, "sent_len_1": 66.7953, "sent_max_len_0": 128.0, "sent_max_len_1": 189.43, "stdk": 0.0493, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85400 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.2788, "doc_norm": 1.4043, "encoder_q-embeddings": 1822.6698, "encoder_q-layer.0": 1181.0714, "encoder_q-layer.1": 1239.7585, "encoder_q-layer.10": 2312.5623, "encoder_q-layer.11": 5188.7935, "encoder_q-layer.2": 1403.6921, "encoder_q-layer.3": 1441.4417, "encoder_q-layer.4": 1554.634, "encoder_q-layer.5": 1669.1437, "encoder_q-layer.6": 1861.8542, "encoder_q-layer.7": 2112.394, "encoder_q-layer.8": 2431.814, "encoder_q-layer.9": 2236.3237, "epoch": 0.83, "inbatch_neg_score": 0.1058, "inbatch_pos_score": 0.7881, "learning_rate": 8.055555555555557e-06, "loss": 3.2788, "norm_diff": 0.1355, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3372.4948, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.106, "query_norm": 1.2688, "queue_k_norm": 1.4007, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4952, "sent_len_1": 66.7999, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7937, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85500 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2919, "doc_norm": 1.3952, "encoder_q-embeddings": 2087.3245, "encoder_q-layer.0": 1465.4655, "encoder_q-layer.1": 1573.2611, "encoder_q-layer.10": 2426.0571, "encoder_q-layer.11": 5783.8472, "encoder_q-layer.2": 1812.9204, "encoder_q-layer.3": 1942.2787, "encoder_q-layer.4": 1990.2561, "encoder_q-layer.5": 2060.0212, "encoder_q-layer.6": 2277.3647, "encoder_q-layer.7": 2306.6921, "encoder_q-layer.8": 2632.9844, "encoder_q-layer.9": 2276.3013, "epoch": 0.84, "inbatch_neg_score": 0.1027, "inbatch_pos_score": 0.7715, "learning_rate": 8.000000000000001e-06, "loss": 3.2919, "norm_diff": 0.1306, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3802.2743, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1031, "query_norm": 1.2646, "queue_k_norm": 1.3994, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7759, "sent_len_1": 66.8636, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0163, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85600 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.2917, "doc_norm": 1.4022, "encoder_q-embeddings": 1887.1974, "encoder_q-layer.0": 1266.5452, "encoder_q-layer.1": 1349.6199, "encoder_q-layer.10": 2449.5908, "encoder_q-layer.11": 5324.7705, "encoder_q-layer.2": 1501.3768, "encoder_q-layer.3": 1626.1613, "encoder_q-layer.4": 1727.0928, "encoder_q-layer.5": 1811.3458, "encoder_q-layer.6": 1947.8949, "encoder_q-layer.7": 2153.731, "encoder_q-layer.8": 2607.166, "encoder_q-layer.9": 2410.2175, "epoch": 0.84, "inbatch_neg_score": 0.1019, "inbatch_pos_score": 0.7842, "learning_rate": 7.944444444444445e-06, "loss": 3.2917, "norm_diff": 0.1253, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3527.636, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1018, "query_norm": 1.2769, "queue_k_norm": 1.3998, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5931, "sent_len_1": 66.7439, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.6575, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85700 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3001, "doc_norm": 1.396, "encoder_q-embeddings": 2114.3633, "encoder_q-layer.0": 1463.0909, "encoder_q-layer.1": 1566.8118, "encoder_q-layer.10": 2415.7036, "encoder_q-layer.11": 5411.7256, "encoder_q-layer.2": 1768.3135, "encoder_q-layer.3": 1917.5975, "encoder_q-layer.4": 2004.9353, "encoder_q-layer.5": 2121.6267, "encoder_q-layer.6": 2445.5857, "encoder_q-layer.7": 2720.0046, "encoder_q-layer.8": 2855.5552, "encoder_q-layer.9": 2410.6326, "epoch": 0.84, "inbatch_neg_score": 0.103, "inbatch_pos_score": 0.7749, "learning_rate": 7.88888888888889e-06, "loss": 3.3001, "norm_diff": 0.1323, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3806.112, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1027, "query_norm": 1.2637, "queue_k_norm": 1.4011, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6734, "sent_len_1": 66.8075, "sent_max_len_0": 128.0, "sent_max_len_1": 187.3325, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85800 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.309, "doc_norm": 1.4011, "encoder_q-embeddings": 2321.0183, "encoder_q-layer.0": 1574.8879, "encoder_q-layer.1": 1742.8129, "encoder_q-layer.10": 2401.2781, "encoder_q-layer.11": 5373.0161, "encoder_q-layer.2": 2001.6389, "encoder_q-layer.3": 2083.7043, "encoder_q-layer.4": 2360.0266, "encoder_q-layer.5": 2419.6494, "encoder_q-layer.6": 2605.0842, "encoder_q-layer.7": 2591.7278, "encoder_q-layer.8": 2733.3777, "encoder_q-layer.9": 2350.8264, "epoch": 0.84, "inbatch_neg_score": 0.0998, "inbatch_pos_score": 0.7759, "learning_rate": 7.833333333333333e-06, "loss": 3.309, "norm_diff": 0.141, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3975.8241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1, "query_norm": 1.2602, "queue_k_norm": 1.3998, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6014, "sent_len_1": 66.8843, "sent_max_len_0": 128.0, "sent_max_len_1": 191.695, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85900 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3085, "doc_norm": 1.3968, "encoder_q-embeddings": 1810.1711, "encoder_q-layer.0": 1182.3479, "encoder_q-layer.1": 1227.7609, "encoder_q-layer.10": 2403.0166, "encoder_q-layer.11": 5509.0503, "encoder_q-layer.2": 1364.7894, "encoder_q-layer.3": 1410.1628, "encoder_q-layer.4": 1605.2922, "encoder_q-layer.5": 1577.9019, "encoder_q-layer.6": 1756.8676, "encoder_q-layer.7": 1964.0962, "encoder_q-layer.8": 2488.8188, "encoder_q-layer.9": 2311.1355, "epoch": 0.84, "inbatch_neg_score": 0.0994, "inbatch_pos_score": 0.7681, "learning_rate": 7.777777777777777e-06, "loss": 3.3085, "norm_diff": 0.1342, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3474.5265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0999, "query_norm": 1.2625, "queue_k_norm": 1.3998, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6676, "sent_len_1": 66.894, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.4263, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86000 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.2989, "doc_norm": 1.4005, "encoder_q-embeddings": 1792.6405, "encoder_q-layer.0": 1166.9109, "encoder_q-layer.1": 1253.7581, "encoder_q-layer.10": 2240.6121, "encoder_q-layer.11": 5311.7285, "encoder_q-layer.2": 1378.5532, "encoder_q-layer.3": 1436.2566, "encoder_q-layer.4": 1538.116, "encoder_q-layer.5": 1538.3727, "encoder_q-layer.6": 1752.0519, "encoder_q-layer.7": 1994.4229, "encoder_q-layer.8": 2325.9177, "encoder_q-layer.9": 2226.1956, "epoch": 0.84, "inbatch_neg_score": 0.102, "inbatch_pos_score": 0.7891, "learning_rate": 7.722222222222223e-06, "loss": 3.2989, "norm_diff": 0.1176, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3350.9124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1015, "query_norm": 1.2829, "queue_k_norm": 1.3985, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6934, "sent_len_1": 66.7818, "sent_max_len_0": 128.0, "sent_max_len_1": 188.12, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86100 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2752, "doc_norm": 1.3905, "encoder_q-embeddings": 1877.6902, "encoder_q-layer.0": 1235.3616, "encoder_q-layer.1": 1313.2842, "encoder_q-layer.10": 2479.2224, "encoder_q-layer.11": 5489.7588, "encoder_q-layer.2": 1480.3248, "encoder_q-layer.3": 1534.6498, "encoder_q-layer.4": 1598.6442, "encoder_q-layer.5": 1644.2538, "encoder_q-layer.6": 1844.5802, "encoder_q-layer.7": 2140.7722, "encoder_q-layer.8": 2607.0273, "encoder_q-layer.9": 2344.4553, "epoch": 0.84, "inbatch_neg_score": 0.0983, "inbatch_pos_score": 0.7461, "learning_rate": 7.666666666666667e-06, "loss": 3.2752, "norm_diff": 0.1261, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3535.8592, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0988, "query_norm": 1.2644, "queue_k_norm": 1.3993, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6888, "sent_len_1": 66.7012, "sent_max_len_0": 127.99, "sent_max_len_1": 189.9638, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86200 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2805, "doc_norm": 1.4073, "encoder_q-embeddings": 1826.1295, "encoder_q-layer.0": 1232.2041, "encoder_q-layer.1": 1338.6908, "encoder_q-layer.10": 2373.884, "encoder_q-layer.11": 5546.9932, "encoder_q-layer.2": 1483.2886, "encoder_q-layer.3": 1543.7487, "encoder_q-layer.4": 1674.3163, "encoder_q-layer.5": 1748.9279, "encoder_q-layer.6": 1931.1858, "encoder_q-layer.7": 2157.2471, "encoder_q-layer.8": 2526.3955, "encoder_q-layer.9": 2334.6267, "epoch": 0.84, "inbatch_neg_score": 0.0973, "inbatch_pos_score": 0.7705, "learning_rate": 7.611111111111112e-06, "loss": 3.2805, "norm_diff": 0.147, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3597.5361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0974, "query_norm": 1.2603, "queue_k_norm": 1.3983, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6472, "sent_len_1": 66.752, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2175, "stdk": 0.0493, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86300 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.294, "doc_norm": 1.4043, "encoder_q-embeddings": 11108.8359, "encoder_q-layer.0": 8062.374, "encoder_q-layer.1": 7253.0972, "encoder_q-layer.10": 4956.9448, "encoder_q-layer.11": 11259.9277, "encoder_q-layer.2": 7513.9917, "encoder_q-layer.3": 7283.6064, "encoder_q-layer.4": 7894.8638, "encoder_q-layer.5": 6663.8359, "encoder_q-layer.6": 6153.77, "encoder_q-layer.7": 6206.9487, "encoder_q-layer.8": 6309.8682, "encoder_q-layer.9": 4712.1089, "epoch": 0.84, "inbatch_neg_score": 0.1011, "inbatch_pos_score": 0.7935, "learning_rate": 7.555555555555556e-06, "loss": 3.294, "norm_diff": 0.1285, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11706.2072, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1021, "query_norm": 1.2758, "queue_k_norm": 1.4005, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6805, "sent_len_1": 66.6354, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4963, "stdk": 0.0492, "stdq": 0.0452, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 86400 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.2824, "doc_norm": 1.3967, "encoder_q-embeddings": 4041.5996, "encoder_q-layer.0": 2556.4919, "encoder_q-layer.1": 2743.2349, "encoder_q-layer.10": 4683.6431, "encoder_q-layer.11": 10633.1807, "encoder_q-layer.2": 3038.5632, "encoder_q-layer.3": 3250.1194, "encoder_q-layer.4": 3421.6904, "encoder_q-layer.5": 3468.5378, "encoder_q-layer.6": 3907.9219, "encoder_q-layer.7": 4457.5063, "encoder_q-layer.8": 5100.5767, "encoder_q-layer.9": 4618.3569, "epoch": 0.84, "inbatch_neg_score": 0.1018, "inbatch_pos_score": 0.7954, "learning_rate": 7.5e-06, "loss": 3.2824, "norm_diff": 0.1114, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7081.6089, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1009, "query_norm": 1.2853, "queue_k_norm": 1.3964, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4603, "sent_len_1": 66.5661, "sent_max_len_0": 128.0, "sent_max_len_1": 188.41, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86500 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2952, "doc_norm": 1.3968, "encoder_q-embeddings": 3750.2766, "encoder_q-layer.0": 2646.4299, "encoder_q-layer.1": 2749.7068, "encoder_q-layer.10": 4479.2607, "encoder_q-layer.11": 10150.9043, "encoder_q-layer.2": 3099.2061, "encoder_q-layer.3": 3212.9631, "encoder_q-layer.4": 3500.4314, "encoder_q-layer.5": 3456.0227, "encoder_q-layer.6": 3846.9666, "encoder_q-layer.7": 4217.8101, "encoder_q-layer.8": 4610.4092, "encoder_q-layer.9": 4326.4976, "epoch": 0.85, "inbatch_neg_score": 0.102, "inbatch_pos_score": 0.7754, "learning_rate": 7.444444444444444e-06, "loss": 3.2952, "norm_diff": 0.127, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6787.5143, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1016, "query_norm": 1.2699, "queue_k_norm": 1.3965, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7764, "sent_len_1": 66.6581, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.89, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86600 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.2892, "doc_norm": 1.402, "encoder_q-embeddings": 4374.7617, "encoder_q-layer.0": 3220.4333, "encoder_q-layer.1": 3464.2947, "encoder_q-layer.10": 4524.291, "encoder_q-layer.11": 10699.1689, "encoder_q-layer.2": 4271.8726, "encoder_q-layer.3": 4582.3218, "encoder_q-layer.4": 4465.3486, "encoder_q-layer.5": 4452.5591, "encoder_q-layer.6": 4524.709, "encoder_q-layer.7": 4621.4102, "encoder_q-layer.8": 5331.5244, "encoder_q-layer.9": 4328.7808, "epoch": 0.85, "inbatch_neg_score": 0.0993, "inbatch_pos_score": 0.7817, "learning_rate": 7.38888888888889e-06, "loss": 3.2892, "norm_diff": 0.1389, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7750.0754, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0988, "query_norm": 1.2631, "queue_k_norm": 1.3991, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.631, "sent_len_1": 66.8357, "sent_max_len_0": 128.0, "sent_max_len_1": 186.565, "stdk": 0.0491, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86700 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 3.2779, "doc_norm": 1.3987, "encoder_q-embeddings": 3403.3586, "encoder_q-layer.0": 2210.6724, "encoder_q-layer.1": 2378.4595, "encoder_q-layer.10": 4627.1553, "encoder_q-layer.11": 10123.2637, "encoder_q-layer.2": 2655.8503, "encoder_q-layer.3": 2770.3323, "encoder_q-layer.4": 2988.0474, "encoder_q-layer.5": 3067.4258, "encoder_q-layer.6": 3543.9692, "encoder_q-layer.7": 4109.2808, "encoder_q-layer.8": 5005.499, "encoder_q-layer.9": 4389.4058, "epoch": 0.85, "inbatch_neg_score": 0.1014, "inbatch_pos_score": 0.8008, "learning_rate": 7.333333333333334e-06, "loss": 3.2779, "norm_diff": 0.1265, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6542.1849, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1014, "query_norm": 1.2722, "queue_k_norm": 1.3988, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.705, "sent_len_1": 67.0354, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5163, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86800 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.284, "doc_norm": 1.4032, "encoder_q-embeddings": 3679.2202, "encoder_q-layer.0": 2496.6494, "encoder_q-layer.1": 2632.7119, "encoder_q-layer.10": 4534.1245, "encoder_q-layer.11": 10661.0781, "encoder_q-layer.2": 2967.637, "encoder_q-layer.3": 3141.8611, "encoder_q-layer.4": 3457.3787, "encoder_q-layer.5": 3564.7195, "encoder_q-layer.6": 3813.2871, "encoder_q-layer.7": 4266.0786, "encoder_q-layer.8": 4819.8633, "encoder_q-layer.9": 4423.7832, "epoch": 0.85, "inbatch_neg_score": 0.0998, "inbatch_pos_score": 0.769, "learning_rate": 7.277777777777778e-06, "loss": 3.284, "norm_diff": 0.1495, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6916.7711, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0994, "query_norm": 1.2537, "queue_k_norm": 1.3965, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6243, "sent_len_1": 66.9686, "sent_max_len_0": 128.0, "sent_max_len_1": 187.3063, "stdk": 0.0492, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86900 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2839, "doc_norm": 1.3997, "encoder_q-embeddings": 4672.811, "encoder_q-layer.0": 2939.9656, "encoder_q-layer.1": 3141.4849, "encoder_q-layer.10": 4839.5693, "encoder_q-layer.11": 11021.0576, "encoder_q-layer.2": 3474.2048, "encoder_q-layer.3": 3544.2927, "encoder_q-layer.4": 3832.6226, "encoder_q-layer.5": 4138.8237, "encoder_q-layer.6": 4675.8872, "encoder_q-layer.7": 4793.6421, "encoder_q-layer.8": 5462.4375, "encoder_q-layer.9": 4876.1865, "epoch": 0.85, "inbatch_neg_score": 0.1015, "inbatch_pos_score": 0.7886, "learning_rate": 7.222222222222222e-06, "loss": 3.2839, "norm_diff": 0.1273, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7585.7544, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1006, "query_norm": 1.2724, "queue_k_norm": 1.3972, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5508, "sent_len_1": 66.7254, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0137, "stdk": 0.049, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 87000 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.2961, "doc_norm": 1.3964, "encoder_q-embeddings": 3671.4614, "encoder_q-layer.0": 2508.7847, "encoder_q-layer.1": 2646.146, "encoder_q-layer.10": 4573.3228, "encoder_q-layer.11": 11323.002, "encoder_q-layer.2": 3016.429, "encoder_q-layer.3": 3240.21, "encoder_q-layer.4": 3606.7048, "encoder_q-layer.5": 3699.6396, "encoder_q-layer.6": 4242.5254, "encoder_q-layer.7": 4520.7871, "encoder_q-layer.8": 5253.4712, "encoder_q-layer.9": 4558.4692, "epoch": 0.85, "inbatch_neg_score": 0.0987, "inbatch_pos_score": 0.7573, "learning_rate": 7.166666666666667e-06, "loss": 3.2961, "norm_diff": 0.1394, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7282.1903, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0988, "query_norm": 1.257, "queue_k_norm": 1.3964, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5828, "sent_len_1": 66.6556, "sent_max_len_0": 128.0, "sent_max_len_1": 186.985, "stdk": 0.049, "stdq": 0.0445, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87100 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.2867, "doc_norm": 1.4012, "encoder_q-embeddings": 3748.707, "encoder_q-layer.0": 2486.9185, "encoder_q-layer.1": 2660.2026, "encoder_q-layer.10": 4493.4814, "encoder_q-layer.11": 10203.249, "encoder_q-layer.2": 2851.9163, "encoder_q-layer.3": 2992.6873, "encoder_q-layer.4": 3077.4502, "encoder_q-layer.5": 3309.4199, "encoder_q-layer.6": 3667.989, "encoder_q-layer.7": 4077.7607, "encoder_q-layer.8": 5096.1509, "encoder_q-layer.9": 4577.8486, "epoch": 0.85, "inbatch_neg_score": 0.0985, "inbatch_pos_score": 0.7896, "learning_rate": 7.111111111111112e-06, "loss": 3.2867, "norm_diff": 0.1258, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6753.8867, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0983, "query_norm": 1.2754, "queue_k_norm": 1.3954, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6909, "sent_len_1": 66.7393, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3338, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87200 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.2863, "doc_norm": 1.397, "encoder_q-embeddings": 4216.252, "encoder_q-layer.0": 2777.6584, "encoder_q-layer.1": 3007.0088, "encoder_q-layer.10": 5073.6382, "encoder_q-layer.11": 10499.1953, "encoder_q-layer.2": 3395.9241, "encoder_q-layer.3": 3424.6035, "encoder_q-layer.4": 3525.3286, "encoder_q-layer.5": 3749.7227, "encoder_q-layer.6": 4176.6416, "encoder_q-layer.7": 4450.2671, "encoder_q-layer.8": 5142.4971, "encoder_q-layer.9": 4758.0601, "epoch": 0.85, "inbatch_neg_score": 0.0987, "inbatch_pos_score": 0.7954, "learning_rate": 7.055555555555556e-06, "loss": 3.2863, "norm_diff": 0.0969, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7217.2455, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0995, "query_norm": 1.3001, "queue_k_norm": 1.3966, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.618, "sent_len_1": 66.8761, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0325, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 87300 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2879, "doc_norm": 1.3927, "encoder_q-embeddings": 3802.1084, "encoder_q-layer.0": 2361.2993, "encoder_q-layer.1": 2549.5713, "encoder_q-layer.10": 4882.7598, "encoder_q-layer.11": 10813.5225, "encoder_q-layer.2": 2853.8186, "encoder_q-layer.3": 2968.4355, "encoder_q-layer.4": 3190.7463, "encoder_q-layer.5": 3238.083, "encoder_q-layer.6": 3819.1853, "encoder_q-layer.7": 4281.521, "encoder_q-layer.8": 5386.0098, "encoder_q-layer.9": 4889.2246, "epoch": 0.85, "inbatch_neg_score": 0.1036, "inbatch_pos_score": 0.7778, "learning_rate": 7.000000000000001e-06, "loss": 3.2879, "norm_diff": 0.1055, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7081.9771, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.103, "query_norm": 1.2872, "queue_k_norm": 1.3956, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5849, "sent_len_1": 66.5803, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5525, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87400 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.2911, "doc_norm": 1.4018, "encoder_q-embeddings": 10002.8447, "encoder_q-layer.0": 7685.4888, "encoder_q-layer.1": 7134.8955, "encoder_q-layer.10": 4686.0981, "encoder_q-layer.11": 9993.7305, "encoder_q-layer.2": 8596.5498, "encoder_q-layer.3": 8593.4561, "encoder_q-layer.4": 8904.3516, "encoder_q-layer.5": 7408.0254, "encoder_q-layer.6": 6665.6846, "encoder_q-layer.7": 6566.4346, "encoder_q-layer.8": 6774.0469, "encoder_q-layer.9": 4701.9165, "epoch": 0.85, "inbatch_neg_score": 0.1039, "inbatch_pos_score": 0.8071, "learning_rate": 6.944444444444445e-06, "loss": 3.2911, "norm_diff": 0.1171, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11809.3916, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1035, "query_norm": 1.2847, "queue_k_norm": 1.3948, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4482, "sent_len_1": 66.7277, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.42, "stdk": 0.0491, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87500 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.2778, "doc_norm": 1.3941, "encoder_q-embeddings": 3850.5745, "encoder_q-layer.0": 2754.3933, "encoder_q-layer.1": 2935.9348, "encoder_q-layer.10": 4761.958, "encoder_q-layer.11": 10529.3652, "encoder_q-layer.2": 3378.0293, "encoder_q-layer.3": 3339.4739, "encoder_q-layer.4": 3510.377, "encoder_q-layer.5": 3512.084, "encoder_q-layer.6": 3725.9495, "encoder_q-layer.7": 4006.1377, "encoder_q-layer.8": 4812.1089, "encoder_q-layer.9": 4420.2124, "epoch": 0.86, "inbatch_neg_score": 0.1014, "inbatch_pos_score": 0.7954, "learning_rate": 6.888888888888889e-06, "loss": 3.2778, "norm_diff": 0.1121, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6956.6521, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1016, "query_norm": 1.282, "queue_k_norm": 1.3956, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.69, "sent_len_1": 66.7264, "sent_max_len_0": 127.995, "sent_max_len_1": 189.2287, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87600 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.2737, "doc_norm": 1.3909, "encoder_q-embeddings": 4190.4209, "encoder_q-layer.0": 2884.5352, "encoder_q-layer.1": 3039.5393, "encoder_q-layer.10": 5290.499, "encoder_q-layer.11": 11510.2979, "encoder_q-layer.2": 3598.1001, "encoder_q-layer.3": 3789.0396, "encoder_q-layer.4": 4110.4712, "encoder_q-layer.5": 4158.2319, "encoder_q-layer.6": 4691.2466, "encoder_q-layer.7": 5018.4307, "encoder_q-layer.8": 5642.23, "encoder_q-layer.9": 5407.875, "epoch": 0.86, "inbatch_neg_score": 0.102, "inbatch_pos_score": 0.7759, "learning_rate": 6.833333333333333e-06, "loss": 3.2737, "norm_diff": 0.1189, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7764.1408, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1032, "query_norm": 1.2721, "queue_k_norm": 1.3971, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6791, "sent_len_1": 66.8117, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.64, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 87700 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.2771, "doc_norm": 1.3973, "encoder_q-embeddings": 3571.1804, "encoder_q-layer.0": 2426.9563, "encoder_q-layer.1": 2593.4731, "encoder_q-layer.10": 5112.7021, "encoder_q-layer.11": 11073.8066, "encoder_q-layer.2": 2965.6311, "encoder_q-layer.3": 3011.6091, "encoder_q-layer.4": 3264.0742, "encoder_q-layer.5": 3537.6472, "encoder_q-layer.6": 3899.5166, "encoder_q-layer.7": 4302.1753, "encoder_q-layer.8": 5003.7607, "encoder_q-layer.9": 4738.3823, "epoch": 0.86, "inbatch_neg_score": 0.103, "inbatch_pos_score": 0.7686, "learning_rate": 6.777777777777779e-06, "loss": 3.2771, "norm_diff": 0.1218, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7007.869, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.103, "query_norm": 1.2756, "queue_k_norm": 1.3978, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.623, "sent_len_1": 66.7837, "sent_max_len_0": 128.0, "sent_max_len_1": 192.0775, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 87800 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.2713, "doc_norm": 1.4022, "encoder_q-embeddings": 4943.8525, "encoder_q-layer.0": 3303.7227, "encoder_q-layer.1": 3437.8525, "encoder_q-layer.10": 5068.9287, "encoder_q-layer.11": 10694.1738, "encoder_q-layer.2": 3920.4724, "encoder_q-layer.3": 4094.8323, "encoder_q-layer.4": 4607.1299, "encoder_q-layer.5": 4615.9722, "encoder_q-layer.6": 5078.0811, "encoder_q-layer.7": 5215.9438, "encoder_q-layer.8": 5668.8989, "encoder_q-layer.9": 4724.1084, "epoch": 0.86, "inbatch_neg_score": 0.1081, "inbatch_pos_score": 0.8237, "learning_rate": 6.722222222222223e-06, "loss": 3.2713, "norm_diff": 0.1029, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7977.2286, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1069, "query_norm": 1.2993, "queue_k_norm": 1.3974, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6202, "sent_len_1": 66.6235, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2962, "stdk": 0.0491, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 87900 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3025, "doc_norm": 1.3974, "encoder_q-embeddings": 4619.7236, "encoder_q-layer.0": 3221.4766, "encoder_q-layer.1": 3463.4617, "encoder_q-layer.10": 4972.958, "encoder_q-layer.11": 10555.4355, "encoder_q-layer.2": 3926.0547, "encoder_q-layer.3": 4102.2266, "encoder_q-layer.4": 4640.7344, "encoder_q-layer.5": 4983.271, "encoder_q-layer.6": 5615.5376, "encoder_q-layer.7": 5774.3433, "encoder_q-layer.8": 5904.895, "encoder_q-layer.9": 4831.9443, "epoch": 0.86, "inbatch_neg_score": 0.1044, "inbatch_pos_score": 0.7715, "learning_rate": 6.666666666666667e-06, "loss": 3.3025, "norm_diff": 0.1362, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7961.9112, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1045, "query_norm": 1.2612, "queue_k_norm": 1.3978, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5864, "sent_len_1": 66.9471, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0575, "stdk": 0.049, "stdq": 0.0445, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88000 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.279, "doc_norm": 1.4009, "encoder_q-embeddings": 4177.4819, "encoder_q-layer.0": 2669.8979, "encoder_q-layer.1": 3042.4717, "encoder_q-layer.10": 5038.3013, "encoder_q-layer.11": 10926.7949, "encoder_q-layer.2": 3525.2507, "encoder_q-layer.3": 3640.6416, "encoder_q-layer.4": 4133.771, "encoder_q-layer.5": 4038.3657, "encoder_q-layer.6": 4708.7451, "encoder_q-layer.7": 4480.0654, "encoder_q-layer.8": 5117.5161, "encoder_q-layer.9": 4641.0229, "epoch": 0.86, "inbatch_neg_score": 0.1047, "inbatch_pos_score": 0.7983, "learning_rate": 6.611111111111111e-06, "loss": 3.279, "norm_diff": 0.1307, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7391.6194, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.105, "query_norm": 1.2702, "queue_k_norm": 1.3988, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.515, "sent_len_1": 66.8793, "sent_max_len_0": 127.9912, "sent_max_len_1": 187.085, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88100 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.2763, "doc_norm": 1.4077, "encoder_q-embeddings": 3456.7578, "encoder_q-layer.0": 2326.9946, "encoder_q-layer.1": 2422.4272, "encoder_q-layer.10": 4815.5142, "encoder_q-layer.11": 10347.1299, "encoder_q-layer.2": 2691.5525, "encoder_q-layer.3": 2713.3743, "encoder_q-layer.4": 2882.3457, "encoder_q-layer.5": 3020.4521, "encoder_q-layer.6": 3656.8159, "encoder_q-layer.7": 4075.8066, "encoder_q-layer.8": 4889.334, "encoder_q-layer.9": 4548.1294, "epoch": 0.86, "inbatch_neg_score": 0.1071, "inbatch_pos_score": 0.7949, "learning_rate": 6.555555555555556e-06, "loss": 3.2763, "norm_diff": 0.1287, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6667.9437, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1083, "query_norm": 1.279, "queue_k_norm": 1.3989, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.717, "sent_len_1": 66.8737, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1213, "stdk": 0.0494, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88200 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2767, "doc_norm": 1.3993, "encoder_q-embeddings": 4188.0493, "encoder_q-layer.0": 2830.7183, "encoder_q-layer.1": 3036.2981, "encoder_q-layer.10": 4845.7871, "encoder_q-layer.11": 10506.8662, "encoder_q-layer.2": 3503.197, "encoder_q-layer.3": 3617.9185, "encoder_q-layer.4": 3904.0515, "encoder_q-layer.5": 3985.1021, "encoder_q-layer.6": 4353.2134, "encoder_q-layer.7": 4847.6562, "encoder_q-layer.8": 5340.7769, "encoder_q-layer.9": 4603.646, "epoch": 0.86, "inbatch_neg_score": 0.1051, "inbatch_pos_score": 0.7788, "learning_rate": 6.5000000000000004e-06, "loss": 3.2767, "norm_diff": 0.1363, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7420.8135, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1042, "query_norm": 1.263, "queue_k_norm": 1.3995, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7242, "sent_len_1": 66.649, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1, "stdk": 0.049, "stdq": 0.0447, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88300 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3054, "doc_norm": 1.403, "encoder_q-embeddings": 6136.7925, "encoder_q-layer.0": 4446.105, "encoder_q-layer.1": 5144.7275, "encoder_q-layer.10": 4690.2046, "encoder_q-layer.11": 10765.0117, "encoder_q-layer.2": 5949.1069, "encoder_q-layer.3": 5921.4683, "encoder_q-layer.4": 6064.626, "encoder_q-layer.5": 6551.313, "encoder_q-layer.6": 7140.0269, "encoder_q-layer.7": 7572.9531, "encoder_q-layer.8": 6092.8438, "encoder_q-layer.9": 4669.5508, "epoch": 0.86, "inbatch_neg_score": 0.1047, "inbatch_pos_score": 0.792, "learning_rate": 6.4444444444444445e-06, "loss": 3.3054, "norm_diff": 0.1289, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9622.524, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1049, "query_norm": 1.2741, "queue_k_norm": 1.3986, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5324, "sent_len_1": 66.7279, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0037, "stdk": 0.0492, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88400 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.3029, "doc_norm": 1.3972, "encoder_q-embeddings": 6393.687, "encoder_q-layer.0": 4944.5854, "encoder_q-layer.1": 4618.3379, "encoder_q-layer.10": 4593.1108, "encoder_q-layer.11": 10253.6416, "encoder_q-layer.2": 4876.335, "encoder_q-layer.3": 5001.0479, "encoder_q-layer.4": 5286.7095, "encoder_q-layer.5": 4536.7949, "encoder_q-layer.6": 4491.1709, "encoder_q-layer.7": 4864.0171, "encoder_q-layer.8": 5189.6621, "encoder_q-layer.9": 4586.3516, "epoch": 0.86, "inbatch_neg_score": 0.1059, "inbatch_pos_score": 0.7847, "learning_rate": 6.3888888888888885e-06, "loss": 3.3029, "norm_diff": 0.1265, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8329.0506, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1067, "query_norm": 1.2707, "queue_k_norm": 1.3974, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.2995, "sent_len_1": 66.5842, "sent_max_len_0": 128.0, "sent_max_len_1": 190.545, "stdk": 0.049, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88500 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2843, "doc_norm": 1.4, "encoder_q-embeddings": 3457.2251, "encoder_q-layer.0": 2357.5981, "encoder_q-layer.1": 2420.0127, "encoder_q-layer.10": 5165.5449, "encoder_q-layer.11": 11202.2109, "encoder_q-layer.2": 2708.5356, "encoder_q-layer.3": 2828.7222, "encoder_q-layer.4": 2944.1597, "encoder_q-layer.5": 3062.9592, "encoder_q-layer.6": 3682.3066, "encoder_q-layer.7": 4210.6992, "encoder_q-layer.8": 5007.7725, "encoder_q-layer.9": 4813.3921, "epoch": 0.87, "inbatch_neg_score": 0.1041, "inbatch_pos_score": 0.7666, "learning_rate": 6.333333333333334e-06, "loss": 3.2843, "norm_diff": 0.1425, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6958.631, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.105, "query_norm": 1.2574, "queue_k_norm": 1.3974, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5415, "sent_len_1": 66.8715, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8625, "stdk": 0.0491, "stdq": 0.0445, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88600 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.2966, "doc_norm": 1.3948, "encoder_q-embeddings": 3556.6958, "encoder_q-layer.0": 2364.7676, "encoder_q-layer.1": 2477.2454, "encoder_q-layer.10": 4663.9277, "encoder_q-layer.11": 10859.0811, "encoder_q-layer.2": 2726.6724, "encoder_q-layer.3": 2814.3684, "encoder_q-layer.4": 2961.1047, "encoder_q-layer.5": 3196.4407, "encoder_q-layer.6": 3589.2625, "encoder_q-layer.7": 4334.0137, "encoder_q-layer.8": 4939.583, "encoder_q-layer.9": 4436.9385, "epoch": 0.87, "inbatch_neg_score": 0.1083, "inbatch_pos_score": 0.7866, "learning_rate": 6.277777777777778e-06, "loss": 3.2966, "norm_diff": 0.118, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6768.1601, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1075, "query_norm": 1.2768, "queue_k_norm": 1.398, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5284, "sent_len_1": 66.7857, "sent_max_len_0": 127.9975, "sent_max_len_1": 189.0625, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88700 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.2641, "doc_norm": 1.3956, "encoder_q-embeddings": 3716.6721, "encoder_q-layer.0": 2497.5417, "encoder_q-layer.1": 2686.9446, "encoder_q-layer.10": 5092.3213, "encoder_q-layer.11": 10853.168, "encoder_q-layer.2": 3012.9902, "encoder_q-layer.3": 3095.9033, "encoder_q-layer.4": 3426.7043, "encoder_q-layer.5": 3540.7344, "encoder_q-layer.6": 4023.3391, "encoder_q-layer.7": 4398.6772, "encoder_q-layer.8": 5319.6831, "encoder_q-layer.9": 4819.4731, "epoch": 0.87, "inbatch_neg_score": 0.1092, "inbatch_pos_score": 0.7725, "learning_rate": 6.222222222222222e-06, "loss": 3.2641, "norm_diff": 0.1261, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7149.9585, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1091, "query_norm": 1.2696, "queue_k_norm": 1.3974, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.8193, "sent_len_1": 66.888, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1225, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88800 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.288, "doc_norm": 1.3929, "encoder_q-embeddings": 4137.5142, "encoder_q-layer.0": 2677.2212, "encoder_q-layer.1": 2895.1023, "encoder_q-layer.10": 4651.2261, "encoder_q-layer.11": 10363.9717, "encoder_q-layer.2": 3271.9575, "encoder_q-layer.3": 3416.6099, "encoder_q-layer.4": 3718.1936, "encoder_q-layer.5": 4026.6941, "encoder_q-layer.6": 4320.2114, "encoder_q-layer.7": 4678.0254, "encoder_q-layer.8": 5248.7979, "encoder_q-layer.9": 4765.8677, "epoch": 0.87, "inbatch_neg_score": 0.1079, "inbatch_pos_score": 0.7803, "learning_rate": 6.166666666666667e-06, "loss": 3.288, "norm_diff": 0.1245, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7270.704, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1067, "query_norm": 1.2685, "queue_k_norm": 1.3981, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5955, "sent_len_1": 66.8215, "sent_max_len_0": 127.995, "sent_max_len_1": 189.1637, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88900 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.2799, "doc_norm": 1.4033, "encoder_q-embeddings": 3954.3599, "encoder_q-layer.0": 2765.8889, "encoder_q-layer.1": 3018.2476, "encoder_q-layer.10": 5325.5317, "encoder_q-layer.11": 10339.0234, "encoder_q-layer.2": 3381.3296, "encoder_q-layer.3": 3518.0601, "encoder_q-layer.4": 3741.5371, "encoder_q-layer.5": 3814.4609, "encoder_q-layer.6": 4094.3528, "encoder_q-layer.7": 4706.6763, "encoder_q-layer.8": 5152.5776, "encoder_q-layer.9": 4674.0029, "epoch": 0.87, "inbatch_neg_score": 0.1068, "inbatch_pos_score": 0.772, "learning_rate": 6.111111111111111e-06, "loss": 3.2799, "norm_diff": 0.1279, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7138.0702, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1076, "query_norm": 1.2754, "queue_k_norm": 1.3963, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5544, "sent_len_1": 66.7232, "sent_max_len_0": 127.9938, "sent_max_len_1": 191.2337, "stdk": 0.0492, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89000 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 3.2796, "doc_norm": 1.3992, "encoder_q-embeddings": 3967.271, "encoder_q-layer.0": 2755.677, "encoder_q-layer.1": 3007.7566, "encoder_q-layer.10": 5165.9829, "encoder_q-layer.11": 10774.9131, "encoder_q-layer.2": 3487.0193, "encoder_q-layer.3": 3754.1736, "encoder_q-layer.4": 3902.2837, "encoder_q-layer.5": 3891.8955, "encoder_q-layer.6": 4131.606, "encoder_q-layer.7": 4736.9844, "encoder_q-layer.8": 5588.6167, "encoder_q-layer.9": 4811.7134, "epoch": 0.87, "inbatch_neg_score": 0.1038, "inbatch_pos_score": 0.8066, "learning_rate": 6.055555555555556e-06, "loss": 3.2796, "norm_diff": 0.1115, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7321.1447, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1044, "query_norm": 1.2878, "queue_k_norm": 1.3992, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.8891, "sent_len_1": 66.7044, "sent_max_len_0": 127.99, "sent_max_len_1": 189.115, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89100 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 3.2896, "doc_norm": 1.4037, "encoder_q-embeddings": 1808.5148, "encoder_q-layer.0": 1180.0032, "encoder_q-layer.1": 1213.1191, "encoder_q-layer.10": 2459.4675, "encoder_q-layer.11": 5126.4727, "encoder_q-layer.2": 1356.8549, "encoder_q-layer.3": 1405.0746, "encoder_q-layer.4": 1508.7161, "encoder_q-layer.5": 1537.6853, "encoder_q-layer.6": 1752.5668, "encoder_q-layer.7": 2048.4353, "encoder_q-layer.8": 2629.8357, "encoder_q-layer.9": 2368.3198, "epoch": 0.87, "inbatch_neg_score": 0.1053, "inbatch_pos_score": 0.8193, "learning_rate": 6e-06, "loss": 3.2896, "norm_diff": 0.1278, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3371.9073, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1058, "query_norm": 1.2758, "queue_k_norm": 1.3964, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5657, "sent_len_1": 66.8813, "sent_max_len_0": 128.0, "sent_max_len_1": 190.415, "stdk": 0.0492, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89200 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.2883, "doc_norm": 1.4054, "encoder_q-embeddings": 1889.7834, "encoder_q-layer.0": 1255.2748, "encoder_q-layer.1": 1325.5336, "encoder_q-layer.10": 2310.2532, "encoder_q-layer.11": 5242.3398, "encoder_q-layer.2": 1519.8566, "encoder_q-layer.3": 1575.4161, "encoder_q-layer.4": 1723.5371, "encoder_q-layer.5": 1865.3098, "encoder_q-layer.6": 2066.1882, "encoder_q-layer.7": 2223.0022, "encoder_q-layer.8": 2636.9353, "encoder_q-layer.9": 2331.4702, "epoch": 0.87, "inbatch_neg_score": 0.1062, "inbatch_pos_score": 0.8037, "learning_rate": 5.944444444444445e-06, "loss": 3.2883, "norm_diff": 0.1213, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3545.2919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1066, "query_norm": 1.2841, "queue_k_norm": 1.3972, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6258, "sent_len_1": 66.8227, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9712, "stdk": 0.0493, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2941, "doc_norm": 1.3931, "encoder_q-embeddings": 1830.7396, "encoder_q-layer.0": 1215.0055, "encoder_q-layer.1": 1299.9011, "encoder_q-layer.10": 2496.3933, "encoder_q-layer.11": 5328.6299, "encoder_q-layer.2": 1480.4729, "encoder_q-layer.3": 1497.2665, "encoder_q-layer.4": 1673.3162, "encoder_q-layer.5": 1718.3274, "encoder_q-layer.6": 1978.5996, "encoder_q-layer.7": 2168.3162, "encoder_q-layer.8": 2530.4656, "encoder_q-layer.9": 2491.2815, "epoch": 0.87, "inbatch_neg_score": 0.1074, "inbatch_pos_score": 0.771, "learning_rate": 5.888888888888889e-06, "loss": 3.2941, "norm_diff": 0.1339, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3467.3885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1058, "query_norm": 1.2591, "queue_k_norm": 1.3969, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5632, "sent_len_1": 66.9262, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.98, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89400 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2839, "doc_norm": 1.4026, "encoder_q-embeddings": 3024.2959, "encoder_q-layer.0": 2215.9812, "encoder_q-layer.1": 2593.7944, "encoder_q-layer.10": 2436.5759, "encoder_q-layer.11": 5657.0674, "encoder_q-layer.2": 3029.564, "encoder_q-layer.3": 3035.3665, "encoder_q-layer.4": 3488.2009, "encoder_q-layer.5": 3346.6375, "encoder_q-layer.6": 3262.322, "encoder_q-layer.7": 3265.0552, "encoder_q-layer.8": 3090.8232, "encoder_q-layer.9": 2364.7, "epoch": 0.87, "inbatch_neg_score": 0.1041, "inbatch_pos_score": 0.7715, "learning_rate": 5.833333333333334e-06, "loss": 3.2839, "norm_diff": 0.1417, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4825.0511, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.104, "query_norm": 1.2609, "queue_k_norm": 1.3974, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6025, "sent_len_1": 67.0566, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3762, "stdk": 0.0491, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89500 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2823, "doc_norm": 1.4038, "encoder_q-embeddings": 1815.2554, "encoder_q-layer.0": 1163.0641, "encoder_q-layer.1": 1234.826, "encoder_q-layer.10": 2296.6316, "encoder_q-layer.11": 5244.4658, "encoder_q-layer.2": 1381.1385, "encoder_q-layer.3": 1478.4069, "encoder_q-layer.4": 1514.3486, "encoder_q-layer.5": 1596.9781, "encoder_q-layer.6": 1830.843, "encoder_q-layer.7": 2075.2388, "encoder_q-layer.8": 2478.2878, "encoder_q-layer.9": 2288.0183, "epoch": 0.87, "inbatch_neg_score": 0.1049, "inbatch_pos_score": 0.7969, "learning_rate": 5.777777777777778e-06, "loss": 3.2823, "norm_diff": 0.1358, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3416.8844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1055, "query_norm": 1.2681, "queue_k_norm": 1.4004, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5685, "sent_len_1": 66.8666, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8425, "stdk": 0.0492, "stdq": 0.045, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89600 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2645, "doc_norm": 1.3945, "encoder_q-embeddings": 1913.1603, "encoder_q-layer.0": 1223.4896, "encoder_q-layer.1": 1314.5745, "encoder_q-layer.10": 2623.6741, "encoder_q-layer.11": 5481.3433, "encoder_q-layer.2": 1511.3219, "encoder_q-layer.3": 1583.5497, "encoder_q-layer.4": 1672.9, "encoder_q-layer.5": 1737.4834, "encoder_q-layer.6": 1978.912, "encoder_q-layer.7": 2172.5657, "encoder_q-layer.8": 2709.1799, "encoder_q-layer.9": 2441.5278, "epoch": 0.88, "inbatch_neg_score": 0.1069, "inbatch_pos_score": 0.7939, "learning_rate": 5.722222222222223e-06, "loss": 3.2645, "norm_diff": 0.117, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3580.7613, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1072, "query_norm": 1.2775, "queue_k_norm": 1.3993, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8277, "sent_len_1": 66.9356, "sent_max_len_0": 127.9925, "sent_max_len_1": 191.0288, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89700 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2487, "doc_norm": 1.395, "encoder_q-embeddings": 1803.6259, "encoder_q-layer.0": 1232.4043, "encoder_q-layer.1": 1311.4215, "encoder_q-layer.10": 2389.1169, "encoder_q-layer.11": 5475.6069, "encoder_q-layer.2": 1482.7843, "encoder_q-layer.3": 1507.6588, "encoder_q-layer.4": 1640.5557, "encoder_q-layer.5": 1742.1289, "encoder_q-layer.6": 1974.2534, "encoder_q-layer.7": 2190.6406, "encoder_q-layer.8": 2402.115, "encoder_q-layer.9": 2256.5513, "epoch": 0.88, "inbatch_neg_score": 0.1037, "inbatch_pos_score": 0.7773, "learning_rate": 5.666666666666667e-06, "loss": 3.2487, "norm_diff": 0.1179, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3465.4651, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.104, "query_norm": 1.2771, "queue_k_norm": 1.3996, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7457, "sent_len_1": 66.6891, "sent_max_len_0": 127.9813, "sent_max_len_1": 188.9412, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2734, "doc_norm": 1.394, "encoder_q-embeddings": 2188.6826, "encoder_q-layer.0": 1634.8921, "encoder_q-layer.1": 1702.4546, "encoder_q-layer.10": 2404.6042, "encoder_q-layer.11": 5535.8867, "encoder_q-layer.2": 2071.4636, "encoder_q-layer.3": 1930.9324, "encoder_q-layer.4": 1981.5574, "encoder_q-layer.5": 1943.8911, "encoder_q-layer.6": 2092.5417, "encoder_q-layer.7": 2445.324, "encoder_q-layer.8": 2800.1675, "encoder_q-layer.9": 2280.9697, "epoch": 0.88, "inbatch_neg_score": 0.1039, "inbatch_pos_score": 0.7808, "learning_rate": 5.611111111111112e-06, "loss": 3.2734, "norm_diff": 0.1365, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3812.294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1041, "query_norm": 1.2575, "queue_k_norm": 1.3998, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5466, "sent_len_1": 66.8918, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.75, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89900 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.2877, "doc_norm": 1.3952, "encoder_q-embeddings": 1815.8586, "encoder_q-layer.0": 1220.4382, "encoder_q-layer.1": 1293.7249, "encoder_q-layer.10": 2429.6643, "encoder_q-layer.11": 5624.9375, "encoder_q-layer.2": 1525.9553, "encoder_q-layer.3": 1597.1902, "encoder_q-layer.4": 1689.9147, "encoder_q-layer.5": 1764.4453, "encoder_q-layer.6": 1974.5969, "encoder_q-layer.7": 2208.4329, "encoder_q-layer.8": 2543.2559, "encoder_q-layer.9": 2218.6633, "epoch": 0.88, "inbatch_neg_score": 0.1018, "inbatch_pos_score": 0.7588, "learning_rate": 5.555555555555556e-06, "loss": 3.2877, "norm_diff": 0.1339, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3589.8876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1017, "query_norm": 1.2613, "queue_k_norm": 1.3988, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5208, "sent_len_1": 67.1235, "sent_max_len_0": 127.9975, "sent_max_len_1": 192.2812, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90000 }, { "dev_runtime": 26.7363, "dev_samples_per_second": 2.394, "dev_steps_per_second": 0.037, "epoch": 0.88, "step": 90000, "test_accuracy": 94.0185546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.34201061725616455, "test_doc_norm": 1.373220443725586, "test_inbatch_neg_score": 0.4779123067855835, "test_inbatch_pos_score": 1.4251104593276978, "test_loss": 0.34201061725616455, "test_loss_align": 0.9784166812896729, "test_loss_unif": 3.956362724304199, "test_loss_unif_q@queue": 3.956362247467041, "test_norm_diff": 0.012098046019673347, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.107520192861557, "test_query_norm": 1.3827805519104004, "test_queue_k_norm": 1.3988728523254395, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04271689057350159, "test_stdq": 0.04282587766647339, "test_stdqueue_k": 0.04912446066737175, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.7363, "dev_samples_per_second": 2.394, "dev_steps_per_second": 0.037, "epoch": 0.88, "eval_beir-arguana_ndcg@10": 0.37429, "eval_beir-arguana_recall@10": 0.62731, "eval_beir-arguana_recall@100": 0.91465, "eval_beir-arguana_recall@20": 0.76245, "eval_beir-avg_ndcg@10": 0.38502525, "eval_beir-avg_recall@10": 0.4547698333333333, "eval_beir-avg_recall@100": 0.6308655833333333, "eval_beir-avg_recall@20": 0.5168036666666667, "eval_beir-cqadupstack_ndcg@10": 0.2801425, "eval_beir-cqadupstack_recall@10": 0.37676833333333337, "eval_beir-cqadupstack_recall@100": 0.6058258333333334, "eval_beir-cqadupstack_recall@20": 0.4450366666666667, "eval_beir-fiqa_ndcg@10": 0.24681, "eval_beir-fiqa_recall@10": 0.30681, "eval_beir-fiqa_recall@100": 0.57177, "eval_beir-fiqa_recall@20": 0.39205, "eval_beir-nfcorpus_ndcg@10": 0.29541, "eval_beir-nfcorpus_recall@10": 0.14371, "eval_beir-nfcorpus_recall@100": 0.27229, "eval_beir-nfcorpus_recall@20": 0.17823, "eval_beir-nq_ndcg@10": 0.29223, "eval_beir-nq_recall@10": 0.4769, "eval_beir-nq_recall@100": 0.79765, "eval_beir-nq_recall@20": 0.60006, "eval_beir-quora_ndcg@10": 0.81105, "eval_beir-quora_recall@10": 0.90817, "eval_beir-quora_recall@100": 0.98276, "eval_beir-quora_recall@20": 0.94249, "eval_beir-scidocs_ndcg@10": 0.15556, "eval_beir-scidocs_recall@10": 0.16303, "eval_beir-scidocs_recall@100": 0.37002, "eval_beir-scidocs_recall@20": 0.22158, "eval_beir-scifact_ndcg@10": 0.62574, "eval_beir-scifact_recall@10": 0.78122, "eval_beir-scifact_recall@100": 0.90656, "eval_beir-scifact_recall@20": 0.83789, "eval_beir-trec-covid_ndcg@10": 0.57137, "eval_beir-trec-covid_recall@10": 0.628, "eval_beir-trec-covid_recall@100": 0.4512, "eval_beir-trec-covid_recall@20": 0.595, "eval_beir-webis-touche2020_ndcg@10": 0.19765, "eval_beir-webis-touche2020_recall@10": 0.13578, "eval_beir-webis-touche2020_recall@100": 0.43593, "eval_beir-webis-touche2020_recall@20": 0.19325, "eval_senteval-avg_sts": 0.7631703131960845, "eval_senteval-sickr_spearman": 0.7281904510250542, "eval_senteval-stsb_spearman": 0.7981501753671149, "step": 90000, "test_accuracy": 94.0185546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.34201061725616455, "test_doc_norm": 1.373220443725586, "test_inbatch_neg_score": 0.4779123067855835, "test_inbatch_pos_score": 1.4251104593276978, "test_loss": 0.34201061725616455, "test_loss_align": 0.9784166812896729, "test_loss_unif": 3.956362724304199, "test_loss_unif_q@queue": 3.956362247467041, "test_norm_diff": 0.012098046019673347, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.107520192861557, "test_query_norm": 1.3827805519104004, "test_queue_k_norm": 1.3988728523254395, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04271689057350159, "test_stdq": 0.04282587766647339, "test_stdqueue_k": 0.04912446066737175, "test_stdqueue_q": 0.0 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.2963, "doc_norm": 1.4083, "encoder_q-embeddings": 2088.9268, "encoder_q-layer.0": 1498.9474, "encoder_q-layer.1": 1567.3336, "encoder_q-layer.10": 2350.2463, "encoder_q-layer.11": 5590.4756, "encoder_q-layer.2": 1779.4172, "encoder_q-layer.3": 1824.0311, "encoder_q-layer.4": 1933.6707, "encoder_q-layer.5": 1876.9196, "encoder_q-layer.6": 2077.6465, "encoder_q-layer.7": 2349.8525, "encoder_q-layer.8": 2647.5061, "encoder_q-layer.9": 2359.3977, "epoch": 0.88, "inbatch_neg_score": 0.1032, "inbatch_pos_score": 0.7798, "learning_rate": 5.500000000000001e-06, "loss": 3.2963, "norm_diff": 0.1458, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3716.555, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1034, "query_norm": 1.2625, "queue_k_norm": 1.3986, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6201, "sent_len_1": 66.6257, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0437, "stdk": 0.0494, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90100 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2941, "doc_norm": 1.3969, "encoder_q-embeddings": 1786.7593, "encoder_q-layer.0": 1206.7985, "encoder_q-layer.1": 1252.7307, "encoder_q-layer.10": 2343.4829, "encoder_q-layer.11": 5292.3574, "encoder_q-layer.2": 1409.0614, "encoder_q-layer.3": 1477.7778, "encoder_q-layer.4": 1557.1113, "encoder_q-layer.5": 1624.6772, "encoder_q-layer.6": 1835.3676, "encoder_q-layer.7": 2067.5266, "encoder_q-layer.8": 2473.4976, "encoder_q-layer.9": 2241.9739, "epoch": 0.88, "inbatch_neg_score": 0.1024, "inbatch_pos_score": 0.7749, "learning_rate": 5.444444444444445e-06, "loss": 3.2941, "norm_diff": 0.1301, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3397.8324, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1035, "query_norm": 1.2668, "queue_k_norm": 1.4003, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6324, "sent_len_1": 66.721, "sent_max_len_0": 127.9838, "sent_max_len_1": 189.7962, "stdk": 0.049, "stdq": 0.0449, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 90200 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2757, "doc_norm": 1.3987, "encoder_q-embeddings": 1843.9413, "encoder_q-layer.0": 1215.399, "encoder_q-layer.1": 1263.1083, "encoder_q-layer.10": 2393.8223, "encoder_q-layer.11": 5130.4102, "encoder_q-layer.2": 1467.3186, "encoder_q-layer.3": 1466.2428, "encoder_q-layer.4": 1569.6608, "encoder_q-layer.5": 1643.6885, "encoder_q-layer.6": 1849.1183, "encoder_q-layer.7": 2133.0405, "encoder_q-layer.8": 2448.5859, "encoder_q-layer.9": 2285.363, "epoch": 0.88, "inbatch_neg_score": 0.1027, "inbatch_pos_score": 0.793, "learning_rate": 5.388888888888889e-06, "loss": 3.2757, "norm_diff": 0.1278, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3383.8923, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1036, "query_norm": 1.2709, "queue_k_norm": 1.3992, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5815, "sent_len_1": 66.6948, "sent_max_len_0": 127.9825, "sent_max_len_1": 188.0, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2658, "doc_norm": 1.3976, "encoder_q-embeddings": 2138.2041, "encoder_q-layer.0": 1398.4673, "encoder_q-layer.1": 1497.6519, "encoder_q-layer.10": 2600.2366, "encoder_q-layer.11": 5342.269, "encoder_q-layer.2": 1731.2961, "encoder_q-layer.3": 1825.8436, "encoder_q-layer.4": 1915.147, "encoder_q-layer.5": 1959.7354, "encoder_q-layer.6": 2126.1006, "encoder_q-layer.7": 2405.1172, "encoder_q-layer.8": 2692.8247, "encoder_q-layer.9": 2409.7666, "epoch": 0.88, "inbatch_neg_score": 0.1047, "inbatch_pos_score": 0.7646, "learning_rate": 5.333333333333334e-06, "loss": 3.2658, "norm_diff": 0.132, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3716.7431, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1044, "query_norm": 1.2655, "queue_k_norm": 1.3977, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6236, "sent_len_1": 66.9195, "sent_max_len_0": 127.99, "sent_max_len_1": 188.9263, "stdk": 0.049, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90400 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.271, "doc_norm": 1.4062, "encoder_q-embeddings": 1787.5587, "encoder_q-layer.0": 1191.3783, "encoder_q-layer.1": 1234.4657, "encoder_q-layer.10": 2638.2905, "encoder_q-layer.11": 5624.2974, "encoder_q-layer.2": 1387.0934, "encoder_q-layer.3": 1454.1067, "encoder_q-layer.4": 1554.636, "encoder_q-layer.5": 1634.9705, "encoder_q-layer.6": 1862.882, "encoder_q-layer.7": 2068.9016, "encoder_q-layer.8": 2646.176, "encoder_q-layer.9": 2465.6389, "epoch": 0.88, "inbatch_neg_score": 0.1021, "inbatch_pos_score": 0.7896, "learning_rate": 5.277777777777778e-06, "loss": 3.271, "norm_diff": 0.1427, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3533.8007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1035, "query_norm": 1.2634, "queue_k_norm": 1.3995, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.601, "sent_len_1": 66.6703, "sent_max_len_0": 128.0, "sent_max_len_1": 188.515, "stdk": 0.0493, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90500 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.2857, "doc_norm": 1.3962, "encoder_q-embeddings": 1832.537, "encoder_q-layer.0": 1222.2809, "encoder_q-layer.1": 1298.8911, "encoder_q-layer.10": 2564.8643, "encoder_q-layer.11": 5305.5693, "encoder_q-layer.2": 1415.709, "encoder_q-layer.3": 1428.6289, "encoder_q-layer.4": 1557.1298, "encoder_q-layer.5": 1660.3185, "encoder_q-layer.6": 2043.8134, "encoder_q-layer.7": 2321.5354, "encoder_q-layer.8": 2736.0769, "encoder_q-layer.9": 2422.6057, "epoch": 0.88, "inbatch_neg_score": 0.1025, "inbatch_pos_score": 0.7803, "learning_rate": 5.2222222222222226e-06, "loss": 3.2857, "norm_diff": 0.1315, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3521.8954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1032, "query_norm": 1.2647, "queue_k_norm": 1.3991, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.3167, "sent_len_1": 66.8426, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.825, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90600 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 3.2538, "doc_norm": 1.4004, "encoder_q-embeddings": 3189.3701, "encoder_q-layer.0": 2150.1003, "encoder_q-layer.1": 2063.8879, "encoder_q-layer.10": 2212.3594, "encoder_q-layer.11": 5076.3755, "encoder_q-layer.2": 2487.8103, "encoder_q-layer.3": 2465.4548, "encoder_q-layer.4": 2674.8518, "encoder_q-layer.5": 2521.1523, "encoder_q-layer.6": 2476.0505, "encoder_q-layer.7": 2531.8125, "encoder_q-layer.8": 2651.4023, "encoder_q-layer.9": 2240.907, "epoch": 0.89, "inbatch_neg_score": 0.1021, "inbatch_pos_score": 0.8042, "learning_rate": 5.166666666666667e-06, "loss": 3.2538, "norm_diff": 0.1143, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4209.5258, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1025, "query_norm": 1.2862, "queue_k_norm": 1.3969, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5753, "sent_len_1": 66.5242, "sent_max_len_0": 127.985, "sent_max_len_1": 189.025, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90700 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2836, "doc_norm": 1.3964, "encoder_q-embeddings": 2867.2778, "encoder_q-layer.0": 2023.3176, "encoder_q-layer.1": 2309.6382, "encoder_q-layer.10": 2412.3733, "encoder_q-layer.11": 5482.4385, "encoder_q-layer.2": 3178.9338, "encoder_q-layer.3": 3150.175, "encoder_q-layer.4": 3289.0044, "encoder_q-layer.5": 3400.7161, "encoder_q-layer.6": 3373.4985, "encoder_q-layer.7": 3671.2412, "encoder_q-layer.8": 3248.0076, "encoder_q-layer.9": 2413.8298, "epoch": 0.89, "inbatch_neg_score": 0.1043, "inbatch_pos_score": 0.7822, "learning_rate": 5.1111111111111115e-06, "loss": 3.2836, "norm_diff": 0.1236, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4763.7367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1049, "query_norm": 1.2728, "queue_k_norm": 1.399, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.2678, "sent_len_1": 66.531, "sent_max_len_0": 127.9988, "sent_max_len_1": 188.6225, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90800 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.2689, "doc_norm": 1.3996, "encoder_q-embeddings": 1921.1062, "encoder_q-layer.0": 1277.6475, "encoder_q-layer.1": 1368.5438, "encoder_q-layer.10": 2576.6838, "encoder_q-layer.11": 5643.4736, "encoder_q-layer.2": 1506.9839, "encoder_q-layer.3": 1594.2025, "encoder_q-layer.4": 1776.6592, "encoder_q-layer.5": 1878.7286, "encoder_q-layer.6": 2093.7778, "encoder_q-layer.7": 2342.5513, "encoder_q-layer.8": 2680.72, "encoder_q-layer.9": 2325.9163, "epoch": 0.89, "inbatch_neg_score": 0.1028, "inbatch_pos_score": 0.7764, "learning_rate": 5.0555555555555555e-06, "loss": 3.2689, "norm_diff": 0.1318, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3702.1508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.103, "query_norm": 1.2679, "queue_k_norm": 1.3994, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.439, "sent_len_1": 66.7016, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5513, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90900 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.2628, "doc_norm": 1.4022, "encoder_q-embeddings": 2233.9719, "encoder_q-layer.0": 1515.6251, "encoder_q-layer.1": 1677.8678, "encoder_q-layer.10": 2389.3315, "encoder_q-layer.11": 5366.1499, "encoder_q-layer.2": 2025.2162, "encoder_q-layer.3": 2091.2585, "encoder_q-layer.4": 2293.24, "encoder_q-layer.5": 2318.2112, "encoder_q-layer.6": 2469.1218, "encoder_q-layer.7": 2432.0823, "encoder_q-layer.8": 2704.2703, "encoder_q-layer.9": 2407.1118, "epoch": 0.89, "inbatch_neg_score": 0.1021, "inbatch_pos_score": 0.7944, "learning_rate": 5e-06, "loss": 3.2628, "norm_diff": 0.1219, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3917.3551, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1024, "query_norm": 1.2803, "queue_k_norm": 1.3985, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7116, "sent_len_1": 67.2252, "sent_max_len_0": 128.0, "sent_max_len_1": 192.27, "stdk": 0.0491, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91000 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.2685, "doc_norm": 1.4062, "encoder_q-embeddings": 1765.4935, "encoder_q-layer.0": 1184.8926, "encoder_q-layer.1": 1273.6958, "encoder_q-layer.10": 2492.5518, "encoder_q-layer.11": 5290.7773, "encoder_q-layer.2": 1403.989, "encoder_q-layer.3": 1472.3672, "encoder_q-layer.4": 1564.079, "encoder_q-layer.5": 1609.5183, "encoder_q-layer.6": 1882.1853, "encoder_q-layer.7": 2155.5251, "encoder_q-layer.8": 2441.0054, "encoder_q-layer.9": 2270.2725, "epoch": 0.89, "inbatch_neg_score": 0.1047, "inbatch_pos_score": 0.7817, "learning_rate": 4.9444444444444444e-06, "loss": 3.2685, "norm_diff": 0.1447, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3452.9745, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1041, "query_norm": 1.2614, "queue_k_norm": 1.3985, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7608, "sent_len_1": 66.8161, "sent_max_len_0": 128.0, "sent_max_len_1": 191.9412, "stdk": 0.0493, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91100 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3116, "doc_norm": 1.406, "encoder_q-embeddings": 3853.5718, "encoder_q-layer.0": 2649.3247, "encoder_q-layer.1": 2783.3398, "encoder_q-layer.10": 5123.3276, "encoder_q-layer.11": 11158.79, "encoder_q-layer.2": 3129.1423, "encoder_q-layer.3": 3208.9841, "encoder_q-layer.4": 3461.0176, "encoder_q-layer.5": 3513.6807, "encoder_q-layer.6": 3846.3765, "encoder_q-layer.7": 4333.3032, "encoder_q-layer.8": 5139.2969, "encoder_q-layer.9": 4677.5171, "epoch": 0.89, "inbatch_neg_score": 0.1024, "inbatch_pos_score": 0.7471, "learning_rate": 4.888888888888889e-06, "loss": 3.3116, "norm_diff": 0.1501, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7256.7863, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1023, "query_norm": 1.2559, "queue_k_norm": 1.3985, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5954, "sent_len_1": 66.7419, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7113, "stdk": 0.0493, "stdq": 0.0445, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2745, "doc_norm": 1.4026, "encoder_q-embeddings": 4664.5737, "encoder_q-layer.0": 3104.6284, "encoder_q-layer.1": 3421.6653, "encoder_q-layer.10": 4950.6938, "encoder_q-layer.11": 11065.2734, "encoder_q-layer.2": 3891.6289, "encoder_q-layer.3": 4119.5449, "encoder_q-layer.4": 4587.9497, "encoder_q-layer.5": 4293.5381, "encoder_q-layer.6": 4849.0913, "encoder_q-layer.7": 5149.3188, "encoder_q-layer.8": 5533.1616, "encoder_q-layer.9": 4699.772, "epoch": 0.89, "inbatch_neg_score": 0.1029, "inbatch_pos_score": 0.7646, "learning_rate": 4.833333333333333e-06, "loss": 3.2745, "norm_diff": 0.1335, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7873.1031, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1036, "query_norm": 1.2691, "queue_k_norm": 1.3999, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8145, "sent_len_1": 66.6778, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2325, "stdk": 0.0492, "stdq": 0.045, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91300 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.2879, "doc_norm": 1.3965, "encoder_q-embeddings": 3656.6699, "encoder_q-layer.0": 2480.7544, "encoder_q-layer.1": 2580.999, "encoder_q-layer.10": 4777.1753, "encoder_q-layer.11": 10590.6924, "encoder_q-layer.2": 2946.5571, "encoder_q-layer.3": 3096.9036, "encoder_q-layer.4": 3204.9888, "encoder_q-layer.5": 3342.1963, "encoder_q-layer.6": 3824.9985, "encoder_q-layer.7": 4303.7612, "encoder_q-layer.8": 5064.8555, "encoder_q-layer.9": 4599.209, "epoch": 0.89, "inbatch_neg_score": 0.1043, "inbatch_pos_score": 0.7949, "learning_rate": 4.777777777777778e-06, "loss": 3.2879, "norm_diff": 0.1127, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6874.554, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1039, "query_norm": 1.2838, "queue_k_norm": 1.3973, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4477, "sent_len_1": 66.9658, "sent_max_len_0": 127.9912, "sent_max_len_1": 190.9187, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91400 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2891, "doc_norm": 1.4033, "encoder_q-embeddings": 4046.6194, "encoder_q-layer.0": 2690.5701, "encoder_q-layer.1": 2842.9067, "encoder_q-layer.10": 4665.5063, "encoder_q-layer.11": 10880.3184, "encoder_q-layer.2": 3214.4539, "encoder_q-layer.3": 3292.7969, "encoder_q-layer.4": 3460.0735, "encoder_q-layer.5": 3664.6086, "encoder_q-layer.6": 4264.5059, "encoder_q-layer.7": 4633.1621, "encoder_q-layer.8": 5207.8354, "encoder_q-layer.9": 4521.9048, "epoch": 0.89, "inbatch_neg_score": 0.1037, "inbatch_pos_score": 0.7734, "learning_rate": 4.722222222222222e-06, "loss": 3.2891, "norm_diff": 0.1542, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7302.8595, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1041, "query_norm": 1.2491, "queue_k_norm": 1.3978, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5303, "sent_len_1": 66.538, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.04, "stdk": 0.0492, "stdq": 0.0442, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2977, "doc_norm": 1.4038, "encoder_q-embeddings": 4438.0376, "encoder_q-layer.0": 2974.96, "encoder_q-layer.1": 3303.3145, "encoder_q-layer.10": 4928.9116, "encoder_q-layer.11": 11267.8799, "encoder_q-layer.2": 3744.9324, "encoder_q-layer.3": 3964.5383, "encoder_q-layer.4": 4268.2368, "encoder_q-layer.5": 4240.2729, "encoder_q-layer.6": 4798.3926, "encoder_q-layer.7": 5040.5181, "encoder_q-layer.8": 5622.104, "encoder_q-layer.9": 4806.2383, "epoch": 0.89, "inbatch_neg_score": 0.1002, "inbatch_pos_score": 0.7715, "learning_rate": 4.666666666666667e-06, "loss": 3.2977, "norm_diff": 0.1411, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7883.0005, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1008, "query_norm": 1.2627, "queue_k_norm": 1.397, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4143, "sent_len_1": 66.7133, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5687, "stdk": 0.0492, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91600 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.2688, "doc_norm": 1.4003, "encoder_q-embeddings": 6986.6089, "encoder_q-layer.0": 5341.9414, "encoder_q-layer.1": 5592.2358, "encoder_q-layer.10": 4814.2129, "encoder_q-layer.11": 10772.5889, "encoder_q-layer.2": 6377.6685, "encoder_q-layer.3": 6691.4624, "encoder_q-layer.4": 7555.7705, "encoder_q-layer.5": 6620.8467, "encoder_q-layer.6": 5927.0991, "encoder_q-layer.7": 6246.833, "encoder_q-layer.8": 6186.3833, "encoder_q-layer.9": 4893.0752, "epoch": 0.9, "inbatch_neg_score": 0.1018, "inbatch_pos_score": 0.7568, "learning_rate": 4.611111111111111e-06, "loss": 3.2688, "norm_diff": 0.1334, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10059.685, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1022, "query_norm": 1.2669, "queue_k_norm": 1.4004, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.645, "sent_len_1": 66.9124, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3013, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91700 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.268, "doc_norm": 1.3961, "encoder_q-embeddings": 3738.2144, "encoder_q-layer.0": 2409.2793, "encoder_q-layer.1": 2644.8105, "encoder_q-layer.10": 4627.9917, "encoder_q-layer.11": 10590.3418, "encoder_q-layer.2": 2860.3831, "encoder_q-layer.3": 2961.2324, "encoder_q-layer.4": 3087.4036, "encoder_q-layer.5": 3292.7498, "encoder_q-layer.6": 3872.0552, "encoder_q-layer.7": 4539.4399, "encoder_q-layer.8": 4866.4048, "encoder_q-layer.9": 4460.2188, "epoch": 0.9, "inbatch_neg_score": 0.1048, "inbatch_pos_score": 0.8105, "learning_rate": 4.555555555555556e-06, "loss": 3.268, "norm_diff": 0.1165, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6954.3969, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.104, "query_norm": 1.2796, "queue_k_norm": 1.3993, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4929, "sent_len_1": 66.8035, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1175, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91800 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.2654, "doc_norm": 1.4047, "encoder_q-embeddings": 3862.7314, "encoder_q-layer.0": 2706.1355, "encoder_q-layer.1": 3054.8596, "encoder_q-layer.10": 4884.9395, "encoder_q-layer.11": 10895.8252, "encoder_q-layer.2": 3493.9844, "encoder_q-layer.3": 3513.6897, "encoder_q-layer.4": 3919.0039, "encoder_q-layer.5": 3861.6255, "encoder_q-layer.6": 4291.0933, "encoder_q-layer.7": 4641.2217, "encoder_q-layer.8": 5415.2485, "encoder_q-layer.9": 4691.4717, "epoch": 0.9, "inbatch_neg_score": 0.1031, "inbatch_pos_score": 0.7773, "learning_rate": 4.5e-06, "loss": 3.2654, "norm_diff": 0.126, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7371.5834, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1024, "query_norm": 1.2787, "queue_k_norm": 1.3985, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6298, "sent_len_1": 66.8056, "sent_max_len_0": 128.0, "sent_max_len_1": 189.49, "stdk": 0.0492, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91900 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2852, "doc_norm": 1.401, "encoder_q-embeddings": 3534.7913, "encoder_q-layer.0": 2418.7874, "encoder_q-layer.1": 2563.3674, "encoder_q-layer.10": 4891.436, "encoder_q-layer.11": 10994.7188, "encoder_q-layer.2": 2835.4292, "encoder_q-layer.3": 2864.0237, "encoder_q-layer.4": 3109.5422, "encoder_q-layer.5": 3164.1152, "encoder_q-layer.6": 3549.5239, "encoder_q-layer.7": 4161.5049, "encoder_q-layer.8": 4843.0664, "encoder_q-layer.9": 4577.0044, "epoch": 0.9, "inbatch_neg_score": 0.1029, "inbatch_pos_score": 0.7676, "learning_rate": 4.444444444444445e-06, "loss": 3.2852, "norm_diff": 0.1369, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6944.4219, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1019, "query_norm": 1.2641, "queue_k_norm": 1.3978, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.3657, "sent_len_1": 66.8342, "sent_max_len_0": 127.98, "sent_max_len_1": 190.68, "stdk": 0.0491, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92000 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2717, "doc_norm": 1.4013, "encoder_q-embeddings": 3734.3845, "encoder_q-layer.0": 2567.063, "encoder_q-layer.1": 2756.7656, "encoder_q-layer.10": 5155.6899, "encoder_q-layer.11": 11009.7725, "encoder_q-layer.2": 3105.4463, "encoder_q-layer.3": 3173.3608, "encoder_q-layer.4": 3383.5955, "encoder_q-layer.5": 3345.2903, "encoder_q-layer.6": 3691.8601, "encoder_q-layer.7": 4425.437, "encoder_q-layer.8": 5007.9941, "encoder_q-layer.9": 4654.5122, "epoch": 0.9, "inbatch_neg_score": 0.105, "inbatch_pos_score": 0.7705, "learning_rate": 4.388888888888889e-06, "loss": 3.2717, "norm_diff": 0.1328, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7215.5395, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1045, "query_norm": 1.2685, "queue_k_norm": 1.3963, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7221, "sent_len_1": 66.7504, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.2725, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92100 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2752, "doc_norm": 1.4006, "encoder_q-embeddings": 8516.8008, "encoder_q-layer.0": 6586.6401, "encoder_q-layer.1": 5826.1968, "encoder_q-layer.10": 4966.9995, "encoder_q-layer.11": 10876.2041, "encoder_q-layer.2": 5864.0991, "encoder_q-layer.3": 6197.5845, "encoder_q-layer.4": 6094.6396, "encoder_q-layer.5": 5355.1074, "encoder_q-layer.6": 5340.2925, "encoder_q-layer.7": 5961.5225, "encoder_q-layer.8": 5766.2188, "encoder_q-layer.9": 4699.2388, "epoch": 0.9, "inbatch_neg_score": 0.104, "inbatch_pos_score": 0.7876, "learning_rate": 4.333333333333334e-06, "loss": 3.2752, "norm_diff": 0.1205, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10060.773, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1045, "query_norm": 1.2801, "queue_k_norm": 1.3975, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5509, "sent_len_1": 66.7896, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8363, "stdk": 0.0491, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92200 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2643, "doc_norm": 1.3996, "encoder_q-embeddings": 3632.1121, "encoder_q-layer.0": 2501.2441, "encoder_q-layer.1": 2739.8132, "encoder_q-layer.10": 4838.7964, "encoder_q-layer.11": 10378.3574, "encoder_q-layer.2": 2998.3835, "encoder_q-layer.3": 3041.5996, "encoder_q-layer.4": 3299.4209, "encoder_q-layer.5": 3507.7646, "encoder_q-layer.6": 3915.5688, "encoder_q-layer.7": 4252.0425, "encoder_q-layer.8": 4831.8853, "encoder_q-layer.9": 4607.9839, "epoch": 0.9, "inbatch_neg_score": 0.1051, "inbatch_pos_score": 0.8115, "learning_rate": 4.277777777777778e-06, "loss": 3.2643, "norm_diff": 0.1082, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6879.1447, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1064, "query_norm": 1.2914, "queue_k_norm": 1.3982, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.4198, "sent_len_1": 66.9992, "sent_max_len_0": 128.0, "sent_max_len_1": 190.49, "stdk": 0.0491, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92300 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.2619, "doc_norm": 1.3985, "encoder_q-embeddings": 4074.8142, "encoder_q-layer.0": 2686.9883, "encoder_q-layer.1": 2814.3472, "encoder_q-layer.10": 4879.001, "encoder_q-layer.11": 11453.9785, "encoder_q-layer.2": 3151.573, "encoder_q-layer.3": 3293.1372, "encoder_q-layer.4": 3565.0208, "encoder_q-layer.5": 3672.8525, "encoder_q-layer.6": 4045.6294, "encoder_q-layer.7": 4698.1421, "encoder_q-layer.8": 5500.3687, "encoder_q-layer.9": 4754.3623, "epoch": 0.9, "inbatch_neg_score": 0.1041, "inbatch_pos_score": 0.792, "learning_rate": 4.222222222222223e-06, "loss": 3.2619, "norm_diff": 0.1251, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7386.0472, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1049, "query_norm": 1.2734, "queue_k_norm": 1.3983, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6403, "sent_len_1": 66.7217, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6125, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92400 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.2637, "doc_norm": 1.3961, "encoder_q-embeddings": 3503.0884, "encoder_q-layer.0": 2311.1033, "encoder_q-layer.1": 2406.0356, "encoder_q-layer.10": 4736.4277, "encoder_q-layer.11": 10655.8135, "encoder_q-layer.2": 2722.6116, "encoder_q-layer.3": 2819.5349, "encoder_q-layer.4": 3118.2861, "encoder_q-layer.5": 3258.4631, "encoder_q-layer.6": 3681.3481, "encoder_q-layer.7": 4169.8281, "encoder_q-layer.8": 4879.855, "encoder_q-layer.9": 4589.7339, "epoch": 0.9, "inbatch_neg_score": 0.1085, "inbatch_pos_score": 0.8047, "learning_rate": 4.166666666666667e-06, "loss": 3.2637, "norm_diff": 0.111, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6865.1721, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1075, "query_norm": 1.2852, "queue_k_norm": 1.3979, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7782, "sent_len_1": 66.5891, "sent_max_len_0": 127.9963, "sent_max_len_1": 191.4187, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92500 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.2893, "doc_norm": 1.3916, "encoder_q-embeddings": 3419.0337, "encoder_q-layer.0": 2327.8137, "encoder_q-layer.1": 2429.8394, "encoder_q-layer.10": 4881.5137, "encoder_q-layer.11": 10900.085, "encoder_q-layer.2": 2775.2112, "encoder_q-layer.3": 2869.6553, "encoder_q-layer.4": 3153.2361, "encoder_q-layer.5": 3199.3579, "encoder_q-layer.6": 3616.6816, "encoder_q-layer.7": 3996.748, "encoder_q-layer.8": 4902.9053, "encoder_q-layer.9": 4525.0854, "epoch": 0.9, "inbatch_neg_score": 0.1078, "inbatch_pos_score": 0.8066, "learning_rate": 4.111111111111112e-06, "loss": 3.2893, "norm_diff": 0.1171, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6841.8246, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1069, "query_norm": 1.2745, "queue_k_norm": 1.3997, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6797, "sent_len_1": 66.7978, "sent_max_len_0": 127.9963, "sent_max_len_1": 191.6175, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92600 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2743, "doc_norm": 1.3924, "encoder_q-embeddings": 3676.29, "encoder_q-layer.0": 2510.1687, "encoder_q-layer.1": 2641.3291, "encoder_q-layer.10": 5654.8545, "encoder_q-layer.11": 10646.6426, "encoder_q-layer.2": 2982.0005, "encoder_q-layer.3": 3138.0181, "encoder_q-layer.4": 3393.9729, "encoder_q-layer.5": 3608.2542, "encoder_q-layer.6": 3865.6624, "encoder_q-layer.7": 4152.5811, "encoder_q-layer.8": 4814.6128, "encoder_q-layer.9": 4549.9507, "epoch": 0.91, "inbatch_neg_score": 0.1049, "inbatch_pos_score": 0.7822, "learning_rate": 4.055555555555556e-06, "loss": 3.2743, "norm_diff": 0.1252, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7022.4971, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1056, "query_norm": 1.2673, "queue_k_norm": 1.398, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7083, "sent_len_1": 66.8025, "sent_max_len_0": 128.0, "sent_max_len_1": 186.8812, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92700 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.2642, "doc_norm": 1.398, "encoder_q-embeddings": 3879.6626, "encoder_q-layer.0": 2692.948, "encoder_q-layer.1": 2871.2964, "encoder_q-layer.10": 4993.085, "encoder_q-layer.11": 11183.3174, "encoder_q-layer.2": 3280.6355, "encoder_q-layer.3": 3434.762, "encoder_q-layer.4": 3661.1973, "encoder_q-layer.5": 3857.6707, "encoder_q-layer.6": 4191.9712, "encoder_q-layer.7": 4490.0859, "encoder_q-layer.8": 5338.6387, "encoder_q-layer.9": 4807.6606, "epoch": 0.91, "inbatch_neg_score": 0.1076, "inbatch_pos_score": 0.7549, "learning_rate": 4.000000000000001e-06, "loss": 3.2642, "norm_diff": 0.1354, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7358.1874, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1075, "query_norm": 1.2626, "queue_k_norm": 1.3993, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.693, "sent_len_1": 66.6619, "sent_max_len_0": 127.965, "sent_max_len_1": 188.6788, "stdk": 0.049, "stdq": 0.0446, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92800 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.2816, "doc_norm": 1.398, "encoder_q-embeddings": 3877.1707, "encoder_q-layer.0": 2614.2925, "encoder_q-layer.1": 2657.5186, "encoder_q-layer.10": 4853.6812, "encoder_q-layer.11": 10794.9707, "encoder_q-layer.2": 2953.1196, "encoder_q-layer.3": 3098.4153, "encoder_q-layer.4": 3282.5803, "encoder_q-layer.5": 3569.7817, "encoder_q-layer.6": 4267.311, "encoder_q-layer.7": 4735.5098, "encoder_q-layer.8": 5318.1929, "encoder_q-layer.9": 4606.0264, "epoch": 0.91, "inbatch_neg_score": 0.1086, "inbatch_pos_score": 0.79, "learning_rate": 3.944444444444445e-06, "loss": 3.2816, "norm_diff": 0.1136, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7144.7779, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1091, "query_norm": 1.2844, "queue_k_norm": 1.3978, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.4745, "sent_len_1": 66.8361, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.15, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92900 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.285, "doc_norm": 1.3946, "encoder_q-embeddings": 3840.0415, "encoder_q-layer.0": 2491.0356, "encoder_q-layer.1": 2683.8259, "encoder_q-layer.10": 4979.2856, "encoder_q-layer.11": 10725.457, "encoder_q-layer.2": 2995.4146, "encoder_q-layer.3": 3117.9814, "encoder_q-layer.4": 3429.8113, "encoder_q-layer.5": 3512.8447, "encoder_q-layer.6": 3846.2585, "encoder_q-layer.7": 4176.0615, "encoder_q-layer.8": 5002.5454, "encoder_q-layer.9": 4736.0874, "epoch": 0.91, "inbatch_neg_score": 0.1075, "inbatch_pos_score": 0.7856, "learning_rate": 3.888888888888889e-06, "loss": 3.285, "norm_diff": 0.1184, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7021.8823, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.109, "query_norm": 1.2762, "queue_k_norm": 1.3968, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7071, "sent_len_1": 66.865, "sent_max_len_0": 127.9938, "sent_max_len_1": 191.9425, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93000 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.2691, "doc_norm": 1.398, "encoder_q-embeddings": 3753.7283, "encoder_q-layer.0": 2437.2776, "encoder_q-layer.1": 2544.104, "encoder_q-layer.10": 4832.4077, "encoder_q-layer.11": 11029.2725, "encoder_q-layer.2": 2783.8301, "encoder_q-layer.3": 2903.2256, "encoder_q-layer.4": 3070.2458, "encoder_q-layer.5": 3064.8142, "encoder_q-layer.6": 3522.449, "encoder_q-layer.7": 4051.188, "encoder_q-layer.8": 4808.1143, "encoder_q-layer.9": 4475.4458, "epoch": 0.91, "inbatch_neg_score": 0.1111, "inbatch_pos_score": 0.7739, "learning_rate": 3.833333333333334e-06, "loss": 3.2691, "norm_diff": 0.1304, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7048.2269, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1104, "query_norm": 1.2676, "queue_k_norm": 1.3989, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6158, "sent_len_1": 66.59, "sent_max_len_0": 127.9925, "sent_max_len_1": 188.9925, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93100 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.2712, "doc_norm": 1.3949, "encoder_q-embeddings": 8628.9824, "encoder_q-layer.0": 5730.3794, "encoder_q-layer.1": 5978.9409, "encoder_q-layer.10": 9798.3604, "encoder_q-layer.11": 22057.1445, "encoder_q-layer.2": 6552.2148, "encoder_q-layer.3": 6712.5518, "encoder_q-layer.4": 7203.3354, "encoder_q-layer.5": 8015.1997, "encoder_q-layer.6": 8473.126, "encoder_q-layer.7": 9220.8643, "encoder_q-layer.8": 10206.9287, "encoder_q-layer.9": 9199.0957, "epoch": 0.91, "inbatch_neg_score": 0.1107, "inbatch_pos_score": 0.7715, "learning_rate": 3.777777777777778e-06, "loss": 3.2712, "norm_diff": 0.1154, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14862.3874, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1099, "query_norm": 1.2795, "queue_k_norm": 1.3985, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4944, "sent_len_1": 66.9404, "sent_max_len_0": 127.9988, "sent_max_len_1": 191.9363, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93200 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2575, "doc_norm": 1.3999, "encoder_q-embeddings": 8297.2227, "encoder_q-layer.0": 5394.1758, "encoder_q-layer.1": 5478.6426, "encoder_q-layer.10": 9870.0166, "encoder_q-layer.11": 21427.0762, "encoder_q-layer.2": 6161.0518, "encoder_q-layer.3": 6432.5488, "encoder_q-layer.4": 6785.5503, "encoder_q-layer.5": 6916.5332, "encoder_q-layer.6": 7639.8867, "encoder_q-layer.7": 8500.4863, "encoder_q-layer.8": 9850.4561, "encoder_q-layer.9": 9285.6211, "epoch": 0.91, "inbatch_neg_score": 0.1115, "inbatch_pos_score": 0.7993, "learning_rate": 3.722222222222222e-06, "loss": 3.2575, "norm_diff": 0.1201, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14124.2338, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1118, "query_norm": 1.2798, "queue_k_norm": 1.3989, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6301, "sent_len_1": 66.8245, "sent_max_len_0": 128.0, "sent_max_len_1": 186.8162, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93300 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.2842, "doc_norm": 1.3905, "encoder_q-embeddings": 10018.0625, "encoder_q-layer.0": 6583.8848, "encoder_q-layer.1": 8045.5107, "encoder_q-layer.10": 10367.3975, "encoder_q-layer.11": 21524.6973, "encoder_q-layer.2": 8819.0391, "encoder_q-layer.3": 8941.2676, "encoder_q-layer.4": 9075.8486, "encoder_q-layer.5": 8770.1738, "encoder_q-layer.6": 9049.498, "encoder_q-layer.7": 9623.8232, "encoder_q-layer.8": 10436.0068, "encoder_q-layer.9": 9338.8838, "epoch": 0.91, "inbatch_neg_score": 0.1094, "inbatch_pos_score": 0.7832, "learning_rate": 3.666666666666667e-06, "loss": 3.2842, "norm_diff": 0.1215, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15845.8186, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1091, "query_norm": 1.269, "queue_k_norm": 1.4, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6778, "sent_len_1": 66.8794, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3638, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93400 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.2806, "doc_norm": 1.4016, "encoder_q-embeddings": 8710.0244, "encoder_q-layer.0": 6296.8447, "encoder_q-layer.1": 7463.3511, "encoder_q-layer.10": 10034.4785, "encoder_q-layer.11": 21811.3945, "encoder_q-layer.2": 10006.2852, "encoder_q-layer.3": 9463.8916, "encoder_q-layer.4": 9852.2354, "encoder_q-layer.5": 8658.4062, "encoder_q-layer.6": 9687.1211, "encoder_q-layer.7": 10665.8457, "encoder_q-layer.8": 11728.9199, "encoder_q-layer.9": 9573.8574, "epoch": 0.91, "inbatch_neg_score": 0.1116, "inbatch_pos_score": 0.8003, "learning_rate": 3.611111111111111e-06, "loss": 3.2806, "norm_diff": 0.1226, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16298.2612, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1113, "query_norm": 1.279, "queue_k_norm": 1.3989, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7124, "sent_len_1": 66.9656, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5163, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2704, "doc_norm": 1.4053, "encoder_q-embeddings": 11287.2539, "encoder_q-layer.0": 7446.6392, "encoder_q-layer.1": 7503.46, "encoder_q-layer.10": 9533.7139, "encoder_q-layer.11": 21800.7617, "encoder_q-layer.2": 8074.3101, "encoder_q-layer.3": 7751.8071, "encoder_q-layer.4": 8549.376, "encoder_q-layer.5": 8386.3965, "encoder_q-layer.6": 8797.3975, "encoder_q-layer.7": 9054.5068, "encoder_q-layer.8": 10294.2197, "encoder_q-layer.9": 9453.2393, "epoch": 0.91, "inbatch_neg_score": 0.1102, "inbatch_pos_score": 0.7734, "learning_rate": 3.555555555555556e-06, "loss": 3.2704, "norm_diff": 0.1438, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15979.1821, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1108, "query_norm": 1.2614, "queue_k_norm": 1.3988, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7202, "sent_len_1": 66.7372, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.3738, "stdk": 0.0492, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93600 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.2716, "doc_norm": 1.3956, "encoder_q-embeddings": 7707.9888, "encoder_q-layer.0": 5102.4888, "encoder_q-layer.1": 5368.7939, "encoder_q-layer.10": 9499.9385, "encoder_q-layer.11": 20888.3418, "encoder_q-layer.2": 5879.6631, "encoder_q-layer.3": 6160.7788, "encoder_q-layer.4": 6399.4824, "encoder_q-layer.5": 6536.665, "encoder_q-layer.6": 7348.6377, "encoder_q-layer.7": 8005.5049, "encoder_q-layer.8": 9610.6768, "encoder_q-layer.9": 8759.0059, "epoch": 0.91, "inbatch_neg_score": 0.1146, "inbatch_pos_score": 0.7705, "learning_rate": 3.5000000000000004e-06, "loss": 3.2716, "norm_diff": 0.1233, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13703.4365, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1141, "query_norm": 1.2723, "queue_k_norm": 1.4, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.609, "sent_len_1": 66.8206, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1687, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93700 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.2845, "doc_norm": 1.4027, "encoder_q-embeddings": 9147.0029, "encoder_q-layer.0": 5991.4692, "encoder_q-layer.1": 6367.7539, "encoder_q-layer.10": 9757.9248, "encoder_q-layer.11": 21431.0312, "encoder_q-layer.2": 7320.3877, "encoder_q-layer.3": 7309.436, "encoder_q-layer.4": 7951.9209, "encoder_q-layer.5": 8698.5127, "encoder_q-layer.6": 9352.2471, "encoder_q-layer.7": 9481.7217, "encoder_q-layer.8": 10380.9463, "encoder_q-layer.9": 9295.082, "epoch": 0.92, "inbatch_neg_score": 0.1127, "inbatch_pos_score": 0.7729, "learning_rate": 3.4444444444444444e-06, "loss": 3.2845, "norm_diff": 0.1405, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15058.8542, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1121, "query_norm": 1.2622, "queue_k_norm": 1.4002, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4325, "sent_len_1": 66.7099, "sent_max_len_0": 127.9938, "sent_max_len_1": 188.9563, "stdk": 0.0491, "stdq": 0.0446, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93800 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2624, "doc_norm": 1.4057, "encoder_q-embeddings": 7434.5815, "encoder_q-layer.0": 4845.3828, "encoder_q-layer.1": 5220.5889, "encoder_q-layer.10": 10239.374, "encoder_q-layer.11": 21791.3223, "encoder_q-layer.2": 5787.1196, "encoder_q-layer.3": 6107.894, "encoder_q-layer.4": 6537.4561, "encoder_q-layer.5": 6446.8833, "encoder_q-layer.6": 7174.271, "encoder_q-layer.7": 8140.5522, "encoder_q-layer.8": 9757.8428, "encoder_q-layer.9": 9294.918, "epoch": 0.92, "inbatch_neg_score": 0.1112, "inbatch_pos_score": 0.7744, "learning_rate": 3.3888888888888893e-06, "loss": 3.2624, "norm_diff": 0.1498, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14006.7736, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.111, "query_norm": 1.2559, "queue_k_norm": 1.4002, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7123, "sent_len_1": 66.7799, "sent_max_len_0": 128.0, "sent_max_len_1": 189.73, "stdk": 0.0492, "stdq": 0.0444, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93900 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2717, "doc_norm": 1.4, "encoder_q-embeddings": 14292.9121, "encoder_q-layer.0": 9758.6162, "encoder_q-layer.1": 8604.3936, "encoder_q-layer.10": 9758.9111, "encoder_q-layer.11": 21164.6699, "encoder_q-layer.2": 8667.9551, "encoder_q-layer.3": 8325.1582, "encoder_q-layer.4": 8761.6172, "encoder_q-layer.5": 8129.8584, "encoder_q-layer.6": 8559.2373, "encoder_q-layer.7": 9211.9492, "encoder_q-layer.8": 10133.1807, "encoder_q-layer.9": 9036.458, "epoch": 0.92, "inbatch_neg_score": 0.1105, "inbatch_pos_score": 0.7988, "learning_rate": 3.3333333333333333e-06, "loss": 3.2717, "norm_diff": 0.1257, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16845.4717, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1108, "query_norm": 1.2743, "queue_k_norm": 1.4008, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7958, "sent_len_1": 66.8179, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8212, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94000 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2811, "doc_norm": 1.3974, "encoder_q-embeddings": 7510.8398, "encoder_q-layer.0": 5029.0405, "encoder_q-layer.1": 5407.4624, "encoder_q-layer.10": 10592.2539, "encoder_q-layer.11": 21779.373, "encoder_q-layer.2": 6273.9473, "encoder_q-layer.3": 6610.4673, "encoder_q-layer.4": 6951.7549, "encoder_q-layer.5": 7130.4385, "encoder_q-layer.6": 7896.8813, "encoder_q-layer.7": 8843.5811, "encoder_q-layer.8": 10621.4492, "encoder_q-layer.9": 10067.0986, "epoch": 0.92, "inbatch_neg_score": 0.1115, "inbatch_pos_score": 0.7939, "learning_rate": 3.277777777777778e-06, "loss": 3.2811, "norm_diff": 0.1073, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14371.6737, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1122, "query_norm": 1.29, "queue_k_norm": 1.4013, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.7396, "sent_len_1": 66.7847, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0637, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 94100 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.2572, "doc_norm": 1.3931, "encoder_q-embeddings": 7382.6914, "encoder_q-layer.0": 4893.4834, "encoder_q-layer.1": 5105.9634, "encoder_q-layer.10": 9457.6367, "encoder_q-layer.11": 21585.7656, "encoder_q-layer.2": 5690.8799, "encoder_q-layer.3": 6060.5005, "encoder_q-layer.4": 6446.9277, "encoder_q-layer.5": 6808.1968, "encoder_q-layer.6": 7625.1426, "encoder_q-layer.7": 8926.7031, "encoder_q-layer.8": 10171.0869, "encoder_q-layer.9": 9251.4053, "epoch": 0.92, "inbatch_neg_score": 0.113, "inbatch_pos_score": 0.7754, "learning_rate": 3.2222222222222222e-06, "loss": 3.2572, "norm_diff": 0.1223, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13939.9701, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.113, "query_norm": 1.2708, "queue_k_norm": 1.3994, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7516, "sent_len_1": 66.8354, "sent_max_len_0": 127.9975, "sent_max_len_1": 188.9613, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94200 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2572, "doc_norm": 1.3962, "encoder_q-embeddings": 7861.8345, "encoder_q-layer.0": 5325.729, "encoder_q-layer.1": 5192.5981, "encoder_q-layer.10": 9571.0625, "encoder_q-layer.11": 21968.9141, "encoder_q-layer.2": 5825.2856, "encoder_q-layer.3": 6020.3438, "encoder_q-layer.4": 6494.4546, "encoder_q-layer.5": 6653.8604, "encoder_q-layer.6": 7315.7729, "encoder_q-layer.7": 8103.8984, "encoder_q-layer.8": 9830.1377, "encoder_q-layer.9": 9213.2412, "epoch": 0.92, "inbatch_neg_score": 0.1117, "inbatch_pos_score": 0.7783, "learning_rate": 3.166666666666667e-06, "loss": 3.2572, "norm_diff": 0.1285, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14175.4707, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1122, "query_norm": 1.2677, "queue_k_norm": 1.4012, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5425, "sent_len_1": 66.938, "sent_max_len_0": 128.0, "sent_max_len_1": 189.04, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94300 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.2706, "doc_norm": 1.4042, "encoder_q-embeddings": 7378.0947, "encoder_q-layer.0": 5166.9702, "encoder_q-layer.1": 5458.3584, "encoder_q-layer.10": 10159.8457, "encoder_q-layer.11": 20920.957, "encoder_q-layer.2": 6154.8628, "encoder_q-layer.3": 6306.8364, "encoder_q-layer.4": 6834.6953, "encoder_q-layer.5": 6908.7349, "encoder_q-layer.6": 7795.1304, "encoder_q-layer.7": 8782.6045, "encoder_q-layer.8": 9723.5781, "encoder_q-layer.9": 9249.4023, "epoch": 0.92, "inbatch_neg_score": 0.1122, "inbatch_pos_score": 0.7734, "learning_rate": 3.111111111111111e-06, "loss": 3.2706, "norm_diff": 0.1469, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13846.2721, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1117, "query_norm": 1.2573, "queue_k_norm": 1.3997, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.774, "sent_len_1": 66.9461, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.2025, "stdk": 0.0491, "stdq": 0.0445, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94400 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.2758, "doc_norm": 1.4023, "encoder_q-embeddings": 7527.6201, "encoder_q-layer.0": 4953.8633, "encoder_q-layer.1": 5135.6133, "encoder_q-layer.10": 9830.8955, "encoder_q-layer.11": 21293.8164, "encoder_q-layer.2": 5664.8706, "encoder_q-layer.3": 5842.3501, "encoder_q-layer.4": 6296.7798, "encoder_q-layer.5": 6729.8174, "encoder_q-layer.6": 7924.1318, "encoder_q-layer.7": 8835.418, "encoder_q-layer.8": 10352.3115, "encoder_q-layer.9": 9452.2881, "epoch": 0.92, "inbatch_neg_score": 0.1118, "inbatch_pos_score": 0.7988, "learning_rate": 3.0555555555555556e-06, "loss": 3.2758, "norm_diff": 0.1306, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14082.0318, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1122, "query_norm": 1.2717, "queue_k_norm": 1.3993, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6722, "sent_len_1": 66.55, "sent_max_len_0": 128.0, "sent_max_len_1": 191.47, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94500 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.2853, "doc_norm": 1.399, "encoder_q-embeddings": 8709.2266, "encoder_q-layer.0": 5752.3477, "encoder_q-layer.1": 6164.3521, "encoder_q-layer.10": 9992.8467, "encoder_q-layer.11": 22390.6641, "encoder_q-layer.2": 7118.3379, "encoder_q-layer.3": 7474.0283, "encoder_q-layer.4": 8369.335, "encoder_q-layer.5": 8705.3789, "encoder_q-layer.6": 9500.543, "encoder_q-layer.7": 9729.126, "encoder_q-layer.8": 10954.8525, "encoder_q-layer.9": 9713.0752, "epoch": 0.92, "inbatch_neg_score": 0.1118, "inbatch_pos_score": 0.7715, "learning_rate": 3e-06, "loss": 3.2853, "norm_diff": 0.1366, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15416.2054, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1126, "query_norm": 1.2624, "queue_k_norm": 1.4003, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4783, "sent_len_1": 66.755, "sent_max_len_0": 127.9938, "sent_max_len_1": 190.3288, "stdk": 0.049, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94600 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.269, "doc_norm": 1.398, "encoder_q-embeddings": 7405.6509, "encoder_q-layer.0": 4954.5166, "encoder_q-layer.1": 5145.187, "encoder_q-layer.10": 9524.2227, "encoder_q-layer.11": 21524.25, "encoder_q-layer.2": 5880.6279, "encoder_q-layer.3": 6019.1479, "encoder_q-layer.4": 6371.6113, "encoder_q-layer.5": 6573.3188, "encoder_q-layer.6": 7575.9717, "encoder_q-layer.7": 8535.833, "encoder_q-layer.8": 10173.123, "encoder_q-layer.9": 9063.0996, "epoch": 0.92, "inbatch_neg_score": 0.1149, "inbatch_pos_score": 0.8076, "learning_rate": 2.9444444444444445e-06, "loss": 3.269, "norm_diff": 0.1257, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13955.4657, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.114, "query_norm": 1.2723, "queue_k_norm": 1.4008, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6271, "sent_len_1": 66.9055, "sent_max_len_0": 128.0, "sent_max_len_1": 187.1687, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2747, "doc_norm": 1.3962, "encoder_q-embeddings": 8024.6929, "encoder_q-layer.0": 5465.9805, "encoder_q-layer.1": 5977.7837, "encoder_q-layer.10": 10223.0342, "encoder_q-layer.11": 21327.3359, "encoder_q-layer.2": 6843.0234, "encoder_q-layer.3": 7250.7651, "encoder_q-layer.4": 8046.3257, "encoder_q-layer.5": 8139.7207, "encoder_q-layer.6": 9178.5898, "encoder_q-layer.7": 10145.4863, "encoder_q-layer.8": 11078.293, "encoder_q-layer.9": 9269.0117, "epoch": 0.93, "inbatch_neg_score": 0.1141, "inbatch_pos_score": 0.7773, "learning_rate": 2.888888888888889e-06, "loss": 3.2747, "norm_diff": 0.1261, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14796.3581, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1131, "query_norm": 1.2701, "queue_k_norm": 1.3992, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5143, "sent_len_1": 66.6976, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.8812, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94800 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.276, "doc_norm": 1.4025, "encoder_q-embeddings": 7261.4741, "encoder_q-layer.0": 4929.8589, "encoder_q-layer.1": 5199.4824, "encoder_q-layer.10": 9269.5322, "encoder_q-layer.11": 20859.1211, "encoder_q-layer.2": 5847.9507, "encoder_q-layer.3": 6025.9614, "encoder_q-layer.4": 6354.5356, "encoder_q-layer.5": 6627.3872, "encoder_q-layer.6": 7431.4683, "encoder_q-layer.7": 8113.6157, "encoder_q-layer.8": 9644.3594, "encoder_q-layer.9": 8862.7188, "epoch": 0.93, "inbatch_neg_score": 0.113, "inbatch_pos_score": 0.8105, "learning_rate": 2.8333333333333335e-06, "loss": 3.276, "norm_diff": 0.1286, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13623.2981, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1127, "query_norm": 1.274, "queue_k_norm": 1.4005, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6813, "sent_len_1": 66.8258, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2875, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94900 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.2798, "doc_norm": 1.4073, "encoder_q-embeddings": 7583.4346, "encoder_q-layer.0": 4992.3569, "encoder_q-layer.1": 5434.8271, "encoder_q-layer.10": 9991.8516, "encoder_q-layer.11": 21908.2207, "encoder_q-layer.2": 6150.395, "encoder_q-layer.3": 6180.2798, "encoder_q-layer.4": 6453.0605, "encoder_q-layer.5": 6779.5337, "encoder_q-layer.6": 7865.6323, "encoder_q-layer.7": 8997.9092, "encoder_q-layer.8": 10722.9395, "encoder_q-layer.9": 9766.5996, "epoch": 0.93, "inbatch_neg_score": 0.1113, "inbatch_pos_score": 0.7852, "learning_rate": 2.777777777777778e-06, "loss": 3.2798, "norm_diff": 0.1454, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14368.9276, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1118, "query_norm": 1.262, "queue_k_norm": 1.3979, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.7093, "sent_len_1": 66.6196, "sent_max_len_0": 128.0, "sent_max_len_1": 187.3512, "stdk": 0.0493, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95000 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.2733, "doc_norm": 1.394, "encoder_q-embeddings": 11440.8828, "encoder_q-layer.0": 7517.7441, "encoder_q-layer.1": 8200.7832, "encoder_q-layer.10": 10506.5361, "encoder_q-layer.11": 21741.2812, "encoder_q-layer.2": 9934.4521, "encoder_q-layer.3": 10014.6797, "encoder_q-layer.4": 11582.8721, "encoder_q-layer.5": 10156.1982, "encoder_q-layer.6": 9461.9775, "encoder_q-layer.7": 9750.5898, "encoder_q-layer.8": 11125.377, "encoder_q-layer.9": 9585.334, "epoch": 0.93, "inbatch_neg_score": 0.1107, "inbatch_pos_score": 0.7734, "learning_rate": 2.7222222222222224e-06, "loss": 3.2733, "norm_diff": 0.1284, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17054.1822, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1109, "query_norm": 1.2656, "queue_k_norm": 1.4009, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6271, "sent_len_1": 66.8027, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2075, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95100 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.279, "doc_norm": 1.4001, "encoder_q-embeddings": 3646.3123, "encoder_q-layer.0": 2364.2605, "encoder_q-layer.1": 2463.3103, "encoder_q-layer.10": 4772.1694, "encoder_q-layer.11": 10877.7617, "encoder_q-layer.2": 2821.668, "encoder_q-layer.3": 2978.7146, "encoder_q-layer.4": 3047.543, "encoder_q-layer.5": 3163.2253, "encoder_q-layer.6": 3673.7737, "encoder_q-layer.7": 4043.5591, "encoder_q-layer.8": 5174.8477, "encoder_q-layer.9": 4722.7344, "epoch": 0.93, "inbatch_neg_score": 0.1134, "inbatch_pos_score": 0.8174, "learning_rate": 2.666666666666667e-06, "loss": 3.279, "norm_diff": 0.122, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6905.8588, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1133, "query_norm": 1.2781, "queue_k_norm": 1.4019, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.568, "sent_len_1": 66.7196, "sent_max_len_0": 128.0, "sent_max_len_1": 192.165, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95200 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.2572, "doc_norm": 1.407, "encoder_q-embeddings": 3745.4692, "encoder_q-layer.0": 2525.2258, "encoder_q-layer.1": 2695.7588, "encoder_q-layer.10": 4942.7578, "encoder_q-layer.11": 10527.5088, "encoder_q-layer.2": 3041.874, "encoder_q-layer.3": 3090.1003, "encoder_q-layer.4": 3339.0935, "encoder_q-layer.5": 3635.8286, "encoder_q-layer.6": 4152.4976, "encoder_q-layer.7": 4353.7642, "encoder_q-layer.8": 5204.7612, "encoder_q-layer.9": 4543.7549, "epoch": 0.93, "inbatch_neg_score": 0.1114, "inbatch_pos_score": 0.8135, "learning_rate": 2.6111111111111113e-06, "loss": 3.2572, "norm_diff": 0.1245, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7030.3524, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1121, "query_norm": 1.2825, "queue_k_norm": 1.4026, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8185, "sent_len_1": 66.7932, "sent_max_len_0": 127.9938, "sent_max_len_1": 187.8225, "stdk": 0.0492, "stdq": 0.0454, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2878, "doc_norm": 1.4014, "encoder_q-embeddings": 5015.0752, "encoder_q-layer.0": 3448.5081, "encoder_q-layer.1": 3615.6443, "encoder_q-layer.10": 4889.9634, "encoder_q-layer.11": 10709.9814, "encoder_q-layer.2": 4602.0791, "encoder_q-layer.3": 5117.1123, "encoder_q-layer.4": 5236.7148, "encoder_q-layer.5": 4963.269, "encoder_q-layer.6": 5049.8989, "encoder_q-layer.7": 5075.1255, "encoder_q-layer.8": 5768.688, "encoder_q-layer.9": 4625.1821, "epoch": 0.93, "inbatch_neg_score": 0.1103, "inbatch_pos_score": 0.7993, "learning_rate": 2.5555555555555557e-06, "loss": 3.2878, "norm_diff": 0.1283, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8289.5646, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.111, "query_norm": 1.2731, "queue_k_norm": 1.4014, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4717, "sent_len_1": 66.852, "sent_max_len_0": 128.0, "sent_max_len_1": 191.635, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95400 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.2704, "doc_norm": 1.4058, "encoder_q-embeddings": 3728.9653, "encoder_q-layer.0": 2456.1355, "encoder_q-layer.1": 2595.7129, "encoder_q-layer.10": 5074.6113, "encoder_q-layer.11": 10934.0605, "encoder_q-layer.2": 2935.7405, "encoder_q-layer.3": 3004.1794, "encoder_q-layer.4": 3302.1216, "encoder_q-layer.5": 3502.3904, "encoder_q-layer.6": 3803.3857, "encoder_q-layer.7": 4620.582, "encoder_q-layer.8": 5285.3672, "encoder_q-layer.9": 4815.8174, "epoch": 0.93, "inbatch_neg_score": 0.1101, "inbatch_pos_score": 0.7974, "learning_rate": 2.5e-06, "loss": 3.2704, "norm_diff": 0.1327, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7139.5162, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1104, "query_norm": 1.273, "queue_k_norm": 1.4, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7245, "sent_len_1": 66.9128, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8388, "stdk": 0.0492, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95500 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.2794, "doc_norm": 1.4035, "encoder_q-embeddings": 3830.437, "encoder_q-layer.0": 2475.7625, "encoder_q-layer.1": 2622.021, "encoder_q-layer.10": 4868.2271, "encoder_q-layer.11": 10874.7627, "encoder_q-layer.2": 2929.3613, "encoder_q-layer.3": 3014.3542, "encoder_q-layer.4": 3153.2747, "encoder_q-layer.5": 3362.7119, "encoder_q-layer.6": 3772.0, "encoder_q-layer.7": 4397.6455, "encoder_q-layer.8": 4935.4033, "encoder_q-layer.9": 4534.8228, "epoch": 0.93, "inbatch_neg_score": 0.1098, "inbatch_pos_score": 0.7891, "learning_rate": 2.4444444444444447e-06, "loss": 3.2794, "norm_diff": 0.1336, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6997.5319, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1082, "query_norm": 1.27, "queue_k_norm": 1.4016, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6666, "sent_len_1": 67.1318, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.3275, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95600 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.2645, "doc_norm": 1.3959, "encoder_q-embeddings": 3752.5259, "encoder_q-layer.0": 2453.2393, "encoder_q-layer.1": 2549.116, "encoder_q-layer.10": 5427.9844, "encoder_q-layer.11": 11005.7539, "encoder_q-layer.2": 2898.8403, "encoder_q-layer.3": 2984.1704, "encoder_q-layer.4": 3302.896, "encoder_q-layer.5": 3474.0029, "encoder_q-layer.6": 4199.7485, "encoder_q-layer.7": 4407.5293, "encoder_q-layer.8": 5158.8149, "encoder_q-layer.9": 4668.2344, "epoch": 0.93, "inbatch_neg_score": 0.1102, "inbatch_pos_score": 0.7876, "learning_rate": 2.388888888888889e-06, "loss": 3.2645, "norm_diff": 0.1266, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7202.1505, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1104, "query_norm": 1.2693, "queue_k_norm": 1.4004, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6213, "sent_len_1": 66.6903, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6325, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95700 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2845, "doc_norm": 1.4019, "encoder_q-embeddings": 6921.144, "encoder_q-layer.0": 4828.5142, "encoder_q-layer.1": 5633.7075, "encoder_q-layer.10": 4931.5195, "encoder_q-layer.11": 10940.0762, "encoder_q-layer.2": 6581.0508, "encoder_q-layer.3": 6648.2998, "encoder_q-layer.4": 6924.2778, "encoder_q-layer.5": 7218.3306, "encoder_q-layer.6": 7205.1343, "encoder_q-layer.7": 7120.5967, "encoder_q-layer.8": 6287.7754, "encoder_q-layer.9": 4856.0547, "epoch": 0.94, "inbatch_neg_score": 0.1088, "inbatch_pos_score": 0.7681, "learning_rate": 2.3333333333333336e-06, "loss": 3.2845, "norm_diff": 0.1417, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10171.9275, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1085, "query_norm": 1.2602, "queue_k_norm": 1.4018, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4819, "sent_len_1": 66.9693, "sent_max_len_0": 127.9912, "sent_max_len_1": 188.2725, "stdk": 0.049, "stdq": 0.0446, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95800 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2686, "doc_norm": 1.3998, "encoder_q-embeddings": 4021.2949, "encoder_q-layer.0": 2849.0554, "encoder_q-layer.1": 2940.1055, "encoder_q-layer.10": 5322.2104, "encoder_q-layer.11": 11199.8086, "encoder_q-layer.2": 3425.4507, "encoder_q-layer.3": 3483.7788, "encoder_q-layer.4": 3843.3379, "encoder_q-layer.5": 3934.073, "encoder_q-layer.6": 4313.2012, "encoder_q-layer.7": 4495.2231, "encoder_q-layer.8": 5310.1821, "encoder_q-layer.9": 4845.7056, "epoch": 0.94, "inbatch_neg_score": 0.1082, "inbatch_pos_score": 0.7744, "learning_rate": 2.277777777777778e-06, "loss": 3.2686, "norm_diff": 0.1327, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7482.5282, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1086, "query_norm": 1.2671, "queue_k_norm": 1.3992, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5643, "sent_len_1": 66.7888, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.0375, "stdk": 0.049, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95900 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2876, "doc_norm": 1.3992, "encoder_q-embeddings": 3437.9197, "encoder_q-layer.0": 2348.2739, "encoder_q-layer.1": 2547.2654, "encoder_q-layer.10": 4488.0913, "encoder_q-layer.11": 10231.9121, "encoder_q-layer.2": 2849.9473, "encoder_q-layer.3": 2877.6675, "encoder_q-layer.4": 3271.0669, "encoder_q-layer.5": 3253.1921, "encoder_q-layer.6": 3657.9944, "encoder_q-layer.7": 4341.876, "encoder_q-layer.8": 4787.2397, "encoder_q-layer.9": 4356.6963, "epoch": 0.94, "inbatch_neg_score": 0.1087, "inbatch_pos_score": 0.7896, "learning_rate": 2.2222222222222225e-06, "loss": 3.2876, "norm_diff": 0.1307, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6639.406, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1077, "query_norm": 1.2685, "queue_k_norm": 1.3994, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4616, "sent_len_1": 66.633, "sent_max_len_0": 127.9925, "sent_max_len_1": 188.91, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96000 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.2603, "doc_norm": 1.4032, "encoder_q-embeddings": 3562.481, "encoder_q-layer.0": 2513.0994, "encoder_q-layer.1": 2721.5508, "encoder_q-layer.10": 4895.5171, "encoder_q-layer.11": 10542.7373, "encoder_q-layer.2": 3085.3274, "encoder_q-layer.3": 3189.2107, "encoder_q-layer.4": 3448.6978, "encoder_q-layer.5": 3643.3953, "encoder_q-layer.6": 3915.594, "encoder_q-layer.7": 4502.9375, "encoder_q-layer.8": 5119.8691, "encoder_q-layer.9": 4569.8735, "epoch": 0.94, "inbatch_neg_score": 0.1092, "inbatch_pos_score": 0.8027, "learning_rate": 2.166666666666667e-06, "loss": 3.2603, "norm_diff": 0.1344, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6955.0004, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1083, "query_norm": 1.2688, "queue_k_norm": 1.4007, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5747, "sent_len_1": 66.6194, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2287, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96100 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.2578, "doc_norm": 1.4004, "encoder_q-embeddings": 3647.2175, "encoder_q-layer.0": 2439.9104, "encoder_q-layer.1": 2571.4629, "encoder_q-layer.10": 4542.4712, "encoder_q-layer.11": 10161.1592, "encoder_q-layer.2": 2974.4548, "encoder_q-layer.3": 3058.5017, "encoder_q-layer.4": 3405.668, "encoder_q-layer.5": 3528.5234, "encoder_q-layer.6": 3750.6719, "encoder_q-layer.7": 4219.1367, "encoder_q-layer.8": 4968.3027, "encoder_q-layer.9": 4346.5884, "epoch": 0.94, "inbatch_neg_score": 0.1091, "inbatch_pos_score": 0.8145, "learning_rate": 2.1111111111111114e-06, "loss": 3.2578, "norm_diff": 0.1226, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6769.9314, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1091, "query_norm": 1.2778, "queue_k_norm": 1.401, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6044, "sent_len_1": 66.5486, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0788, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96200 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2888, "doc_norm": 1.4016, "encoder_q-embeddings": 4438.1128, "encoder_q-layer.0": 3135.0129, "encoder_q-layer.1": 3363.9399, "encoder_q-layer.10": 4846.4082, "encoder_q-layer.11": 10902.3662, "encoder_q-layer.2": 3903.9819, "encoder_q-layer.3": 4081.0754, "encoder_q-layer.4": 4188.2842, "encoder_q-layer.5": 4322.3643, "encoder_q-layer.6": 4425.5293, "encoder_q-layer.7": 5038.0103, "encoder_q-layer.8": 5507.6782, "encoder_q-layer.9": 4865.085, "epoch": 0.94, "inbatch_neg_score": 0.11, "inbatch_pos_score": 0.7842, "learning_rate": 2.055555555555556e-06, "loss": 3.2888, "norm_diff": 0.1263, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7752.306, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1099, "query_norm": 1.2752, "queue_k_norm": 1.4005, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5859, "sent_len_1": 66.796, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4175, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96300 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.2518, "doc_norm": 1.406, "encoder_q-embeddings": 3721.9041, "encoder_q-layer.0": 2437.1165, "encoder_q-layer.1": 2644.3757, "encoder_q-layer.10": 5459.6553, "encoder_q-layer.11": 11220.1572, "encoder_q-layer.2": 2926.2878, "encoder_q-layer.3": 2944.7126, "encoder_q-layer.4": 3120.0833, "encoder_q-layer.5": 3287.5334, "encoder_q-layer.6": 3835.0293, "encoder_q-layer.7": 4197.9648, "encoder_q-layer.8": 5476.1118, "encoder_q-layer.9": 5007.2451, "epoch": 0.94, "inbatch_neg_score": 0.1063, "inbatch_pos_score": 0.8027, "learning_rate": 2.0000000000000003e-06, "loss": 3.2518, "norm_diff": 0.1396, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7188.7084, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1071, "query_norm": 1.2664, "queue_k_norm": 1.4015, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.6503, "sent_len_1": 66.795, "sent_max_len_0": 127.9963, "sent_max_len_1": 191.335, "stdk": 0.0492, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96400 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.2754, "doc_norm": 1.4014, "encoder_q-embeddings": 3750.2039, "encoder_q-layer.0": 2489.9231, "encoder_q-layer.1": 2673.3274, "encoder_q-layer.10": 5209.2598, "encoder_q-layer.11": 11231.3896, "encoder_q-layer.2": 2974.6243, "encoder_q-layer.3": 2981.7358, "encoder_q-layer.4": 3158.4705, "encoder_q-layer.5": 3324.2803, "encoder_q-layer.6": 3718.2952, "encoder_q-layer.7": 4420.0327, "encoder_q-layer.8": 5180.1602, "encoder_q-layer.9": 4991.0815, "epoch": 0.94, "inbatch_neg_score": 0.1076, "inbatch_pos_score": 0.7876, "learning_rate": 1.9444444444444444e-06, "loss": 3.2754, "norm_diff": 0.1313, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7177.032, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1083, "query_norm": 1.2701, "queue_k_norm": 1.3992, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3612, "sent_len_1": 66.5125, "sent_max_len_0": 127.9862, "sent_max_len_1": 189.3388, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96500 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.2594, "doc_norm": 1.4058, "encoder_q-embeddings": 3992.7539, "encoder_q-layer.0": 2688.365, "encoder_q-layer.1": 2888.4026, "encoder_q-layer.10": 4957.9526, "encoder_q-layer.11": 10867.0918, "encoder_q-layer.2": 3217.6584, "encoder_q-layer.3": 3413.7275, "encoder_q-layer.4": 3838.0579, "encoder_q-layer.5": 3932.5964, "encoder_q-layer.6": 4213.2603, "encoder_q-layer.7": 4914.1289, "encoder_q-layer.8": 5332.5991, "encoder_q-layer.9": 4729.855, "epoch": 0.94, "inbatch_neg_score": 0.1069, "inbatch_pos_score": 0.8154, "learning_rate": 1.888888888888889e-06, "loss": 3.2594, "norm_diff": 0.1244, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7335.4587, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1061, "query_norm": 1.2814, "queue_k_norm": 1.3998, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5425, "sent_len_1": 66.6723, "sent_max_len_0": 127.9988, "sent_max_len_1": 187.945, "stdk": 0.0492, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96600 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.2637, "doc_norm": 1.3924, "encoder_q-embeddings": 4354.3633, "encoder_q-layer.0": 2942.1531, "encoder_q-layer.1": 3200.198, "encoder_q-layer.10": 4629.4526, "encoder_q-layer.11": 10633.2227, "encoder_q-layer.2": 3840.4771, "encoder_q-layer.3": 3869.5837, "encoder_q-layer.4": 4271.6514, "encoder_q-layer.5": 4264.3149, "encoder_q-layer.6": 4457.3608, "encoder_q-layer.7": 4881.4648, "encoder_q-layer.8": 5223.0986, "encoder_q-layer.9": 4555.8857, "epoch": 0.94, "inbatch_neg_score": 0.1069, "inbatch_pos_score": 0.7881, "learning_rate": 1.8333333333333335e-06, "loss": 3.2637, "norm_diff": 0.1224, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7431.2851, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1069, "query_norm": 1.27, "queue_k_norm": 1.4023, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.8842, "sent_len_1": 66.7807, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1138, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 96700 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2749, "doc_norm": 1.4035, "encoder_q-embeddings": 4145.1206, "encoder_q-layer.0": 2756.9495, "encoder_q-layer.1": 2896.9851, "encoder_q-layer.10": 5097.5674, "encoder_q-layer.11": 11353.498, "encoder_q-layer.2": 3374.2395, "encoder_q-layer.3": 3526.3406, "encoder_q-layer.4": 3849.7217, "encoder_q-layer.5": 3963.3875, "encoder_q-layer.6": 4122.1255, "encoder_q-layer.7": 4654.6348, "encoder_q-layer.8": 5332.7275, "encoder_q-layer.9": 4826.8032, "epoch": 0.95, "inbatch_neg_score": 0.1059, "inbatch_pos_score": 0.7842, "learning_rate": 1.777777777777778e-06, "loss": 3.2749, "norm_diff": 0.1374, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7554.49, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1061, "query_norm": 1.2661, "queue_k_norm": 1.3991, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.678, "sent_len_1": 66.8585, "sent_max_len_0": 127.9938, "sent_max_len_1": 190.2587, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96800 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2773, "doc_norm": 1.399, "encoder_q-embeddings": 3895.7095, "encoder_q-layer.0": 2595.073, "encoder_q-layer.1": 2762.957, "encoder_q-layer.10": 5038.7153, "encoder_q-layer.11": 11020.748, "encoder_q-layer.2": 3181.1321, "encoder_q-layer.3": 3291.4521, "encoder_q-layer.4": 3575.7197, "encoder_q-layer.5": 3715.655, "encoder_q-layer.6": 4629.6108, "encoder_q-layer.7": 4830.563, "encoder_q-layer.8": 5539.7202, "encoder_q-layer.9": 4713.2915, "epoch": 0.95, "inbatch_neg_score": 0.1062, "inbatch_pos_score": 0.7871, "learning_rate": 1.7222222222222222e-06, "loss": 3.2773, "norm_diff": 0.1218, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7423.8396, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1064, "query_norm": 1.2772, "queue_k_norm": 1.3993, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.3779, "sent_len_1": 66.8219, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2925, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96900 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2787, "doc_norm": 1.4001, "encoder_q-embeddings": 4119.5762, "encoder_q-layer.0": 2740.0911, "encoder_q-layer.1": 2933.366, "encoder_q-layer.10": 4683.0708, "encoder_q-layer.11": 10898.2686, "encoder_q-layer.2": 3396.502, "encoder_q-layer.3": 3531.99, "encoder_q-layer.4": 3814.7, "encoder_q-layer.5": 3992.2207, "encoder_q-layer.6": 3968.2024, "encoder_q-layer.7": 4381.1797, "encoder_q-layer.8": 5048.2769, "encoder_q-layer.9": 4568.9194, "epoch": 0.95, "inbatch_neg_score": 0.1058, "inbatch_pos_score": 0.7847, "learning_rate": 1.6666666666666667e-06, "loss": 3.2787, "norm_diff": 0.1324, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7343.6517, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1058, "query_norm": 1.2677, "queue_k_norm": 1.4026, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5244, "sent_len_1": 66.9858, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5337, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 97000 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2485, "doc_norm": 1.4008, "encoder_q-embeddings": 1780.0453, "encoder_q-layer.0": 1213.7422, "encoder_q-layer.1": 1250.3502, "encoder_q-layer.10": 2407.1421, "encoder_q-layer.11": 5336.5571, "encoder_q-layer.2": 1409.3049, "encoder_q-layer.3": 1444.1678, "encoder_q-layer.4": 1532.0178, "encoder_q-layer.5": 1615.6427, "encoder_q-layer.6": 1816.1227, "encoder_q-layer.7": 2117.2542, "encoder_q-layer.8": 2516.708, "encoder_q-layer.9": 2278.5002, "epoch": 0.95, "inbatch_neg_score": 0.1044, "inbatch_pos_score": 0.7822, "learning_rate": 1.6111111111111111e-06, "loss": 3.2485, "norm_diff": 0.1405, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3426.9383, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1051, "query_norm": 1.2603, "queue_k_norm": 1.4005, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.7527, "sent_len_1": 66.8787, "sent_max_len_0": 128.0, "sent_max_len_1": 189.145, "stdk": 0.0491, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97100 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.266, "doc_norm": 1.4072, "encoder_q-embeddings": 1764.1448, "encoder_q-layer.0": 1158.3362, "encoder_q-layer.1": 1243.8048, "encoder_q-layer.10": 2433.6616, "encoder_q-layer.11": 5738.3101, "encoder_q-layer.2": 1417.7888, "encoder_q-layer.3": 1433.5778, "encoder_q-layer.4": 1599.2789, "encoder_q-layer.5": 1632.4659, "encoder_q-layer.6": 2049.0002, "encoder_q-layer.7": 2195.3313, "encoder_q-layer.8": 2652.1875, "encoder_q-layer.9": 2404.5103, "epoch": 0.95, "inbatch_neg_score": 0.105, "inbatch_pos_score": 0.7861, "learning_rate": 1.5555555555555556e-06, "loss": 3.266, "norm_diff": 0.142, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3540.2593, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1052, "query_norm": 1.2652, "queue_k_norm": 1.3988, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5342, "sent_len_1": 66.9349, "sent_max_len_0": 128.0, "sent_max_len_1": 189.93, "stdk": 0.0493, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2857, "doc_norm": 1.3916, "encoder_q-embeddings": 1848.7166, "encoder_q-layer.0": 1208.3219, "encoder_q-layer.1": 1287.3043, "encoder_q-layer.10": 2552.9199, "encoder_q-layer.11": 5840.1641, "encoder_q-layer.2": 1429.1941, "encoder_q-layer.3": 1541.3624, "encoder_q-layer.4": 1649.0696, "encoder_q-layer.5": 1696.3773, "encoder_q-layer.6": 1984.5441, "encoder_q-layer.7": 2340.0139, "encoder_q-layer.8": 2669.6965, "encoder_q-layer.9": 2520.8176, "epoch": 0.95, "inbatch_neg_score": 0.1061, "inbatch_pos_score": 0.7764, "learning_rate": 1.5e-06, "loss": 3.2857, "norm_diff": 0.1287, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3658.971, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1064, "query_norm": 1.263, "queue_k_norm": 1.4, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6751, "sent_len_1": 66.7326, "sent_max_len_0": 127.995, "sent_max_len_1": 187.6813, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97300 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.268, "doc_norm": 1.3966, "encoder_q-embeddings": 1920.7767, "encoder_q-layer.0": 1320.692, "encoder_q-layer.1": 1353.3223, "encoder_q-layer.10": 2449.9329, "encoder_q-layer.11": 5365.6123, "encoder_q-layer.2": 1498.6532, "encoder_q-layer.3": 1544.0017, "encoder_q-layer.4": 1627.8199, "encoder_q-layer.5": 1600.4014, "encoder_q-layer.6": 1825.869, "encoder_q-layer.7": 1991.9106, "encoder_q-layer.8": 2389.9995, "encoder_q-layer.9": 2270.1226, "epoch": 0.95, "inbatch_neg_score": 0.1058, "inbatch_pos_score": 0.7671, "learning_rate": 1.4444444444444445e-06, "loss": 3.268, "norm_diff": 0.1384, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3482.2524, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1058, "query_norm": 1.2581, "queue_k_norm": 1.4, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6576, "sent_len_1": 66.922, "sent_max_len_0": 127.995, "sent_max_len_1": 189.8825, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97400 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.2845, "doc_norm": 1.4034, "encoder_q-embeddings": 2268.999, "encoder_q-layer.0": 1452.6626, "encoder_q-layer.1": 1607.6431, "encoder_q-layer.10": 2509.4209, "encoder_q-layer.11": 5435.9141, "encoder_q-layer.2": 1933.1985, "encoder_q-layer.3": 1930.8298, "encoder_q-layer.4": 2147.0771, "encoder_q-layer.5": 2351.468, "encoder_q-layer.6": 2402.8699, "encoder_q-layer.7": 2706.0591, "encoder_q-layer.8": 2792.8699, "encoder_q-layer.9": 2375.4219, "epoch": 0.95, "inbatch_neg_score": 0.1053, "inbatch_pos_score": 0.7896, "learning_rate": 1.388888888888889e-06, "loss": 3.2845, "norm_diff": 0.1368, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3917.3602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1057, "query_norm": 1.2666, "queue_k_norm": 1.399, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5068, "sent_len_1": 66.9694, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4238, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97500 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.2931, "doc_norm": 1.4017, "encoder_q-embeddings": 1777.1162, "encoder_q-layer.0": 1211.4698, "encoder_q-layer.1": 1286.2665, "encoder_q-layer.10": 2241.3491, "encoder_q-layer.11": 5098.8779, "encoder_q-layer.2": 1447.3019, "encoder_q-layer.3": 1504.9928, "encoder_q-layer.4": 1584.709, "encoder_q-layer.5": 1645.098, "encoder_q-layer.6": 1826.9019, "encoder_q-layer.7": 2064.0676, "encoder_q-layer.8": 2395.4045, "encoder_q-layer.9": 2223.3708, "epoch": 0.95, "inbatch_neg_score": 0.1071, "inbatch_pos_score": 0.793, "learning_rate": 1.3333333333333334e-06, "loss": 3.2931, "norm_diff": 0.1276, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3349.51, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1066, "query_norm": 1.2741, "queue_k_norm": 1.3995, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5827, "sent_len_1": 66.6861, "sent_max_len_0": 128.0, "sent_max_len_1": 191.19, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97600 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.2594, "doc_norm": 1.403, "encoder_q-embeddings": 1900.1151, "encoder_q-layer.0": 1283.3746, "encoder_q-layer.1": 1368.5703, "encoder_q-layer.10": 2576.3357, "encoder_q-layer.11": 5494.1104, "encoder_q-layer.2": 1588.7943, "encoder_q-layer.3": 1658.537, "encoder_q-layer.4": 1786.8395, "encoder_q-layer.5": 1913.5045, "encoder_q-layer.6": 2109.7241, "encoder_q-layer.7": 2346.2261, "encoder_q-layer.8": 2488.3508, "encoder_q-layer.9": 2394.1514, "epoch": 0.95, "inbatch_neg_score": 0.1059, "inbatch_pos_score": 0.7949, "learning_rate": 1.2777777777777779e-06, "loss": 3.2594, "norm_diff": 0.1387, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3588.591, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1053, "query_norm": 1.2643, "queue_k_norm": 1.4001, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6329, "sent_len_1": 66.8255, "sent_max_len_0": 127.9988, "sent_max_len_1": 189.7887, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97700 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.2471, "doc_norm": 1.3993, "encoder_q-embeddings": 2079.2241, "encoder_q-layer.0": 1336.1371, "encoder_q-layer.1": 1449.1648, "encoder_q-layer.10": 2417.728, "encoder_q-layer.11": 5585.5918, "encoder_q-layer.2": 1630.4991, "encoder_q-layer.3": 1729.0724, "encoder_q-layer.4": 1791.3446, "encoder_q-layer.5": 1909.735, "encoder_q-layer.6": 2132.103, "encoder_q-layer.7": 2366.3567, "encoder_q-layer.8": 2745.5515, "encoder_q-layer.9": 2379.0015, "epoch": 0.95, "inbatch_neg_score": 0.1062, "inbatch_pos_score": 0.7725, "learning_rate": 1.2222222222222223e-06, "loss": 3.2471, "norm_diff": 0.1348, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3749.125, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1059, "query_norm": 1.2645, "queue_k_norm": 1.4001, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.9912, "sent_len_1": 67.086, "sent_max_len_0": 127.9963, "sent_max_len_1": 190.0062, "stdk": 0.049, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97800 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.2527, "doc_norm": 1.3991, "encoder_q-embeddings": 2472.9504, "encoder_q-layer.0": 1662.5692, "encoder_q-layer.1": 1892.9974, "encoder_q-layer.10": 2876.0959, "encoder_q-layer.11": 5507.4453, "encoder_q-layer.2": 2181.739, "encoder_q-layer.3": 2291.1948, "encoder_q-layer.4": 2493.8711, "encoder_q-layer.5": 2741.6943, "encoder_q-layer.6": 2765.415, "encoder_q-layer.7": 2744.9546, "encoder_q-layer.8": 2801.04, "encoder_q-layer.9": 2466.6975, "epoch": 0.96, "inbatch_neg_score": 0.105, "inbatch_pos_score": 0.7769, "learning_rate": 1.1666666666666668e-06, "loss": 3.2527, "norm_diff": 0.1437, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4176.7618, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1056, "query_norm": 1.2553, "queue_k_norm": 1.3997, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6709, "sent_len_1": 66.6471, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2025, "stdk": 0.049, "stdq": 0.0446, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97900 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2937, "doc_norm": 1.4024, "encoder_q-embeddings": 2013.3065, "encoder_q-layer.0": 1315.4355, "encoder_q-layer.1": 1396.1302, "encoder_q-layer.10": 2667.5183, "encoder_q-layer.11": 5594.2002, "encoder_q-layer.2": 1536.9209, "encoder_q-layer.3": 1620.8547, "encoder_q-layer.4": 1709.6833, "encoder_q-layer.5": 1794.5087, "encoder_q-layer.6": 1991.645, "encoder_q-layer.7": 2158.4739, "encoder_q-layer.8": 2595.4189, "encoder_q-layer.9": 2512.5872, "epoch": 0.96, "inbatch_neg_score": 0.1067, "inbatch_pos_score": 0.7896, "learning_rate": 1.1111111111111112e-06, "loss": 3.2937, "norm_diff": 0.14, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3651.0036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1066, "query_norm": 1.2624, "queue_k_norm": 1.3984, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4975, "sent_len_1": 66.5787, "sent_max_len_0": 127.9925, "sent_max_len_1": 186.645, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98000 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.2806, "doc_norm": 1.3979, "encoder_q-embeddings": 2255.4309, "encoder_q-layer.0": 1589.995, "encoder_q-layer.1": 1727.9478, "encoder_q-layer.10": 2503.8337, "encoder_q-layer.11": 5538.4038, "encoder_q-layer.2": 2097.4424, "encoder_q-layer.3": 2340.8855, "encoder_q-layer.4": 2608.1272, "encoder_q-layer.5": 2872.7444, "encoder_q-layer.6": 2824.1433, "encoder_q-layer.7": 2807.7427, "encoder_q-layer.8": 2629.6228, "encoder_q-layer.9": 2317.1113, "epoch": 0.96, "inbatch_neg_score": 0.1052, "inbatch_pos_score": 0.7935, "learning_rate": 1.0555555555555557e-06, "loss": 3.2806, "norm_diff": 0.1258, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4101.8644, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1058, "query_norm": 1.2721, "queue_k_norm": 1.3987, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.551, "sent_len_1": 66.7497, "sent_max_len_0": 128.0, "sent_max_len_1": 187.405, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98100 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2699, "doc_norm": 1.3975, "encoder_q-embeddings": 2234.2698, "encoder_q-layer.0": 1572.8937, "encoder_q-layer.1": 1810.6398, "encoder_q-layer.10": 2365.6733, "encoder_q-layer.11": 5286.6001, "encoder_q-layer.2": 2041.4829, "encoder_q-layer.3": 2110.9978, "encoder_q-layer.4": 2363.6755, "encoder_q-layer.5": 2189.1035, "encoder_q-layer.6": 2272.6794, "encoder_q-layer.7": 2478.6938, "encoder_q-layer.8": 2575.9685, "encoder_q-layer.9": 2326.3105, "epoch": 0.96, "inbatch_neg_score": 0.1037, "inbatch_pos_score": 0.7891, "learning_rate": 1.0000000000000002e-06, "loss": 3.2699, "norm_diff": 0.1226, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3870.5605, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1046, "query_norm": 1.2749, "queue_k_norm": 1.3982, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.5135, "sent_len_1": 66.728, "sent_max_len_0": 128.0, "sent_max_len_1": 187.2312, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98200 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.2836, "doc_norm": 1.403, "encoder_q-embeddings": 1894.8911, "encoder_q-layer.0": 1248.5618, "encoder_q-layer.1": 1257.2511, "encoder_q-layer.10": 2492.5149, "encoder_q-layer.11": 5460.7695, "encoder_q-layer.2": 1406.8855, "encoder_q-layer.3": 1439.5496, "encoder_q-layer.4": 1553.7947, "encoder_q-layer.5": 1666.3668, "encoder_q-layer.6": 1867.2864, "encoder_q-layer.7": 2235.3198, "encoder_q-layer.8": 2535.4985, "encoder_q-layer.9": 2389.2419, "epoch": 0.96, "inbatch_neg_score": 0.1049, "inbatch_pos_score": 0.7988, "learning_rate": 9.444444444444445e-07, "loss": 3.2836, "norm_diff": 0.1371, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3565.8735, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1049, "query_norm": 1.2659, "queue_k_norm": 1.3986, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6492, "sent_len_1": 66.7876, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6075, "stdk": 0.0492, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98300 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.266, "doc_norm": 1.3992, "encoder_q-embeddings": 1782.4414, "encoder_q-layer.0": 1183.9645, "encoder_q-layer.1": 1235.172, "encoder_q-layer.10": 2381.7507, "encoder_q-layer.11": 5343.9233, "encoder_q-layer.2": 1374.7942, "encoder_q-layer.3": 1416.1882, "encoder_q-layer.4": 1495.1316, "encoder_q-layer.5": 1602.8687, "encoder_q-layer.6": 1784.006, "encoder_q-layer.7": 1997.0817, "encoder_q-layer.8": 2503.9648, "encoder_q-layer.9": 2332.0137, "epoch": 0.96, "inbatch_neg_score": 0.1061, "inbatch_pos_score": 0.7949, "learning_rate": 8.88888888888889e-07, "loss": 3.266, "norm_diff": 0.128, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3454.6268, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1064, "query_norm": 1.2711, "queue_k_norm": 1.3994, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.9836, "sent_len_1": 66.7962, "sent_max_len_0": 127.9925, "sent_max_len_1": 189.8025, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98400 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2644, "doc_norm": 1.3956, "encoder_q-embeddings": 1813.3339, "encoder_q-layer.0": 1167.9039, "encoder_q-layer.1": 1207.3544, "encoder_q-layer.10": 2393.2458, "encoder_q-layer.11": 5224.6963, "encoder_q-layer.2": 1358.8246, "encoder_q-layer.3": 1418.4222, "encoder_q-layer.4": 1515.1876, "encoder_q-layer.5": 1534.4255, "encoder_q-layer.6": 1770.4972, "encoder_q-layer.7": 2001.4165, "encoder_q-layer.8": 2373.3367, "encoder_q-layer.9": 2231.9226, "epoch": 0.96, "inbatch_neg_score": 0.1054, "inbatch_pos_score": 0.7827, "learning_rate": 8.333333333333333e-07, "loss": 3.2644, "norm_diff": 0.1314, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3332.6784, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1054, "query_norm": 1.2642, "queue_k_norm": 1.4002, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.6355, "sent_len_1": 66.7721, "sent_max_len_0": 127.9975, "sent_max_len_1": 190.8713, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98500 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2817, "doc_norm": 1.3987, "encoder_q-embeddings": 2126.4263, "encoder_q-layer.0": 1409.2025, "encoder_q-layer.1": 1500.0287, "encoder_q-layer.10": 2533.3037, "encoder_q-layer.11": 5420.833, "encoder_q-layer.2": 1766.8057, "encoder_q-layer.3": 1903.4827, "encoder_q-layer.4": 2098.0461, "encoder_q-layer.5": 2234.9062, "encoder_q-layer.6": 2569.1396, "encoder_q-layer.7": 2715.0571, "encoder_q-layer.8": 2757.0417, "encoder_q-layer.9": 2400.6665, "epoch": 0.96, "inbatch_neg_score": 0.1056, "inbatch_pos_score": 0.7881, "learning_rate": 7.777777777777778e-07, "loss": 3.2817, "norm_diff": 0.1232, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3864.3229, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1053, "query_norm": 1.2754, "queue_k_norm": 1.3972, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.4408, "sent_len_1": 66.9126, "sent_max_len_0": 128.0, "sent_max_len_1": 192.6213, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98600 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2837, "doc_norm": 1.4022, "encoder_q-embeddings": 1854.9257, "encoder_q-layer.0": 1231.1116, "encoder_q-layer.1": 1287.8514, "encoder_q-layer.10": 2644.4312, "encoder_q-layer.11": 5423.7246, "encoder_q-layer.2": 1450.3877, "encoder_q-layer.3": 1491.6866, "encoder_q-layer.4": 1553.686, "encoder_q-layer.5": 1589.7645, "encoder_q-layer.6": 1869.4323, "encoder_q-layer.7": 2093.7236, "encoder_q-layer.8": 2635.0205, "encoder_q-layer.9": 2407.3088, "epoch": 0.96, "inbatch_neg_score": 0.106, "inbatch_pos_score": 0.791, "learning_rate": 7.222222222222222e-07, "loss": 3.2837, "norm_diff": 0.136, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3537.6461, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1051, "query_norm": 1.2662, "queue_k_norm": 1.3991, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5433, "sent_len_1": 66.7017, "sent_max_len_0": 127.99, "sent_max_len_1": 189.7363, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98700 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.27, "doc_norm": 1.3958, "encoder_q-embeddings": 1952.7214, "encoder_q-layer.0": 1261.1794, "encoder_q-layer.1": 1357.0208, "encoder_q-layer.10": 2426.8269, "encoder_q-layer.11": 5522.3765, "encoder_q-layer.2": 1529.4674, "encoder_q-layer.3": 1567.2733, "encoder_q-layer.4": 1629.9137, "encoder_q-layer.5": 1703.7328, "encoder_q-layer.6": 1962.4313, "encoder_q-layer.7": 2191.4844, "encoder_q-layer.8": 2484.0166, "encoder_q-layer.9": 2269.54, "epoch": 0.96, "inbatch_neg_score": 0.105, "inbatch_pos_score": 0.7729, "learning_rate": 6.666666666666667e-07, "loss": 3.27, "norm_diff": 0.1331, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3534.1958, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1043, "query_norm": 1.2626, "queue_k_norm": 1.4003, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7862, "sent_len_1": 66.7881, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4387, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 98800 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.2621, "doc_norm": 1.3966, "encoder_q-embeddings": 1905.4164, "encoder_q-layer.0": 1195.3094, "encoder_q-layer.1": 1259.2531, "encoder_q-layer.10": 2715.3762, "encoder_q-layer.11": 5813.0752, "encoder_q-layer.2": 1435.9275, "encoder_q-layer.3": 1497.6841, "encoder_q-layer.4": 1583.5042, "encoder_q-layer.5": 1601.2906, "encoder_q-layer.6": 1841.8417, "encoder_q-layer.7": 2047.1523, "encoder_q-layer.8": 2672.9802, "encoder_q-layer.9": 2481.0649, "epoch": 0.97, "inbatch_neg_score": 0.1061, "inbatch_pos_score": 0.7769, "learning_rate": 6.111111111111112e-07, "loss": 3.2621, "norm_diff": 0.1314, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3633.885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1054, "query_norm": 1.2653, "queue_k_norm": 1.3999, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5434, "sent_len_1": 66.776, "sent_max_len_0": 127.9975, "sent_max_len_1": 191.545, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98900 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.269, "doc_norm": 1.3968, "encoder_q-embeddings": 2423.5466, "encoder_q-layer.0": 1708.8193, "encoder_q-layer.1": 1802.0437, "encoder_q-layer.10": 2605.1204, "encoder_q-layer.11": 5482.3779, "encoder_q-layer.2": 2148.2197, "encoder_q-layer.3": 2194.6248, "encoder_q-layer.4": 2287.0222, "encoder_q-layer.5": 2272.3069, "encoder_q-layer.6": 2435.8435, "encoder_q-layer.7": 2449.0029, "encoder_q-layer.8": 2580.0452, "encoder_q-layer.9": 2275.5879, "epoch": 0.97, "inbatch_neg_score": 0.103, "inbatch_pos_score": 0.7812, "learning_rate": 5.555555555555556e-07, "loss": 3.269, "norm_diff": 0.1337, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3970.9692, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1036, "query_norm": 1.263, "queue_k_norm": 1.3996, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6917, "sent_len_1": 66.8237, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6538, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99000 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.2806, "doc_norm": 1.4004, "encoder_q-embeddings": 4382.9595, "encoder_q-layer.0": 3036.6406, "encoder_q-layer.1": 3397.8398, "encoder_q-layer.10": 4933.8857, "encoder_q-layer.11": 10953.4229, "encoder_q-layer.2": 3908.8853, "encoder_q-layer.3": 3819.1536, "encoder_q-layer.4": 4086.2561, "encoder_q-layer.5": 4158.6104, "encoder_q-layer.6": 4469.1509, "encoder_q-layer.7": 4996.2725, "encoder_q-layer.8": 5436.2827, "encoder_q-layer.9": 4600.7959, "epoch": 0.97, "inbatch_neg_score": 0.1046, "inbatch_pos_score": 0.7808, "learning_rate": 5.000000000000001e-07, "loss": 3.2806, "norm_diff": 0.1416, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7727.1871, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1052, "query_norm": 1.2588, "queue_k_norm": 1.3985, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.341, "sent_len_1": 66.8575, "sent_max_len_0": 127.9963, "sent_max_len_1": 188.4238, "stdk": 0.0491, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99100 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2752, "doc_norm": 1.3997, "encoder_q-embeddings": 3609.0688, "encoder_q-layer.0": 2381.6155, "encoder_q-layer.1": 2510.6331, "encoder_q-layer.10": 5104.0132, "encoder_q-layer.11": 11061.0537, "encoder_q-layer.2": 2771.7456, "encoder_q-layer.3": 2858.7502, "encoder_q-layer.4": 3019.71, "encoder_q-layer.5": 3145.124, "encoder_q-layer.6": 3512.4062, "encoder_q-layer.7": 4055.8918, "encoder_q-layer.8": 5095.4307, "encoder_q-layer.9": 4758.0977, "epoch": 0.97, "inbatch_neg_score": 0.1062, "inbatch_pos_score": 0.7676, "learning_rate": 4.444444444444445e-07, "loss": 3.2752, "norm_diff": 0.1414, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7048.7726, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.105, "query_norm": 1.2583, "queue_k_norm": 1.3992, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.4939, "sent_len_1": 66.8122, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0375, "stdk": 0.049, "stdq": 0.0447, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99200 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.2778, "doc_norm": 1.4044, "encoder_q-embeddings": 6680.0864, "encoder_q-layer.0": 5024.5586, "encoder_q-layer.1": 5257.8164, "encoder_q-layer.10": 4779.7881, "encoder_q-layer.11": 11138.9258, "encoder_q-layer.2": 6897.7334, "encoder_q-layer.3": 7599.7227, "encoder_q-layer.4": 9612.79, "encoder_q-layer.5": 10676.5293, "encoder_q-layer.6": 11619.1074, "encoder_q-layer.7": 13020.1055, "encoder_q-layer.8": 9280.5908, "encoder_q-layer.9": 5133.8188, "epoch": 0.97, "inbatch_neg_score": 0.1061, "inbatch_pos_score": 0.8057, "learning_rate": 3.888888888888889e-07, "loss": 3.2778, "norm_diff": 0.1333, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12760.2667, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1058, "query_norm": 1.2711, "queue_k_norm": 1.3977, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.5499, "sent_len_1": 66.6016, "sent_max_len_0": 127.9988, "sent_max_len_1": 187.8438, "stdk": 0.0492, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99300 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.2656, "doc_norm": 1.3971, "encoder_q-embeddings": 3417.8962, "encoder_q-layer.0": 2292.1616, "encoder_q-layer.1": 2448.2732, "encoder_q-layer.10": 4871.1519, "encoder_q-layer.11": 10461.0811, "encoder_q-layer.2": 2739.7156, "encoder_q-layer.3": 2813.594, "encoder_q-layer.4": 3119.4138, "encoder_q-layer.5": 3132.5046, "encoder_q-layer.6": 3623.0249, "encoder_q-layer.7": 4241.8511, "encoder_q-layer.8": 4869.5073, "encoder_q-layer.9": 4630.8975, "epoch": 0.97, "inbatch_neg_score": 0.1065, "inbatch_pos_score": 0.791, "learning_rate": 3.3333333333333335e-07, "loss": 3.2656, "norm_diff": 0.1256, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6773.3418, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1055, "query_norm": 1.2715, "queue_k_norm": 1.3976, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.6026, "sent_len_1": 66.7187, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3162, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99400 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.2624, "doc_norm": 1.4014, "encoder_q-embeddings": 3875.2458, "encoder_q-layer.0": 2589.3462, "encoder_q-layer.1": 2809.4971, "encoder_q-layer.10": 4941.0288, "encoder_q-layer.11": 11403.2119, "encoder_q-layer.2": 3272.0269, "encoder_q-layer.3": 3470.1211, "encoder_q-layer.4": 3732.9839, "encoder_q-layer.5": 3881.2205, "encoder_q-layer.6": 4222.3184, "encoder_q-layer.7": 4514.0186, "encoder_q-layer.8": 5300.0176, "encoder_q-layer.9": 4776.2012, "epoch": 0.97, "inbatch_neg_score": 0.1037, "inbatch_pos_score": 0.7646, "learning_rate": 2.777777777777778e-07, "loss": 3.2624, "norm_diff": 0.1482, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7531.678, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.104, "query_norm": 1.2532, "queue_k_norm": 1.3967, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.6829, "sent_len_1": 66.7926, "sent_max_len_0": 128.0, "sent_max_len_1": 189.19, "stdk": 0.0491, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99500 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.2712, "doc_norm": 1.4004, "encoder_q-embeddings": 3983.7495, "encoder_q-layer.0": 2741.6741, "encoder_q-layer.1": 2892.2185, "encoder_q-layer.10": 4688.7456, "encoder_q-layer.11": 10612.2979, "encoder_q-layer.2": 3180.9375, "encoder_q-layer.3": 3323.8469, "encoder_q-layer.4": 3602.0195, "encoder_q-layer.5": 3788.821, "encoder_q-layer.6": 4096.1245, "encoder_q-layer.7": 4641.5132, "encoder_q-layer.8": 5045.1924, "encoder_q-layer.9": 4595.6152, "epoch": 0.97, "inbatch_neg_score": 0.1058, "inbatch_pos_score": 0.7852, "learning_rate": 2.2222222222222224e-07, "loss": 3.2712, "norm_diff": 0.1313, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7141.3463, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1058, "query_norm": 1.2692, "queue_k_norm": 1.3977, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.5973, "sent_len_1": 66.8303, "sent_max_len_0": 127.9988, "sent_max_len_1": 190.5362, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99600 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2584, "doc_norm": 1.3986, "encoder_q-embeddings": 3662.4373, "encoder_q-layer.0": 2488.657, "encoder_q-layer.1": 2721.6345, "encoder_q-layer.10": 5619.1855, "encoder_q-layer.11": 11133.8398, "encoder_q-layer.2": 3004.6956, "encoder_q-layer.3": 3025.5488, "encoder_q-layer.4": 3270.2295, "encoder_q-layer.5": 3387.7478, "encoder_q-layer.6": 3882.252, "encoder_q-layer.7": 4830.5898, "encoder_q-layer.8": 5707.9595, "encoder_q-layer.9": 4953.104, "epoch": 0.97, "inbatch_neg_score": 0.1048, "inbatch_pos_score": 0.7827, "learning_rate": 1.6666666666666668e-07, "loss": 3.2584, "norm_diff": 0.1297, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7249.2643, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1041, "query_norm": 1.269, "queue_k_norm": 1.3996, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 45.8596, "sent_len_1": 66.7165, "sent_max_len_0": 127.9938, "sent_max_len_1": 189.26, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99700 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.2677, "doc_norm": 1.3919, "encoder_q-embeddings": 3730.5596, "encoder_q-layer.0": 2449.4915, "encoder_q-layer.1": 2540.5957, "encoder_q-layer.10": 5124.4976, "encoder_q-layer.11": 11136.1084, "encoder_q-layer.2": 2911.697, "encoder_q-layer.3": 3004.3298, "encoder_q-layer.4": 3177.1663, "encoder_q-layer.5": 3403.8247, "encoder_q-layer.6": 3741.9961, "encoder_q-layer.7": 4415.064, "encoder_q-layer.8": 4992.0322, "encoder_q-layer.9": 4849.7832, "epoch": 0.97, "inbatch_neg_score": 0.1048, "inbatch_pos_score": 0.7822, "learning_rate": 1.1111111111111112e-07, "loss": 3.2677, "norm_diff": 0.1294, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7181.9511, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1049, "query_norm": 1.2624, "queue_k_norm": 1.399, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 45.687, "sent_len_1": 66.8263, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2188, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99800 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2628, "doc_norm": 1.4028, "encoder_q-embeddings": 3722.0066, "encoder_q-layer.0": 2512.7141, "encoder_q-layer.1": 2693.5071, "encoder_q-layer.10": 4757.2422, "encoder_q-layer.11": 10508.1172, "encoder_q-layer.2": 2957.8098, "encoder_q-layer.3": 3133.302, "encoder_q-layer.4": 3339.2896, "encoder_q-layer.5": 3811.4399, "encoder_q-layer.6": 4396.4971, "encoder_q-layer.7": 4764.1206, "encoder_q-layer.8": 5420.6021, "encoder_q-layer.9": 4758.4229, "epoch": 0.98, "inbatch_neg_score": 0.105, "inbatch_pos_score": 0.8008, "learning_rate": 5.555555555555556e-08, "loss": 3.2628, "norm_diff": 0.1344, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7160.2962, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.105, "query_norm": 1.2684, "queue_k_norm": 1.3982, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 45.5063, "sent_len_1": 66.8196, "sent_max_len_0": 128.0, "sent_max_len_1": 189.425, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99900 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2664, "doc_norm": 1.4013, "encoder_q-embeddings": 3700.3794, "encoder_q-layer.0": 2376.4387, "encoder_q-layer.1": 2576.1658, "encoder_q-layer.10": 4840.7241, "encoder_q-layer.11": 10562.8711, "encoder_q-layer.2": 2896.0662, "encoder_q-layer.3": 2899.2732, "encoder_q-layer.4": 3065.4753, "encoder_q-layer.5": 3144.5378, "encoder_q-layer.6": 3491.2368, "encoder_q-layer.7": 3921.6797, "encoder_q-layer.8": 4654.332, "encoder_q-layer.9": 4469.7256, "epoch": 0.98, "inbatch_neg_score": 0.106, "inbatch_pos_score": 0.7788, "learning_rate": 0.0, "loss": 3.2664, "norm_diff": 0.1389, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6836.2659, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1051, "query_norm": 1.2624, "queue_k_norm": 1.3995, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 45.7387, "sent_len_1": 66.8044, "sent_max_len_0": 127.995, "sent_max_len_1": 188.425, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 100000 }, { "dev_runtime": 26.8042, "dev_samples_per_second": 2.388, "dev_steps_per_second": 0.037, "epoch": 0.98, "step": 100000, "test_accuracy": 94.00634765625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3432139754295349, "test_doc_norm": 1.3699827194213867, "test_inbatch_neg_score": 0.4710500240325928, "test_inbatch_pos_score": 1.4118424654006958, "test_loss": 0.3432139754295349, "test_loss_align": 0.9544433951377869, "test_loss_unif": 3.956712484359741, "test_loss_unif_q@queue": 3.956712484359741, "test_norm_diff": 0.009124305099248886, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.10810625553131104, "test_query_norm": 1.3677408695220947, "test_queue_k_norm": 1.3988842964172363, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04262939840555191, "test_stdq": 0.042521826922893524, "test_stdqueue_k": 0.04910757392644882, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.8042, "dev_samples_per_second": 2.388, "dev_steps_per_second": 0.037, "epoch": 0.98, "eval_beir-arguana_ndcg@10": 0.37881, "eval_beir-arguana_recall@10": 0.63514, "eval_beir-arguana_recall@100": 0.9175, "eval_beir-arguana_recall@20": 0.76174, "eval_beir-avg_ndcg@10": 0.3840631666666666, "eval_beir-avg_recall@10": 0.45305324999999996, "eval_beir-avg_recall@100": 0.6314795, "eval_beir-avg_recall@20": 0.5145639166666667, "eval_beir-cqadupstack_ndcg@10": 0.2803316666666667, "eval_beir-cqadupstack_recall@10": 0.3765525, "eval_beir-cqadupstack_recall@100": 0.604765, "eval_beir-cqadupstack_recall@20": 0.4442291666666666, "eval_beir-fiqa_ndcg@10": 0.25271, "eval_beir-fiqa_recall@10": 0.31547, "eval_beir-fiqa_recall@100": 0.57736, "eval_beir-fiqa_recall@20": 0.39221, "eval_beir-nfcorpus_ndcg@10": 0.29721, "eval_beir-nfcorpus_recall@10": 0.1453, "eval_beir-nfcorpus_recall@100": 0.27314, "eval_beir-nfcorpus_recall@20": 0.17684, "eval_beir-nq_ndcg@10": 0.28932, "eval_beir-nq_recall@10": 0.47579, "eval_beir-nq_recall@100": 0.80159, "eval_beir-nq_recall@20": 0.59195, "eval_beir-quora_ndcg@10": 0.81279, "eval_beir-quora_recall@10": 0.9098, "eval_beir-quora_recall@100": 0.98333, "eval_beir-quora_recall@20": 0.94365, "eval_beir-scidocs_ndcg@10": 0.1582, "eval_beir-scidocs_recall@10": 0.16598, "eval_beir-scidocs_recall@100": 0.37162, "eval_beir-scidocs_recall@20": 0.22317, "eval_beir-scifact_ndcg@10": 0.63462, "eval_beir-scifact_recall@10": 0.77344, "eval_beir-scifact_recall@100": 0.90322, "eval_beir-scifact_recall@20": 0.84122, "eval_beir-trec-covid_ndcg@10": 0.5527, "eval_beir-trec-covid_recall@10": 0.6, "eval_beir-trec-covid_recall@100": 0.4402, "eval_beir-trec-covid_recall@20": 0.581, "eval_beir-webis-touche2020_ndcg@10": 0.18394, "eval_beir-webis-touche2020_recall@10": 0.13306, "eval_beir-webis-touche2020_recall@100": 0.44207, "eval_beir-webis-touche2020_recall@20": 0.18963, "eval_senteval-avg_sts": 0.7659987677923039, "eval_senteval-sickr_spearman": 0.7309801454276812, "eval_senteval-stsb_spearman": 0.8010173901569266, "step": 100000, "test_accuracy": 94.00634765625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3432139754295349, "test_doc_norm": 1.3699827194213867, "test_inbatch_neg_score": 0.4710500240325928, "test_inbatch_pos_score": 1.4118424654006958, "test_loss": 0.3432139754295349, "test_loss_align": 0.9544433951377869, "test_loss_unif": 3.956712484359741, "test_loss_unif_q@queue": 3.956712484359741, "test_norm_diff": 0.009124305099248886, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.10810625553131104, "test_query_norm": 1.3677408695220947, "test_queue_k_norm": 1.3988842964172363, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04262939840555191, "test_stdq": 0.042521826922893524, "test_stdqueue_k": 0.04910757392644882, "test_stdqueue_q": 0.0 }, { "epoch": 0.98, "step": 100000, "total_flos": 0, "train_runtime": 75126.7844, "train_samples_per_second": 1.331 } ], "max_steps": 100000, "num_train_epochs": 1, "total_flos": 0, "trial_name": null, "trial_params": null }