Training in progress, epoch 11
Browse files- logs/events.out.tfevents.1715675319.sphinx2 +2 -2
- model.safetensors +1 -1
- train_job_output.txt +31 -1
logs/events.out.tfevents.1715675319.sphinx2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2acc52a51e079256a9579908f4b199f5e1bc1ffe67916df7dc8ef207ecfd195d
|
3 |
+
size 75324
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52150a6ebc81dad193b3cae38fff2e4eb54f171b9235b36e52968c357caf53dd
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -368,4 +368,34 @@ command outputs:
|
|
368 |
|
369 |
71%|βββββββ | 7575/10682 [1:09:53<27:49, 1.86it/s]
|
370 |
71%|βββββββ | 7576/10682 [1:09:53<27:09, 1.91it/s]
|
371 |
71%|βββββββ | 7577/10682 [1:09:54<26:41, 1.94it/s]
|
372 |
71%|βββββββ | 7578/10682 [1:09:54<26:20, 1.96it/s]
|
373 |
71%|βββββββ | 7579/10682 [1:09:55<26:07, 1.98it/s]
|
374 |
71%|βββββββ | 7580/10682 [1:09:55<25:55, 1.99it/s]
|
375 |
71%|βββββββ | 7581/10682 [1:09:56<25:48, 2.00it/s]
|
376 |
71%|βββββββ | 7582/10682 [1:09:56<25:43, 2.01it/s]
|
377 |
71%|βββββββ | 7583/10682 [1:09:57<25:40, 2.01it/s]
|
378 |
71%|βββββββ | 7584/10682 [1:09:57<25:37, 2.02it/s]
|
379 |
71%|βββββββ | 7585/10682 [1:09:58<25:34, 2.02it/s]
|
380 |
71%|βββββββ | 7586/10682 [1:09:58<25:33, 2.02it/s]
|
381 |
71%|βββββββ | 7587/10682 [1:09:59<25:30, 2.02it/s]
|
382 |
71%|βββββββ | 7588/10682 [1:09:59<25:30, 2.02it/s]
|
383 |
71%|βββββββ | 7589/10682 [1:10:00<25:28, 2.02it/s]
|
384 |
71%|βββββββ | 7590/10682 [1:10:00<25:29, 2.02it/s]
|
385 |
71%|βββββββ | 7591/10682 [1:10:01<25:28, 2.02it/s]
|
386 |
71%|βββββββ | 7592/10682 [1:10:01<25:28, 2.02it/s]
|
387 |
71%|βββββββ | 7593/10682 [1:10:02<25:27, 2.02it/s]
|
388 |
71%|βββββββ | 7594/10682 [1:10:02<25:26, 2.02it/s]
|
389 |
71%|βββββββ | 7595/10682 [1:10:03<25:24, 2.03it/s]
|
390 |
71%|βββββββ | 7596/10682 [1:10:03<25:24, 2.02it/s]
|
391 |
71%|βββββββ | 7597/10682 [1:10:04<25:24, 2.02it/s]
|
392 |
71%|βββββββ | 7598/10682 [1:10:04<25:21, 2.03it/s]
|
393 |
71%|βββββββ | 7599/10682 [1:10:05<25:23, 2.02it/s]
|
394 |
71%|βββββββ | 7600/10682 [1:10:05<25:20, 2.03it/s]{'loss': 2.9745, 'grad_norm': 0.24363452196121216, 'learning_rate': 0.00023289281143540065, 'epoch': 9.96}
|
395 |
|
396 |
|
397 |
71%|βββββββ | 7600/10682 [1:10:05<25:20, 2.03it/s]
|
398 |
71%|βββββββ | 7601/10682 [1:10:06<25:24, 2.02it/s]
|
399 |
71%|βββββββ | 7602/10682 [1:10:06<25:23, 2.02it/s]
|
400 |
71%|βββββββ | 7603/10682 [1:10:07<25:23, 2.02it/s]
|
401 |
71%|βββββββ | 7604/10682 [1:10:07<25:23, 2.02it/s]
|
402 |
71%|βββββββ | 7605/10682 [1:10:08<25:22, 2.02it/s]
|
403 |
71%|βββββββ | 7606/10682 [1:10:08<25:22, 2.02it/s]
|
404 |
71%|βββββββ | 7607/10682 [1:10:09<25:20, 2.02it/s]
|
405 |
71%|βββββββ | 7608/10682 [1:10:09<25:20, 2.02it/s]
|
406 |
71%|βββββββ | 7609/10682 [1:10:10<25:19, 2.02it/s]
|
407 |
71%|βββββββ | 7610/10682 [1:10:10<25:20, 2.02it/s]
|
408 |
71%|ββββββββ | 7611/10682 [1:10:11<25:19, 2.02it/s]
|
409 |
71%|ββββββββ | 7612/10682 [1:10:11<25:17, 2.02it/s]
|
410 |
71%|ββββββββ | 7613/10682 [1:10:12<25:17, 2.02it/s]
|
411 |
71%|ββββββββ | 7614/10682 [1:10:12<25:15, 2.02it/s]
|
412 |
71%|ββββββββ | 7615/10682 [1:10:13<25:15, 2.02it/s]
|
413 |
71%|ββββββββ | 7616/10682 [1:10:13<25:14, 2.02it/s]
|
414 |
71%|ββββββββ | 7617/10682 [1:10:14<25:15, 2.02it/s]
|
415 |
71%|ββββββββ | 7618/10682 [1:10:14<25:13, 2.02it/s]
|
416 |
71%|ββββββββ | 7619/10682 [1:10:15<25:14, 2.02it/s]
|
417 |
71%|ββββββββ | 7620/10682 [1:10:15<25:12, 2.02it/s]
|
418 |
71%|ββββββββ | 7621/10682 [1:10:16<25:13, 2.02it/s]
|
419 |
71%|ββββββββ | 7622/10682 [1:10:16<25:12, 2.02it/s]
|
420 |
71%|ββββββββ | 7623/10682 [1:10:17<25:13, 2.02it/s]
|
421 |
71%|ββββββββ | 7624/10682 [1:10:17<25:11, 2.02it/s]
|
422 |
71%|ββββββββ | 7625/10682 [1:10:18<25:12, 2.02it/s]
|
423 |
{'loss': 2.9725, 'grad_norm': 0.23881086707115173, 'learning_rate': 0.00022944844326774121, 'epoch': 9.99}
|
424 |
|
425 |
71%|ββββββββ | 7625/10682 [1:10:18<25:12, 2.02it/s]
|
426 |
71%|ββββββββ | 7626/10682 [1:10:18<25:11, 2.02it/s]
|
427 |
71%|ββββββββ | 7627/10682 [1:10:19<25:11, 2.02it/s]
|
428 |
71%|ββββββββ | 7628/10682 [1:10:19<25:10, 2.02it/s]
|
429 |
71%|ββββββββ | 7629/10682 [1:10:20<25:10, 2.02it/s]
|
430 |
71%|ββββββββ | 7630/10682 [1:10:20<24:52, 2.05it/s]
|
431 |
71%|ββββββββ | 7631/10682 [1:10:32<3:25:31, 4.04s/it]
|
432 |
71%|ββββββββ | 7632/10682 [1:10:33<2:31:22, 2.98s/it]
|
433 |
71%|ββββββββ | 7633/10682 [1:10:33<1:53:26, 2.23s/it]
|
434 |
71%|ββββββββ | 7634/10682 [1:10:34<1:27:02, 1.71s/it]
|
435 |
71%|ββββββββ | 7635/10682 [1:10:34<1:08:32, 1.35s/it]
|
436 |
71%|ββββββββ | 7636/10682 [1:10:35<55:29, 1.09s/it]
|
437 |
71%|ββββββββ | 7637/10682 [1:10:35<46:26, 1.09it/s]
|
438 |
72%|ββββββββ | 7638/10682 [1:10:36<40:07, 1.26it/s]
|
439 |
72%|ββββββββ | 7639/10682 [1:10:36<35:36, 1.42it/s]
|
440 |
72%|ββββββββ | 7640/10682 [1:10:37<32:30, 1.56it/s]
|
441 |
72%|ββββββββ | 7641/10682 [1:10:37<30:15, 1.67it/s]
|
442 |
72%|ββββββββ | 7642/10682 [1:10:38<28:43, 1.76it/s]
|
443 |
72%|ββββββββ | 7643/10682 [1:10:38<27:37, 1.83it/s]
|
444 |
72%|ββββββββ | 7644/10682 [1:10:39<26:52, 1.88it/s]
|
445 |
72%|ββββββββ | 7645/10682 [1:10:39<26:22, 1.92it/s]
|
446 |
72%|ββββββββ | 7646/10682 [1:10:40<25:58, 1.95it/s]
|
447 |
72%|ββββββββ | 7647/10682 [1:10:40<25:44, 1.96it/s]
|
448 |
72%|ββββββββ | 7648/10682 [1:10:41<25:34, 1.98it/s]
|
449 |
72%|ββββββββ | 7649/10682 [1:10:41<25:23, 1.99it/s]
|
450 |
72%|ββββββββ | 7650/10682 [1:10:42<25:18, 2.00it/s]{'loss': 2.8655, 'grad_norm': 0.2524799108505249, 'learning_rate': 0.00022602213475715589, 'epoch': 10.03}
|
451 |
-
|
452 |
|
453 |
72%|ββββββββ | 7650/10682 [1:10:42<25:18, 2.00it/s]
|
454 |
72%|ββββββββ | 7651/10682 [1:10:42<25:20, 1.99it/s]
|
455 |
72%|ββββββββ | 7652/10682 [1:10:43<25:13, 2.00it/s]
|
456 |
72%|ββββββββ | 7653/10682 [1:10:43<27:12, 1.86it/s]
|
457 |
72%|ββββββββ | 7654/10682 [1:10:44<26:30, 1.90it/s]
|
458 |
72%|ββββββββ | 7655/10682 [1:10:44<26:02, 1.94it/s]
|
|
|
459 |
|
460 |
72%|ββββββββ | 7650/10682 [1:10:42<25:18, 2.00it/s]
|
461 |
72%|ββββββββ | 7651/10682 [1:10:42<25:20, 1.99it/s]
|
462 |
72%|ββββββββ | 7652/10682 [1:10:43<25:13, 2.00it/s]
|
463 |
72%|ββββββββ | 7653/10682 [1:10:43<27:12, 1.86it/s]
|
464 |
72%|ββββββββ | 7654/10682 [1:10:44<26:30, 1.90it/s]
|
465 |
72%|ββββββββ | 7655/10682 [1:10:44<26:02, 1.94it/s]
|
466 |
72%|ββββββββ | 7656/10682 [1:10:45<25:42, 1.96it/s]
|
467 |
72%|ββββββββ | 7657/10682 [1:10:45<25:27, 1.98it/s]
|
468 |
72%|ββββββββ | 7658/10682 [1:10:46<25:19, 1.99it/s]
|
469 |
72%|ββββββββ | 7659/10682 [1:10:46<25:11, 2.00it/s]
|
470 |
72%|ββββββββ | 7660/10682 [1:10:47<25:06, 2.01it/s]
|
471 |
72%|ββββββββ | 7661/10682 [1:10:47<25:03, 2.01it/s]
|
472 |
72%|ββββββββ | 7662/10682 [1:10:48<24:59, 2.01it/s]
|
473 |
72%|ββββββββ | 7663/10682 [1:10:48<24:57, 2.02it/s]
|
474 |
72%|ββββββββ | 7664/10682 [1:10:49<24:55, 2.02it/s]
|
475 |
72%|ββββββββ | 7665/10682 [1:10:49<24:52, 2.02it/s]
|
476 |
72%|ββββββββ | 7666/10682 [1:10:50<24:51, 2.02it/s]
|
477 |
72%|ββββββββ | 7667/10682 [1:10:50<24:50, 2.02it/s]
|
478 |
72%|ββββββββ | 7668/10682 [1:10:51<24:51, 2.02it/s]
|
479 |
72%|ββββββββ | 7669/10682 [1:10:51<24:50, 2.02it/s]
|
480 |
72%|ββββββββ | 7670/10682 [1:10:52<24:52, 2.02it/s]
|
481 |
72%|ββββββββ | 7671/10682 [1:10:52<24:51, 2.02it/s]
|
482 |
72%|ββββββββ | 7672/10682 [1:10:53<24:50, 2.02it/s]
|
483 |
72%|ββββββββ | 7673/10682 [1:10:53<24:49, 2.02it/s]
|
484 |
72%|ββββββββ | 7674/10682 [1:10:54<24:49, 2.02it/s]
|
485 |
72%|ββββββββ | 7675/10682 [1:10:54<24:47, 2.02it/s]
|
486 |
|
|
|
487 |
72%|ββββββββ | 7675/10682 [1:10:54<24:47, 2.02it/s]
|
488 |
72%|ββββββββ | 7676/10682 [1:10:55<24:50, 2.02it/s]
|
489 |
72%|ββββββββ | 7677/10682 [1:10:55<24:48, 2.02it/s]
|
490 |
72%|ββββββββ | 7678/10682 [1:10:56<24:49, 2.02it/s]
|
491 |
72%|ββββββββ | 7679/10682 [1:10:56<24:47, 2.02it/s]
|
492 |
72%|ββββββββ | 7680/10682 [1:10:57<24:47, 2.02it/s]
|
493 |
72%|ββββββββ | 7681/10682 [1:10:57<24:44, 2.02it/s]
|
494 |
72%|ββββββββ | 7682/10682 [1:10:58<24:46, 2.02it/s]
|
495 |
72%|ββββββββ | 7683/10682 [1:10:58<24:46, 2.02it/s]
|
496 |
72%|ββββββββ | 7684/10682 [1:10:59<24:46, 2.02it/s]
|
497 |
72%|ββββββββ | 7685/10682 [1:10:59<24:47, 2.01it/s]
|
498 |
72%|ββββββββ | 7686/10682 [1:11:00<24:45, 2.02it/s]
|
499 |
72%|ββββββββ | 7687/10682 [1:11:00<24:43, 2.02it/s]
|
500 |
72%|ββββββββ | 7688/10682 [1:11:01<24:42, 2.02it/s]
|
501 |
72%|ββββββββ | 7689/10682 [1:11:01<24:42, 2.02it/s]
|
502 |
72%|ββββββββ | 7690/10682 [1:11:02<24:41, 2.02it/s]
|
503 |
72%|ββββββββ | 7691/10682 [1:11:02<24:40, 2.02it/s]
|
504 |
72%|ββββββββ | 7692/10682 [1:11:03<24:40, 2.02it/s]
|
505 |
72%|ββββββββ | 7693/10682 [1:11:03<24:39, 2.02it/s]
|
506 |
72%|ββββββββ | 7694/10682 [1:11:04<24:38, 2.02it/s]
|
507 |
72%|ββββββββ | 7695/10682 [1:11:04<24:36, 2.02it/s]
|
508 |
72%|ββββββββ | 7696/10682 [1:11:05<24:36, 2.02it/s]
|
509 |
72%|ββββββββ | 7697/10682 [1:11:05<24:36, 2.02it/s]
|
510 |
72%|ββββββββ | 7698/10682 [1:11:06<24:35, 2.02it/s]
|
511 |
72%|ββββββββ | 7699/10682 [1:11:06<24:33, 2.02it/s]
|
512 |
72%|ββββββββ | 7700/10682 [1:11:07<24:33, 2.02it/s]
|
513 |
|
|
|
514 |
72%|ββββββββ | 7700/10682 [1:11:07<24:33, 2.02it/s]
|
515 |
72%|ββββββββ | 7701/10682 [1:11:07<24:34, 2.02it/s]
|
516 |
72%|ββββββββ | 7702/10682 [1:11:08<24:34, 2.02it/s]
|
517 |
72%|ββββββββ | 7703/10682 [1:11:08<24:32, 2.02it/s]
|
518 |
72%|ββββββββ | 7704/10682 [1:11:09<24:34, 2.02it/s]
|
519 |
72%|ββββββββ | 7705/10682 [1:11:09<24:33, 2.02it/s]
|
520 |
72%|ββββββββ | 7706/10682 [1:11:10<24:33, 2.02it/s]
|
521 |
72%|ββββββββ | 7707/10682 [1:11:10<24:30, 2.02it/s]
|
522 |
72%|ββββββββ | 7708/10682 [1:11:11<24:30, 2.02it/s]
|
523 |
72%|ββββββββ | 7709/10682 [1:11:11<24:28, 2.02it/s]
|
524 |
72%|ββββββββ | 7710/10682 [1:11:12<24:28, 2.02it/s]
|
525 |
72%|ββββββββ | 7711/10682 [1:11:12<24:28, 2.02it/s]
|
526 |
72%|ββββββββ | 7712/10682 [1:11:13<24:27, 2.02it/s]
|
527 |
72%|ββββββββ | 7713/10682 [1:11:13<24:28, 2.02it/s]
|
528 |
72%|ββββββββ | 7714/10682 [1:11:14<24:25, 2.02it/s]
|
529 |
72%|ββββββββ | 7715/10682 [1:11:14<24:27, 2.02it/s]
|
530 |
72%|ββββββββ | 7716/10682 [1:11:15<24:27, 2.02it/s]
|
531 |
72%|ββββββββ | 7717/10682 [1:11:15<24:28, 2.02it/s]
|
532 |
72%|ββββββββ | 7718/10682 [1:11:16<24:27, 2.02it/s]
|
533 |
72%|ββββββββ | 7719/10682 [1:11:16<24:28, 2.02it/s]
|
534 |
72%|ββββββββ | 7720/10682 [1:11:17<24:27, 2.02it/s]
|
535 |
72%|ββββββββ | 7721/10682 [1:11:17<24:26, 2.02it/s]
|
536 |
72%|ββββββββ | 7722/10682 [1:11:18<24:24, 2.02it/s]
|
537 |
72%|ββββββββ | 7723/10682 [1:11:18<24:22, 2.02it/s]
|
538 |
72%|ββββββββ | 7724/10682 [1:11:19<24:22, 2.02it/s]
|
539 |
72%|ββββββββ | 7725/10682 [1:11:19<24:20, 2.02it/s]
|
540 |
|
|
|
541 |
72%|ββββββββ | 7725/10682 [1:11:19<24:20, 2.02it/s]
|
542 |
72%|ββββββββ | 7726/10682 [1:11:20<24:23, 2.02it/s]
|
543 |
72%|ββββββββ | 7727/10682 [1:11:20<24:21, 2.02it/s]
|
544 |
72%|ββββββββ | 7728/10682 [1:11:21<24:21, 2.02it/s]
|
545 |
72%|ββββββββ | 7729/10682 [1:11:21<24:19, 2.02it/s]
|
546 |
72%|ββββββββ | 7730/10682 [1:11:22<24:21, 2.02it/s]
|
547 |
72%|ββββββββ | 7731/10682 [1:11:22<24:20, 2.02it/s]
|
548 |
72%|ββββββββ | 7732/10682 [1:11:23<24:20, 2.02it/s]
|
549 |
72%|ββββββββ | 7733/10682 [1:11:23<24:17, 2.02it/s]
|
550 |
72%|ββββββββ | 7734/10682 [1:11:24<24:17, 2.02it/s]
|
551 |
72%|ββββββββ | 7735/10682 [1:11:24<24:16, 2.02it/s]
|
552 |
72%|ββββββββ | 7736/10682 [1:11:24<24:17, 2.02it/s]
|
553 |
72%|ββββββββ | 7737/10682 [1:11:25<24:15, 2.02it/s]
|
554 |
72%|ββββββββ | 7738/10682 [1:11:25<24:16, 2.02it/s]
|
555 |
72%|ββββββββ | 7739/10682 [1:11:26<24:15, 2.02it/s]
|
556 |
72%|ββββββββ | 7740/10682 [1:11:26<24:16, 2.02it/s]
|
557 |
72%|ββββββββ | 7741/10682 [1:11:27<24:15, 2.02it/s]
|
558 |
72%|ββββββββ | 7742/10682 [1:11:27<24:15, 2.02it/s]
|
559 |
72%|ββββββββ | 7743/10682 [1:11:28<24:13, 2.02it/s]
|
560 |
72%|ββββββββ | 7744/10682 [1:11:28<24:14, 2.02it/s]
|
561 |
73%|ββββββββ | 7745/10682 [1:11:29<24:13, 2.02it/s]
|
562 |
73%|ββββββββ | 7746/10682 [1:11:29<24:13, 2.02it/s]
|
563 |
73%|ββββββββ | 7747/10682 [1:11:30<24:12, 2.02it/s]
|
564 |
73%|ββββββββ | 7748/10682 [1:11:30<24:12, 2.02it/s]
|
565 |
73%|ββββββββ | 7749/10682 [1:11:31<24:10, 2.02it/s]
|
566 |
73%|ββββββββ | 7750/10682 [1:11:31<24:11, 2.02it/s]
|
567 |
{'loss': 2.8612, 'grad_norm': 0.24724432826042175, 'learning_rate': 0.00021250205309110155, 'epoch': 10.16}
|
|
|
568 |
73%|ββββββββ | 7750/10682 [1:11:31<24:11, 2.02it/s]
|
569 |
73%|ββββββββ | 7751/10682 [1:11:32<24:12, 2.02it/s]
|
570 |
73%|ββββββββ | 7752/10682 [1:11:32<24:10, 2.02it/s]
|
571 |
73%|ββββββββ | 7753/10682 [1:11:33<24:10, 2.02it/s]
|
572 |
73%|ββββββββ | 7754/10682 [1:11:33<24:08, 2.02it/s]
|
573 |
73%|ββββββββ | 7755/10682 [1:11:34<24:07, 2.02it/s]
|
574 |
73%|ββββββββ | 7756/10682 [1:11:34<24:07, 2.02it/s]
|
575 |
73%|ββββββββ | 7757/10682 [1:11:35<24:06, 2.02it/s]
|
576 |
73%|ββββββββ | 7758/10682 [1:11:35<24:06, 2.02it/s]
|
577 |
73%|ββββββββ | 7759/10682 [1:11:36<24:05, 2.02it/s]
|
578 |
73%|ββββββββ | 7760/10682 [1:11:36<24:04, 2.02it/s]
|
579 |
73%|ββββββββ | 7761/10682 [1:11:37<24:04, 2.02it/s]
|
580 |
73%|ββββββββ | 7762/10682 [1:11:37<24:02, 2.02it/s]
|
581 |
73%|ββββββββ | 7763/10682 [1:11:38<24:03, 2.02it/s]
|
582 |
73%|ββββββββ | 7764/10682 [1:11:38<24:03, 2.02it/s]
|
583 |
73%|ββββββββ | 7765/10682 [1:11:39<24:03, 2.02it/s]
|
584 |
73%|ββββββββ | 7766/10682 [1:11:39<24:02, 2.02it/s]
|
585 |
73%|ββββββββ | 7767/10682 [1:11:40<24:02, 2.02it/s]
|
586 |
73%|ββββββββ | 7768/10682 [1:11:40<24:01, 2.02it/s]
|
587 |
73%|ββββββββ | 7769/10682 [1:11:41<24:00, 2.02it/s]
|
588 |
73%|ββββββββ | 7770/10682 [1:11:41<23:59, 2.02it/s]
|
589 |
73%|ββββββββ | 7771/10682 [1:11:42<23:59, 2.02it/s]
|
590 |
73%|ββββββββ | 7772/10682 [1:11:42<23:58, 2.02it/s]
|
591 |
73%|ββββββββ | 7773/10682 [1:11:43<23:57, 2.02it/s]
|
592 |
73%|ββββββββ | 7774/10682 [1:11:43<23:59, 2.02it/s]
|
593 |
73%|ββββββββ | 7775/10682 [1:11:44<23:58, 2.02it/s]{'loss': 2.8622, 'grad_norm': 0.2527807056903839, 'learning_rate': 0.00020916944887928359, 'epoch': 10.19}
|
|
|
594 |
|
595 |
73%|ββββββββ | 7775/10682 [1:11:44<23:58, 2.02it/s]
|
596 |
73%|ββββββββ | 7776/10682 [1:11:44<23:59, 2.02it/s]
|
597 |
73%|ββββββββ | 7777/10682 [1:11:45<24:00, 2.02it/s]
|
598 |
73%|ββββββββ | 7778/10682 [1:11:45<23:58, 2.02it/s]
|
599 |
73%|ββββββββ | 7779/10682 [1:11:46<23:59, 2.02it/s]
|
600 |
73%|ββββββββ | 7780/10682 [1:11:46<23:58, 2.02it/s]
|
601 |
73%|ββββββββ | 7781/10682 [1:11:47<23:58, 2.02it/s]
|
602 |
73%|ββββββββ | 7782/10682 [1:11:47<23:56, 2.02it/s]
|
603 |
73%|ββββββββ | 7783/10682 [1:11:48<23:55, 2.02it/s]
|
604 |
73%|ββββββββ | 7784/10682 [1:11:48<23:54, 2.02it/s]
|
605 |
73%|ββββββββ | 7785/10682 [1:11:49<23:54, 2.02it/s]
|
606 |
73%|ββββββββ | 7786/10682 [1:11:49<23:52, 2.02it/s]
|
607 |
73%|ββββββββ | 7787/10682 [1:11:50<23:52, 2.02it/s]
|
608 |
73%|ββββββββ | 7788/10682 [1:11:50<23:51, 2.02it/s]
|
609 |
73%|ββββββββ | 7789/10682 [1:11:51<23:50, 2.02it/s]
|
610 |
73%|ββββββββ | 7790/10682 [1:11:51<23:54, 2.02it/s]
|
611 |
73%|ββββββββ | 7791/10682 [1:11:52<23:51, 2.02it/s]
|
612 |
73%|ββββββββ | 7792/10682 [1:11:52<23:50, 2.02it/s]
|
613 |
73%|ββββββββ | 7793/10682 [1:11:53<23:49, 2.02it/s]
|
614 |
73%|ββββββββ | 7794/10682 [1:11:53<23:48, 2.02it/s]
|
615 |
73%|ββββββββ | 7795/10682 [1:11:54<23:48, 2.02it/s]
|
616 |
73%|ββββββββ | 7796/10682 [1:11:54<23:46, 2.02it/s]
|
617 |
73%|ββββββββ | 7797/10682 [1:11:55<23:46, 2.02it/s]
|
618 |
73%|ββββββββ | 7798/10682 [1:11:55<23:47, 2.02it/s]
|
619 |
73%|ββββββββ | 7799/10682 [1:11:56<23:47, 2.02it/s]
|
620 |
73%|ββββββββ | 7800/10682 [1:11:56<23:45, 2.02it/s]
|
621 |
{'loss': 2.8737, 'grad_norm': 0.24947026371955872, 'learning_rate': 0.00020585625797294927, 'epoch': 10.22}
|
|
|
622 |
73%|ββββββββ | 7800/10682 [1:11:56<23:45, 2.02it/s]
|
623 |
73%|ββββββββ | 7801/10682 [1:11:57<23:48, 2.02it/s]
|
624 |
73%|ββββββββ | 7802/10682 [1:11:57<23:47, 2.02it/s]
|
625 |
73%|ββββββββ | 7803/10682 [1:11:58<23:46, 2.02it/s]
|
626 |
73%|ββββββββ | 7804/10682 [1:11:58<23:43, 2.02it/s]
|
627 |
73%|ββββββββ | 7805/10682 [1:11:59<23:45, 2.02it/s]
|
628 |
73%|ββββββββ | 7806/10682 [1:11:59<23:41, 2.02it/s]
|
629 |
73%|ββββββββ | 7807/10682 [1:12:00<23:42, 2.02it/s]
|
630 |
73%|ββββββββ | 7808/10682 [1:12:00<23:40, 2.02it/s]
|
631 |
73%|ββββββββ | 7809/10682 [1:12:01<23:40, 2.02it/s]
|
632 |
73%|ββββββββ | 7810/10682 [1:12:01<23:40, 2.02it/s]
|
633 |
73%|ββββββββ | 7811/10682 [1:12:02<23:40, 2.02it/s]
|
634 |
73%|ββββββββ | 7812/10682 [1:12:02<23:38, 2.02it/s]
|
635 |
73%|ββββββββ | 7813/10682 [1:12:03<23:39, 2.02it/s]
|
636 |
73%|ββββββββ | 7814/10682 [1:12:03<23:37, 2.02it/s]
|
637 |
73%|ββββββββ | 7815/10682 [1:12:04<23:37, 2.02it/s]
|
638 |
73%|ββββββββ | 7816/10682 [1:12:04<23:37, 2.02it/s]
|
639 |
73%|ββββββββ | 7817/10682 [1:12:05<23:35, 2.02it/s]
|
640 |
73%|ββββββββ | 7818/10682 [1:12:05<23:35, 2.02it/s]
|
641 |
73%|ββββββββ | 7819/10682 [1:12:06<23:35, 2.02it/s]
|
642 |
73%|ββββββββ | 7820/10682 [1:12:06<23:35, 2.02it/s]
|
643 |
73%|ββββββββ | 7821/10682 [1:12:07<23:36, 2.02it/s]
|
644 |
73%|ββββββββ | 7822/10682 [1:12:07<23:35, 2.02it/s]
|
645 |
73%|ββββββββ | 7823/10682 [1:12:08<23:34, 2.02it/s]
|
646 |
73%|ββββββββ | 7824/10682 [1:12:08<23:34, 2.02it/s]
|
647 |
73%|ββββββββ | 7825/10682 [1:12:09<23:32, 2.02it/s]
|
648 |
{'loss': 2.8657, 'grad_norm': 0.2530911862850189, 'learning_rate': 0.00020256270153176371, 'epoch': 10.26}
|
|
|
649 |
73%|ββββββββ | 7825/10682 [1:12:09<23:32, 2.02it/s]
|
650 |
73%|ββββββββ | 7826/10682 [1:12:09<23:33, 2.02it/s]
|
651 |
73%|ββββββββ | 7827/10682 [1:12:10<23:31, 2.02it/s]
|
652 |
73%|ββββββββ | 7828/10682 [1:12:10<23:32, 2.02it/s]
|
653 |
73%|ββββββββ | 7829/10682 [1:12:11<23:30, 2.02it/s]
|
654 |
73%|ββββββββ | 7830/10682 [1:12:11<23:30, 2.02it/s]
|
655 |
73%|ββββββββ | 7831/10682 [1:12:11<23:29, 2.02it/s]
|
656 |
73%|ββββββββ | 7832/10682 [1:12:12<23:31, 2.02it/s]
|
657 |
73%|ββββββββ | 7833/10682 [1:12:12<23:30, 2.02it/s]
|
658 |
73%|ββββββββ | 7834/10682 [1:12:13<23:29, 2.02it/s]
|
659 |
73%|ββββββββ | 7835/10682 [1:12:13<23:29, 2.02it/s]
|
660 |
73%|ββββββββ | 7836/10682 [1:12:14<23:27, 2.02it/s]
|
661 |
73%|ββββββββ | 7837/10682 [1:12:14<23:27, 2.02it/s]
|
662 |
73%|ββββββββ | 7838/10682 [1:12:15<23:26, 2.02it/s]
|
663 |
73%|ββββββββ | 7839/10682 [1:12:15<23:25, 2.02it/s]
|
664 |
73%|ββββββββ | 7840/10682 [1:12:16<23:25, 2.02it/s]
|
665 |
73%|ββββββββ | 7841/10682 [1:12:16<23:23, 2.02it/s]
|
666 |
73%|ββββββββ | 7842/10682 [1:12:17<23:24, 2.02it/s]
|
667 |
73%|ββββββββ | 7843/10682 [1:12:17<23:22, 2.02it/s]
|
668 |
73%|ββββββββ | 7844/10682 [1:12:18<23:24, 2.02it/s]
|
669 |
73%|ββββββββ | 7845/10682 [1:12:18<23:23, 2.02it/s]
|
670 |
73%|ββββββββ | 7846/10682 [1:12:19<23:23, 2.02it/s]
|
671 |
73%|ββββββββ | 7847/10682 [1:12:19<23:22, 2.02it/s]
|
672 |
73%|ββββββββ | 7848/10682 [1:12:20<23:23, 2.02it/s]
|
673 |
73%|ββββββββ | 7849/10682 [1:12:20<23:21, 2.02it/s]
|
674 |
73%|ββββββββ | 7850/10682 [1:12:21<23:21, 2.02it/s]
|
675 |
|
|
|
676 |
73%|ββββββββ | 7850/10682 [1:12:21<23:21, 2.02it/s]
|
677 |
73%|ββββββββ | 7851/10682 [1:12:21<23:22, 2.02it/s]
|
678 |
74%|ββββββββ | 7852/10682 [1:12:22<23:19, 2.02it/s]
|
679 |
74%|ββββββββ | 7853/10682 [1:12:22<23:20, 2.02it/s]
|
680 |
74%|ββββββββ | 7854/10682 [1:12:23<23:18, 2.02it/s]
|
681 |
74%|ββββββββ | 7855/10682 [1:12:23<23:18, 2.02it/s]
|
682 |
74%|ββββββββ | 7856/10682 [1:12:24<23:17, 2.02it/s]
|
683 |
74%|ββββββββ | 7857/10682 [1:12:24<23:18, 2.02it/s]
|
684 |
74%|ββββββββ | 7858/10682 [1:12:25<23:17, 2.02it/s]
|
685 |
74%|ββββββββ | 7859/10682 [1:12:25<23:16, 2.02it/s]
|
686 |
74%|ββββββββ | 7860/10682 [1:12:26<23:16, 2.02it/s]
|
687 |
74%|ββββββββ | 7861/10682 [1:12:26<23:14, 2.02it/s]
|
688 |
74%|ββββββββ | 7862/10682 [1:12:27<23:15, 2.02it/s]
|
689 |
74%|ββββββββ | 7863/10682 [1:12:27<23:14, 2.02it/s]
|
690 |
74%|ββββββββ | 7864/10682 [1:12:28<23:16, 2.02it/s]
|
691 |
74%|ββββββββ | 7865/10682 [1:12:28<23:13, 2.02it/s]
|
692 |
74%|ββββββββ | 7866/10682 [1:12:29<23:13, 2.02it/s]
|
693 |
74%|ββββββββ | 7867/10682 [1:12:29<23:12, 2.02it/s]
|
694 |
74%|ββββββββ | 7868/10682 [1:12:30<23:13, 2.02it/s]
|
695 |
74%|ββββββββ | 7869/10682 [1:12:30<23:11, 2.02it/s]
|
696 |
74%|ββββββββ | 7870/10682 [1:12:31<23:11, 2.02it/s]
|
697 |
74%|ββββββββ | 7871/10682 [1:12:31<23:10, 2.02it/s]
|
698 |
74%|ββββββββ | 7872/10682 [1:12:32<23:10, 2.02it/s]
|
699 |
74%|ββββββββ | 7873/10682 [1:12:32<23:09, 2.02it/s]
|
700 |
74%|ββββββββ | 7874/10682 [1:12:33<23:10, 2.02it/s]
|
701 |
74%|ββββββββ | 7875/10682 [1:12:33<23:09, 2.02it/s]
|
702 |
{'loss': 2.8799, 'grad_norm': 0.26851731538772583, 'learning_rate': 0.00019603537011569566, 'epoch': 10.32}
|
|
|
703 |
74%|ββββββββ | 7875/10682 [1:12:33<23:09, 2.02it/s]
|
704 |
74%|ββββββββ | 7876/10682 [1:12:34<23:09, 2.02it/s]
|
705 |
74%|ββββββββ | 7877/10682 [1:12:34<23:09, 2.02it/s]
|
706 |
74%|ββββββββ | 7878/10682 [1:12:35<23:07, 2.02it/s]
|
707 |
74%|ββββββββ | 7879/10682 [1:12:35<23:08, 2.02it/s]
|
708 |
74%|ββββββββ | 7880/10682 [1:12:36<23:06, 2.02it/s]
|
709 |
74%|ββββββββ | 7881/10682 [1:12:36<23:04, 2.02it/s]
|
710 |
74%|ββββββββ | 7882/10682 [1:12:37<23:03, 2.02it/s]
|
711 |
74%|ββββββββ | 7883/10682 [1:12:37<23:03, 2.02it/s]
|
712 |
74%|ββββββββ | 7884/10682 [1:12:38<23:03, 2.02it/s]
|
713 |
74%|ββββββββ | 7885/10682 [1:12:38<23:04, 2.02it/s]
|
714 |
74%|ββββββββ | 7886/10682 [1:12:39<23:02, 2.02it/s]
|
715 |
74%|ββββββββ | 7887/10682 [1:12:39<23:02, 2.02it/s]
|
716 |
74%|ββββββββ | 7888/10682 [1:12:40<23:01, 2.02it/s]
|
717 |
74%|ββββββββ | 7889/10682 [1:12:40<23:02, 2.02it/s]
|
718 |
74%|ββββββββ | 7890/10682 [1:12:41<23:01, 2.02it/s]
|
719 |
74%|ββββββββ | 7891/10682 [1:12:41<23:01, 2.02it/s]
|
720 |
74%|ββββββββ | 7892/10682 [1:12:42<22:59, 2.02it/s]
|
721 |
74%|ββββββββ | 7893/10682 [1:12:42<23:00, 2.02it/s]
|
722 |
74%|ββββββββ | 7894/10682 [1:12:43<22:58, 2.02it/s]
|
723 |
74%|ββββββββ | 7895/10682 [1:12:43<22:59, 2.02it/s]
|
724 |
74%|ββββββββ | 7896/10682 [1:12:44<22:56, 2.02it/s]
|
725 |
74%|ββββββββ | 7897/10682 [1:12:44<22:57, 2.02it/s]
|
726 |
74%|ββββββββ | 7898/10682 [1:12:45<22:55, 2.02it/s]
|
727 |
74%|ββββββββ | 7899/10682 [1:12:45<22:56, 2.02it/s]
|
728 |
74%|ββββββββ | 7900/10682 [1:12:46<22:55, 2.02it/s]
|
729 |
{'loss': 2.8842, 'grad_norm': 0.24814572930335999, 'learning_rate': 0.0001928020308484042, 'epoch': 10.35}
|
|
|
730 |
74%|ββββββββ | 7900/10682 [1:12:46<22:55, 2.02it/s]
|
731 |
74%|ββββββββ | 7901/10682 [1:12:46<22:57, 2.02it/s]
|
732 |
74%|ββββββββ | 7902/10682 [1:12:47<22:54, 2.02it/s]
|
733 |
74%|ββββββββ | 7903/10682 [1:12:47<22:56, 2.02it/s]
|
734 |
74%|ββββββββ | 7904/10682 [1:12:48<22:53, 2.02it/s]
|
735 |
74%|ββββββββ | 7905/10682 [1:12:48<22:54, 2.02it/s]
|
736 |
74%|ββββββββ | 7906/10682 [1:12:49<22:53, 2.02it/s]
|
737 |
74%|ββββββββ | 7907/10682 [1:12:49<22:53, 2.02it/s]
|
738 |
74%|ββββββββ | 7908/10682 [1:12:50<22:51, 2.02it/s]
|
739 |
74%|ββββββββ | 7909/10682 [1:12:50<22:53, 2.02it/s]
|
740 |
74%|ββββββββ | 7910/10682 [1:12:51<22:51, 2.02it/s]
|
741 |
74%|ββββββββ | 7911/10682 [1:12:51<22:51, 2.02it/s]
|
742 |
74%|ββββββββ | 7912/10682 [1:12:52<22:49, 2.02it/s]
|
743 |
74%|ββββββββ | 7913/10682 [1:12:52<22:50, 2.02it/s]
|
744 |
74%|ββββββββ | 7914/10682 [1:12:53<22:48, 2.02it/s]
|
745 |
74%|ββββββββ | 7915/10682 [1:12:53<22:48, 2.02it/s]
|
746 |
74%|ββββββββ | 7916/10682 [1:12:54<22:47, 2.02it/s]
|
747 |
74%|ββββββββ | 7917/10682 [1:12:54<22:48, 2.02it/s]
|
748 |
74%|ββββββββ | 7918/10682 [1:12:55<22:46, 2.02it/s]
|
749 |
74%|ββββββββ | 7919/10682 [1:12:55<22:48, 2.02it/s]
|
750 |
74%|ββββββββ | 7920/10682 [1:12:56<22:45, 2.02it/s]
|
751 |
74%|ββββββββ | 7921/10682 [1:12:56<22:45, 2.02it/s]
|
752 |
74%|ββββββββ | 7922/10682 [1:12:57<22:44, 2.02it/s]
|
753 |
74%|ββββββββ | 7923/10682 [1:12:57<22:45, 2.02it/s]
|
754 |
74%|ββββββββ | 7924/10682 [1:12:58<22:45, 2.02it/s]
|
755 |
74%|ββββββββ | 7925/10682 [1:12:58<22:45, 2.02it/s]
|
756 |
{'loss': 2.8778, 'grad_norm': 0.24965329468250275, 'learning_rate': 0.00018958919743235897, 'epoch': 10.39}
|
|
|
757 |
74%|ββββββββ | 7925/10682 [1:12:58<22:45, 2.02it/s]
|
758 |
74%|ββββββββ | 7926/10682 [1:12:59<22:47, 2.02it/s]
|
759 |
74%|ββββββββ | 7927/10682 [1:12:59<22:44, 2.02it/s]
|
760 |
74%|ββββββββ | 7928/10682 [1:12:59<22:44, 2.02it/s]
|
761 |
74%|ββββββββ | 7929/10682 [1:13:00<22:43, 2.02it/s]
|
762 |
74%|ββββββββ | 7930/10682 [1:13:00<22:42, 2.02it/s]
|
763 |
74%|ββββββββ | 7931/10682 [1:13:01<22:41, 2.02it/s]
|
764 |
74%|ββββββββ | 7932/10682 [1:13:01<22:40, 2.02it/s]
|
765 |
74%|ββββββββ | 7933/10682 [1:13:02<22:39, 2.02it/s]
|
766 |
74%|ββββββββ | 7934/10682 [1:13:02<22:38, 2.02it/s]
|
767 |
74%|ββββββββ | 7935/10682 [1:13:03<22:39, 2.02it/s]
|
768 |
74%|ββββββββ | 7936/10682 [1:13:03<22:38, 2.02it/s]
|
769 |
74%|ββββββββ | 7937/10682 [1:13:04<22:39, 2.02it/s]
|
770 |
74%|ββββββββ | 7938/10682 [1:13:04<22:37, 2.02it/s]
|
771 |
74%|ββββββββ | 7939/10682 [1:13:05<22:37, 2.02it/s]
|
772 |
74%|ββββββββ | 7940/10682 [1:13:05<22:36, 2.02it/s]
|
773 |
74%|ββββββββ | 7941/10682 [1:13:06<22:37, 2.02it/s]
|
774 |
74%|ββββββββ | 7942/10682 [1:13:06<22:35, 2.02it/s]
|
775 |
74%|ββββββββ | 7943/10682 [1:13:07<22:36, 2.02it/s]
|
776 |
74%|ββββββββ | 7944/10682 [1:13:07<22:35, 2.02it/s]
|
777 |
74%|ββββββββ | 7945/10682 [1:13:08<22:35, 2.02it/s]
|
778 |
74%|ββββββββ | 7946/10682 [1:13:08<22:34, 2.02it/s]
|
779 |
74%|ββββββββ | 7947/10682 [1:13:09<22:33, 2.02it/s]
|
780 |
74%|ββββββββ | 7948/10682 [1:13:09<22:32, 2.02it/s]
|
781 |
74%|ββββββββ | 7949/10682 [1:13:10<22:32, 2.02it/s]
|
782 |
74%|ββββββββ | 7950/10682 [1:13:10<22:30, 2.02it/s]
|
783 |
{'loss': 2.8803, 'grad_norm': 0.24981389939785004, 'learning_rate': 0.0001863970843282357, 'epoch': 10.42}
|
|
|
784 |
74%|ββββββββ | 7950/10682 [1:13:10<22:30, 2.02it/s]
|
785 |
74%|ββββββββ | 7951/10682 [1:13:11<22:33, 2.02it/s]
|
786 |
74%|ββββββββ | 7952/10682 [1:13:11<22:31, 2.02it/s]
|
787 |
74%|ββββββββ | 7953/10682 [1:13:12<22:32, 2.02it/s]
|
788 |
74%|ββββββββ | 7954/10682 [1:13:12<22:31, 2.02it/s]
|
789 |
74%|ββββββββ | 7955/10682 [1:13:13<22:31, 2.02it/s]
|
790 |
74%|ββββββββ | 7956/10682 [1:13:13<22:29, 2.02it/s]
|
791 |
74%|ββββββββ | 7957/10682 [1:13:14<22:29, 2.02it/s]
|
792 |
74%|ββββββββ | 7958/10682 [1:13:14<22:27, 2.02it/s]
|
793 |
75%|ββββββββ | 7959/10682 [1:13:15<22:27, 2.02it/s]
|
794 |
75%|ββββββββ | 7960/10682 [1:13:15<22:25, 2.02it/s]
|
795 |
75%|ββββββββ | 7961/10682 [1:13:16<22:27, 2.02it/s]
|
796 |
75%|ββββββββ | 7962/10682 [1:13:16<22:27, 2.02it/s]
|
797 |
75%|ββββββββ | 7963/10682 [1:13:17<22:26, 2.02it/s]
|
798 |
75%|ββββββββ | 7964/10682 [1:13:17<22:26, 2.02it/s]
|
799 |
75%|ββββββββ | 7965/10682 [1:13:18<22:24, 2.02it/s]
|
800 |
75%|ββββββββ | 7966/10682 [1:13:18<22:23, 2.02it/s]
|
801 |
75%|ββββββββ | 7967/10682 [1:13:19<22:23, 2.02it/s]
|
802 |
75%|ββββββββ | 7968/10682 [1:13:19<22:24, 2.02it/s]
|
803 |
75%|ββββββββ | 7969/10682 [1:13:20<22:22, 2.02it/s]
|
804 |
75%|ββββββββ | 7970/10682 [1:13:20<22:21, 2.02it/s]
|
805 |
75%|ββββββββ | 7971/10682 [1:13:21<22:20, 2.02it/s]
|
806 |
75%|ββββββββ | 7972/10682 [1:13:21<22:20, 2.02it/s]
|
807 |
75%|ββββββββ | 7973/10682 [1:13:22<22:20, 2.02it/s]
|
808 |
75%|ββββββββ | 7974/10682 [1:13:22<22:21, 2.02it/s]
|
809 |
75%|ββββββββ | 7975/10682 [1:13:23<22:19, 2.02it/s]
|
810 |
|
|
|
811 |
75%|ββββββββ | 7975/10682 [1:13:23<22:19, 2.02it/s]
|
812 |
75%|ββββββββ | 7976/10682 [1:13:23<22:23, 2.01it/s]
|
813 |
75%|ββββββββ | 7977/10682 [1:13:24<22:20, 2.02it/s]
|
814 |
75%|ββββββββ | 7978/10682 [1:13:24<22:20, 2.02it/s]
|
815 |
75%|ββββββββ | 7979/10682 [1:13:25<22:18, 2.02it/s]
|
816 |
75%|ββββββββ | 7980/10682 [1:13:25<22:19, 2.02it/s]
|
817 |
75%|ββββββββ | 7981/10682 [1:13:26<22:17, 2.02it/s]
|
818 |
75%|ββββββββ | 7982/10682 [1:13:26<22:17, 2.02it/s]
|
819 |
75%|ββββββββ | 7983/10682 [1:13:27<22:15, 2.02it/s]
|
820 |
75%|ββββββββ | 7984/10682 [1:13:27<22:15, 2.02it/s]
|
821 |
75%|ββββββββ | 7985/10682 [1:13:28<22:14, 2.02it/s]
|
822 |
75%|ββββββββ | 7986/10682 [1:13:28<22:14, 2.02it/s]
|
823 |
75%|ββββββββ | 7987/10682 [1:13:29<22:14, 2.02it/s]
|
824 |
75%|ββββββββ | 7988/10682 [1:13:29<22:13, 2.02it/s]
|
825 |
75%|ββββββββ | 7989/10682 [1:13:30<22:13, 2.02it/s]
|
826 |
75%|ββββββββ | 7990/10682 [1:13:30<22:12, 2.02it/s]
|
827 |
75%|ββββββββ | 7991/10682 [1:13:31<22:13, 2.02it/s]
|
828 |
75%|ββββββββ | 7992/10682 [1:13:31<22:11, 2.02it/s]
|
829 |
75%|ββββββββ | 7993/10682 [1:13:32<22:11, 2.02it/s]
|
830 |
75%|ββββββββ | 7994/10682 [1:13:32<22:09, 2.02it/s]
|
831 |
75%|ββββββββ | 7995/10682 [1:13:33<22:10, 2.02it/s]
|
832 |
75%|ββββββββ | 7996/10682 [1:13:33<22:10, 2.02it/s]
|
833 |
75%|ββββββββ | 7997/10682 [1:13:34<22:10, 2.02it/s]
|
834 |
75%|ββββββββ | 7998/10682 [1:13:34<22:08, 2.02it/s]
|
835 |
75%|ββββββββ | 7999/10682 [1:13:35<22:08, 2.02it/s]
|
836 |
75%|ββββββββ | 8000/10682 [1:13:35<22:06, 2.02it/s]
|
837 |
{'loss': 2.8918, 'grad_norm': 0.24982097744941711, 'learning_rate': 0.00018007586996870206, 'epoch': 10.48}
|
|
|
838 |
75%|ββββββββ | 8000/10682 [1:13:35<22:06, 2.02it/s]
|
839 |
75%|ββββββββ | 8001/10682 [1:13:36<22:09, 2.02it/s]
|
840 |
75%|ββββββββ | 8002/10682 [1:13:36<22:07, 2.02it/s]
|
841 |
75%|ββββββββ | 8003/10682 [1:13:37<22:06, 2.02it/s]
|
842 |
75%|ββββββββ | 8004/10682 [1:13:37<22:05, 2.02it/s]
|
843 |
75%|ββββββββ | 8005/10682 [1:13:38<22:05, 2.02it/s]
|
844 |
75%|ββββββββ | 8006/10682 [1:13:38<22:04, 2.02it/s]
|
845 |
75%|ββββββββ | 8007/10682 [1:13:39<22:04, 2.02it/s]
|
846 |
75%|ββββββββ | 8008/10682 [1:13:39<22:03, 2.02it/s]
|
847 |
75%|ββββββββ | 8009/10682 [1:13:40<22:03, 2.02it/s]
|
848 |
75%|ββββββββ | 8010/10682 [1:13:40<22:01, 2.02it/s]
|
849 |
75%|ββββββββ | 8011/10682 [1:13:41<22:01, 2.02it/s]
|
850 |
75%|ββββββββ | 8012/10682 [1:13:41<22:00, 2.02it/s]
|
851 |
75%|ββββββββ | 8013/10682 [1:13:42<22:01, 2.02it/s]
|
852 |
75%|ββββββββ | 8014/10682 [1:13:42<21:59, 2.02it/s]
|
853 |
75%|ββββββββ | 8015/10682 [1:13:43<22:00, 2.02it/s]
|
854 |
75%|ββββββββ | 8016/10682 [1:13:43<21:58, 2.02it/s]
|
855 |
75%|ββββββββ | 8017/10682 [1:13:44<21:58, 2.02it/s]
|
856 |
75%|ββββββββ | 8018/10682 [1:13:44<21:56, 2.02it/s]
|
857 |
75%|ββββββββ | 8019/10682 [1:13:45<21:58, 2.02it/s]
|
858 |
75%|ββββββββ | 8020/10682 [1:13:45<21:55, 2.02it/s]
|
859 |
75%|ββββββββ | 8021/10682 [1:13:46<21:56, 2.02it/s]
|
860 |
75%|ββββββββ | 8022/10682 [1:13:46<21:55, 2.02it/s]
|
861 |
75%|ββββββββ | 8023/10682 [1:13:47<21:55, 2.02it/s]
|
862 |
75%|ββββββββ | 8024/10682 [1:13:47<21:54, 2.02it/s]
|
863 |
75%|ββββββββ | 8025/10682 [1:13:48<21:54, 2.02it/s]{'loss': 2.9015, 'grad_norm': 0.2518491744995117, 'learning_rate': 0.00017694719066230924, 'epoch': 10.52}
|
864 |
|
|
|
865 |
75%|ββββββββ | 8025/10682 [1:13:48<21:54, 2.02it/s]
|
866 |
75%|ββββββββ | 8026/10682 [1:13:48<21:54, 2.02it/s]
|
867 |
75%|ββββββββ | 8027/10682 [1:13:48<21:54, 2.02it/s]
|
868 |
75%|ββββββββ | 8028/10682 [1:13:49<21:51, 2.02it/s]
|
869 |
75%|ββββββββ | 8029/10682 [1:13:49<21:52, 2.02it/s]
|
870 |
75%|ββββββββ | 8030/10682 [1:13:50<21:50, 2.02it/s]
|
871 |
75%|ββββββββ | 8031/10682 [1:13:50<21:51, 2.02it/s]
|
872 |
75%|ββββββββ | 8032/10682 [1:13:51<21:51, 2.02it/s]
|
873 |
75%|ββββββββ | 8033/10682 [1:13:51<21:51, 2.02it/s]
|
874 |
75%|ββββββββ | 8034/10682 [1:13:52<21:50, 2.02it/s]
|
875 |
75%|ββββββββ | 8035/10682 [1:13:52<21:50, 2.02it/s]
|
876 |
75%|ββββββββ | 8036/10682 [1:13:53<21:48, 2.02it/s]
|
877 |
75%|ββββββββ | 8037/10682 [1:13:53<21:48, 2.02it/s]
|
878 |
75%|ββββββββ | 8038/10682 [1:13:54<21:46, 2.02it/s]
|
879 |
75%|ββββββββ | 8039/10682 [1:13:54<21:47, 2.02it/s]
|
880 |
75%|ββββββββ | 8040/10682 [1:13:55<21:46, 2.02it/s]
|
881 |
75%|ββββββββ | 8041/10682 [1:13:55<21:46, 2.02it/s]
|
882 |
75%|ββββββββ | 8042/10682 [1:13:56<21:45, 2.02it/s]
|
883 |
75%|ββββββββ | 8043/10682 [1:13:56<21:46, 2.02it/s]
|
884 |
75%|ββββββββ | 8044/10682 [1:13:57<21:43, 2.02it/s]
|
885 |
75%|ββββββββ | 8045/10682 [1:13:57<21:44, 2.02it/s]
|
886 |
75%|ββββββββ | 8046/10682 [1:13:58<21:43, 2.02it/s]
|
887 |
75%|ββββββββ | 8047/10682 [1:13:58<21:43, 2.02it/s]
|
888 |
75%|ββββββββ | 8048/10682 [1:13:59<21:42, 2.02it/s]
|
889 |
75%|ββββββββ | 8049/10682 [1:13:59<21:42, 2.02it/s]
|
890 |
75%|ββββββββ | 8050/10682 [1:14:00<21:40, 2.02it/s]
|
891 |
{'loss': 2.8929, 'grad_norm': 0.24705103039741516, 'learning_rate': 0.00017384007553770858, 'epoch': 10.55}
|
|
|
892 |
75%|ββββββββ | 8050/10682 [1:14:00<21:40, 2.02it/s]
|
893 |
75%|ββββββββ | 8051/10682 [1:14:00<21:42, 2.02it/s]
|
894 |
75%|ββββββββ | 8052/10682 [1:14:01<21:40, 2.02it/s]
|
895 |
75%|ββββββββ | 8053/10682 [1:14:01<21:41, 2.02it/s]
|
896 |
75%|ββββββββ | 8054/10682 [1:14:02<21:39, 2.02it/s]
|
897 |
75%|ββββββββ | 8055/10682 [1:14:02<21:40, 2.02it/s]
|
898 |
75%|ββββββββ | 8056/10682 [1:14:03<21:39, 2.02it/s]
|
899 |
75%|ββββββββ | 8057/10682 [1:14:03<21:39, 2.02it/s]
|
900 |
75%|ββββββββ | 8058/10682 [1:14:04<21:38, 2.02it/s]
|
901 |
75%|ββββββββ | 8059/10682 [1:14:04<21:38, 2.02it/s]
|
902 |
75%|ββββββββ | 8060/10682 [1:14:05<21:36, 2.02it/s]
|
903 |
75%|ββββββββ | 8061/10682 [1:14:05<21:36, 2.02it/s]
|
904 |
75%|ββββββββ | 8062/10682 [1:14:06<21:36, 2.02it/s]
|
905 |
75%|ββββββββ | 8063/10682 [1:14:06<21:35, 2.02it/s]
|
906 |
75%|ββββββββ | 8064/10682 [1:14:07<21:34, 2.02it/s]
|
907 |
76%|ββββββββ | 8065/10682 [1:14:07<21:35, 2.02it/s]
|
908 |
76%|ββββββββ | 8066/10682 [1:14:08<21:34, 2.02it/s]
|
909 |
76%|ββββββββ | 8067/10682 [1:14:08<21:34, 2.02it/s]
|
910 |
76%|ββββββββ | 8068/10682 [1:14:09<21:32, 2.02it/s]
|
911 |
76%|ββββββββ | 8069/10682 [1:14:09<21:32, 2.02it/s]
|
912 |
76%|ββββββββ | 8070/10682 [1:14:10<21:31, 2.02it/s]
|
913 |
76%|ββββββββ | 8071/10682 [1:14:10<21:32, 2.02it/s]
|
914 |
76%|ββββββββ | 8072/10682 [1:14:11<21:32, 2.02it/s]
|
915 |
76%|ββββββββ | 8073/10682 [1:14:11<21:33, 2.02it/s]
|
916 |
76%|ββββββββ | 8074/10682 [1:14:12<21:31, 2.02it/s]
|
917 |
76%|ββββββββ | 8075/10682 [1:14:12<21:31, 2.02it/s]
|
918 |
{'loss': 2.8997, 'grad_norm': 0.2514490485191345, 'learning_rate': 0.00017075473199874692, 'epoch': 10.58}
|
|
|
919 |
76%|ββββββββ | 8075/10682 [1:14:12<21:31, 2.02it/s]
|
920 |
76%|ββββββββ | 8076/10682 [1:14:13<21:33, 2.01it/s]
|
921 |
76%|ββββββββ | 8077/10682 [1:14:13<21:32, 2.02it/s]
|
922 |
76%|ββββββββ | 8078/10682 [1:14:14<21:31, 2.02it/s]
|
923 |
76%|ββββββββ | 8079/10682 [1:14:14<21:28, 2.02it/s]
|
924 |
76%|ββββββββ | 8080/10682 [1:14:15<21:29, 2.02it/s]
|
925 |
76%|ββββββββ | 8081/10682 [1:14:15<21:27, 2.02it/s]
|
926 |
76%|ββββββββ | 8082/10682 [1:14:16<21:27, 2.02it/s]
|
927 |
76%|ββββββββ | 8083/10682 [1:14:16<21:26, 2.02it/s]
|
928 |
76%|ββββββββ | 8084/10682 [1:14:17<21:25, 2.02it/s]
|
929 |
76%|ββββββββ | 8085/10682 [1:14:17<21:25, 2.02it/s]
|
930 |
76%|ββββββββ | 8086/10682 [1:14:18<21:23, 2.02it/s]
|
931 |
76%|ββββββββ | 8087/10682 [1:14:18<21:24, 2.02it/s]
|
932 |
76%|ββββββββ | 8088/10682 [1:14:19<21:22, 2.02it/s]
|
933 |
76%|ββββββββ | 8089/10682 [1:14:19<21:22, 2.02it/s]
|
934 |
76%|ββββββββ | 8090/10682 [1:14:20<21:21, 2.02it/s]
|
935 |
76%|ββββββββ | 8091/10682 [1:14:20<21:22, 2.02it/s]
|
936 |
76%|ββββββββ | 8092/10682 [1:14:21<21:21, 2.02it/s]
|
937 |
76%|ββββββββ | 8093/10682 [1:14:21<21:33, 2.00it/s]
|
938 |
76%|ββββββββ | 8094/10682 [1:14:22<21:28, 2.01it/s]
|
939 |
76%|ββββββββ | 8095/10682 [1:14:22<21:25, 2.01it/s]
|
940 |
76%|ββββββββ | 8096/10682 [1:14:23<21:23, 2.02it/s]
|
941 |
76%|ββββββββ | 8097/10682 [1:14:23<21:21, 2.02it/s]
|
942 |
76%|ββββββββ | 8098/10682 [1:14:24<21:19, 2.02it/s]
|
943 |
76%|ββββββββ | 8099/10682 [1:14:24<21:18, 2.02it/s]
|
944 |
76%|ββββββββ | 8100/10682 [1:14:25<21:18, 2.02it/s]
|
945 |
|
|
|
946 |
76%|ββββββββ | 8100/10682 [1:14:25<21:18, 2.02it/s]
|
947 |
76%|ββββββββ | 8101/10682 [1:14:25<21:19, 2.02it/s]
|
948 |
76%|ββββββββ | 8102/10682 [1:14:26<21:17, 2.02it/s]
|
949 |
76%|ββββββββ | 8103/10682 [1:14:26<21:17, 2.02it/s]
|
950 |
76%|ββββββββ | 8104/10682 [1:14:27<21:15, 2.02it/s]
|
951 |
76%|ββββββββ | 8105/10682 [1:14:27<21:14, 2.02it/s]
|
952 |
76%|ββββββββ | 8106/10682 [1:14:28<21:14, 2.02it/s]
|
953 |
76%|ββββββββ | 8107/10682 [1:14:28<21:14, 2.02it/s]
|
954 |
76%|ββββββββ | 8108/10682 [1:14:29<21:13, 2.02it/s]
|
955 |
76%|ββββββββ | 8109/10682 [1:14:29<21:12, 2.02it/s]
|
956 |
76%|ββββββββ | 8110/10682 [1:14:30<21:11, 2.02it/s]
|
957 |
76%|ββββββββ | 8111/10682 [1:14:30<21:11, 2.02it/s]
|
958 |
76%|ββββββββ | 8112/10682 [1:14:31<21:11, 2.02it/s]
|
959 |
76%|ββββββββ | 8113/10682 [1:14:31<21:12, 2.02it/s]
|
960 |
76%|ββββββββ | 8114/10682 [1:14:32<21:12, 2.02it/s]
|
961 |
76%|ββββββββ | 8115/10682 [1:14:32<21:10, 2.02it/s]
|
962 |
76%|ββββββββ | 8116/10682 [1:14:33<21:10, 2.02it/s]
|
963 |
76%|ββββββββ | 8117/10682 [1:14:33<21:08, 2.02it/s]
|
964 |
76%|ββββββββ | 8118/10682 [1:14:34<21:09, 2.02it/s]
|
965 |
76%|ββββββββ | 8119/10682 [1:14:34<21:07, 2.02it/s]
|
966 |
76%|ββββββββ | 8120/10682 [1:14:35<21:07, 2.02it/s]
|
967 |
76%|ββββββββ | 8121/10682 [1:14:35<21:06, 2.02it/s]
|
968 |
76%|ββββββββ | 8122/10682 [1:14:36<21:05, 2.02it/s]
|
969 |
76%|ββββββββ | 8123/10682 [1:14:36<21:05, 2.02it/s]
|
970 |
76%|ββββββββ | 8124/10682 [1:14:37<21:05, 2.02it/s]
|
971 |
76%|ββββββββ | 8125/10682 [1:14:37<21:05, 2.02it/s]{'loss': 2.8945, 'grad_norm': 0.24637724459171295, 'learning_rate': 0.0001646501820129766, 'epoch': 10.65}
|
|
|
972 |
|
973 |
76%|ββββββββ | 8125/10682 [1:14:37<21:05, 2.02it/s]
|
974 |
76%|ββββββββ | 8126/10682 [1:14:37<21:05, 2.02it/s]
|
975 |
76%|ββββββββ | 8127/10682 [1:14:38<21:05, 2.02it/s]
|
976 |
76%|ββββββββ | 8128/10682 [1:14:38<21:04, 2.02it/s]
|
977 |
76%|ββββββββ | 8129/10682 [1:14:39<21:04, 2.02it/s]
|
978 |
76%|ββββββββ | 8130/10682 [1:14:39<21:02, 2.02it/s]
|
979 |
76%|ββββββββ | 8131/10682 [1:14:40<21:02, 2.02it/s]
|
980 |
76%|ββββββββ | 8132/10682 [1:14:40<21:00, 2.02it/s]
|
981 |
76%|ββββββββ | 8133/10682 [1:14:41<20:59, 2.02it/s]
|
982 |
76%|ββββββββ | 8134/10682 [1:14:41<20:59, 2.02it/s]
|
983 |
76%|ββββββββ | 8135/10682 [1:14:42<20:58, 2.02it/s]
|
984 |
76%|ββββββββ | 8136/10682 [1:14:42<20:59, 2.02it/s]
|
985 |
76%|ββββββββ | 8137/10682 [1:14:43<20:58, 2.02it/s]
|
986 |
76%|ββββββββ | 8138/10682 [1:14:43<20:59, 2.02it/s]
|
987 |
76%|ββββββββ | 8139/10682 [1:14:44<20:58, 2.02it/s]
|
988 |
76%|ββββββββ | 8140/10682 [1:14:44<20:58, 2.02it/s]
|
989 |
76%|ββββββββ | 8141/10682 [1:14:45<20:58, 2.02it/s]
|
990 |
76%|ββββββββ | 8142/10682 [1:14:45<20:57, 2.02it/s]
|
991 |
76%|ββββββββ | 8143/10682 [1:14:46<20:55, 2.02it/s]
|
992 |
76%|ββοΏ½οΏ½βββββ | 8144/10682 [1:14:46<20:55, 2.02it/s]
|
993 |
76%|ββββββββ | 8145/10682 [1:14:47<20:55, 2.02it/s]
|
994 |
76%|ββββββββ | 8146/10682 [1:14:47<20:54, 2.02it/s]
|
995 |
76%|ββββββββ | 8147/10682 [1:14:48<20:53, 2.02it/s]
|
996 |
76%|ββββββββ | 8148/10682 [1:14:48<20:54, 2.02it/s]
|
997 |
76%|ββββββββ | 8149/10682 [1:14:49<20:54, 2.02it/s]
|
998 |
76%|ββββββββ | 8150/10682 [1:14:49<20:52, 2.02it/s]{'loss': 2.8927, 'grad_norm': 0.25224441289901733, 'learning_rate': 0.00016163138305256598, 'epoch': 10.68}
|
999 |
|
|
|
1000 |
76%|ββββββββ | 8150/10682 [1:14:49<20:52, 2.02it/s]
|
1001 |
76%|ββββββββ | 8151/10682 [1:14:50<20:54, 2.02it/s]
|
1002 |
76%|ββββββββ | 8152/10682 [1:14:50<20:52, 2.02it/s]
|
1003 |
76%|ββββββββ | 8153/10682 [1:14:51<20:52, 2.02it/s]
|
1004 |
76%|ββββββββ | 8154/10682 [1:14:51<20:51, 2.02it/s]
|
1005 |
76%|ββββββββ | 8155/10682 [1:14:52<20:51, 2.02it/s]
|
1006 |
76%|ββββββββ | 8156/10682 [1:14:52<20:50, 2.02it/s]
|
1007 |
76%|ββββββββ | 8157/10682 [1:14:53<20:49, 2.02it/s]
|
1008 |
76%|ββββββββ | 8158/10682 [1:14:53<20:48, 2.02it/s]
|
1009 |
76%|ββββββββ | 8159/10682 [1:14:54<20:49, 2.02it/s]
|
1010 |
76%|ββββββββ | 8160/10682 [1:14:54<20:47, 2.02it/s]
|
1011 |
76%|ββββββββ | 8161/10682 [1:14:55<20:48, 2.02it/s]
|
1012 |
76%|ββββββββ | 8162/10682 [1:14:55<20:46, 2.02it/s]
|
1013 |
76%|ββββββββ | 8163/10682 [1:14:56<20:46, 2.02it/s]
|
1014 |
76%|ββββββββ | 8164/10682 [1:14:56<20:45, 2.02it/s]
|
1015 |
76%|ββββββββ | 8165/10682 [1:14:57<20:45, 2.02it/s]
|
1016 |
76%|ββββββββ | 8166/10682 [1:14:57<20:44, 2.02it/s]
|
1017 |
76%|ββββββββ | 8167/10682 [1:14:58<20:44, 2.02it/s]
|
1018 |
76%|ββββββββ | 8168/10682 [1:14:58<20:43, 2.02it/s]
|
1019 |
76%|ββββββββ | 8169/10682 [1:14:59<20:43, 2.02it/s]
|
1020 |
76%|ββββββββ | 8170/10682 [1:14:59<20:42, 2.02it/s]
|
1021 |
76%|ββββββββ | 8171/10682 [1:15:00<20:42, 2.02it/s]
|
1022 |
77%|ββββββββ | 8172/10682 [1:15:00<20:40, 2.02it/s]
|
1023 |
77%|ββββββββ | 8173/10682 [1:15:01<20:41, 2.02it/s]
|
1024 |
77%|ββββββββ | 8174/10682 [1:15:01<20:39, 2.02it/s]
|
1025 |
77%|ββββββββ | 8175/10682 [1:15:02<20:39, 2.02it/s]
|
1026 |
|
|
|
1027 |
77%|ββββββββ | 8175/10682 [1:15:02<20:39, 2.02it/s]
|
1028 |
77%|ββββββββ | 8176/10682 [1:15:02<20:41, 2.02it/s]
|
1029 |
77%|ββββββββ | 8177/10682 [1:15:03<20:40, 2.02it/s]
|
1030 |
77%|ββββββββ | 8178/10682 [1:15:03<20:39, 2.02it/s]
|
1031 |
77%|ββββββββ | 8179/10682 [1:15:04<20:38, 2.02it/s]
|
1032 |
77%|ββββββββ | 8180/10682 [1:15:04<20:38, 2.02it/s]
|
1033 |
77%|ββββββββ | 8181/10682 [1:15:05<20:37, 2.02it/s]
|
1034 |
77%|ββββββββ | 8182/10682 [1:15:05<20:36, 2.02it/s]
|
1035 |
77%|ββββββββ | 8183/10682 [1:15:06<20:37, 2.02it/s]
|
1036 |
77%|ββββββββ | 8184/10682 [1:15:06<20:35, 2.02it/s]
|
1037 |
77%|ββββββββ | 8185/10682 [1:15:07<20:35, 2.02it/s]
|
1038 |
77%|ββββββββ | 8186/10682 [1:15:07<20:36, 2.02it/s]
|
1039 |
77%|ββββββββ | 8187/10682 [1:15:08<20:36, 2.02it/s]
|
1040 |
77%|ββββββββ | 8188/10682 [1:15:08<20:36, 2.02it/s]
|
1041 |
77%|ββββββββ | 8189/10682 [1:15:09<20:33, 2.02it/s]
|
1042 |
77%|ββββββββ | 8190/10682 [1:15:09<20:33, 2.02it/s]
|
1043 |
77%|ββββββββ | 8191/10682 [1:15:10<20:31, 2.02it/s]
|
1044 |
77%|ββββββββ | 8192/10682 [1:15:10<20:31, 2.02it/s]
|
1045 |
77%|ββββββββ | 8193/10682 [1:15:11<20:30, 2.02it/s]
|
1046 |
77%|ββββββββ | 8194/10682 [1:15:11<20:31, 2.02it/s]
|
1047 |
77%|ββββββββ | 8195/10682 [1:15:12<20:29, 2.02it/s]
|
1048 |
77%|ββββββββ | 8196/10682 [1:15:12<20:29, 2.02it/s]
|
1049 |
77%|ββββββββ | 8197/10682 [1:15:13<20:28, 2.02it/s]
|
1050 |
77%|ββββββββ | 8198/10682 [1:15:13<20:28, 2.02it/s]
|
1051 |
77%|ββββββββ | 8199/10682 [1:15:14<20:27, 2.02it/s]
|
1052 |
77%|ββββββββ | 8200/10682 [1:15:14<20:27, 2.02it/s]
|
1053 |
|
|
|
1054 |
77%|ββββββββ | 8200/10682 [1:15:14<20:27, 2.02it/s]
|
1055 |
77%|ββββββββ | 8201/10682 [1:15:15<20:27, 2.02it/s]
|
1056 |
77%|ββββββββ | 8202/10682 [1:15:15<20:25, 2.02it/s]
|
1057 |
77%|ββββββββ | 8203/10682 [1:15:16<20:26, 2.02it/s]
|
1058 |
77%|ββββββββ | 8204/10682 [1:15:16<20:25, 2.02it/s]
|
1059 |
77%|ββββββββ | 8205/10682 [1:15:17<22:12, 1.86it/s]
|
1060 |
77%|ββββββοΏ½οΏ½β | 8206/10682 [1:15:17<21:43, 1.90it/s]
|
1061 |
77%|ββββββββ | 8207/10682 [1:15:18<21:18, 1.94it/s]
|
1062 |
77%|ββββββββ | 8208/10682 [1:15:18<21:01, 1.96it/s]
|
1063 |
77%|ββββββββ | 8209/10682 [1:15:19<20:49, 1.98it/s]
|
1064 |
77%|ββββββββ | 8210/10682 [1:15:19<20:41, 1.99it/s]
|
1065 |
77%|ββββββββ | 8211/10682 [1:15:20<20:35, 2.00it/s]
|
1066 |
77%|ββββββββ | 8212/10682 [1:15:20<20:30, 2.01it/s]
|
1067 |
77%|ββββββββ | 8213/10682 [1:15:21<20:27, 2.01it/s]
|
1068 |
77%|ββββββββ | 8214/10682 [1:15:21<20:37, 1.99it/s]
|
1069 |
77%|ββββββββ | 8215/10682 [1:15:22<20:32, 2.00it/s]
|
1070 |
77%|ββββββββ | 8216/10682 [1:15:22<20:28, 2.01it/s]
|
1071 |
77%|ββββββββ | 8217/10682 [1:15:23<20:26, 2.01it/s]
|
1072 |
77%|ββββββββ | 8218/10682 [1:15:23<20:24, 2.01it/s]
|
1073 |
77%|ββββββββ | 8219/10682 [1:15:24<20:23, 2.01it/s]
|
1074 |
77%|ββββββββ | 8220/10682 [1:15:24<20:20, 2.02it/s]
|
1075 |
77%|ββββββββ | 8221/10682 [1:15:25<20:20, 2.02it/s]
|
1076 |
77%|ββββββββ | 8222/10682 [1:15:25<20:18, 2.02it/s]
|
1077 |
77%|ββββββββ | 8223/10682 [1:15:26<20:17, 2.02it/s]
|
1078 |
77%|ββββββββ | 8224/10682 [1:15:26<20:16, 2.02it/s]
|
1079 |
77%|ββββββββ | 8225/10682 [1:15:27<20:16, 2.02it/s]{'loss': 2.9031, 'grad_norm': 0.24478811025619507, 'learning_rate': 0.0001527113038415231, 'epoch': 10.78}
|
|
|
1080 |
|
1081 |
77%|ββββββββ | 8225/10682 [1:15:27<20:16, 2.02it/s]
|
1082 |
77%|ββββββββ | 8226/10682 [1:15:27<20:17, 2.02it/s]
|
1083 |
77%|ββββββββ | 8227/10682 [1:15:28<20:16, 2.02it/s]
|
1084 |
77%|ββββββββ | 8228/10682 [1:15:28<20:15, 2.02it/s]
|
1085 |
77%|ββββββββ | 8229/10682 [1:15:29<20:16, 2.02it/s]
|
1086 |
77%|ββββββββ | 8230/10682 [1:15:29<20:15, 2.02it/s]
|
1087 |
77%|ββββββββ | 8231/10682 [1:15:30<20:14, 2.02it/s]
|
1088 |
77%|ββββββββ | 8232/10682 [1:15:30<20:12, 2.02it/s]
|
1089 |
77%|ββββββββ | 8233/10682 [1:15:31<20:12, 2.02it/s]
|
1090 |
77%|ββββββββ | 8234/10682 [1:15:31<20:10, 2.02it/s]
|
1091 |
77%|ββββββββ | 8235/10682 [1:15:32<20:10, 2.02it/s]
|
1092 |
77%|ββββββββ | 8236/10682 [1:15:32<20:09, 2.02it/s]
|
1093 |
77%|ββββββββ | 8237/10682 [1:15:33<20:09, 2.02it/s]
|
1094 |
77%|ββββββββ | 8238/10682 [1:15:33<20:08, 2.02it/s]
|
1095 |
77%|ββββββββ | 8239/10682 [1:15:34<20:08, 2.02it/s]
|
1096 |
77%|ββββββββ | 8240/10682 [1:15:34<20:07, 2.02it/s]
|
1097 |
77%|ββββββββ | 8241/10682 [1:15:35<20:07, 2.02it/s]
|
1098 |
77%|ββββββββ | 8242/10682 [1:15:35<20:05, 2.02it/s]
|
1099 |
77%|ββββββββ | 8243/10682 [1:15:36<20:05, 2.02it/s]
|
1100 |
77%|ββββββββ | 8244/10682 [1:15:36<20:04, 2.02it/s]
|
1101 |
77%|ββββββββ | 8245/10682 [1:15:37<20:05, 2.02it/s]
|
1102 |
77%|ββββββββ | 8246/10682 [1:15:37<20:04, 2.02it/s]
|
1103 |
77%|ββββββββ | 8247/10682 [1:15:38<20:05, 2.02it/s]
|
1104 |
77%|ββββββββ | 8248/10682 [1:15:38<20:03, 2.02it/s]
|
1105 |
77%|ββββββββ | 8249/10682 [1:15:39<20:04, 2.02it/s]
|
1106 |
77%|ββββββββ | 8250/10682 [1:15:39<20:03, 2.02it/s]{'loss': 2.8918, 'grad_norm': 0.24946050345897675, 'learning_rate': 0.00014978404491439802, 'epoch': 10.81}
|
1107 |
|
|
|
1108 |
77%|ββββββββ | 8250/10682 [1:15:39<20:03, 2.02it/s]
|
1109 |
77%|ββββββββ | 8251/10682 [1:15:40<20:04, 2.02it/s]
|
1110 |
77%|ββββββββ | 8252/10682 [1:15:40<20:03, 2.02it/s]
|
1111 |
77%|ββββββββ | 8253/10682 [1:15:41<20:03, 2.02it/s]
|
1112 |
77%|ββββββββ | 8254/10682 [1:15:41<20:01, 2.02it/s]
|
1113 |
77%|ββββββββ | 8255/10682 [1:15:41<20:02, 2.02it/s]
|
1114 |
77%|ββββββββ | 8256/10682 [1:15:42<20:01, 2.02it/s]
|
1115 |
77%|ββββββββ | 8257/10682 [1:15:42<20:01, 2.02it/s]
|
1116 |
77%|ββββββββ | 8258/10682 [1:15:43<20:00, 2.02it/s]
|
1117 |
77%|ββββββββ | 8259/10682 [1:15:43<20:00, 2.02it/s]
|
1118 |
77%|ββββββββ | 8260/10682 [1:15:44<19:59, 2.02it/s]
|
1119 |
77%|ββββββββ | 8261/10682 [1:15:44<19:59, 2.02it/s]
|
1120 |
77%|ββββββββ | 8262/10682 [1:15:45<19:57, 2.02it/s]
|
1121 |
77%|ββββββββ | 8263/10682 [1:15:45<19:57, 2.02it/s]
|
1122 |
77%|ββββββββ | 8264/10682 [1:15:46<19:57, 2.02it/s]
|
1123 |
77%|ββββββββ | 8265/10682 [1:15:46<19:56, 2.02it/s]
|
1124 |
77%|ββββββββ | 8266/10682 [1:15:47<19:56, 2.02it/s]
|
1125 |
77%|ββββββββ | 8267/10682 [1:15:47<19:54, 2.02it/s]
|
1126 |
77%|ββββββββ | 8268/10682 [1:15:48<19:55, 2.02it/s]
|
1127 |
77%|ββββββββ | 8269/10682 [1:15:48<19:52, 2.02it/s]
|
1128 |
77%|ββββββββ | 8270/10682 [1:15:49<19:52, 2.02it/s]
|
1129 |
77%|ββββββββ | 8271/10682 [1:15:49<19:51, 2.02it/s]
|
1130 |
77%|ββββββββ | 8272/10682 [1:15:50<19:52, 2.02it/s]
|
1131 |
77%|ββββββββ | 8273/10682 [1:15:50<19:51, 2.02it/s]
|
1132 |
77%|ββββββββ | 8274/10682 [1:15:51<19:51, 2.02it/s]
|
1133 |
77%|ββββββββ | 8275/10682 [1:15:51<19:50, 2.02it/s]
|
1134 |
|
|
|
1135 |
77%|ββββββββ | 8275/10682 [1:15:51<19:50, 2.02it/s]
|
1136 |
77%|ββββββββ | 8276/10682 [1:15:52<19:52, 2.02it/s]
|
1137 |
77%|ββββββββ | 8277/10682 [1:15:52<19:51, 2.02it/s]
|
1138 |
77%|ββββββββ | 8278/10682 [1:15:53<19:50, 2.02it/s]
|
1139 |
78%|ββββββββ | 8279/10682 [1:15:53<19:50, 2.02it/s]
|
1140 |
78%|ββββββββ | 8280/10682 [1:15:54<19:49, 2.02it/s]
|
1141 |
78%|ββββββββ | 8281/10682 [1:15:54<19:48, 2.02it/s]
|
1142 |
78%|ββββββββ | 8282/10682 [1:15:55<19:48, 2.02it/s]
|
1143 |
78%|ββββββββ | 8283/10682 [1:15:55<19:47, 2.02it/s]
|
1144 |
78%|ββββββββ | 8284/10682 [1:15:56<19:46, 2.02it/s]
|
1145 |
78%|ββββββββ | 8285/10682 [1:15:56<19:46, 2.02it/s]
|
1146 |
78%|ββββββββ | 8286/10682 [1:15:57<19:46, 2.02it/s]
|
1147 |
78%|ββββββββ | 8287/10682 [1:15:57<19:47, 2.02it/s]
|
1148 |
78%|ββββββββ | 8288/10682 [1:15:58<19:46, 2.02it/s]
|
1149 |
78%|ββββββββ | 8289/10682 [1:15:58<19:47, 2.02it/s]
|
1150 |
78%|ββββββββ | 8290/10682 [1:15:59<21:33, 1.85it/s]
|
1151 |
78%|ββββββββ | 8291/10682 [1:15:59<20:58, 1.90it/s]
|
1152 |
78%|ββββββββ | 8292/10682 [1:16:00<20:36, 1.93it/s]
|
1153 |
78%|ββββββββ | 8293/10682 [1:16:00<20:18, 1.96it/s]
|
1154 |
78%|ββββββββ | 8294/10682 [1:16:01<20:08, 1.98it/s]
|
1155 |
78%|ββββββββ | 8295/10682 [1:16:01<19:59, 1.99it/s]
|
1156 |
78%|ββββββββ | 8296/10682 [1:16:02<19:54, 2.00it/s]
|
1157 |
78%|ββββββββ | 8297/10682 [1:16:02<19:49, 2.01it/s]
|
1158 |
78%|ββββββββ | 8298/10682 [1:16:03<19:46, 2.01it/s]
|
1159 |
78%|ββββββββ | 8299/10682 [1:16:03<19:42, 2.02it/s]
|
1160 |
78%|ββββββββ | 8300/10682 [1:16:04<19:42, 2.01it/s]
|
1161 |
{'loss': 2.8984, 'grad_norm': 0.24762478470802307, 'learning_rate': 0.00014399985296581835, 'epoch': 10.88}
|
|
|
1162 |
78%|ββββββββ | 8300/10682 [1:16:04<19:42, 2.01it/s]
|
1163 |
78%|ββββββββ | 8301/10682 [1:16:04<19:41, 2.01it/s]
|
1164 |
78%|ββββββββ | 8302/10682 [1:16:05<19:40, 2.02it/s]
|
1165 |
78%|ββββββββ | 8303/10682 [1:16:05<19:38, 2.02it/s]
|
1166 |
78%|ββββββββ | 8304/10682 [1:16:06<19:38, 2.02it/s]
|
1167 |
78%|ββββββββ | 8305/10682 [1:16:06<19:37, 2.02it/s]
|
1168 |
78%|ββββββββ | 8306/10682 [1:16:07<19:36, 2.02it/s]
|
1169 |
78%|ββββββββ | 8307/10682 [1:16:07<19:35, 2.02it/s]
|
1170 |
78%|ββββββββ | 8308/10682 [1:16:08<19:35, 2.02it/s]
|
1171 |
78%|ββββββββ | 8309/10682 [1:16:08<19:34, 2.02it/s]
|
1172 |
78%|ββββββββ | 8310/10682 [1:16:09<19:34, 2.02it/s]
|
1173 |
78%|ββββββββ | 8311/10682 [1:16:09<19:34, 2.02it/s]
|
1174 |
78%|ββββββββ | 8312/10682 [1:16:10<19:33, 2.02it/s]
|
1175 |
78%|ββββββββ | 8313/10682 [1:16:10<19:33, 2.02it/s]
|
1176 |
78%|ββββββββ | 8314/10682 [1:16:11<19:31, 2.02it/s]
|
1177 |
78%|ββββββββ | 8315/10682 [1:16:11<19:32, 2.02it/s]
|
1178 |
78%|ββββββββ | 8316/10682 [1:16:12<19:29, 2.02it/s]
|
1179 |
78%|ββββββββ | 8317/10682 [1:16:12<19:29, 2.02it/s]
|
1180 |
78%|ββββββββ | 8318/10682 [1:16:13<19:28, 2.02it/s]
|
1181 |
78%|ββββββββ | 8319/10682 [1:16:13<19:28, 2.02it/s]
|
1182 |
78%|ββββββββ | 8320/10682 [1:16:14<19:28, 2.02it/s]
|
1183 |
78%|ββββββββ | 8321/10682 [1:16:14<19:28, 2.02it/s]
|
1184 |
78%|ββββββββ | 8322/10682 [1:16:15<19:28, 2.02it/s]
|
1185 |
78%|ββββββββ | 8323/10682 [1:16:15<19:28, 2.02it/s]
|
1186 |
78%|ββββββββ | 8324/10682 [1:16:16<19:26, 2.02it/s]
|
1187 |
78%|ββββββββ | 8325/10682 [1:16:16<19:26, 2.02it/s]
|
1188 |
|
|
|
1189 |
78%|ββββββββ | 8325/10682 [1:16:16<19:26, 2.02it/s]
|
1190 |
78%|ββββββββ | 8326/10682 [1:16:17<19:26, 2.02it/s]
|
1191 |
78%|ββββββββ | 8327/10682 [1:16:17<19:26, 2.02it/s]
|
1192 |
78%|ββββββββ | 8328/10682 [1:16:18<19:25, 2.02it/s]
|
1193 |
78%|ββββββββ | 8329/10682 [1:16:18<19:24, 2.02it/s]
|
1194 |
78%|ββββββββ | 8330/10682 [1:16:19<19:22, 2.02it/s]
|
1195 |
78%|ββββββββ | 8331/10682 [1:16:19<19:23, 2.02it/s]
|
1196 |
78%|ββββββββ | 8332/10682 [1:16:20<19:22, 2.02it/s]
|
1197 |
78%|ββββββββ | 8333/10682 [1:16:20<19:23, 2.02it/s]
|
1198 |
78%|ββββββββ | 8334/10682 [1:16:21<19:24, 2.02it/s]
|
1199 |
78%|ββββββββ | 8335/10682 [1:16:21<19:23, 2.02it/s]
|
1200 |
78%|ββββββββ | 8336/10682 [1:16:22<19:22, 2.02it/s]
|
1201 |
78%|ββββββββ | 8337/10682 [1:16:22<19:20, 2.02it/s]
|
1202 |
78%|ββββββββ | 8338/10682 [1:16:23<19:21, 2.02it/s]
|
1203 |
78%|ββββββββ | 8339/10682 [1:16:23<19:19, 2.02it/s]
|
1204 |
78%|ββββββββ | 8340/10682 [1:16:24<19:20, 2.02it/s]
|
1205 |
78%|ββββββββ | 8341/10682 [1:16:24<19:18, 2.02it/s]
|
1206 |
78%|ββββββββ | 8342/10682 [1:16:25<19:19, 2.02it/s]
|
1207 |
78%|ββββββββ | 8343/10682 [1:16:25<19:18, 2.02it/s]
|
1208 |
78%|ββββββββ | 8344/10682 [1:16:26<19:17, 2.02it/s]
|
1209 |
78%|ββββββββ | 8345/10682 [1:16:26<19:17, 2.02it/s]
|
1210 |
78%|ββββββββ | 8346/10682 [1:16:27<19:17, 2.02it/s]
|
1211 |
78%|ββββββββ | 8347/10682 [1:16:27<19:16, 2.02it/s]
|
1212 |
78%|ββββββββ | 8348/10682 [1:16:28<19:17, 2.02it/s]
|
1213 |
78%|ββββββββ | 8349/10682 [1:16:28<19:16, 2.02it/s]
|
1214 |
78%|ββββββββ | 8350/10682 [1:16:29<19:15, 2.02it/s]
|
1215 |
{'loss': 2.9034, 'grad_norm': 0.2513554096221924, 'learning_rate': 0.00013831071326327282, 'epoch': 10.94}
|
|
|
1216 |
78%|ββββββββ | 8350/10682 [1:16:29<19:15, 2.02it/s]
|
1217 |
78%|ββββββββ | 8351/10682 [1:16:29<19:16, 2.02it/s]
|
1218 |
78%|ββββββββ | 8352/10682 [1:16:30<19:14, 2.02it/s]
|
1219 |
78%|ββββββββ | 8353/10682 [1:16:30<19:14, 2.02it/s]
|
1220 |
78%|ββββββββ | 8354/10682 [1:16:31<19:12, 2.02it/s]
|
1221 |
78%|ββββββββ | 8355/10682 [1:16:31<19:13, 2.02it/s]
|
1222 |
78%|ββββββββ | 8356/10682 [1:16:32<19:12, 2.02it/s]
|
1223 |
78%|ββββββββ | 8357/10682 [1:16:32<19:11, 2.02it/s]
|
1224 |
78%|ββββββββ | 8358/10682 [1:16:33<19:10, 2.02it/s]
|
1225 |
78%|ββββββββ | 8359/10682 [1:16:33<19:09, 2.02it/s]
|
1226 |
78%|ββββββββ | 8360/10682 [1:16:34<19:08, 2.02it/s]
|
1227 |
78%|ββββββββ | 8361/10682 [1:16:34<19:08, 2.02it/s]
|
1228 |
78%|ββββββββ | 8362/10682 [1:16:35<19:07, 2.02it/s]
|
1229 |
78%|ββββββββ | 8363/10682 [1:16:35<19:06, 2.02it/s]
|
1230 |
78%|ββββββββ | 8364/10682 [1:16:36<19:06, 2.02it/s]
|
1231 |
78%|ββββββββ | 8365/10682 [1:16:36<19:05, 2.02it/s]
|
1232 |
78%|ββββββββ | 8366/10682 [1:16:37<19:05, 2.02it/s]
|
1233 |
78%|ββββββββ | 8367/10682 [1:16:37<19:04, 2.02it/s]
|
1234 |
78%|ββββββββ | 8368/10682 [1:16:38<19:05, 2.02it/s]
|
1235 |
78%|ββββββββ | 8369/10682 [1:16:38<19:03, 2.02it/s]
|
1236 |
78%|ββββββββ | 8370/10682 [1:16:39<19:04, 2.02it/s]
|
1237 |
78%|ββββββββ | 8371/10682 [1:16:39<19:02, 2.02it/s]
|
1238 |
78%|ββββββββ | 8372/10682 [1:16:40<19:03, 2.02it/s]
|
1239 |
78%|ββββββββ | 8373/10682 [1:16:40<19:02, 2.02it/s]
|
1240 |
78%|ββββββββ | 8374/10682 [1:16:41<19:02, 2.02it/s]
|
1241 |
78%|ββββββββ | 8375/10682 [1:16:41<19:01, 2.02it/s]
|
1242 |
|
|
|
1243 |
78%|ββββββββ | 8375/10682 [1:16:41<19:01, 2.02it/s]
|
1244 |
78%|ββββββββ | 8376/10682 [1:16:42<19:02, 2.02it/s]
|
1245 |
78%|ββββββββ | 8377/10682 [1:16:42<19:01, 2.02it/s]
|
1246 |
78%|ββββββββ | 8378/10682 [1:16:43<19:00, 2.02it/s]
|
1247 |
78%|ββββββββ | 8379/10682 [1:16:43<18:58, 2.02it/s]
|
1248 |
78%|ββββββββ | 8380/10682 [1:16:44<18:59, 2.02it/s]
|
1249 |
78%|ββββββββ | 8381/10682 [1:16:44<18:58, 2.02it/s]
|
1250 |
78%|ββββββββ | 8382/10682 [1:16:45<18:59, 2.02it/s]
|
1251 |
78%|ββββββββ | 8383/10682 [1:16:45<18:56, 2.02it/s]
|
1252 |
78%|ββββββββ | 8384/10682 [1:16:46<18:57, 2.02it/s]
|
1253 |
78%|ββββββββ | 8385/10682 [1:16:46<18:56, 2.02it/s]
|
1254 |
79%|ββββββββ | 8386/10682 [1:16:47<18:56, 2.02it/s]
|
1255 |
79%|ββββββββ | 8387/10682 [1:16:47<18:55, 2.02it/s]
|
1256 |
79%|ββββββββ | 8388/10682 [1:16:47<18:56, 2.02it/s]
|
1257 |
79%|ββββββββ | 8389/10682 [1:16:48<18:55, 2.02it/s]
|
1258 |
79%|ββββββββ | 8390/10682 [1:16:48<18:55, 2.02it/s]
|
1259 |
79%|ββββββββ | 8391/10682 [1:16:49<18:53, 2.02it/s]
|
1260 |
79%|ββββββββ | 8392/10682 [1:16:49<18:54, 2.02it/s]
|
1261 |
79%|ββββββββ | 8393/10682 [1:16:50<18:40, 2.04it/s]
|
1262 |
79%|ββββββββ | 8394/10682 [1:17:04<2:55:03, 4.59s/it]
|
1263 |
79%|ββββββββ | 8395/10682 [1:17:05<2:08:08, 3.36s/it]
|
1264 |
79%|ββββββββ | 8396/10682 [1:17:05<1:35:23, 2.50s/it]
|
1265 |
79%|ββββββββ | 8397/10682 [1:17:06<1:12:24, 1.90s/it]
|
1266 |
79%|ββββββββ | 8398/10682 [1:17:06<56:24, 1.48s/it]
|
1267 |
79%|ββββββββ | 8399/10682 [1:17:07<45:10, 1.19s/it]
|
1268 |
79%|ββββββββ | 8400/10682 [1:17:07<37:22, 1.02it/s]
|
1269 |
|
|
|
1270 |
79%|ββββββββ | 8400/10682 [1:17:07<37:22, 1.02it/s]
|
1271 |
79%|ββββββββ | 8401/10682 [1:17:08<31:48, 1.20it/s]
|
1272 |
79%|ββββββββ | 8402/10682 [1:17:08<27:51, 1.36it/s]
|
1273 |
79%|ββββββββ | 8403/10682 [1:17:09<25:08, 1.51it/s]
|
1274 |
79%|ββββββββ | 8404/10682 [1:17:09<23:11, 1.64it/s]
|
1275 |
79%|ββββββββ | 8405/10682 [1:17:10<21:51, 1.74it/s]
|
1276 |
79%|ββββββββ | 8406/10682 [1:17:10<20:54, 1.81it/s]
|
1277 |
79%|ββββββββ | 8407/10682 [1:17:11<20:17, 1.87it/s]
|
1278 |
79%|ββββββββ | 8408/10682 [1:17:11<19:51, 1.91it/s]
|
1279 |
79%|ββββββββ | 8409/10682 [1:17:12<19:30, 1.94it/s]
|
1280 |
79%|ββββββββ | 8410/10682 [1:17:12<19:19, 1.96it/s]
|
1281 |
79%|ββββββββ | 8411/10682 [1:17:13<19:06, 1.98it/s]
|
1282 |
79%|ββββββββ | 8412/10682 [1:17:13<18:59, 1.99it/s]
|
1283 |
79%|ββββββββ | 8413/10682 [1:17:14<18:53, 2.00it/s]
|
1284 |
79%|ββββββββ | 8414/10682 [1:17:14<18:50, 2.01it/s]
|
1285 |
79%|ββββββββ | 8415/10682 [1:17:15<18:49, 2.01it/s]
|
1286 |
79%|ββββββββ | 8416/10682 [1:17:15<18:46, 2.01it/s]
|
1287 |
79%|ββββββββ | 8417/10682 [1:17:16<18:43, 2.02it/s]
|
1288 |
79%|ββββββββ | 8418/10682 [1:17:16<18:42, 2.02it/s]
|
1289 |
79%|ββββββββ | 8419/10682 [1:17:17<18:39, 2.02it/s]
|
1290 |
79%|ββββββββ | 8420/10682 [1:17:17<18:39, 2.02it/s]
|
|
|
368 |
|
369 |
71%|βββββββ | 7575/10682 [1:09:53<27:49, 1.86it/s]
|
370 |
71%|βββββββ | 7576/10682 [1:09:53<27:09, 1.91it/s]
|
371 |
71%|βββββββ | 7577/10682 [1:09:54<26:41, 1.94it/s]
|
372 |
71%|βββββββ | 7578/10682 [1:09:54<26:20, 1.96it/s]
|
373 |
71%|βββββββ | 7579/10682 [1:09:55<26:07, 1.98it/s]
|
374 |
71%|βββββββ | 7580/10682 [1:09:55<25:55, 1.99it/s]
|
375 |
71%|βββββββ | 7581/10682 [1:09:56<25:48, 2.00it/s]
|
376 |
71%|βββββββ | 7582/10682 [1:09:56<25:43, 2.01it/s]
|
377 |
71%|βββββββ | 7583/10682 [1:09:57<25:40, 2.01it/s]
|
378 |
71%|βββββββ | 7584/10682 [1:09:57<25:37, 2.02it/s]
|
379 |
71%|βββββββ | 7585/10682 [1:09:58<25:34, 2.02it/s]
|
380 |
71%|βββββββ | 7586/10682 [1:09:58<25:33, 2.02it/s]
|
381 |
71%|βββββββ | 7587/10682 [1:09:59<25:30, 2.02it/s]
|
382 |
71%|βββββββ | 7588/10682 [1:09:59<25:30, 2.02it/s]
|
383 |
71%|βββββββ | 7589/10682 [1:10:00<25:28, 2.02it/s]
|
384 |
71%|βββββββ | 7590/10682 [1:10:00<25:29, 2.02it/s]
|
385 |
71%|βββββββ | 7591/10682 [1:10:01<25:28, 2.02it/s]
|
386 |
71%|βββββββ | 7592/10682 [1:10:01<25:28, 2.02it/s]
|
387 |
71%|βββββββ | 7593/10682 [1:10:02<25:27, 2.02it/s]
|
388 |
71%|βββββββ | 7594/10682 [1:10:02<25:26, 2.02it/s]
|
389 |
71%|βββββββ | 7595/10682 [1:10:03<25:24, 2.03it/s]
|
390 |
71%|βββββββ | 7596/10682 [1:10:03<25:24, 2.02it/s]
|
391 |
71%|βββββββ | 7597/10682 [1:10:04<25:24, 2.02it/s]
|
392 |
71%|βββββββ | 7598/10682 [1:10:04<25:21, 2.03it/s]
|
393 |
71%|βββββββ | 7599/10682 [1:10:05<25:23, 2.02it/s]
|
394 |
71%|βββββββ | 7600/10682 [1:10:05<25:20, 2.03it/s]{'loss': 2.9745, 'grad_norm': 0.24363452196121216, 'learning_rate': 0.00023289281143540065, 'epoch': 9.96}
|
395 |
|
396 |
|
397 |
71%|βββββββ | 7600/10682 [1:10:05<25:20, 2.03it/s]
|
398 |
71%|βββββββ | 7601/10682 [1:10:06<25:24, 2.02it/s]
|
399 |
71%|βββββββ | 7602/10682 [1:10:06<25:23, 2.02it/s]
|
400 |
71%|βββββββ | 7603/10682 [1:10:07<25:23, 2.02it/s]
|
401 |
71%|βββββββ | 7604/10682 [1:10:07<25:23, 2.02it/s]
|
402 |
71%|βββββββ | 7605/10682 [1:10:08<25:22, 2.02it/s]
|
403 |
71%|βββββββ | 7606/10682 [1:10:08<25:22, 2.02it/s]
|
404 |
71%|βββββββ | 7607/10682 [1:10:09<25:20, 2.02it/s]
|
405 |
71%|βββββββ | 7608/10682 [1:10:09<25:20, 2.02it/s]
|
406 |
71%|βββββββ | 7609/10682 [1:10:10<25:19, 2.02it/s]
|
407 |
71%|βββββββ | 7610/10682 [1:10:10<25:20, 2.02it/s]
|
408 |
71%|ββββββββ | 7611/10682 [1:10:11<25:19, 2.02it/s]
|
409 |
71%|ββββββββ | 7612/10682 [1:10:11<25:17, 2.02it/s]
|
410 |
71%|ββββββββ | 7613/10682 [1:10:12<25:17, 2.02it/s]
|
411 |
71%|ββββββββ | 7614/10682 [1:10:12<25:15, 2.02it/s]
|
412 |
71%|ββββββββ | 7615/10682 [1:10:13<25:15, 2.02it/s]
|
413 |
71%|ββββββββ | 7616/10682 [1:10:13<25:14, 2.02it/s]
|
414 |
71%|ββββββββ | 7617/10682 [1:10:14<25:15, 2.02it/s]
|
415 |
71%|ββββββββ | 7618/10682 [1:10:14<25:13, 2.02it/s]
|
416 |
71%|ββββββββ | 7619/10682 [1:10:15<25:14, 2.02it/s]
|
417 |
71%|ββββββββ | 7620/10682 [1:10:15<25:12, 2.02it/s]
|
418 |
71%|ββββββββ | 7621/10682 [1:10:16<25:13, 2.02it/s]
|
419 |
71%|ββββββββ | 7622/10682 [1:10:16<25:12, 2.02it/s]
|
420 |
71%|ββββββββ | 7623/10682 [1:10:17<25:13, 2.02it/s]
|
421 |
71%|ββββββββ | 7624/10682 [1:10:17<25:11, 2.02it/s]
|
422 |
71%|ββββββββ | 7625/10682 [1:10:18<25:12, 2.02it/s]
|
423 |
{'loss': 2.9725, 'grad_norm': 0.23881086707115173, 'learning_rate': 0.00022944844326774121, 'epoch': 9.99}
|
424 |
|
425 |
71%|ββββββββ | 7625/10682 [1:10:18<25:12, 2.02it/s]
|
426 |
71%|ββββββββ | 7626/10682 [1:10:18<25:11, 2.02it/s]
|
427 |
71%|ββββββββ | 7627/10682 [1:10:19<25:11, 2.02it/s]
|
428 |
71%|ββββββββ | 7628/10682 [1:10:19<25:10, 2.02it/s]
|
429 |
71%|ββββββββ | 7629/10682 [1:10:20<25:10, 2.02it/s]
|
430 |
71%|ββββββββ | 7630/10682 [1:10:20<24:52, 2.05it/s]
|
431 |
71%|ββββββββ | 7631/10682 [1:10:32<3:25:31, 4.04s/it]
|
432 |
71%|ββββββββ | 7632/10682 [1:10:33<2:31:22, 2.98s/it]
|
433 |
71%|ββββββββ | 7633/10682 [1:10:33<1:53:26, 2.23s/it]
|
434 |
71%|ββββββββ | 7634/10682 [1:10:34<1:27:02, 1.71s/it]
|
435 |
71%|ββββββββ | 7635/10682 [1:10:34<1:08:32, 1.35s/it]
|
436 |
71%|ββββββββ | 7636/10682 [1:10:35<55:29, 1.09s/it]
|
437 |
71%|ββββββββ | 7637/10682 [1:10:35<46:26, 1.09it/s]
|
438 |
72%|ββββββββ | 7638/10682 [1:10:36<40:07, 1.26it/s]
|
439 |
72%|ββββββββ | 7639/10682 [1:10:36<35:36, 1.42it/s]
|
440 |
72%|ββββββββ | 7640/10682 [1:10:37<32:30, 1.56it/s]
|
441 |
72%|ββββββββ | 7641/10682 [1:10:37<30:15, 1.67it/s]
|
442 |
72%|ββββββββ | 7642/10682 [1:10:38<28:43, 1.76it/s]
|
443 |
72%|ββββββββ | 7643/10682 [1:10:38<27:37, 1.83it/s]
|
444 |
72%|ββββββββ | 7644/10682 [1:10:39<26:52, 1.88it/s]
|
445 |
72%|ββββββββ | 7645/10682 [1:10:39<26:22, 1.92it/s]
|
446 |
72%|ββββββββ | 7646/10682 [1:10:40<25:58, 1.95it/s]
|
447 |
72%|ββββββββ | 7647/10682 [1:10:40<25:44, 1.96it/s]
|
448 |
72%|ββββββββ | 7648/10682 [1:10:41<25:34, 1.98it/s]
|
449 |
72%|ββββββββ | 7649/10682 [1:10:41<25:23, 1.99it/s]
|
450 |
72%|ββββββββ | 7650/10682 [1:10:42<25:18, 2.00it/s]{'loss': 2.8655, 'grad_norm': 0.2524799108505249, 'learning_rate': 0.00022602213475715589, 'epoch': 10.03}
|
|
|
451 |
|
452 |
72%|ββββββββ | 7650/10682 [1:10:42<25:18, 2.00it/s]
|
453 |
72%|ββββββββ | 7651/10682 [1:10:42<25:20, 1.99it/s]
|
454 |
72%|ββββββββ | 7652/10682 [1:10:43<25:13, 2.00it/s]
|
455 |
72%|ββββββββ | 7653/10682 [1:10:43<27:12, 1.86it/s]
|
456 |
72%|ββββββββ | 7654/10682 [1:10:44<26:30, 1.90it/s]
|
457 |
72%|ββββββββ | 7655/10682 [1:10:44<26:02, 1.94it/s]
|
458 |
+
|
459 |
|
460 |
72%|ββββββββ | 7650/10682 [1:10:42<25:18, 2.00it/s]
|
461 |
72%|ββββββββ | 7651/10682 [1:10:42<25:20, 1.99it/s]
|
462 |
72%|ββββββββ | 7652/10682 [1:10:43<25:13, 2.00it/s]
|
463 |
72%|ββββββββ | 7653/10682 [1:10:43<27:12, 1.86it/s]
|
464 |
72%|ββββββββ | 7654/10682 [1:10:44<26:30, 1.90it/s]
|
465 |
72%|ββββββββ | 7655/10682 [1:10:44<26:02, 1.94it/s]
|
466 |
72%|ββββββββ | 7656/10682 [1:10:45<25:42, 1.96it/s]
|
467 |
72%|ββββββββ | 7657/10682 [1:10:45<25:27, 1.98it/s]
|
468 |
72%|ββββββββ | 7658/10682 [1:10:46<25:19, 1.99it/s]
|
469 |
72%|ββββββββ | 7659/10682 [1:10:46<25:11, 2.00it/s]
|
470 |
72%|ββββββββ | 7660/10682 [1:10:47<25:06, 2.01it/s]
|
471 |
72%|ββββββββ | 7661/10682 [1:10:47<25:03, 2.01it/s]
|
472 |
72%|ββββββββ | 7662/10682 [1:10:48<24:59, 2.01it/s]
|
473 |
72%|ββββββββ | 7663/10682 [1:10:48<24:57, 2.02it/s]
|
474 |
72%|ββββββββ | 7664/10682 [1:10:49<24:55, 2.02it/s]
|
475 |
72%|ββββββββ | 7665/10682 [1:10:49<24:52, 2.02it/s]
|
476 |
72%|ββββββββ | 7666/10682 [1:10:50<24:51, 2.02it/s]
|
477 |
72%|ββββββββ | 7667/10682 [1:10:50<24:50, 2.02it/s]
|
478 |
72%|ββββββββ | 7668/10682 [1:10:51<24:51, 2.02it/s]
|
479 |
72%|ββββββββ | 7669/10682 [1:10:51<24:50, 2.02it/s]
|
480 |
72%|ββββββββ | 7670/10682 [1:10:52<24:52, 2.02it/s]
|
481 |
72%|ββββββββ | 7671/10682 [1:10:52<24:51, 2.02it/s]
|
482 |
72%|ββββββββ | 7672/10682 [1:10:53<24:50, 2.02it/s]
|
483 |
72%|ββββββββ | 7673/10682 [1:10:53<24:49, 2.02it/s]
|
484 |
72%|ββββββββ | 7674/10682 [1:10:54<24:49, 2.02it/s]
|
485 |
72%|ββββββββ | 7675/10682 [1:10:54<24:47, 2.02it/s]
|
486 |
|
487 |
+
|
488 |
72%|ββββββββ | 7675/10682 [1:10:54<24:47, 2.02it/s]
|
489 |
72%|ββββββββ | 7676/10682 [1:10:55<24:50, 2.02it/s]
|
490 |
72%|ββββββββ | 7677/10682 [1:10:55<24:48, 2.02it/s]
|
491 |
72%|ββββββββ | 7678/10682 [1:10:56<24:49, 2.02it/s]
|
492 |
72%|ββββββββ | 7679/10682 [1:10:56<24:47, 2.02it/s]
|
493 |
72%|ββββββββ | 7680/10682 [1:10:57<24:47, 2.02it/s]
|
494 |
72%|ββββββββ | 7681/10682 [1:10:57<24:44, 2.02it/s]
|
495 |
72%|ββββββββ | 7682/10682 [1:10:58<24:46, 2.02it/s]
|
496 |
72%|ββββββββ | 7683/10682 [1:10:58<24:46, 2.02it/s]
|
497 |
72%|ββββββββ | 7684/10682 [1:10:59<24:46, 2.02it/s]
|
498 |
72%|ββββββββ | 7685/10682 [1:10:59<24:47, 2.01it/s]
|
499 |
72%|ββββββββ | 7686/10682 [1:11:00<24:45, 2.02it/s]
|
500 |
72%|ββββββββ | 7687/10682 [1:11:00<24:43, 2.02it/s]
|
501 |
72%|ββββββββ | 7688/10682 [1:11:01<24:42, 2.02it/s]
|
502 |
72%|ββββββββ | 7689/10682 [1:11:01<24:42, 2.02it/s]
|
503 |
72%|ββββββββ | 7690/10682 [1:11:02<24:41, 2.02it/s]
|
504 |
72%|ββββββββ | 7691/10682 [1:11:02<24:40, 2.02it/s]
|
505 |
72%|ββββββββ | 7692/10682 [1:11:03<24:40, 2.02it/s]
|
506 |
72%|ββββββββ | 7693/10682 [1:11:03<24:39, 2.02it/s]
|
507 |
72%|ββββββββ | 7694/10682 [1:11:04<24:38, 2.02it/s]
|
508 |
72%|ββββββββ | 7695/10682 [1:11:04<24:36, 2.02it/s]
|
509 |
72%|ββββββββ | 7696/10682 [1:11:05<24:36, 2.02it/s]
|
510 |
72%|ββββββββ | 7697/10682 [1:11:05<24:36, 2.02it/s]
|
511 |
72%|ββββββββ | 7698/10682 [1:11:06<24:35, 2.02it/s]
|
512 |
72%|ββββββββ | 7699/10682 [1:11:06<24:33, 2.02it/s]
|
513 |
72%|ββββββββ | 7700/10682 [1:11:07<24:33, 2.02it/s]
|
514 |
|
515 |
+
|
516 |
72%|ββββββββ | 7700/10682 [1:11:07<24:33, 2.02it/s]
|
517 |
72%|ββββββββ | 7701/10682 [1:11:07<24:34, 2.02it/s]
|
518 |
72%|ββββββββ | 7702/10682 [1:11:08<24:34, 2.02it/s]
|
519 |
72%|ββββββββ | 7703/10682 [1:11:08<24:32, 2.02it/s]
|
520 |
72%|ββββββββ | 7704/10682 [1:11:09<24:34, 2.02it/s]
|
521 |
72%|ββββββββ | 7705/10682 [1:11:09<24:33, 2.02it/s]
|
522 |
72%|ββββββββ | 7706/10682 [1:11:10<24:33, 2.02it/s]
|
523 |
72%|ββββββββ | 7707/10682 [1:11:10<24:30, 2.02it/s]
|
524 |
72%|ββββββββ | 7708/10682 [1:11:11<24:30, 2.02it/s]
|
525 |
72%|ββββββββ | 7709/10682 [1:11:11<24:28, 2.02it/s]
|
526 |
72%|ββββββββ | 7710/10682 [1:11:12<24:28, 2.02it/s]
|
527 |
72%|ββββββββ | 7711/10682 [1:11:12<24:28, 2.02it/s]
|
528 |
72%|ββββββββ | 7712/10682 [1:11:13<24:27, 2.02it/s]
|
529 |
72%|ββββββββ | 7713/10682 [1:11:13<24:28, 2.02it/s]
|
530 |
72%|ββββββββ | 7714/10682 [1:11:14<24:25, 2.02it/s]
|
531 |
72%|ββββββββ | 7715/10682 [1:11:14<24:27, 2.02it/s]
|
532 |
72%|ββββββββ | 7716/10682 [1:11:15<24:27, 2.02it/s]
|
533 |
72%|ββββββββ | 7717/10682 [1:11:15<24:28, 2.02it/s]
|
534 |
72%|ββββββββ | 7718/10682 [1:11:16<24:27, 2.02it/s]
|
535 |
72%|ββββββββ | 7719/10682 [1:11:16<24:28, 2.02it/s]
|
536 |
72%|ββββββββ | 7720/10682 [1:11:17<24:27, 2.02it/s]
|
537 |
72%|ββββββββ | 7721/10682 [1:11:17<24:26, 2.02it/s]
|
538 |
72%|ββββββββ | 7722/10682 [1:11:18<24:24, 2.02it/s]
|
539 |
72%|ββββββββ | 7723/10682 [1:11:18<24:22, 2.02it/s]
|
540 |
72%|ββββββββ | 7724/10682 [1:11:19<24:22, 2.02it/s]
|
541 |
72%|ββββββββ | 7725/10682 [1:11:19<24:20, 2.02it/s]
|
542 |
|
543 |
+
|
544 |
72%|ββββββββ | 7725/10682 [1:11:19<24:20, 2.02it/s]
|
545 |
72%|ββββββββ | 7726/10682 [1:11:20<24:23, 2.02it/s]
|
546 |
72%|ββββββββ | 7727/10682 [1:11:20<24:21, 2.02it/s]
|
547 |
72%|ββββββββ | 7728/10682 [1:11:21<24:21, 2.02it/s]
|
548 |
72%|ββββββββ | 7729/10682 [1:11:21<24:19, 2.02it/s]
|
549 |
72%|ββββββββ | 7730/10682 [1:11:22<24:21, 2.02it/s]
|
550 |
72%|ββββββββ | 7731/10682 [1:11:22<24:20, 2.02it/s]
|
551 |
72%|ββββββββ | 7732/10682 [1:11:23<24:20, 2.02it/s]
|
552 |
72%|ββββββββ | 7733/10682 [1:11:23<24:17, 2.02it/s]
|
553 |
72%|ββββββββ | 7734/10682 [1:11:24<24:17, 2.02it/s]
|
554 |
72%|ββββββββ | 7735/10682 [1:11:24<24:16, 2.02it/s]
|
555 |
72%|ββββββββ | 7736/10682 [1:11:24<24:17, 2.02it/s]
|
556 |
72%|ββββββββ | 7737/10682 [1:11:25<24:15, 2.02it/s]
|
557 |
72%|ββββββββ | 7738/10682 [1:11:25<24:16, 2.02it/s]
|
558 |
72%|ββββββββ | 7739/10682 [1:11:26<24:15, 2.02it/s]
|
559 |
72%|ββββββββ | 7740/10682 [1:11:26<24:16, 2.02it/s]
|
560 |
72%|ββββββββ | 7741/10682 [1:11:27<24:15, 2.02it/s]
|
561 |
72%|ββββββββ | 7742/10682 [1:11:27<24:15, 2.02it/s]
|
562 |
72%|ββββββββ | 7743/10682 [1:11:28<24:13, 2.02it/s]
|
563 |
72%|ββββββββ | 7744/10682 [1:11:28<24:14, 2.02it/s]
|
564 |
73%|ββββββββ | 7745/10682 [1:11:29<24:13, 2.02it/s]
|
565 |
73%|ββββββββ | 7746/10682 [1:11:29<24:13, 2.02it/s]
|
566 |
73%|ββββββββ | 7747/10682 [1:11:30<24:12, 2.02it/s]
|
567 |
73%|ββββββββ | 7748/10682 [1:11:30<24:12, 2.02it/s]
|
568 |
73%|ββββββββ | 7749/10682 [1:11:31<24:10, 2.02it/s]
|
569 |
73%|ββββββββ | 7750/10682 [1:11:31<24:11, 2.02it/s]
|
570 |
{'loss': 2.8612, 'grad_norm': 0.24724432826042175, 'learning_rate': 0.00021250205309110155, 'epoch': 10.16}
|
571 |
+
|
572 |
73%|ββββββββ | 7750/10682 [1:11:31<24:11, 2.02it/s]
|
573 |
73%|ββββββββ | 7751/10682 [1:11:32<24:12, 2.02it/s]
|
574 |
73%|ββββββββ | 7752/10682 [1:11:32<24:10, 2.02it/s]
|
575 |
73%|ββββββββ | 7753/10682 [1:11:33<24:10, 2.02it/s]
|
576 |
73%|ββββββββ | 7754/10682 [1:11:33<24:08, 2.02it/s]
|
577 |
73%|ββββββββ | 7755/10682 [1:11:34<24:07, 2.02it/s]
|
578 |
73%|ββββββββ | 7756/10682 [1:11:34<24:07, 2.02it/s]
|
579 |
73%|ββββββββ | 7757/10682 [1:11:35<24:06, 2.02it/s]
|
580 |
73%|ββββββββ | 7758/10682 [1:11:35<24:06, 2.02it/s]
|
581 |
73%|ββββββββ | 7759/10682 [1:11:36<24:05, 2.02it/s]
|
582 |
73%|ββββββββ | 7760/10682 [1:11:36<24:04, 2.02it/s]
|
583 |
73%|ββββββββ | 7761/10682 [1:11:37<24:04, 2.02it/s]
|
584 |
73%|ββββββββ | 7762/10682 [1:11:37<24:02, 2.02it/s]
|
585 |
73%|ββββββββ | 7763/10682 [1:11:38<24:03, 2.02it/s]
|
586 |
73%|ββββββββ | 7764/10682 [1:11:38<24:03, 2.02it/s]
|
587 |
73%|ββββββββ | 7765/10682 [1:11:39<24:03, 2.02it/s]
|
588 |
73%|ββββββββ | 7766/10682 [1:11:39<24:02, 2.02it/s]
|
589 |
73%|ββββββββ | 7767/10682 [1:11:40<24:02, 2.02it/s]
|
590 |
73%|ββββββββ | 7768/10682 [1:11:40<24:01, 2.02it/s]
|
591 |
73%|ββββββββ | 7769/10682 [1:11:41<24:00, 2.02it/s]
|
592 |
73%|ββββββββ | 7770/10682 [1:11:41<23:59, 2.02it/s]
|
593 |
73%|ββββββββ | 7771/10682 [1:11:42<23:59, 2.02it/s]
|
594 |
73%|ββββββββ | 7772/10682 [1:11:42<23:58, 2.02it/s]
|
595 |
73%|ββββββββ | 7773/10682 [1:11:43<23:57, 2.02it/s]
|
596 |
73%|ββββββββ | 7774/10682 [1:11:43<23:59, 2.02it/s]
|
597 |
73%|ββββββββ | 7775/10682 [1:11:44<23:58, 2.02it/s]{'loss': 2.8622, 'grad_norm': 0.2527807056903839, 'learning_rate': 0.00020916944887928359, 'epoch': 10.19}
|
598 |
+
|
599 |
|
600 |
73%|ββββββββ | 7775/10682 [1:11:44<23:58, 2.02it/s]
|
601 |
73%|ββββββββ | 7776/10682 [1:11:44<23:59, 2.02it/s]
|
602 |
73%|ββββββββ | 7777/10682 [1:11:45<24:00, 2.02it/s]
|
603 |
73%|ββββββββ | 7778/10682 [1:11:45<23:58, 2.02it/s]
|
604 |
73%|ββββββββ | 7779/10682 [1:11:46<23:59, 2.02it/s]
|
605 |
73%|ββββββββ | 7780/10682 [1:11:46<23:58, 2.02it/s]
|
606 |
73%|ββββββββ | 7781/10682 [1:11:47<23:58, 2.02it/s]
|
607 |
73%|ββββββββ | 7782/10682 [1:11:47<23:56, 2.02it/s]
|
608 |
73%|ββββββββ | 7783/10682 [1:11:48<23:55, 2.02it/s]
|
609 |
73%|ββββββββ | 7784/10682 [1:11:48<23:54, 2.02it/s]
|
610 |
73%|ββββββββ | 7785/10682 [1:11:49<23:54, 2.02it/s]
|
611 |
73%|ββββββββ | 7786/10682 [1:11:49<23:52, 2.02it/s]
|
612 |
73%|ββββββββ | 7787/10682 [1:11:50<23:52, 2.02it/s]
|
613 |
73%|ββββββββ | 7788/10682 [1:11:50<23:51, 2.02it/s]
|
614 |
73%|ββββββββ | 7789/10682 [1:11:51<23:50, 2.02it/s]
|
615 |
73%|ββββββββ | 7790/10682 [1:11:51<23:54, 2.02it/s]
|
616 |
73%|ββββββββ | 7791/10682 [1:11:52<23:51, 2.02it/s]
|
617 |
73%|ββββββββ | 7792/10682 [1:11:52<23:50, 2.02it/s]
|
618 |
73%|ββββββββ | 7793/10682 [1:11:53<23:49, 2.02it/s]
|
619 |
73%|ββββββββ | 7794/10682 [1:11:53<23:48, 2.02it/s]
|
620 |
73%|ββββββββ | 7795/10682 [1:11:54<23:48, 2.02it/s]
|
621 |
73%|ββββββββ | 7796/10682 [1:11:54<23:46, 2.02it/s]
|
622 |
73%|ββββββββ | 7797/10682 [1:11:55<23:46, 2.02it/s]
|
623 |
73%|ββββββββ | 7798/10682 [1:11:55<23:47, 2.02it/s]
|
624 |
73%|ββββββββ | 7799/10682 [1:11:56<23:47, 2.02it/s]
|
625 |
73%|ββββββββ | 7800/10682 [1:11:56<23:45, 2.02it/s]
|
626 |
{'loss': 2.8737, 'grad_norm': 0.24947026371955872, 'learning_rate': 0.00020585625797294927, 'epoch': 10.22}
|
627 |
+
|
628 |
73%|ββββββββ | 7800/10682 [1:11:56<23:45, 2.02it/s]
|
629 |
73%|ββββββββ | 7801/10682 [1:11:57<23:48, 2.02it/s]
|
630 |
73%|ββββββββ | 7802/10682 [1:11:57<23:47, 2.02it/s]
|
631 |
73%|ββββββββ | 7803/10682 [1:11:58<23:46, 2.02it/s]
|
632 |
73%|ββββββββ | 7804/10682 [1:11:58<23:43, 2.02it/s]
|
633 |
73%|ββββββββ | 7805/10682 [1:11:59<23:45, 2.02it/s]
|
634 |
73%|ββββββββ | 7806/10682 [1:11:59<23:41, 2.02it/s]
|
635 |
73%|ββββββββ | 7807/10682 [1:12:00<23:42, 2.02it/s]
|
636 |
73%|ββββββββ | 7808/10682 [1:12:00<23:40, 2.02it/s]
|
637 |
73%|ββββββββ | 7809/10682 [1:12:01<23:40, 2.02it/s]
|
638 |
73%|ββββββββ | 7810/10682 [1:12:01<23:40, 2.02it/s]
|
639 |
73%|ββββββββ | 7811/10682 [1:12:02<23:40, 2.02it/s]
|
640 |
73%|ββββββββ | 7812/10682 [1:12:02<23:38, 2.02it/s]
|
641 |
73%|ββββββββ | 7813/10682 [1:12:03<23:39, 2.02it/s]
|
642 |
73%|ββββββββ | 7814/10682 [1:12:03<23:37, 2.02it/s]
|
643 |
73%|ββββββββ | 7815/10682 [1:12:04<23:37, 2.02it/s]
|
644 |
73%|ββββββββ | 7816/10682 [1:12:04<23:37, 2.02it/s]
|
645 |
73%|ββββββββ | 7817/10682 [1:12:05<23:35, 2.02it/s]
|
646 |
73%|ββββββββ | 7818/10682 [1:12:05<23:35, 2.02it/s]
|
647 |
73%|ββββββββ | 7819/10682 [1:12:06<23:35, 2.02it/s]
|
648 |
73%|ββββββββ | 7820/10682 [1:12:06<23:35, 2.02it/s]
|
649 |
73%|ββββββββ | 7821/10682 [1:12:07<23:36, 2.02it/s]
|
650 |
73%|ββββββββ | 7822/10682 [1:12:07<23:35, 2.02it/s]
|
651 |
73%|ββββββββ | 7823/10682 [1:12:08<23:34, 2.02it/s]
|
652 |
73%|ββββββββ | 7824/10682 [1:12:08<23:34, 2.02it/s]
|
653 |
73%|ββββββββ | 7825/10682 [1:12:09<23:32, 2.02it/s]
|
654 |
{'loss': 2.8657, 'grad_norm': 0.2530911862850189, 'learning_rate': 0.00020256270153176371, 'epoch': 10.26}
|
655 |
+
|
656 |
73%|ββββββββ | 7825/10682 [1:12:09<23:32, 2.02it/s]
|
657 |
73%|ββββββββ | 7826/10682 [1:12:09<23:33, 2.02it/s]
|
658 |
73%|ββββββββ | 7827/10682 [1:12:10<23:31, 2.02it/s]
|
659 |
73%|ββββββββ | 7828/10682 [1:12:10<23:32, 2.02it/s]
|
660 |
73%|ββββββββ | 7829/10682 [1:12:11<23:30, 2.02it/s]
|
661 |
73%|ββββββββ | 7830/10682 [1:12:11<23:30, 2.02it/s]
|
662 |
73%|ββββββββ | 7831/10682 [1:12:11<23:29, 2.02it/s]
|
663 |
73%|ββββββββ | 7832/10682 [1:12:12<23:31, 2.02it/s]
|
664 |
73%|ββββββββ | 7833/10682 [1:12:12<23:30, 2.02it/s]
|
665 |
73%|ββββββββ | 7834/10682 [1:12:13<23:29, 2.02it/s]
|
666 |
73%|ββββββββ | 7835/10682 [1:12:13<23:29, 2.02it/s]
|
667 |
73%|ββββββββ | 7836/10682 [1:12:14<23:27, 2.02it/s]
|
668 |
73%|ββββββββ | 7837/10682 [1:12:14<23:27, 2.02it/s]
|
669 |
73%|ββββββββ | 7838/10682 [1:12:15<23:26, 2.02it/s]
|
670 |
73%|ββββββββ | 7839/10682 [1:12:15<23:25, 2.02it/s]
|
671 |
73%|ββββββββ | 7840/10682 [1:12:16<23:25, 2.02it/s]
|
672 |
73%|ββββββββ | 7841/10682 [1:12:16<23:23, 2.02it/s]
|
673 |
73%|ββββββββ | 7842/10682 [1:12:17<23:24, 2.02it/s]
|
674 |
73%|ββββββββ | 7843/10682 [1:12:17<23:22, 2.02it/s]
|
675 |
73%|ββββββββ | 7844/10682 [1:12:18<23:24, 2.02it/s]
|
676 |
73%|ββββββββ | 7845/10682 [1:12:18<23:23, 2.02it/s]
|
677 |
73%|ββββββββ | 7846/10682 [1:12:19<23:23, 2.02it/s]
|
678 |
73%|ββββββββ | 7847/10682 [1:12:19<23:22, 2.02it/s]
|
679 |
73%|ββββββββ | 7848/10682 [1:12:20<23:23, 2.02it/s]
|
680 |
73%|ββββββββ | 7849/10682 [1:12:20<23:21, 2.02it/s]
|
681 |
73%|ββββββββ | 7850/10682 [1:12:21<23:21, 2.02it/s]
|
682 |
|
683 |
+
|
684 |
73%|ββββββββ | 7850/10682 [1:12:21<23:21, 2.02it/s]
|
685 |
73%|ββββββββ | 7851/10682 [1:12:21<23:22, 2.02it/s]
|
686 |
74%|ββββββββ | 7852/10682 [1:12:22<23:19, 2.02it/s]
|
687 |
74%|ββββββββ | 7853/10682 [1:12:22<23:20, 2.02it/s]
|
688 |
74%|ββββββββ | 7854/10682 [1:12:23<23:18, 2.02it/s]
|
689 |
74%|ββββββββ | 7855/10682 [1:12:23<23:18, 2.02it/s]
|
690 |
74%|ββββββββ | 7856/10682 [1:12:24<23:17, 2.02it/s]
|
691 |
74%|ββββββββ | 7857/10682 [1:12:24<23:18, 2.02it/s]
|
692 |
74%|ββββββββ | 7858/10682 [1:12:25<23:17, 2.02it/s]
|
693 |
74%|ββββββββ | 7859/10682 [1:12:25<23:16, 2.02it/s]
|
694 |
74%|ββββββββ | 7860/10682 [1:12:26<23:16, 2.02it/s]
|
695 |
74%|ββββββββ | 7861/10682 [1:12:26<23:14, 2.02it/s]
|
696 |
74%|ββββββββ | 7862/10682 [1:12:27<23:15, 2.02it/s]
|
697 |
74%|ββββββββ | 7863/10682 [1:12:27<23:14, 2.02it/s]
|
698 |
74%|ββββββββ | 7864/10682 [1:12:28<23:16, 2.02it/s]
|
699 |
74%|ββββββββ | 7865/10682 [1:12:28<23:13, 2.02it/s]
|
700 |
74%|ββββββββ | 7866/10682 [1:12:29<23:13, 2.02it/s]
|
701 |
74%|ββββββββ | 7867/10682 [1:12:29<23:12, 2.02it/s]
|
702 |
74%|ββββββββ | 7868/10682 [1:12:30<23:13, 2.02it/s]
|
703 |
74%|ββββββββ | 7869/10682 [1:12:30<23:11, 2.02it/s]
|
704 |
74%|ββββββββ | 7870/10682 [1:12:31<23:11, 2.02it/s]
|
705 |
74%|ββββββββ | 7871/10682 [1:12:31<23:10, 2.02it/s]
|
706 |
74%|ββββββββ | 7872/10682 [1:12:32<23:10, 2.02it/s]
|
707 |
74%|ββββββββ | 7873/10682 [1:12:32<23:09, 2.02it/s]
|
708 |
74%|ββββββββ | 7874/10682 [1:12:33<23:10, 2.02it/s]
|
709 |
74%|ββββββββ | 7875/10682 [1:12:33<23:09, 2.02it/s]
|
710 |
{'loss': 2.8799, 'grad_norm': 0.26851731538772583, 'learning_rate': 0.00019603537011569566, 'epoch': 10.32}
|
711 |
+
|
712 |
74%|ββββββββ | 7875/10682 [1:12:33<23:09, 2.02it/s]
|
713 |
74%|ββββββββ | 7876/10682 [1:12:34<23:09, 2.02it/s]
|
714 |
74%|ββββββββ | 7877/10682 [1:12:34<23:09, 2.02it/s]
|
715 |
74%|ββββββββ | 7878/10682 [1:12:35<23:07, 2.02it/s]
|
716 |
74%|ββββββββ | 7879/10682 [1:12:35<23:08, 2.02it/s]
|
717 |
74%|ββββββββ | 7880/10682 [1:12:36<23:06, 2.02it/s]
|
718 |
74%|ββββββββ | 7881/10682 [1:12:36<23:04, 2.02it/s]
|
719 |
74%|ββββββββ | 7882/10682 [1:12:37<23:03, 2.02it/s]
|
720 |
74%|ββββββββ | 7883/10682 [1:12:37<23:03, 2.02it/s]
|
721 |
74%|ββββββββ | 7884/10682 [1:12:38<23:03, 2.02it/s]
|
722 |
74%|ββββββββ | 7885/10682 [1:12:38<23:04, 2.02it/s]
|
723 |
74%|ββββββββ | 7886/10682 [1:12:39<23:02, 2.02it/s]
|
724 |
74%|ββββββββ | 7887/10682 [1:12:39<23:02, 2.02it/s]
|
725 |
74%|ββββββββ | 7888/10682 [1:12:40<23:01, 2.02it/s]
|
726 |
74%|ββββββββ | 7889/10682 [1:12:40<23:02, 2.02it/s]
|
727 |
74%|ββββββββ | 7890/10682 [1:12:41<23:01, 2.02it/s]
|
728 |
74%|ββββββββ | 7891/10682 [1:12:41<23:01, 2.02it/s]
|
729 |
74%|ββββββββ | 7892/10682 [1:12:42<22:59, 2.02it/s]
|
730 |
74%|ββββββββ | 7893/10682 [1:12:42<23:00, 2.02it/s]
|
731 |
74%|ββββββββ | 7894/10682 [1:12:43<22:58, 2.02it/s]
|
732 |
74%|ββββββββ | 7895/10682 [1:12:43<22:59, 2.02it/s]
|
733 |
74%|ββββββββ | 7896/10682 [1:12:44<22:56, 2.02it/s]
|
734 |
74%|ββββββββ | 7897/10682 [1:12:44<22:57, 2.02it/s]
|
735 |
74%|ββββββββ | 7898/10682 [1:12:45<22:55, 2.02it/s]
|
736 |
74%|ββββββββ | 7899/10682 [1:12:45<22:56, 2.02it/s]
|
737 |
74%|ββββββββ | 7900/10682 [1:12:46<22:55, 2.02it/s]
|
738 |
{'loss': 2.8842, 'grad_norm': 0.24814572930335999, 'learning_rate': 0.0001928020308484042, 'epoch': 10.35}
|
739 |
+
|
740 |
74%|ββββββββ | 7900/10682 [1:12:46<22:55, 2.02it/s]
|
741 |
74%|ββββββββ | 7901/10682 [1:12:46<22:57, 2.02it/s]
|
742 |
74%|ββββββββ | 7902/10682 [1:12:47<22:54, 2.02it/s]
|
743 |
74%|ββββββββ | 7903/10682 [1:12:47<22:56, 2.02it/s]
|
744 |
74%|ββββββββ | 7904/10682 [1:12:48<22:53, 2.02it/s]
|
745 |
74%|ββββββββ | 7905/10682 [1:12:48<22:54, 2.02it/s]
|
746 |
74%|ββββββββ | 7906/10682 [1:12:49<22:53, 2.02it/s]
|
747 |
74%|ββββββββ | 7907/10682 [1:12:49<22:53, 2.02it/s]
|
748 |
74%|ββββββββ | 7908/10682 [1:12:50<22:51, 2.02it/s]
|
749 |
74%|ββββββββ | 7909/10682 [1:12:50<22:53, 2.02it/s]
|
750 |
74%|ββββββββ | 7910/10682 [1:12:51<22:51, 2.02it/s]
|
751 |
74%|ββββββββ | 7911/10682 [1:12:51<22:51, 2.02it/s]
|
752 |
74%|ββββββββ | 7912/10682 [1:12:52<22:49, 2.02it/s]
|
753 |
74%|ββββββββ | 7913/10682 [1:12:52<22:50, 2.02it/s]
|
754 |
74%|ββββββββ | 7914/10682 [1:12:53<22:48, 2.02it/s]
|
755 |
74%|ββββββββ | 7915/10682 [1:12:53<22:48, 2.02it/s]
|
756 |
74%|ββββββββ | 7916/10682 [1:12:54<22:47, 2.02it/s]
|
757 |
74%|ββββββββ | 7917/10682 [1:12:54<22:48, 2.02it/s]
|
758 |
74%|ββββββββ | 7918/10682 [1:12:55<22:46, 2.02it/s]
|
759 |
74%|ββββββββ | 7919/10682 [1:12:55<22:48, 2.02it/s]
|
760 |
74%|ββββββββ | 7920/10682 [1:12:56<22:45, 2.02it/s]
|
761 |
74%|ββββββββ | 7921/10682 [1:12:56<22:45, 2.02it/s]
|
762 |
74%|ββββββββ | 7922/10682 [1:12:57<22:44, 2.02it/s]
|
763 |
74%|ββββββββ | 7923/10682 [1:12:57<22:45, 2.02it/s]
|
764 |
74%|ββββββββ | 7924/10682 [1:12:58<22:45, 2.02it/s]
|
765 |
74%|ββββββββ | 7925/10682 [1:12:58<22:45, 2.02it/s]
|
766 |
{'loss': 2.8778, 'grad_norm': 0.24965329468250275, 'learning_rate': 0.00018958919743235897, 'epoch': 10.39}
|
767 |
+
|
768 |
74%|ββββββββ | 7925/10682 [1:12:58<22:45, 2.02it/s]
|
769 |
74%|ββββββββ | 7926/10682 [1:12:59<22:47, 2.02it/s]
|
770 |
74%|ββββββββ | 7927/10682 [1:12:59<22:44, 2.02it/s]
|
771 |
74%|ββββββββ | 7928/10682 [1:12:59<22:44, 2.02it/s]
|
772 |
74%|ββββββββ | 7929/10682 [1:13:00<22:43, 2.02it/s]
|
773 |
74%|ββββββββ | 7930/10682 [1:13:00<22:42, 2.02it/s]
|
774 |
74%|ββββββββ | 7931/10682 [1:13:01<22:41, 2.02it/s]
|
775 |
74%|ββββββββ | 7932/10682 [1:13:01<22:40, 2.02it/s]
|
776 |
74%|ββββββββ | 7933/10682 [1:13:02<22:39, 2.02it/s]
|
777 |
74%|ββββββββ | 7934/10682 [1:13:02<22:38, 2.02it/s]
|
778 |
74%|ββββββββ | 7935/10682 [1:13:03<22:39, 2.02it/s]
|
779 |
74%|ββββββββ | 7936/10682 [1:13:03<22:38, 2.02it/s]
|
780 |
74%|ββββββββ | 7937/10682 [1:13:04<22:39, 2.02it/s]
|
781 |
74%|ββββββββ | 7938/10682 [1:13:04<22:37, 2.02it/s]
|
782 |
74%|ββββββββ | 7939/10682 [1:13:05<22:37, 2.02it/s]
|
783 |
74%|ββββββββ | 7940/10682 [1:13:05<22:36, 2.02it/s]
|
784 |
74%|ββββββββ | 7941/10682 [1:13:06<22:37, 2.02it/s]
|
785 |
74%|ββββββββ | 7942/10682 [1:13:06<22:35, 2.02it/s]
|
786 |
74%|ββββββββ | 7943/10682 [1:13:07<22:36, 2.02it/s]
|
787 |
74%|ββββββββ | 7944/10682 [1:13:07<22:35, 2.02it/s]
|
788 |
74%|ββββββββ | 7945/10682 [1:13:08<22:35, 2.02it/s]
|
789 |
74%|ββββββββ | 7946/10682 [1:13:08<22:34, 2.02it/s]
|
790 |
74%|ββββββββ | 7947/10682 [1:13:09<22:33, 2.02it/s]
|
791 |
74%|ββββββββ | 7948/10682 [1:13:09<22:32, 2.02it/s]
|
792 |
74%|ββββββββ | 7949/10682 [1:13:10<22:32, 2.02it/s]
|
793 |
74%|ββββββββ | 7950/10682 [1:13:10<22:30, 2.02it/s]
|
794 |
{'loss': 2.8803, 'grad_norm': 0.24981389939785004, 'learning_rate': 0.0001863970843282357, 'epoch': 10.42}
|
795 |
+
|
796 |
74%|ββββββββ | 7950/10682 [1:13:10<22:30, 2.02it/s]
|
797 |
74%|ββββββββ | 7951/10682 [1:13:11<22:33, 2.02it/s]
|
798 |
74%|ββββββββ | 7952/10682 [1:13:11<22:31, 2.02it/s]
|
799 |
74%|ββββββββ | 7953/10682 [1:13:12<22:32, 2.02it/s]
|
800 |
74%|ββββββββ | 7954/10682 [1:13:12<22:31, 2.02it/s]
|
801 |
74%|ββββββββ | 7955/10682 [1:13:13<22:31, 2.02it/s]
|
802 |
74%|ββββββββ | 7956/10682 [1:13:13<22:29, 2.02it/s]
|
803 |
74%|ββββββββ | 7957/10682 [1:13:14<22:29, 2.02it/s]
|
804 |
74%|ββββββββ | 7958/10682 [1:13:14<22:27, 2.02it/s]
|
805 |
75%|ββββββββ | 7959/10682 [1:13:15<22:27, 2.02it/s]
|
806 |
75%|ββββββββ | 7960/10682 [1:13:15<22:25, 2.02it/s]
|
807 |
75%|ββββββββ | 7961/10682 [1:13:16<22:27, 2.02it/s]
|
808 |
75%|ββββββββ | 7962/10682 [1:13:16<22:27, 2.02it/s]
|
809 |
75%|ββββββββ | 7963/10682 [1:13:17<22:26, 2.02it/s]
|
810 |
75%|ββββββββ | 7964/10682 [1:13:17<22:26, 2.02it/s]
|
811 |
75%|ββββββββ | 7965/10682 [1:13:18<22:24, 2.02it/s]
|
812 |
75%|ββββββββ | 7966/10682 [1:13:18<22:23, 2.02it/s]
|
813 |
75%|ββββββββ | 7967/10682 [1:13:19<22:23, 2.02it/s]
|
814 |
75%|ββββββββ | 7968/10682 [1:13:19<22:24, 2.02it/s]
|
815 |
75%|ββββββββ | 7969/10682 [1:13:20<22:22, 2.02it/s]
|
816 |
75%|ββββββββ | 7970/10682 [1:13:20<22:21, 2.02it/s]
|
817 |
75%|ββββββββ | 7971/10682 [1:13:21<22:20, 2.02it/s]
|
818 |
75%|ββββββββ | 7972/10682 [1:13:21<22:20, 2.02it/s]
|
819 |
75%|ββββββββ | 7973/10682 [1:13:22<22:20, 2.02it/s]
|
820 |
75%|ββββββββ | 7974/10682 [1:13:22<22:21, 2.02it/s]
|
821 |
75%|ββββββββ | 7975/10682 [1:13:23<22:19, 2.02it/s]
|
822 |
|
823 |
+
|
824 |
75%|ββββββββ | 7975/10682 [1:13:23<22:19, 2.02it/s]
|
825 |
75%|ββββββββ | 7976/10682 [1:13:23<22:23, 2.01it/s]
|
826 |
75%|ββββββββ | 7977/10682 [1:13:24<22:20, 2.02it/s]
|
827 |
75%|ββββββββ | 7978/10682 [1:13:24<22:20, 2.02it/s]
|
828 |
75%|ββββββββ | 7979/10682 [1:13:25<22:18, 2.02it/s]
|
829 |
75%|ββββββββ | 7980/10682 [1:13:25<22:19, 2.02it/s]
|
830 |
75%|ββββββββ | 7981/10682 [1:13:26<22:17, 2.02it/s]
|
831 |
75%|ββββββββ | 7982/10682 [1:13:26<22:17, 2.02it/s]
|
832 |
75%|ββββββββ | 7983/10682 [1:13:27<22:15, 2.02it/s]
|
833 |
75%|ββββββββ | 7984/10682 [1:13:27<22:15, 2.02it/s]
|
834 |
75%|ββββββββ | 7985/10682 [1:13:28<22:14, 2.02it/s]
|
835 |
75%|ββββββββ | 7986/10682 [1:13:28<22:14, 2.02it/s]
|
836 |
75%|ββββββββ | 7987/10682 [1:13:29<22:14, 2.02it/s]
|
837 |
75%|ββββββββ | 7988/10682 [1:13:29<22:13, 2.02it/s]
|
838 |
75%|ββββββββ | 7989/10682 [1:13:30<22:13, 2.02it/s]
|
839 |
75%|ββββββββ | 7990/10682 [1:13:30<22:12, 2.02it/s]
|
840 |
75%|ββββββββ | 7991/10682 [1:13:31<22:13, 2.02it/s]
|
841 |
75%|ββββββββ | 7992/10682 [1:13:31<22:11, 2.02it/s]
|
842 |
75%|ββββββββ | 7993/10682 [1:13:32<22:11, 2.02it/s]
|
843 |
75%|ββββββββ | 7994/10682 [1:13:32<22:09, 2.02it/s]
|
844 |
75%|ββββββββ | 7995/10682 [1:13:33<22:10, 2.02it/s]
|
845 |
75%|ββββββββ | 7996/10682 [1:13:33<22:10, 2.02it/s]
|
846 |
75%|ββββββββ | 7997/10682 [1:13:34<22:10, 2.02it/s]
|
847 |
75%|ββββββββ | 7998/10682 [1:13:34<22:08, 2.02it/s]
|
848 |
75%|ββββββββ | 7999/10682 [1:13:35<22:08, 2.02it/s]
|
849 |
75%|ββββββββ | 8000/10682 [1:13:35<22:06, 2.02it/s]
|
850 |
{'loss': 2.8918, 'grad_norm': 0.24982097744941711, 'learning_rate': 0.00018007586996870206, 'epoch': 10.48}
|
851 |
+
|
852 |
75%|ββββββββ | 8000/10682 [1:13:35<22:06, 2.02it/s]
|
853 |
75%|ββββββββ | 8001/10682 [1:13:36<22:09, 2.02it/s]
|
854 |
75%|ββββββββ | 8002/10682 [1:13:36<22:07, 2.02it/s]
|
855 |
75%|ββββββββ | 8003/10682 [1:13:37<22:06, 2.02it/s]
|
856 |
75%|ββββββββ | 8004/10682 [1:13:37<22:05, 2.02it/s]
|
857 |
75%|ββββββββ | 8005/10682 [1:13:38<22:05, 2.02it/s]
|
858 |
75%|ββββββββ | 8006/10682 [1:13:38<22:04, 2.02it/s]
|
859 |
75%|ββββββββ | 8007/10682 [1:13:39<22:04, 2.02it/s]
|
860 |
75%|ββββββββ | 8008/10682 [1:13:39<22:03, 2.02it/s]
|
861 |
75%|ββββββββ | 8009/10682 [1:13:40<22:03, 2.02it/s]
|
862 |
75%|ββββββββ | 8010/10682 [1:13:40<22:01, 2.02it/s]
|
863 |
75%|ββββββββ | 8011/10682 [1:13:41<22:01, 2.02it/s]
|
864 |
75%|ββββββββ | 8012/10682 [1:13:41<22:00, 2.02it/s]
|
865 |
75%|ββββββββ | 8013/10682 [1:13:42<22:01, 2.02it/s]
|
866 |
75%|ββββββββ | 8014/10682 [1:13:42<21:59, 2.02it/s]
|
867 |
75%|ββββββββ | 8015/10682 [1:13:43<22:00, 2.02it/s]
|
868 |
75%|ββββββββ | 8016/10682 [1:13:43<21:58, 2.02it/s]
|
869 |
75%|ββββββββ | 8017/10682 [1:13:44<21:58, 2.02it/s]
|
870 |
75%|ββββββββ | 8018/10682 [1:13:44<21:56, 2.02it/s]
|
871 |
75%|ββββββββ | 8019/10682 [1:13:45<21:58, 2.02it/s]
|
872 |
75%|ββββββββ | 8020/10682 [1:13:45<21:55, 2.02it/s]
|
873 |
75%|ββββββββ | 8021/10682 [1:13:46<21:56, 2.02it/s]
|
874 |
75%|ββββββββ | 8022/10682 [1:13:46<21:55, 2.02it/s]
|
875 |
75%|ββββββββ | 8023/10682 [1:13:47<21:55, 2.02it/s]
|
876 |
75%|ββββββββ | 8024/10682 [1:13:47<21:54, 2.02it/s]
|
877 |
75%|ββββββββ | 8025/10682 [1:13:48<21:54, 2.02it/s]{'loss': 2.9015, 'grad_norm': 0.2518491744995117, 'learning_rate': 0.00017694719066230924, 'epoch': 10.52}
|
878 |
|
879 |
+
|
880 |
75%|ββββββββ | 8025/10682 [1:13:48<21:54, 2.02it/s]
|
881 |
75%|ββββββββ | 8026/10682 [1:13:48<21:54, 2.02it/s]
|
882 |
75%|ββββββββ | 8027/10682 [1:13:48<21:54, 2.02it/s]
|
883 |
75%|ββββββββ | 8028/10682 [1:13:49<21:51, 2.02it/s]
|
884 |
75%|ββββββββ | 8029/10682 [1:13:49<21:52, 2.02it/s]
|
885 |
75%|ββββββββ | 8030/10682 [1:13:50<21:50, 2.02it/s]
|
886 |
75%|ββββββββ | 8031/10682 [1:13:50<21:51, 2.02it/s]
|
887 |
75%|ββββββββ | 8032/10682 [1:13:51<21:51, 2.02it/s]
|
888 |
75%|ββββββββ | 8033/10682 [1:13:51<21:51, 2.02it/s]
|
889 |
75%|ββββββββ | 8034/10682 [1:13:52<21:50, 2.02it/s]
|
890 |
75%|ββββββββ | 8035/10682 [1:13:52<21:50, 2.02it/s]
|
891 |
75%|ββββββββ | 8036/10682 [1:13:53<21:48, 2.02it/s]
|
892 |
75%|ββββββββ | 8037/10682 [1:13:53<21:48, 2.02it/s]
|
893 |
75%|ββββββββ | 8038/10682 [1:13:54<21:46, 2.02it/s]
|
894 |
75%|ββββββββ | 8039/10682 [1:13:54<21:47, 2.02it/s]
|
895 |
75%|ββββββββ | 8040/10682 [1:13:55<21:46, 2.02it/s]
|
896 |
75%|ββββββββ | 8041/10682 [1:13:55<21:46, 2.02it/s]
|
897 |
75%|ββββββββ | 8042/10682 [1:13:56<21:45, 2.02it/s]
|
898 |
75%|ββββββββ | 8043/10682 [1:13:56<21:46, 2.02it/s]
|
899 |
75%|ββββββββ | 8044/10682 [1:13:57<21:43, 2.02it/s]
|
900 |
75%|ββββββββ | 8045/10682 [1:13:57<21:44, 2.02it/s]
|
901 |
75%|ββββββββ | 8046/10682 [1:13:58<21:43, 2.02it/s]
|
902 |
75%|ββββββββ | 8047/10682 [1:13:58<21:43, 2.02it/s]
|
903 |
75%|ββββββββ | 8048/10682 [1:13:59<21:42, 2.02it/s]
|
904 |
75%|ββββββββ | 8049/10682 [1:13:59<21:42, 2.02it/s]
|
905 |
75%|ββββββββ | 8050/10682 [1:14:00<21:40, 2.02it/s]
|
906 |
{'loss': 2.8929, 'grad_norm': 0.24705103039741516, 'learning_rate': 0.00017384007553770858, 'epoch': 10.55}
|
907 |
+
|
908 |
75%|ββββββββ | 8050/10682 [1:14:00<21:40, 2.02it/s]
|
909 |
75%|ββββββββ | 8051/10682 [1:14:00<21:42, 2.02it/s]
|
910 |
75%|ββββββββ | 8052/10682 [1:14:01<21:40, 2.02it/s]
|
911 |
75%|ββββββββ | 8053/10682 [1:14:01<21:41, 2.02it/s]
|
912 |
75%|ββββββββ | 8054/10682 [1:14:02<21:39, 2.02it/s]
|
913 |
75%|ββββββββ | 8055/10682 [1:14:02<21:40, 2.02it/s]
|
914 |
75%|ββββββββ | 8056/10682 [1:14:03<21:39, 2.02it/s]
|
915 |
75%|ββββββββ | 8057/10682 [1:14:03<21:39, 2.02it/s]
|
916 |
75%|ββββββββ | 8058/10682 [1:14:04<21:38, 2.02it/s]
|
917 |
75%|ββββββββ | 8059/10682 [1:14:04<21:38, 2.02it/s]
|
918 |
75%|ββββββββ | 8060/10682 [1:14:05<21:36, 2.02it/s]
|
919 |
75%|ββββββββ | 8061/10682 [1:14:05<21:36, 2.02it/s]
|
920 |
75%|ββββββββ | 8062/10682 [1:14:06<21:36, 2.02it/s]
|
921 |
75%|ββββββββ | 8063/10682 [1:14:06<21:35, 2.02it/s]
|
922 |
75%|ββββββββ | 8064/10682 [1:14:07<21:34, 2.02it/s]
|
923 |
76%|ββββββββ | 8065/10682 [1:14:07<21:35, 2.02it/s]
|
924 |
76%|ββββββββ | 8066/10682 [1:14:08<21:34, 2.02it/s]
|
925 |
76%|ββββββββ | 8067/10682 [1:14:08<21:34, 2.02it/s]
|
926 |
76%|ββββββββ | 8068/10682 [1:14:09<21:32, 2.02it/s]
|
927 |
76%|ββββββββ | 8069/10682 [1:14:09<21:32, 2.02it/s]
|
928 |
76%|ββββββββ | 8070/10682 [1:14:10<21:31, 2.02it/s]
|
929 |
76%|ββββββββ | 8071/10682 [1:14:10<21:32, 2.02it/s]
|
930 |
76%|ββββββββ | 8072/10682 [1:14:11<21:32, 2.02it/s]
|
931 |
76%|ββββββββ | 8073/10682 [1:14:11<21:33, 2.02it/s]
|
932 |
76%|ββββββββ | 8074/10682 [1:14:12<21:31, 2.02it/s]
|
933 |
76%|ββββββββ | 8075/10682 [1:14:12<21:31, 2.02it/s]
|
934 |
{'loss': 2.8997, 'grad_norm': 0.2514490485191345, 'learning_rate': 0.00017075473199874692, 'epoch': 10.58}
|
935 |
+
|
936 |
76%|ββββββββ | 8075/10682 [1:14:12<21:31, 2.02it/s]
|
937 |
76%|ββββββββ | 8076/10682 [1:14:13<21:33, 2.01it/s]
|
938 |
76%|ββββββββ | 8077/10682 [1:14:13<21:32, 2.02it/s]
|
939 |
76%|ββββββββ | 8078/10682 [1:14:14<21:31, 2.02it/s]
|
940 |
76%|ββββββββ | 8079/10682 [1:14:14<21:28, 2.02it/s]
|
941 |
76%|ββββββββ | 8080/10682 [1:14:15<21:29, 2.02it/s]
|
942 |
76%|ββββββββ | 8081/10682 [1:14:15<21:27, 2.02it/s]
|
943 |
76%|ββββββββ | 8082/10682 [1:14:16<21:27, 2.02it/s]
|
944 |
76%|ββββββββ | 8083/10682 [1:14:16<21:26, 2.02it/s]
|
945 |
76%|ββββββββ | 8084/10682 [1:14:17<21:25, 2.02it/s]
|
946 |
76%|ββββββββ | 8085/10682 [1:14:17<21:25, 2.02it/s]
|
947 |
76%|ββββββββ | 8086/10682 [1:14:18<21:23, 2.02it/s]
|
948 |
76%|ββββββββ | 8087/10682 [1:14:18<21:24, 2.02it/s]
|
949 |
76%|ββββββββ | 8088/10682 [1:14:19<21:22, 2.02it/s]
|
950 |
76%|ββββββββ | 8089/10682 [1:14:19<21:22, 2.02it/s]
|
951 |
76%|ββββββββ | 8090/10682 [1:14:20<21:21, 2.02it/s]
|
952 |
76%|ββββββββ | 8091/10682 [1:14:20<21:22, 2.02it/s]
|
953 |
76%|ββββββββ | 8092/10682 [1:14:21<21:21, 2.02it/s]
|
954 |
76%|ββββββββ | 8093/10682 [1:14:21<21:33, 2.00it/s]
|
955 |
76%|ββββββββ | 8094/10682 [1:14:22<21:28, 2.01it/s]
|
956 |
76%|ββββββββ | 8095/10682 [1:14:22<21:25, 2.01it/s]
|
957 |
76%|ββββββββ | 8096/10682 [1:14:23<21:23, 2.02it/s]
|
958 |
76%|ββββββββ | 8097/10682 [1:14:23<21:21, 2.02it/s]
|
959 |
76%|ββββββββ | 8098/10682 [1:14:24<21:19, 2.02it/s]
|
960 |
76%|ββββββββ | 8099/10682 [1:14:24<21:18, 2.02it/s]
|
961 |
76%|ββββββββ | 8100/10682 [1:14:25<21:18, 2.02it/s]
|
962 |
|
963 |
+
|
964 |
76%|ββββββββ | 8100/10682 [1:14:25<21:18, 2.02it/s]
|
965 |
76%|ββββββββ | 8101/10682 [1:14:25<21:19, 2.02it/s]
|
966 |
76%|ββββββββ | 8102/10682 [1:14:26<21:17, 2.02it/s]
|
967 |
76%|ββββββββ | 8103/10682 [1:14:26<21:17, 2.02it/s]
|
968 |
76%|ββββββββ | 8104/10682 [1:14:27<21:15, 2.02it/s]
|
969 |
76%|ββββββββ | 8105/10682 [1:14:27<21:14, 2.02it/s]
|
970 |
76%|ββββββββ | 8106/10682 [1:14:28<21:14, 2.02it/s]
|
971 |
76%|ββββββββ | 8107/10682 [1:14:28<21:14, 2.02it/s]
|
972 |
76%|ββββββββ | 8108/10682 [1:14:29<21:13, 2.02it/s]
|
973 |
76%|ββββββββ | 8109/10682 [1:14:29<21:12, 2.02it/s]
|
974 |
76%|ββββββββ | 8110/10682 [1:14:30<21:11, 2.02it/s]
|
975 |
76%|ββββββββ | 8111/10682 [1:14:30<21:11, 2.02it/s]
|
976 |
76%|ββββββββ | 8112/10682 [1:14:31<21:11, 2.02it/s]
|
977 |
76%|ββββββββ | 8113/10682 [1:14:31<21:12, 2.02it/s]
|
978 |
76%|ββββββββ | 8114/10682 [1:14:32<21:12, 2.02it/s]
|
979 |
76%|ββββββββ | 8115/10682 [1:14:32<21:10, 2.02it/s]
|
980 |
76%|ββββββββ | 8116/10682 [1:14:33<21:10, 2.02it/s]
|
981 |
76%|ββββββββ | 8117/10682 [1:14:33<21:08, 2.02it/s]
|
982 |
76%|ββββββββ | 8118/10682 [1:14:34<21:09, 2.02it/s]
|
983 |
76%|ββββββββ | 8119/10682 [1:14:34<21:07, 2.02it/s]
|
984 |
76%|ββββββββ | 8120/10682 [1:14:35<21:07, 2.02it/s]
|
985 |
76%|ββββββββ | 8121/10682 [1:14:35<21:06, 2.02it/s]
|
986 |
76%|ββββββββ | 8122/10682 [1:14:36<21:05, 2.02it/s]
|
987 |
76%|ββββββββ | 8123/10682 [1:14:36<21:05, 2.02it/s]
|
988 |
76%|ββββββββ | 8124/10682 [1:14:37<21:05, 2.02it/s]
|
989 |
76%|ββββββββ | 8125/10682 [1:14:37<21:05, 2.02it/s]{'loss': 2.8945, 'grad_norm': 0.24637724459171295, 'learning_rate': 0.0001646501820129766, 'epoch': 10.65}
|
990 |
+
|
991 |
|
992 |
76%|ββββββββ | 8125/10682 [1:14:37<21:05, 2.02it/s]
|
993 |
76%|ββββββββ | 8126/10682 [1:14:37<21:05, 2.02it/s]
|
994 |
76%|ββββββββ | 8127/10682 [1:14:38<21:05, 2.02it/s]
|
995 |
76%|ββββββββ | 8128/10682 [1:14:38<21:04, 2.02it/s]
|
996 |
76%|ββββββββ | 8129/10682 [1:14:39<21:04, 2.02it/s]
|
997 |
76%|ββββββββ | 8130/10682 [1:14:39<21:02, 2.02it/s]
|
998 |
76%|ββββββββ | 8131/10682 [1:14:40<21:02, 2.02it/s]
|
999 |
76%|ββββββββ | 8132/10682 [1:14:40<21:00, 2.02it/s]
|
1000 |
76%|ββββββββ | 8133/10682 [1:14:41<20:59, 2.02it/s]
|
1001 |
76%|ββββββββ | 8134/10682 [1:14:41<20:59, 2.02it/s]
|
1002 |
76%|ββββββββ | 8135/10682 [1:14:42<20:58, 2.02it/s]
|
1003 |
76%|ββββββββ | 8136/10682 [1:14:42<20:59, 2.02it/s]
|
1004 |
76%|ββββββββ | 8137/10682 [1:14:43<20:58, 2.02it/s]
|
1005 |
76%|ββββββββ | 8138/10682 [1:14:43<20:59, 2.02it/s]
|
1006 |
76%|ββββββββ | 8139/10682 [1:14:44<20:58, 2.02it/s]
|
1007 |
76%|ββββββββ | 8140/10682 [1:14:44<20:58, 2.02it/s]
|
1008 |
76%|ββββββββ | 8141/10682 [1:14:45<20:58, 2.02it/s]
|
1009 |
76%|ββββββββ | 8142/10682 [1:14:45<20:57, 2.02it/s]
|
1010 |
76%|ββββββββ | 8143/10682 [1:14:46<20:55, 2.02it/s]
|
1011 |
76%|ββοΏ½οΏ½βββββ | 8144/10682 [1:14:46<20:55, 2.02it/s]
|
1012 |
76%|ββββββββ | 8145/10682 [1:14:47<20:55, 2.02it/s]
|
1013 |
76%|ββββββββ | 8146/10682 [1:14:47<20:54, 2.02it/s]
|
1014 |
76%|ββββββββ | 8147/10682 [1:14:48<20:53, 2.02it/s]
|
1015 |
76%|ββββββββ | 8148/10682 [1:14:48<20:54, 2.02it/s]
|
1016 |
76%|ββββββββ | 8149/10682 [1:14:49<20:54, 2.02it/s]
|
1017 |
76%|ββββββββ | 8150/10682 [1:14:49<20:52, 2.02it/s]{'loss': 2.8927, 'grad_norm': 0.25224441289901733, 'learning_rate': 0.00016163138305256598, 'epoch': 10.68}
|
1018 |
|
1019 |
+
|
1020 |
76%|ββββββββ | 8150/10682 [1:14:49<20:52, 2.02it/s]
|
1021 |
76%|ββββββββ | 8151/10682 [1:14:50<20:54, 2.02it/s]
|
1022 |
76%|ββββββββ | 8152/10682 [1:14:50<20:52, 2.02it/s]
|
1023 |
76%|ββββββββ | 8153/10682 [1:14:51<20:52, 2.02it/s]
|
1024 |
76%|ββββββββ | 8154/10682 [1:14:51<20:51, 2.02it/s]
|
1025 |
76%|ββββββββ | 8155/10682 [1:14:52<20:51, 2.02it/s]
|
1026 |
76%|ββββββββ | 8156/10682 [1:14:52<20:50, 2.02it/s]
|
1027 |
76%|ββββββββ | 8157/10682 [1:14:53<20:49, 2.02it/s]
|
1028 |
76%|ββββββββ | 8158/10682 [1:14:53<20:48, 2.02it/s]
|
1029 |
76%|ββββββββ | 8159/10682 [1:14:54<20:49, 2.02it/s]
|
1030 |
76%|ββββββββ | 8160/10682 [1:14:54<20:47, 2.02it/s]
|
1031 |
76%|ββββββββ | 8161/10682 [1:14:55<20:48, 2.02it/s]
|
1032 |
76%|ββββββββ | 8162/10682 [1:14:55<20:46, 2.02it/s]
|
1033 |
76%|ββββββββ | 8163/10682 [1:14:56<20:46, 2.02it/s]
|
1034 |
76%|ββββββββ | 8164/10682 [1:14:56<20:45, 2.02it/s]
|
1035 |
76%|ββββββββ | 8165/10682 [1:14:57<20:45, 2.02it/s]
|
1036 |
76%|ββββββββ | 8166/10682 [1:14:57<20:44, 2.02it/s]
|
1037 |
76%|ββββββββ | 8167/10682 [1:14:58<20:44, 2.02it/s]
|
1038 |
76%|ββββββββ | 8168/10682 [1:14:58<20:43, 2.02it/s]
|
1039 |
76%|ββββββββ | 8169/10682 [1:14:59<20:43, 2.02it/s]
|
1040 |
76%|ββββββββ | 8170/10682 [1:14:59<20:42, 2.02it/s]
|
1041 |
76%|ββββββββ | 8171/10682 [1:15:00<20:42, 2.02it/s]
|
1042 |
77%|ββββββββ | 8172/10682 [1:15:00<20:40, 2.02it/s]
|
1043 |
77%|ββββββββ | 8173/10682 [1:15:01<20:41, 2.02it/s]
|
1044 |
77%|ββββββββ | 8174/10682 [1:15:01<20:39, 2.02it/s]
|
1045 |
77%|ββββββββ | 8175/10682 [1:15:02<20:39, 2.02it/s]
|
1046 |
|
1047 |
+
|
1048 |
77%|ββββββββ | 8175/10682 [1:15:02<20:39, 2.02it/s]
|
1049 |
77%|ββββββββ | 8176/10682 [1:15:02<20:41, 2.02it/s]
|
1050 |
77%|ββββββββ | 8177/10682 [1:15:03<20:40, 2.02it/s]
|
1051 |
77%|ββββββββ | 8178/10682 [1:15:03<20:39, 2.02it/s]
|
1052 |
77%|ββββββββ | 8179/10682 [1:15:04<20:38, 2.02it/s]
|
1053 |
77%|ββββββββ | 8180/10682 [1:15:04<20:38, 2.02it/s]
|
1054 |
77%|ββββββββ | 8181/10682 [1:15:05<20:37, 2.02it/s]
|
1055 |
77%|ββββββββ | 8182/10682 [1:15:05<20:36, 2.02it/s]
|
1056 |
77%|ββββββββ | 8183/10682 [1:15:06<20:37, 2.02it/s]
|
1057 |
77%|ββββββββ | 8184/10682 [1:15:06<20:35, 2.02it/s]
|
1058 |
77%|ββββββββ | 8185/10682 [1:15:07<20:35, 2.02it/s]
|
1059 |
77%|ββββββββ | 8186/10682 [1:15:07<20:36, 2.02it/s]
|
1060 |
77%|ββββββββ | 8187/10682 [1:15:08<20:36, 2.02it/s]
|
1061 |
77%|ββββββββ | 8188/10682 [1:15:08<20:36, 2.02it/s]
|
1062 |
77%|ββββββββ | 8189/10682 [1:15:09<20:33, 2.02it/s]
|
1063 |
77%|ββββββββ | 8190/10682 [1:15:09<20:33, 2.02it/s]
|
1064 |
77%|ββββββββ | 8191/10682 [1:15:10<20:31, 2.02it/s]
|
1065 |
77%|ββββββββ | 8192/10682 [1:15:10<20:31, 2.02it/s]
|
1066 |
77%|ββββββββ | 8193/10682 [1:15:11<20:30, 2.02it/s]
|
1067 |
77%|ββββββββ | 8194/10682 [1:15:11<20:31, 2.02it/s]
|
1068 |
77%|ββββββββ | 8195/10682 [1:15:12<20:29, 2.02it/s]
|
1069 |
77%|ββββββββ | 8196/10682 [1:15:12<20:29, 2.02it/s]
|
1070 |
77%|ββββββββ | 8197/10682 [1:15:13<20:28, 2.02it/s]
|
1071 |
77%|ββββββββ | 8198/10682 [1:15:13<20:28, 2.02it/s]
|
1072 |
77%|ββββββββ | 8199/10682 [1:15:14<20:27, 2.02it/s]
|
1073 |
77%|ββββββββ | 8200/10682 [1:15:14<20:27, 2.02it/s]
|
1074 |
|
1075 |
+
|
1076 |
77%|ββββββββ | 8200/10682 [1:15:14<20:27, 2.02it/s]
|
1077 |
77%|ββββββββ | 8201/10682 [1:15:15<20:27, 2.02it/s]
|
1078 |
77%|ββββββββ | 8202/10682 [1:15:15<20:25, 2.02it/s]
|
1079 |
77%|ββββββββ | 8203/10682 [1:15:16<20:26, 2.02it/s]
|
1080 |
77%|ββββββββ | 8204/10682 [1:15:16<20:25, 2.02it/s]
|
1081 |
77%|ββββββββ | 8205/10682 [1:15:17<22:12, 1.86it/s]
|
1082 |
77%|ββββββοΏ½οΏ½β | 8206/10682 [1:15:17<21:43, 1.90it/s]
|
1083 |
77%|ββββββββ | 8207/10682 [1:15:18<21:18, 1.94it/s]
|
1084 |
77%|ββββββββ | 8208/10682 [1:15:18<21:01, 1.96it/s]
|
1085 |
77%|ββββββββ | 8209/10682 [1:15:19<20:49, 1.98it/s]
|
1086 |
77%|ββββββββ | 8210/10682 [1:15:19<20:41, 1.99it/s]
|
1087 |
77%|ββββββββ | 8211/10682 [1:15:20<20:35, 2.00it/s]
|
1088 |
77%|ββββββββ | 8212/10682 [1:15:20<20:30, 2.01it/s]
|
1089 |
77%|ββββββββ | 8213/10682 [1:15:21<20:27, 2.01it/s]
|
1090 |
77%|ββββββββ | 8214/10682 [1:15:21<20:37, 1.99it/s]
|
1091 |
77%|ββββββββ | 8215/10682 [1:15:22<20:32, 2.00it/s]
|
1092 |
77%|ββββββββ | 8216/10682 [1:15:22<20:28, 2.01it/s]
|
1093 |
77%|ββββββββ | 8217/10682 [1:15:23<20:26, 2.01it/s]
|
1094 |
77%|ββββββββ | 8218/10682 [1:15:23<20:24, 2.01it/s]
|
1095 |
77%|ββββββββ | 8219/10682 [1:15:24<20:23, 2.01it/s]
|
1096 |
77%|ββββββββ | 8220/10682 [1:15:24<20:20, 2.02it/s]
|
1097 |
77%|ββββββββ | 8221/10682 [1:15:25<20:20, 2.02it/s]
|
1098 |
77%|ββββββββ | 8222/10682 [1:15:25<20:18, 2.02it/s]
|
1099 |
77%|ββββββββ | 8223/10682 [1:15:26<20:17, 2.02it/s]
|
1100 |
77%|ββββββββ | 8224/10682 [1:15:26<20:16, 2.02it/s]
|
1101 |
77%|ββββββββ | 8225/10682 [1:15:27<20:16, 2.02it/s]{'loss': 2.9031, 'grad_norm': 0.24478811025619507, 'learning_rate': 0.0001527113038415231, 'epoch': 10.78}
|
1102 |
+
|
1103 |
|
1104 |
77%|ββββββββ | 8225/10682 [1:15:27<20:16, 2.02it/s]
|
1105 |
77%|ββββββββ | 8226/10682 [1:15:27<20:17, 2.02it/s]
|
1106 |
77%|ββββββββ | 8227/10682 [1:15:28<20:16, 2.02it/s]
|
1107 |
77%|ββββββββ | 8228/10682 [1:15:28<20:15, 2.02it/s]
|
1108 |
77%|ββββββββ | 8229/10682 [1:15:29<20:16, 2.02it/s]
|
1109 |
77%|ββββββββ | 8230/10682 [1:15:29<20:15, 2.02it/s]
|
1110 |
77%|ββββββββ | 8231/10682 [1:15:30<20:14, 2.02it/s]
|
1111 |
77%|ββββββββ | 8232/10682 [1:15:30<20:12, 2.02it/s]
|
1112 |
77%|ββββββββ | 8233/10682 [1:15:31<20:12, 2.02it/s]
|
1113 |
77%|ββββββββ | 8234/10682 [1:15:31<20:10, 2.02it/s]
|
1114 |
77%|ββββββββ | 8235/10682 [1:15:32<20:10, 2.02it/s]
|
1115 |
77%|ββββββββ | 8236/10682 [1:15:32<20:09, 2.02it/s]
|
1116 |
77%|ββββββββ | 8237/10682 [1:15:33<20:09, 2.02it/s]
|
1117 |
77%|ββββββββ | 8238/10682 [1:15:33<20:08, 2.02it/s]
|
1118 |
77%|ββββββββ | 8239/10682 [1:15:34<20:08, 2.02it/s]
|
1119 |
77%|ββββββββ | 8240/10682 [1:15:34<20:07, 2.02it/s]
|
1120 |
77%|ββββββββ | 8241/10682 [1:15:35<20:07, 2.02it/s]
|
1121 |
77%|ββββββββ | 8242/10682 [1:15:35<20:05, 2.02it/s]
|
1122 |
77%|ββββββββ | 8243/10682 [1:15:36<20:05, 2.02it/s]
|
1123 |
77%|ββββββββ | 8244/10682 [1:15:36<20:04, 2.02it/s]
|
1124 |
77%|ββββββββ | 8245/10682 [1:15:37<20:05, 2.02it/s]
|
1125 |
77%|ββββββββ | 8246/10682 [1:15:37<20:04, 2.02it/s]
|
1126 |
77%|ββββββββ | 8247/10682 [1:15:38<20:05, 2.02it/s]
|
1127 |
77%|ββββββββ | 8248/10682 [1:15:38<20:03, 2.02it/s]
|
1128 |
77%|ββββββββ | 8249/10682 [1:15:39<20:04, 2.02it/s]
|
1129 |
77%|ββββββββ | 8250/10682 [1:15:39<20:03, 2.02it/s]{'loss': 2.8918, 'grad_norm': 0.24946050345897675, 'learning_rate': 0.00014978404491439802, 'epoch': 10.81}
|
1130 |
|
1131 |
+
|
1132 |
77%|ββββββββ | 8250/10682 [1:15:39<20:03, 2.02it/s]
|
1133 |
77%|ββββββββ | 8251/10682 [1:15:40<20:04, 2.02it/s]
|
1134 |
77%|ββββββββ | 8252/10682 [1:15:40<20:03, 2.02it/s]
|
1135 |
77%|ββββββββ | 8253/10682 [1:15:41<20:03, 2.02it/s]
|
1136 |
77%|ββββββββ | 8254/10682 [1:15:41<20:01, 2.02it/s]
|
1137 |
77%|ββββββββ | 8255/10682 [1:15:41<20:02, 2.02it/s]
|
1138 |
77%|ββββββββ | 8256/10682 [1:15:42<20:01, 2.02it/s]
|
1139 |
77%|ββββββββ | 8257/10682 [1:15:42<20:01, 2.02it/s]
|
1140 |
77%|ββββββββ | 8258/10682 [1:15:43<20:00, 2.02it/s]
|
1141 |
77%|ββββββββ | 8259/10682 [1:15:43<20:00, 2.02it/s]
|
1142 |
77%|ββββββββ | 8260/10682 [1:15:44<19:59, 2.02it/s]
|
1143 |
77%|ββββββββ | 8261/10682 [1:15:44<19:59, 2.02it/s]
|
1144 |
77%|ββββββββ | 8262/10682 [1:15:45<19:57, 2.02it/s]
|
1145 |
77%|ββββββββ | 8263/10682 [1:15:45<19:57, 2.02it/s]
|
1146 |
77%|ββββββββ | 8264/10682 [1:15:46<19:57, 2.02it/s]
|
1147 |
77%|ββββββββ | 8265/10682 [1:15:46<19:56, 2.02it/s]
|
1148 |
77%|ββββββββ | 8266/10682 [1:15:47<19:56, 2.02it/s]
|
1149 |
77%|ββββββββ | 8267/10682 [1:15:47<19:54, 2.02it/s]
|
1150 |
77%|ββββββββ | 8268/10682 [1:15:48<19:55, 2.02it/s]
|
1151 |
77%|ββββββββ | 8269/10682 [1:15:48<19:52, 2.02it/s]
|
1152 |
77%|ββββββββ | 8270/10682 [1:15:49<19:52, 2.02it/s]
|
1153 |
77%|ββββββββ | 8271/10682 [1:15:49<19:51, 2.02it/s]
|
1154 |
77%|ββββββββ | 8272/10682 [1:15:50<19:52, 2.02it/s]
|
1155 |
77%|ββββββββ | 8273/10682 [1:15:50<19:51, 2.02it/s]
|
1156 |
77%|ββββββββ | 8274/10682 [1:15:51<19:51, 2.02it/s]
|
1157 |
77%|ββββββββ | 8275/10682 [1:15:51<19:50, 2.02it/s]
|
1158 |
|
1159 |
+
|
1160 |
77%|ββββββββ | 8275/10682 [1:15:51<19:50, 2.02it/s]
|
1161 |
77%|ββββββββ | 8276/10682 [1:15:52<19:52, 2.02it/s]
|
1162 |
77%|ββββββββ | 8277/10682 [1:15:52<19:51, 2.02it/s]
|
1163 |
77%|ββββββββ | 8278/10682 [1:15:53<19:50, 2.02it/s]
|
1164 |
78%|ββββββββ | 8279/10682 [1:15:53<19:50, 2.02it/s]
|
1165 |
78%|ββββββββ | 8280/10682 [1:15:54<19:49, 2.02it/s]
|
1166 |
78%|ββββββββ | 8281/10682 [1:15:54<19:48, 2.02it/s]
|
1167 |
78%|ββββββββ | 8282/10682 [1:15:55<19:48, 2.02it/s]
|
1168 |
78%|ββββββββ | 8283/10682 [1:15:55<19:47, 2.02it/s]
|
1169 |
78%|ββββββββ | 8284/10682 [1:15:56<19:46, 2.02it/s]
|
1170 |
78%|ββββββββ | 8285/10682 [1:15:56<19:46, 2.02it/s]
|
1171 |
78%|ββββββββ | 8286/10682 [1:15:57<19:46, 2.02it/s]
|
1172 |
78%|ββββββββ | 8287/10682 [1:15:57<19:47, 2.02it/s]
|
1173 |
78%|ββββββββ | 8288/10682 [1:15:58<19:46, 2.02it/s]
|
1174 |
78%|ββββββββ | 8289/10682 [1:15:58<19:47, 2.02it/s]
|
1175 |
78%|ββββββββ | 8290/10682 [1:15:59<21:33, 1.85it/s]
|
1176 |
78%|ββββββββ | 8291/10682 [1:15:59<20:58, 1.90it/s]
|
1177 |
78%|ββββββββ | 8292/10682 [1:16:00<20:36, 1.93it/s]
|
1178 |
78%|ββββββββ | 8293/10682 [1:16:00<20:18, 1.96it/s]
|
1179 |
78%|ββββββββ | 8294/10682 [1:16:01<20:08, 1.98it/s]
|
1180 |
78%|ββββββββ | 8295/10682 [1:16:01<19:59, 1.99it/s]
|
1181 |
78%|ββββββββ | 8296/10682 [1:16:02<19:54, 2.00it/s]
|
1182 |
78%|ββββββββ | 8297/10682 [1:16:02<19:49, 2.01it/s]
|
1183 |
78%|ββββββββ | 8298/10682 [1:16:03<19:46, 2.01it/s]
|
1184 |
78%|ββββββββ | 8299/10682 [1:16:03<19:42, 2.02it/s]
|
1185 |
78%|ββββββββ | 8300/10682 [1:16:04<19:42, 2.01it/s]
|
1186 |
{'loss': 2.8984, 'grad_norm': 0.24762478470802307, 'learning_rate': 0.00014399985296581835, 'epoch': 10.88}
|
1187 |
+
|
1188 |
78%|ββββββββ | 8300/10682 [1:16:04<19:42, 2.01it/s]
|
1189 |
78%|ββββββββ | 8301/10682 [1:16:04<19:41, 2.01it/s]
|
1190 |
78%|ββββββββ | 8302/10682 [1:16:05<19:40, 2.02it/s]
|
1191 |
78%|ββββββββ | 8303/10682 [1:16:05<19:38, 2.02it/s]
|
1192 |
78%|ββββββββ | 8304/10682 [1:16:06<19:38, 2.02it/s]
|
1193 |
78%|ββββββββ | 8305/10682 [1:16:06<19:37, 2.02it/s]
|
1194 |
78%|ββββββββ | 8306/10682 [1:16:07<19:36, 2.02it/s]
|
1195 |
78%|ββββββββ | 8307/10682 [1:16:07<19:35, 2.02it/s]
|
1196 |
78%|ββββββββ | 8308/10682 [1:16:08<19:35, 2.02it/s]
|
1197 |
78%|ββββββββ | 8309/10682 [1:16:08<19:34, 2.02it/s]
|
1198 |
78%|ββββββββ | 8310/10682 [1:16:09<19:34, 2.02it/s]
|
1199 |
78%|ββββββββ | 8311/10682 [1:16:09<19:34, 2.02it/s]
|
1200 |
78%|ββββββββ | 8312/10682 [1:16:10<19:33, 2.02it/s]
|
1201 |
78%|ββββββββ | 8313/10682 [1:16:10<19:33, 2.02it/s]
|
1202 |
78%|ββββββββ | 8314/10682 [1:16:11<19:31, 2.02it/s]
|
1203 |
78%|ββββββββ | 8315/10682 [1:16:11<19:32, 2.02it/s]
|
1204 |
78%|ββββββββ | 8316/10682 [1:16:12<19:29, 2.02it/s]
|
1205 |
78%|ββββββββ | 8317/10682 [1:16:12<19:29, 2.02it/s]
|
1206 |
78%|ββββββββ | 8318/10682 [1:16:13<19:28, 2.02it/s]
|
1207 |
78%|ββββββββ | 8319/10682 [1:16:13<19:28, 2.02it/s]
|
1208 |
78%|ββββββββ | 8320/10682 [1:16:14<19:28, 2.02it/s]
|
1209 |
78%|ββββββββ | 8321/10682 [1:16:14<19:28, 2.02it/s]
|
1210 |
78%|ββββββββ | 8322/10682 [1:16:15<19:28, 2.02it/s]
|
1211 |
78%|ββββββββ | 8323/10682 [1:16:15<19:28, 2.02it/s]
|
1212 |
78%|ββββββββ | 8324/10682 [1:16:16<19:26, 2.02it/s]
|
1213 |
78%|ββββββββ | 8325/10682 [1:16:16<19:26, 2.02it/s]
|
1214 |
|
1215 |
+
|
1216 |
78%|ββββββββ | 8325/10682 [1:16:16<19:26, 2.02it/s]
|
1217 |
78%|ββββββββ | 8326/10682 [1:16:17<19:26, 2.02it/s]
|
1218 |
78%|ββββββββ | 8327/10682 [1:16:17<19:26, 2.02it/s]
|
1219 |
78%|ββββββββ | 8328/10682 [1:16:18<19:25, 2.02it/s]
|
1220 |
78%|ββββββββ | 8329/10682 [1:16:18<19:24, 2.02it/s]
|
1221 |
78%|ββββββββ | 8330/10682 [1:16:19<19:22, 2.02it/s]
|
1222 |
78%|ββββββββ | 8331/10682 [1:16:19<19:23, 2.02it/s]
|
1223 |
78%|ββββββββ | 8332/10682 [1:16:20<19:22, 2.02it/s]
|
1224 |
78%|ββββββββ | 8333/10682 [1:16:20<19:23, 2.02it/s]
|
1225 |
78%|ββββββββ | 8334/10682 [1:16:21<19:24, 2.02it/s]
|
1226 |
78%|ββββββββ | 8335/10682 [1:16:21<19:23, 2.02it/s]
|
1227 |
78%|ββββββββ | 8336/10682 [1:16:22<19:22, 2.02it/s]
|
1228 |
78%|ββββββββ | 8337/10682 [1:16:22<19:20, 2.02it/s]
|
1229 |
78%|ββββββββ | 8338/10682 [1:16:23<19:21, 2.02it/s]
|
1230 |
78%|ββββββββ | 8339/10682 [1:16:23<19:19, 2.02it/s]
|
1231 |
78%|ββββββββ | 8340/10682 [1:16:24<19:20, 2.02it/s]
|
1232 |
78%|ββββββββ | 8341/10682 [1:16:24<19:18, 2.02it/s]
|
1233 |
78%|ββββββββ | 8342/10682 [1:16:25<19:19, 2.02it/s]
|
1234 |
78%|ββββββββ | 8343/10682 [1:16:25<19:18, 2.02it/s]
|
1235 |
78%|ββββββββ | 8344/10682 [1:16:26<19:17, 2.02it/s]
|
1236 |
78%|ββββββββ | 8345/10682 [1:16:26<19:17, 2.02it/s]
|
1237 |
78%|ββββββββ | 8346/10682 [1:16:27<19:17, 2.02it/s]
|
1238 |
78%|ββββββββ | 8347/10682 [1:16:27<19:16, 2.02it/s]
|
1239 |
78%|ββββββββ | 8348/10682 [1:16:28<19:17, 2.02it/s]
|
1240 |
78%|ββββββββ | 8349/10682 [1:16:28<19:16, 2.02it/s]
|
1241 |
78%|ββββββββ | 8350/10682 [1:16:29<19:15, 2.02it/s]
|
1242 |
{'loss': 2.9034, 'grad_norm': 0.2513554096221924, 'learning_rate': 0.00013831071326327282, 'epoch': 10.94}
|
1243 |
+
|
1244 |
78%|ββββββββ | 8350/10682 [1:16:29<19:15, 2.02it/s]
|
1245 |
78%|ββββββββ | 8351/10682 [1:16:29<19:16, 2.02it/s]
|
1246 |
78%|ββββββββ | 8352/10682 [1:16:30<19:14, 2.02it/s]
|
1247 |
78%|ββββββββ | 8353/10682 [1:16:30<19:14, 2.02it/s]
|
1248 |
78%|ββββββββ | 8354/10682 [1:16:31<19:12, 2.02it/s]
|
1249 |
78%|ββββββββ | 8355/10682 [1:16:31<19:13, 2.02it/s]
|
1250 |
78%|ββββββββ | 8356/10682 [1:16:32<19:12, 2.02it/s]
|
1251 |
78%|ββββββββ | 8357/10682 [1:16:32<19:11, 2.02it/s]
|
1252 |
78%|ββββββββ | 8358/10682 [1:16:33<19:10, 2.02it/s]
|
1253 |
78%|ββββββββ | 8359/10682 [1:16:33<19:09, 2.02it/s]
|
1254 |
78%|ββββββββ | 8360/10682 [1:16:34<19:08, 2.02it/s]
|
1255 |
78%|ββββββββ | 8361/10682 [1:16:34<19:08, 2.02it/s]
|
1256 |
78%|ββββββββ | 8362/10682 [1:16:35<19:07, 2.02it/s]
|
1257 |
78%|ββββββββ | 8363/10682 [1:16:35<19:06, 2.02it/s]
|
1258 |
78%|ββββββββ | 8364/10682 [1:16:36<19:06, 2.02it/s]
|
1259 |
78%|ββββββββ | 8365/10682 [1:16:36<19:05, 2.02it/s]
|
1260 |
78%|ββββββββ | 8366/10682 [1:16:37<19:05, 2.02it/s]
|
1261 |
78%|ββββββββ | 8367/10682 [1:16:37<19:04, 2.02it/s]
|
1262 |
78%|ββββββββ | 8368/10682 [1:16:38<19:05, 2.02it/s]
|
1263 |
78%|ββββββββ | 8369/10682 [1:16:38<19:03, 2.02it/s]
|
1264 |
78%|ββββββββ | 8370/10682 [1:16:39<19:04, 2.02it/s]
|
1265 |
78%|ββββββββ | 8371/10682 [1:16:39<19:02, 2.02it/s]
|
1266 |
78%|ββββββββ | 8372/10682 [1:16:40<19:03, 2.02it/s]
|
1267 |
78%|ββββββββ | 8373/10682 [1:16:40<19:02, 2.02it/s]
|
1268 |
78%|ββββββββ | 8374/10682 [1:16:41<19:02, 2.02it/s]
|
1269 |
78%|ββββββββ | 8375/10682 [1:16:41<19:01, 2.02it/s]
|
1270 |
|
1271 |
+
|
1272 |
78%|ββββββββ | 8375/10682 [1:16:41<19:01, 2.02it/s]
|
1273 |
78%|ββββββββ | 8376/10682 [1:16:42<19:02, 2.02it/s]
|
1274 |
78%|ββββββββ | 8377/10682 [1:16:42<19:01, 2.02it/s]
|
1275 |
78%|ββββββββ | 8378/10682 [1:16:43<19:00, 2.02it/s]
|
1276 |
78%|ββββββββ | 8379/10682 [1:16:43<18:58, 2.02it/s]
|
1277 |
78%|ββββββββ | 8380/10682 [1:16:44<18:59, 2.02it/s]
|
1278 |
78%|ββββββββ | 8381/10682 [1:16:44<18:58, 2.02it/s]
|
1279 |
78%|ββββββββ | 8382/10682 [1:16:45<18:59, 2.02it/s]
|
1280 |
78%|ββββββββ | 8383/10682 [1:16:45<18:56, 2.02it/s]
|
1281 |
78%|ββββββββ | 8384/10682 [1:16:46<18:57, 2.02it/s]
|
1282 |
78%|ββββββββ | 8385/10682 [1:16:46<18:56, 2.02it/s]
|
1283 |
79%|ββββββββ | 8386/10682 [1:16:47<18:56, 2.02it/s]
|
1284 |
79%|ββββββββ | 8387/10682 [1:16:47<18:55, 2.02it/s]
|
1285 |
79%|ββββββββ | 8388/10682 [1:16:47<18:56, 2.02it/s]
|
1286 |
79%|ββββββββ | 8389/10682 [1:16:48<18:55, 2.02it/s]
|
1287 |
79%|ββββββββ | 8390/10682 [1:16:48<18:55, 2.02it/s]
|
1288 |
79%|ββββββββ | 8391/10682 [1:16:49<18:53, 2.02it/s]
|
1289 |
79%|ββββββββ | 8392/10682 [1:16:49<18:54, 2.02it/s]
|
1290 |
79%|ββββββββ | 8393/10682 [1:16:50<18:40, 2.04it/s]
|
1291 |
79%|ββββββββ | 8394/10682 [1:17:04<2:55:03, 4.59s/it]
|
1292 |
79%|ββββββββ | 8395/10682 [1:17:05<2:08:08, 3.36s/it]
|
1293 |
79%|ββββββββ | 8396/10682 [1:17:05<1:35:23, 2.50s/it]
|
1294 |
79%|ββββββββ | 8397/10682 [1:17:06<1:12:24, 1.90s/it]
|
1295 |
79%|ββββββββ | 8398/10682 [1:17:06<56:24, 1.48s/it]
|
1296 |
79%|ββββββββ | 8399/10682 [1:17:07<45:10, 1.19s/it]
|
1297 |
79%|ββββββββ | 8400/10682 [1:17:07<37:22, 1.02it/s]
|
1298 |
|
1299 |
+
|
1300 |
79%|ββββββββ | 8400/10682 [1:17:07<37:22, 1.02it/s]
|
1301 |
79%|ββββββββ | 8401/10682 [1:17:08<31:48, 1.20it/s]
|
1302 |
79%|ββββββββ | 8402/10682 [1:17:08<27:51, 1.36it/s]
|
1303 |
79%|ββββββββ | 8403/10682 [1:17:09<25:08, 1.51it/s]
|
1304 |
79%|ββββββββ | 8404/10682 [1:17:09<23:11, 1.64it/s]
|
1305 |
79%|ββββββββ | 8405/10682 [1:17:10<21:51, 1.74it/s]
|
1306 |
79%|ββββββββ | 8406/10682 [1:17:10<20:54, 1.81it/s]
|
1307 |
79%|ββββββββ | 8407/10682 [1:17:11<20:17, 1.87it/s]
|
1308 |
79%|ββββββββ | 8408/10682 [1:17:11<19:51, 1.91it/s]
|
1309 |
79%|ββββββββ | 8409/10682 [1:17:12<19:30, 1.94it/s]
|
1310 |
79%|ββββββββ | 8410/10682 [1:17:12<19:19, 1.96it/s]
|
1311 |
79%|ββββββββ | 8411/10682 [1:17:13<19:06, 1.98it/s]
|
1312 |
79%|ββββββββ | 8412/10682 [1:17:13<18:59, 1.99it/s]
|
1313 |
79%|ββββββββ | 8413/10682 [1:17:14<18:53, 2.00it/s]
|
1314 |
79%|ββββββββ | 8414/10682 [1:17:14<18:50, 2.01it/s]
|
1315 |
79%|ββββββββ | 8415/10682 [1:17:15<18:49, 2.01it/s]
|
1316 |
79%|ββββββββ | 8416/10682 [1:17:15<18:46, 2.01it/s]
|
1317 |
79%|ββββββββ | 8417/10682 [1:17:16<18:43, 2.02it/s]
|
1318 |
79%|ββββββββ | 8418/10682 [1:17:16<18:42, 2.02it/s]
|
1319 |
79%|ββββββββ | 8419/10682 [1:17:17<18:39, 2.02it/s]
|
1320 |
79%|ββββββββ | 8420/10682 [1:17:17<18:39, 2.02it/s]
|