==================================================================================================== import os import sys with open(sys.argv[0]) as f: code = f.read() # read the code of this file ASAP, for logging import uuid import glob import time from dataclasses import dataclass import numpy as np import torch from torch import nn import torch.nn.functional as F import torch.distributed as dist import torch._inductor.config as config from torch.nn.parallel import DistributedDataParallel as DDP # ----------------------------------------------------------------------------- # Muon optimizer def zeropower_via_svd(G, steps=None): U, S, V = G.svd() return U @ V.T @torch.compile def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): """ Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose of minimizing steps, it turns out to be empirically effective to keep increasing the slope at zero even beyond the point where the iteration no longer converges all the way to one everywhere on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T where S' is diagonal with S_{ii}' \sim Uniform(0.5, 1.5), which turns out not to hurt model performance at all relative to UV^T, where USV^T = G is the SVD. """ assert len(G.shape) == 2 a, b, c = (3.4445, -4.7750, 2.0315) X = G.bfloat16() X /= (X.norm() + eps) # ensure top singular value <= 1 if G.size(0) > G.size(1): X = X.T for _ in range(steps): A = X @ X.T B = A @ X X = a * X + b * B + c * A @ B if G.size(0) > G.size(1): X = X.T return X zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) class Muon(torch.optim.Optimizer): """ Muon - MomentUm Orthogonalized by Newton-schulz Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- processing step, in which each 2D parameter's update is replaced with the nearest orthogonal matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has the advantage that it can be stably run in bfloat16 on the GPU. Some warnings: - This optimizer assumes that all parameters passed in are 2D. - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D parameters; those should all be optimized by a standard method (e.g., AdamW). - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. - We believe it is unlikely to work well for training with small batch size. - We believe it may not work well for finetuning pretrained models, but we haven't tested this. - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). Arguments: lr: The learning rate used by the internal SGD. momentum: The momentum used by the internal SGD. nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') backend_steps: The number of iteration steps to use in the backend, if it is iterative. """ def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, backend='newtonschulz5', backend_steps=5): defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) super().__init__(params, defaults) def step(self): for group in self.param_groups: lr = group['lr'] momentum = group['momentum'] zeropower_backend = zeropower_backends[group['backend']] # generate weight updates in distributed fashion total_params = sum(p.numel() for p in group['params']) updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) curr_idx = 0 for i, p in enumerate(group['params']): # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): g = p.grad assert g is not None state = self.state[p] if 'momentum_buffer' not in state: state['momentum_buffer'] = torch.zeros_like(g) buf = state['momentum_buffer'] buf.mul_(momentum).add_(g) if group['nesterov']: g = g.add(buf, alpha=momentum) g = zeropower_backend(g, steps=group['backend_steps']) g *= max(1, g.size(0)/g.size(1))**0.5 updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() curr_idx += p.numel() # sync updates across devices. we are not memory-constrained so can do this simple deserialization dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) # deserialize and apply updates curr_idx = 0 for p in group['params']: g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) p.data.add_(g, alpha=-lr) curr_idx += p.numel() # ----------------------------------------------------------------------------- # PyTorch nn.Module definitions for the GPT-2 model class Rotary(torch.nn.Module): def __init__(self, dim, base=10000): super().__init__() self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) self.seq_len_cached = None self.cos_cached = None self.sin_cached = None def forward(self, x): seq_len = x.shape[1] if seq_len != self.seq_len_cached: self.seq_len_cached = seq_len t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) freqs = torch.outer(t, self.inv_freq).to(x.device) self.cos_cached = freqs.cos().bfloat16() self.sin_cached = freqs.sin().bfloat16() return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3]//2 x1 = x[..., :d] x2 = x[..., d:] y1 = x1 * cos + x2 * sin y2 = x1 * (-sin) + x2 * cos return torch.cat([y1, y2], 3).type_as(x) class CausalSelfAttention(nn.Module): def __init__(self, config): super().__init__() self.n_head = config.n_head self.n_embd = config.n_embd self.head_dim = self.n_embd // self.n_head assert self.n_embd % self.n_head == 0 self.c_q = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_k = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_v = nn.Linear(self.n_embd, self.n_embd, bias=False) # output projection self.c_proj = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 self.rotary = Rotary(self.head_dim) self.lamb = nn.Parameter(torch.tensor(0.5)) # @Grad62304977 def forward(self, x, v1=None): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) q = self.c_q(x).view(B, T, self.n_head, self.head_dim) k = self.c_k(x).view(B, T, self.n_head, self.head_dim) v = self.c_v(x).view(B, T, self.n_head, self.head_dim) if v1 is None: v1 = v # This happens if we are in the first block. v needs to be accessed by subsequent blocks v = (1 - self.lamb) * v + self.lamb * v1.view_as(v) # @Grad62304977 cos, sin = self.rotary(q) q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side y = self.c_proj(y) return y, v1 class MLP(nn.Module): def __init__(self, config): super().__init__() self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=False) self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 def forward(self, x): x = self.c_fc(x) x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 x = self.c_proj(x) return x class Block(nn.Module): def __init__(self, config): super().__init__() self.attn = CausalSelfAttention(config) self.mlp = MLP(config) self.lambdas = nn.Parameter(torch.tensor([1., 0.])) def forward(self, x, v1, x0): x = self.lambdas[0] * x + self.lambdas[1] * x0 x1, v1 = self.attn(F.rms_norm(x, (x.size(-1),)), v1) x = x + x1 x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) return x, v1 # ----------------------------------------------------------------------------- # The main GPT-2 model @dataclass class GPTConfig: vocab_size : int = 50304 n_layer : int = 12 n_head : int = 6 # head dim 128 suggested by @Grad62304977 n_embd : int = 768 class GPT(nn.Module): def __init__(self, config): super().__init__() self.config = config self.transformer = nn.ModuleDict(dict( wte = nn.Embedding(config.vocab_size, config.n_embd), h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), )) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.lm_head.weight.data.zero_() # @Grad62304977 def forward(self, idx, targets=None, return_logits=True): # forward the GPT model itself x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 x0 = x v1 = None for block in self.transformer.h: x, v1 = block(x, v1, x0) x = F.rms_norm(x, (x.size(-1),)) if targets is not None: # if we are given some desired targets also calculate the loss logits = self.lm_head(x) logits = 30 * torch.tanh(logits / 30) # @Grad62304977 logits = logits.float() # use tf32/fp32 for logits loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1) else: # inference-time mini-optimization: only forward the lm_head on the very last position logits = self.lm_head(x[:, [-1], :]) # note: using list [-1] to preserve the time dim logits = 30 * torch.tanh(logits / 30) # @Grad62304977 logits = logits.float() # use tf32/fp32 for logits loss = None # there are performance reasons why not returning logits is prudent, if not needed if not return_logits: logits = None return logits, loss # ----------------------------------------------------------------------------- # Our own simple Distributed Data Loader def _peek_data_shard(filename): # only reads the header, returns header data with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) if header[0] != 20240520: print("ERROR: magic number mismatch in the data .bin file!") print("---> HINT: Are you passing in a correct file with --input_bin?") print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") exit(1) assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) return ntok # for now just return the number of tokens def _load_data_shard(filename): with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) assert header[0] == 20240520, "magic number mismatch in the data .bin file" assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) # the rest of it are tokens, stored as uint16 tokens = np.frombuffer(f.read(), dtype=np.uint16) assert len(tokens) == ntok, "number of tokens read does not match header?" return tokens class DistributedDataLoader: def __init__(self, filename_pattern, B, T, process_rank, num_processes): self.process_rank = process_rank self.num_processes = num_processes self.B = B self.T = T # glob files that match the pattern self.files = sorted(glob.glob(filename_pattern)) assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" # load and validate all data shards, count number of tokens in total ntok_total = 0 for fname in self.files: shard_ntok = _peek_data_shard(fname) assert shard_ntok >= num_processes * B * T + 1 ntok_total += int(shard_ntok) self.ntok_total = ntok_total # kick things off self.reset() def reset(self): self.current_shard = 0 self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def advance(self): # advance to next data shard self.current_shard = (self.current_shard + 1) % len(self.files) self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def next_batch(self): B = self.B T = self.T buf = self.tokens[self.current_position : self.current_position+B*T+1] buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) x = (buf[:-1]).view(B, T) # inputs y = (buf[1:]).view(B, T) # targets # advance current position and load next shard if necessary self.current_position += B * T * self.num_processes if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): self.advance() return x.cuda(), y.cuda() # ----------------------------------------------------------------------------- # int main @dataclass class Hyperparameters: # data hyperparams input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on # optimization hyperparams batch_size : int = 8*64 # batch size, in sequences, across all devices device_batch_size : int = 32 # batch size, in sequences, per device sequence_length : int = 1024 # sequence length, in tokens num_iterations : int = 3200 # number of iterations to run warmup_iters : int = 0 warmdown_iters : int = 914 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule weight_decay : float = 0 # evaluation and logging hyperparams val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons save_every : int = 320 # every how many steps to save the checkpoint? 0 for only at the end args = Hyperparameters() # set up DDP (distributed data parallel). torchrun sets this env variable assert torch.cuda.is_available() dist.init_process_group(backend='nccl') ddp_rank = int(os.environ['RANK']) ddp_local_rank = int(os.environ['LOCAL_RANK']) ddp_world_size = int(os.environ['WORLD_SIZE']) device = f'cuda:{ddp_local_rank}' torch.cuda.set_device(device) print(f"using device: {device}") master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. # convenience variables B, T = args.device_batch_size, args.sequence_length # calculate the number of steps to take in the val loop. assert args.val_tokens % (B * T * ddp_world_size) == 0 val_steps = args.val_tokens // (B * T * ddp_world_size) # calculate the steps of gradient accumulation required to attain the desired global batch size. assert args.batch_size % (B * ddp_world_size) == 0 train_accumulation_steps = args.batch_size // (B * ddp_world_size) # load tokens train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) if master_process: print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") x, y = train_loader.next_batch() # there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. # this originates from Karpathy's experiments. num_vocab = 50304 model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=6, n_embd=768)) model = model.cuda() if hasattr(config, "coordinate_descent_tuning"): config.coordinate_descent_tuning = True # suggested by @Chillee model = torch.compile(model) # here we wrap model into DDP container model = DDP(model, device_ids=[ddp_local_rank]) raw_model = model.module # always contains the "raw" unwrapped model ctx = torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16) # CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp enable_cudnn_sdp(True) enable_flash_sdp(False) enable_mem_efficient_sdp(False) enable_math_sdp(False) # init the optimizer(s) optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.3, betas=(0.9, 0.95), fused=True) optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.002, betas=(0.9, 0.95), fused=True) params = list(raw_model.transformer.h.parameters()) matrix_params = [p for p in params if p.ndim == 2] scalar_params = [p for p in params if p.ndim < 2] optimizer3 = Muon(matrix_params, lr=0.02, momentum=0.95) optimizer4 = torch.optim.Adam(scalar_params, lr=0.02, betas=(0.9, 0.95), fused=True) # note that this learning rate is neither sensitive nor tuned optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] # learning rate decay scheduler (linear warmup and warmdown) def get_lr(it): assert it <= args.num_iterations # 1) linear warmup for warmup_iters steps if it < args.warmup_iters: return (it+1) / args.warmup_iters # 2) constant lr for a while elif it < args.num_iterations - args.warmdown_iters: return 1.0 # 3) linear warmdown else: decay_ratio = (args.num_iterations - it) / args.warmdown_iters return decay_ratio schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] # begin logging if master_process: run_id = str(uuid.uuid4()) logdir = 'logs/%s/' % run_id os.makedirs(logdir, exist_ok=True) logfile = 'logs/%s.txt' % run_id # create the log file with open(logfile, "w") as f: # begin the log by printing this file (the Python code) f.write('='*100 + '\n') f.write(code) f.write('='*100 + '\n') # log information about the hardware/software environment this is running on # and print the full `nvidia-smi` to file f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") import subprocess result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) f.write(f'{result.stdout}\n') f.write('='*100 + '\n') training_time_ms = 0 # start the clock torch.cuda.synchronize() t0 = time.time() # begin training train_loader.reset() for step in range(args.num_iterations + 1): last_step = (step == args.num_iterations) # This effectively ignores timing first 10 steps, which are slower for weird reasons. # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 # steps with dummy data first, and then re-initialize the model and reset the loader. if step == 10: training_time_ms = 0 t0 = time.time() timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val # once in a while evaluate the validation dataset if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # run validation batches model.eval() val_loader.reset() val_loss = 0.0 for _ in range(val_steps): x_val, y_val = val_loader.next_batch() with ctx: # of course, we'd like to use no_grad() here too, but that creates a torch.compile error for some reason _, loss = model(x_val, y_val, return_logits=False) val_loss += loss.detach() del loss dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) val_loss /= val_steps # log val loss to console and to logfile if master_process: print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') with open(logfile, "a") as f: f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') # start the clock again torch.cuda.synchronize() t0 = time.time() if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # save the state of the training process log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) # start the clock again torch.cuda.synchronize() t0 = time.time() # bit confusing: we want to make sure to eval on 0th iteration # but also after the very last iteration. so we loop for step <= num_iterations # instead of just < num_iterations (one extra due to <=), only to do # the validation/sampling one last time, and then we break right here as we're done. if last_step: break # --------------- TRAINING SECTION BEGIN ----------------- model.train() for i in range(1, train_accumulation_steps+1): # forward pass with ctx: _, loss = model(x, y, return_logits=False) train_loss = loss.detach() # advance the dataset for the next batch x, y = train_loader.next_batch() # backward pass if i < train_accumulation_steps: with model.no_sync(): # there's no need to sync gradients every accumulation step loss.backward() else: loss.backward() # just sync on the last step for p in model.parameters(): p.grad /= train_accumulation_steps # momentum warmup for Muon frac = min(step/500, 1) optimizer3.param_groups[0]['momentum'] = (1 - frac) * 0.85 + frac * 0.95 # step the optimizers and schedulers for opt, sched in zip(optimizers, schedulers): opt.step() sched.step() # null the gradients model.zero_grad(set_to_none=True) # --------------- TRAINING SECTION END ------------------- # everything that follows now is just diagnostics, prints, logging, etc. #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower if master_process: approx_time = training_time_ms + 1000 * (time.time() - t0) print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") with open(logfile, "a") as f: f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") if master_process: print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") # ------------------------------------------------------------------------- # clean up nice dist.destroy_process_group() ==================================================================================================== Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 nvidia-smi: Fri Nov 8 18:04:18 2024 +---------------------------------------------------------------------------------------+ | NVIDIA-SMI 535.183.01 Driver Version: 535.183.01 CUDA Version: 12.2 | |-----------------------------------------+----------------------+----------------------+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |=========================================+======================+======================| | 0 NVIDIA L40S On | 00000000:34:00.0 Off | 0 | | N/A 31C P0 72W / 350W | 2724MiB / 46068MiB | 8% Default | | | | N/A | +-----------------------------------------+----------------------+----------------------+ +---------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=======================================================================================| +---------------------------------------------------------------------------------------+ ==================================================================================================== step:0/3200 val_loss:10.8259 train_time:219ms step_avg:nanms step:1/3200 train_loss:10.8258 train_time:48353ms step_avg:nanms step:2/3200 train_loss:10.6001 train_time:52153ms step_avg:nanms step:3/3200 train_loss:10.0403 train_time:56071ms step_avg:nanms step:4/3200 train_loss:9.1487 train_time:60007ms step_avg:nanms step:5/3200 train_loss:8.1693 train_time:63952ms step_avg:nanms step:6/3200 train_loss:7.6040 train_time:67904ms step_avg:nanms step:7/3200 train_loss:7.2514 train_time:71863ms step_avg:nanms step:8/3200 train_loss:7.0003 train_time:75825ms step_avg:nanms step:9/3200 train_loss:7.2817 train_time:79781ms step_avg:nanms step:10/3200 train_loss:6.7501 train_time:83747ms step_avg:nanms step:11/3200 train_loss:6.9062 train_time:3981ms step_avg:nanms step:12/3200 train_loss:6.5774 train_time:7972ms step_avg:nanms step:13/3200 train_loss:6.5117 train_time:11951ms step_avg:3983.73ms step:14/3200 train_loss:6.5683 train_time:15938ms step_avg:3984.55ms step:15/3200 train_loss:6.3263 train_time:19940ms step_avg:3988.03ms step:16/3200 train_loss:6.2147 train_time:23952ms step_avg:3992.08ms step:17/3200 train_loss:6.2890 train_time:27980ms step_avg:3997.12ms step:18/3200 train_loss:6.3679 train_time:32014ms step_avg:4001.75ms step:19/3200 train_loss:6.3626 train_time:36070ms step_avg:4007.73ms step:20/3200 train_loss:6.4105 train_time:40148ms step_avg:4014.79ms step:21/3200 train_loss:6.3490 train_time:44248ms step_avg:4022.52ms step:22/3200 train_loss:6.0031 train_time:48373ms step_avg:4031.12ms step:23/3200 train_loss:5.7725 train_time:52526ms step_avg:4040.46ms step:24/3200 train_loss:6.3310 train_time:56701ms step_avg:4050.06ms step:25/3200 train_loss:6.3124 train_time:60879ms step_avg:4058.60ms step:26/3200 train_loss:6.0229 train_time:65058ms step_avg:4066.09ms step:27/3200 train_loss:6.1184 train_time:69238ms step_avg:4072.80ms step:28/3200 train_loss:6.1316 train_time:73415ms step_avg:4078.64ms step:29/3200 train_loss:6.0346 train_time:77594ms step_avg:4083.92ms step:30/3200 train_loss:6.1556 train_time:81774ms step_avg:4088.68ms step:31/3200 train_loss:5.9850 train_time:85953ms step_avg:4092.99ms step:32/3200 train_loss:6.0302 train_time:90138ms step_avg:4097.20ms step:33/3200 train_loss:6.0273 train_time:94334ms step_avg:4101.46ms step:34/3200 train_loss:5.9602 train_time:98553ms step_avg:4106.37ms step:35/3200 train_loss:5.9009 train_time:102753ms step_avg:4110.12ms step:36/3200 train_loss:5.9463 train_time:106943ms step_avg:4113.19ms step:37/3200 train_loss:5.9439 train_time:111093ms step_avg:4114.55ms step:38/3200 train_loss:6.2273 train_time:115219ms step_avg:4114.95ms step:39/3200 train_loss:6.3516 train_time:119329ms step_avg:4114.81ms step:40/3200 train_loss:5.9282 train_time:123423ms step_avg:4114.11ms step:41/3200 train_loss:5.9061 train_time:127505ms step_avg:4113.06ms step:42/3200 train_loss:5.8045 train_time:131582ms step_avg:4111.93ms step:43/3200 train_loss:5.6552 train_time:135658ms step_avg:4110.83ms step:44/3200 train_loss:5.6186 train_time:139726ms step_avg:4109.59ms step:45/3200 train_loss:5.5809 train_time:143793ms step_avg:4108.37ms step:46/3200 train_loss:5.7056 train_time:147856ms step_avg:4107.12ms step:47/3200 train_loss:5.7148 train_time:151917ms step_avg:4105.87ms step:48/3200 train_loss:5.4491 train_time:155977ms step_avg:4104.65ms step:49/3200 train_loss:5.6130 train_time:160036ms step_avg:4103.48ms step:50/3200 train_loss:5.5373 train_time:164102ms step_avg:4102.55ms step:51/3200 train_loss:5.4612 train_time:168166ms step_avg:4101.62ms step:52/3200 train_loss:5.5302 train_time:172239ms step_avg:4100.93ms step:53/3200 train_loss:5.4676 train_time:176316ms step_avg:4100.37ms step:54/3200 train_loss:5.5463 train_time:180399ms step_avg:4099.97ms step:55/3200 train_loss:5.4484 train_time:184485ms step_avg:4099.66ms step:56/3200 train_loss:5.5138 train_time:188589ms step_avg:4099.76ms step:57/3200 train_loss:5.5702 train_time:192699ms step_avg:4099.99ms step:58/3200 train_loss:5.2871 train_time:196824ms step_avg:4100.50ms step:59/3200 train_loss:5.3233 train_time:200955ms step_avg:4101.13ms step:60/3200 train_loss:5.1519 train_time:205095ms step_avg:4101.89ms step:61/3200 train_loss:5.4645 train_time:209272ms step_avg:4103.38ms step:62/3200 train_loss:5.4996 train_time:213451ms step_avg:4104.83ms step:63/3200 train_loss:5.4123 train_time:217658ms step_avg:4106.76ms step:64/3200 train_loss:5.5326 train_time:221895ms step_avg:4109.17ms step:65/3200 train_loss:5.5284 train_time:226132ms step_avg:4111.50ms step:66/3200 train_loss:5.4288 train_time:230324ms step_avg:4112.94ms step:67/3200 train_loss:5.4468 train_time:234461ms step_avg:4113.34ms step:68/3200 train_loss:5.4959 train_time:238572ms step_avg:4113.30ms step:69/3200 train_loss:5.3346 train_time:242655ms step_avg:4112.79ms step:70/3200 train_loss:5.1385 train_time:246717ms step_avg:4111.95ms step:71/3200 train_loss:5.1542 train_time:250757ms step_avg:4110.78ms step:72/3200 train_loss:5.3235 train_time:254819ms step_avg:4109.99ms step:73/3200 train_loss:5.3034 train_time:258869ms step_avg:4109.03ms step:74/3200 train_loss:5.2726 train_time:262913ms step_avg:4108.02ms step:75/3200 train_loss:5.3384 train_time:266943ms step_avg:4106.82ms step:76/3200 train_loss:5.2889 train_time:270970ms step_avg:4105.61ms step:77/3200 train_loss:5.2573 train_time:274991ms step_avg:4104.34ms step:78/3200 train_loss:5.4338 train_time:279012ms step_avg:4103.12ms step:79/3200 train_loss:5.0812 train_time:283031ms step_avg:4101.90ms step:80/3200 train_loss:5.2128 train_time:287045ms step_avg:4100.64ms step:81/3200 train_loss:4.9174 train_time:291056ms step_avg:4099.38ms step:82/3200 train_loss:5.0614 train_time:295072ms step_avg:4098.22ms step:83/3200 train_loss:5.1598 train_time:299085ms step_avg:4097.05ms step:84/3200 train_loss:5.1010 train_time:303098ms step_avg:4095.92ms step:85/3200 train_loss:4.9514 train_time:307111ms step_avg:4094.81ms step:86/3200 train_loss:4.9858 train_time:311126ms step_avg:4093.76ms step:87/3200 train_loss:5.1761 train_time:315146ms step_avg:4092.80ms step:88/3200 train_loss:5.0091 train_time:319166ms step_avg:4091.88ms step:89/3200 train_loss:5.1585 train_time:323184ms step_avg:4090.93ms step:90/3200 train_loss:5.1347 train_time:327205ms step_avg:4090.06ms step:91/3200 train_loss:5.1060 train_time:331233ms step_avg:4089.30ms step:92/3200 train_loss:5.0316 train_time:335264ms step_avg:4088.58ms step:93/3200 train_loss:5.0712 train_time:339304ms step_avg:4088.00ms step:94/3200 train_loss:5.1352 train_time:343345ms step_avg:4087.44ms step:95/3200 train_loss:4.9161 train_time:347391ms step_avg:4086.95ms step:96/3200 train_loss:5.0715 train_time:351440ms step_avg:4086.51ms step:97/3200 train_loss:4.9629 train_time:355479ms step_avg:4085.97ms step:98/3200 train_loss:5.0630 train_time:359536ms step_avg:4085.63ms step:99/3200 train_loss:5.0076 train_time:363597ms step_avg:4085.36ms step:100/3200 train_loss:4.9957 train_time:367671ms step_avg:4085.24ms step:101/3200 train_loss:5.0037 train_time:371758ms step_avg:4085.26ms step:102/3200 train_loss:4.9098 train_time:375853ms step_avg:4085.36ms step:103/3200 train_loss:5.0799 train_time:379970ms step_avg:4085.70ms step:104/3200 train_loss:4.8957 train_time:384096ms step_avg:4086.13ms step:105/3200 train_loss:4.9924 train_time:388253ms step_avg:4086.88ms step:106/3200 train_loss:4.9174 train_time:392437ms step_avg:4087.89ms step:107/3200 train_loss:4.6095 train_time:396644ms step_avg:4089.11ms step:108/3200 train_loss:4.8999 train_time:400878ms step_avg:4090.59ms step:109/3200 train_loss:4.8402 train_time:405072ms step_avg:4091.64ms step:110/3200 train_loss:4.7280 train_time:409202ms step_avg:4092.02ms step:111/3200 train_loss:4.8738 train_time:413280ms step_avg:4091.88ms step:112/3200 train_loss:4.8569 train_time:417319ms step_avg:4091.36ms step:113/3200 train_loss:4.8117 train_time:421358ms step_avg:4090.86ms step:114/3200 train_loss:4.8431 train_time:425379ms step_avg:4090.18ms step:115/3200 train_loss:4.7878 train_time:429381ms step_avg:4089.34ms step:116/3200 train_loss:4.9981 train_time:433373ms step_avg:4088.42ms step:117/3200 train_loss:4.7663 train_time:437356ms step_avg:4087.44ms step:118/3200 train_loss:4.8516 train_time:441333ms step_avg:4086.42ms step:119/3200 train_loss:4.7922 train_time:445303ms step_avg:4085.35ms step:120/3200 train_loss:4.6265 train_time:449273ms step_avg:4084.30ms step:121/3200 train_loss:4.7673 train_time:453235ms step_avg:4083.20ms step:122/3200 train_loss:4.4846 train_time:457188ms step_avg:4082.04ms step:123/3200 train_loss:4.7519 train_time:461163ms step_avg:4081.09ms step:124/3200 train_loss:4.7385 train_time:465139ms step_avg:4080.17ms step:125/3200 train_loss:4.8081 train_time:469112ms step_avg:4079.23ms step:125/3200 val_loss:4.7168 train_time:469112ms step_avg:4079.23ms step:126/3200 train_loss:4.6532 train_time:473086ms step_avg:4078.33ms step:127/3200 train_loss:4.6349 train_time:477061ms step_avg:4077.44ms step:128/3200 train_loss:4.7974 train_time:481031ms step_avg:4076.53ms step:129/3200 train_loss:4.6673 train_time:485007ms step_avg:4075.69ms step:130/3200 train_loss:4.6959 train_time:488982ms step_avg:4074.85ms step:131/3200 train_loss:4.8231 train_time:492954ms step_avg:4074.00ms step:132/3200 train_loss:4.6620 train_time:496932ms step_avg:4073.21ms step:133/3200 train_loss:4.5921 train_time:500909ms step_avg:4072.43ms step:134/3200 train_loss:4.6457 train_time:504890ms step_avg:4071.69ms step:135/3200 train_loss:4.7013 train_time:508864ms step_avg:4070.91ms step:136/3200 train_loss:4.5077 train_time:512847ms step_avg:4070.22ms step:137/3200 train_loss:4.6344 train_time:516834ms step_avg:4069.56ms step:138/3200 train_loss:4.9498 train_time:520819ms step_avg:4068.90ms step:139/3200 train_loss:4.5726 train_time:524805ms step_avg:4068.26ms step:140/3200 train_loss:4.5680 train_time:528796ms step_avg:4067.66ms step:141/3200 train_loss:4.5759 train_time:532790ms step_avg:4067.10ms step:142/3200 train_loss:4.5972 train_time:536787ms step_avg:4066.57ms step:143/3200 train_loss:4.4597 train_time:540787ms step_avg:4066.07ms step:144/3200 train_loss:4.5055 train_time:544796ms step_avg:4065.64ms step:145/3200 train_loss:4.7178 train_time:548806ms step_avg:4065.23ms step:146/3200 train_loss:4.5062 train_time:552822ms step_avg:4064.87ms step:147/3200 train_loss:4.4491 train_time:556838ms step_avg:4064.51ms step:148/3200 train_loss:4.5011 train_time:560836ms step_avg:4064.03ms step:149/3200 train_loss:4.6435 train_time:564836ms step_avg:4063.57ms step:150/3200 train_loss:4.5146 train_time:568840ms step_avg:4063.14ms step:151/3200 train_loss:4.5807 train_time:572855ms step_avg:4062.80ms step:152/3200 train_loss:4.4452 train_time:576875ms step_avg:4062.50ms step:153/3200 train_loss:4.6090 train_time:580907ms step_avg:4062.28ms step:154/3200 train_loss:4.5008 train_time:584943ms step_avg:4062.10ms step:155/3200 train_loss:4.4837 train_time:588971ms step_avg:4061.87ms step:156/3200 train_loss:4.6117 train_time:593004ms step_avg:4061.67ms step:157/3200 train_loss:4.4686 train_time:597058ms step_avg:4061.62ms step:158/3200 train_loss:4.4039 train_time:601128ms step_avg:4061.68ms step:159/3200 train_loss:4.2918 train_time:605220ms step_avg:4061.88ms step:160/3200 train_loss:4.5800 train_time:609322ms step_avg:4062.14ms step:161/3200 train_loss:4.2625 train_time:613442ms step_avg:4062.53ms step:162/3200 train_loss:4.4641 train_time:617595ms step_avg:4063.13ms step:163/3200 train_loss:4.3824 train_time:621783ms step_avg:4063.94ms step:164/3200 train_loss:4.4151 train_time:625987ms step_avg:4064.85ms step:165/3200 train_loss:4.4252 train_time:630206ms step_avg:4065.84ms step:166/3200 train_loss:4.2249 train_time:634399ms step_avg:4066.66ms step:167/3200 train_loss:4.4236 train_time:638534ms step_avg:4067.10ms step:168/3200 train_loss:4.3081 train_time:642635ms step_avg:4067.31ms step:169/3200 train_loss:4.3591 train_time:646718ms step_avg:4067.41ms step:170/3200 train_loss:4.0883 train_time:650777ms step_avg:4067.35ms step:171/3200 train_loss:4.5466 train_time:654815ms step_avg:4067.17ms step:172/3200 train_loss:4.4462 train_time:658849ms step_avg:4066.97ms step:173/3200 train_loss:4.2299 train_time:662875ms step_avg:4066.72ms step:174/3200 train_loss:4.3446 train_time:666907ms step_avg:4066.51ms step:175/3200 train_loss:4.2011 train_time:670942ms step_avg:4066.31ms step:176/3200 train_loss:4.2611 train_time:674969ms step_avg:4066.08ms step:177/3200 train_loss:4.2148 train_time:678991ms step_avg:4065.82ms step:178/3200 train_loss:4.4734 train_time:683007ms step_avg:4065.52ms step:179/3200 train_loss:4.3088 train_time:687020ms step_avg:4065.21ms step:180/3200 train_loss:4.3297 train_time:691033ms step_avg:4064.90ms step:181/3200 train_loss:4.3155 train_time:695044ms step_avg:4064.58ms step:182/3200 train_loss:4.4256 train_time:699055ms step_avg:4064.27ms step:183/3200 train_loss:4.2002 train_time:703069ms step_avg:4063.98ms step:184/3200 train_loss:4.2397 train_time:707083ms step_avg:4063.69ms step:185/3200 train_loss:4.3318 train_time:711096ms step_avg:4063.40ms step:186/3200 train_loss:4.0248 train_time:715117ms step_avg:4063.16ms step:187/3200 train_loss:4.3952 train_time:719140ms step_avg:4062.94ms step:188/3200 train_loss:4.2798 train_time:723164ms step_avg:4062.72ms step:189/3200 train_loss:4.1206 train_time:727192ms step_avg:4062.52ms step:190/3200 train_loss:4.1656 train_time:731226ms step_avg:4062.36ms step:191/3200 train_loss:4.2515 train_time:735256ms step_avg:4062.19ms step:192/3200 train_loss:4.2286 train_time:739293ms step_avg:4062.05ms step:193/3200 train_loss:4.0183 train_time:743320ms step_avg:4061.86ms step:194/3200 train_loss:4.3272 train_time:747351ms step_avg:4061.69ms step:195/3200 train_loss:4.2061 train_time:751392ms step_avg:4061.58ms step:196/3200 train_loss:4.2792 train_time:755440ms step_avg:4061.51ms step:197/3200 train_loss:4.1198 train_time:759502ms step_avg:4061.51ms step:198/3200 train_loss:4.3033 train_time:763577ms step_avg:4061.58ms step:199/3200 train_loss:4.0874 train_time:767658ms step_avg:4061.68ms step:200/3200 train_loss:4.5550 train_time:771757ms step_avg:4061.88ms step:201/3200 train_loss:4.3229 train_time:775881ms step_avg:4062.20ms step:202/3200 train_loss:4.2404 train_time:780017ms step_avg:4062.59ms step:203/3200 train_loss:4.6433 train_time:784193ms step_avg:4063.18ms step:204/3200 train_loss:4.2239 train_time:788395ms step_avg:4063.89ms step:205/3200 train_loss:4.2252 train_time:792593ms step_avg:4064.58ms step:206/3200 train_loss:4.1525 train_time:796780ms step_avg:4065.20ms step:207/3200 train_loss:4.2365 train_time:800885ms step_avg:4065.40ms step:208/3200 train_loss:4.3107 train_time:804962ms step_avg:4065.46ms step:209/3200 train_loss:4.2079 train_time:809012ms step_avg:4065.39ms step:210/3200 train_loss:4.1494 train_time:813037ms step_avg:4065.19ms step:211/3200 train_loss:4.1654 train_time:817060ms step_avg:4064.97ms step:212/3200 train_loss:4.1329 train_time:821079ms step_avg:4064.75ms step:213/3200 train_loss:4.4645 train_time:825091ms step_avg:4064.49ms step:214/3200 train_loss:4.2647 train_time:829091ms step_avg:4064.17ms step:215/3200 train_loss:4.2045 train_time:833090ms step_avg:4063.86ms step:216/3200 train_loss:4.1214 train_time:837081ms step_avg:4063.50ms step:217/3200 train_loss:4.2397 train_time:841070ms step_avg:4063.14ms step:218/3200 train_loss:4.2035 train_time:845056ms step_avg:4062.77ms step:219/3200 train_loss:4.1313 train_time:849042ms step_avg:4062.40ms step:220/3200 train_loss:4.1508 train_time:853024ms step_avg:4062.02ms step:221/3200 train_loss:3.9857 train_time:857025ms step_avg:4061.73ms step:222/3200 train_loss:4.0367 train_time:861027ms step_avg:4061.45ms step:223/3200 train_loss:4.2728 train_time:865026ms step_avg:4061.15ms step:224/3200 train_loss:4.0811 train_time:869025ms step_avg:4060.86ms step:225/3200 train_loss:4.4851 train_time:873026ms step_avg:4060.59ms step:226/3200 train_loss:4.0461 train_time:877034ms step_avg:4060.34ms step:227/3200 train_loss:4.0561 train_time:881039ms step_avg:4060.09ms step:228/3200 train_loss:4.1409 train_time:885042ms step_avg:4059.82ms step:229/3200 train_loss:4.2043 train_time:889045ms step_avg:4059.57ms step:230/3200 train_loss:4.1884 train_time:893053ms step_avg:4059.33ms step:231/3200 train_loss:4.1180 train_time:897065ms step_avg:4059.12ms step:232/3200 train_loss:4.2275 train_time:901066ms step_avg:4058.85ms step:233/3200 train_loss:4.1199 train_time:905057ms step_avg:4058.55ms step:234/3200 train_loss:4.0663 train_time:909051ms step_avg:4058.26ms step:235/3200 train_loss:4.0769 train_time:913048ms step_avg:4057.99ms step:236/3200 train_loss:4.2146 train_time:917047ms step_avg:4057.73ms step:237/3200 train_loss:4.0606 train_time:921055ms step_avg:4057.51ms step:238/3200 train_loss:4.1468 train_time:925066ms step_avg:4057.31ms step:239/3200 train_loss:4.1442 train_time:929084ms step_avg:4057.14ms step:240/3200 train_loss:4.1090 train_time:933107ms step_avg:4056.99ms step:241/3200 train_loss:4.3047 train_time:937138ms step_avg:4056.87ms step:242/3200 train_loss:3.9476 train_time:941158ms step_avg:4056.71ms step:243/3200 train_loss:4.1284 train_time:945193ms step_avg:4056.62ms step:244/3200 train_loss:4.0756 train_time:949233ms step_avg:4056.55ms step:245/3200 train_loss:4.0647 train_time:953275ms step_avg:4056.49ms step:246/3200 train_loss:3.9913 train_time:957333ms step_avg:4056.50ms step:247/3200 train_loss:4.0787 train_time:961412ms step_avg:4056.59ms step:248/3200 train_loss:4.2475 train_time:965512ms step_avg:4056.77ms step:249/3200 train_loss:3.8390 train_time:969618ms step_avg:4056.98ms step:250/3200 train_loss:4.0433 train_time:973752ms step_avg:4057.30ms step:250/3200 val_loss:4.0971 train_time:973752ms step_avg:4057.30ms step:251/3200 train_loss:3.9808 train_time:977957ms step_avg:4057.91ms step:252/3200 train_loss:4.2669 train_time:982158ms step_avg:4058.50ms step:253/3200 train_loss:4.1900 train_time:986355ms step_avg:4059.08ms step:254/3200 train_loss:4.0999 train_time:990553ms step_avg:4059.65ms step:255/3200 train_loss:4.0796 train_time:994751ms step_avg:4060.21ms step:256/3200 train_loss:4.1820 train_time:998950ms step_avg:4060.77ms step:257/3200 train_loss:3.9326 train_time:1003148ms step_avg:4061.33ms step:258/3200 train_loss:4.2898 train_time:1007346ms step_avg:4061.88ms step:259/3200 train_loss:4.1009 train_time:1011544ms step_avg:4062.43ms step:260/3200 train_loss:4.1023 train_time:1015744ms step_avg:4062.98ms step:261/3200 train_loss:4.3948 train_time:1019944ms step_avg:4063.52ms step:262/3200 train_loss:4.0706 train_time:1024139ms step_avg:4064.04ms step:263/3200 train_loss:4.0574 train_time:1028316ms step_avg:4064.49ms step:264/3200 train_loss:4.0599 train_time:1032486ms step_avg:4064.91ms step:265/3200 train_loss:3.9921 train_time:1036619ms step_avg:4065.17ms step:266/3200 train_loss:4.1441 train_time:1040752ms step_avg:4065.44ms step:267/3200 train_loss:4.1208 train_time:1044885ms step_avg:4065.70ms step:268/3200 train_loss:4.0718 train_time:1049008ms step_avg:4065.92ms step:269/3200 train_loss:4.2623 train_time:1053138ms step_avg:4066.17ms step:270/3200 train_loss:4.1337 train_time:1057269ms step_avg:4066.42ms step:271/3200 train_loss:4.0549 train_time:1061406ms step_avg:4066.69ms step:272/3200 train_loss:4.3276 train_time:1065542ms step_avg:4066.95ms step:273/3200 train_loss:4.0379 train_time:1069703ms step_avg:4067.31ms step:274/3200 train_loss:4.1289 train_time:1073877ms step_avg:4067.72ms step:275/3200 train_loss:4.1451 train_time:1078065ms step_avg:4068.17ms step:276/3200 train_loss:4.1030 train_time:1082261ms step_avg:4068.65ms step:277/3200 train_loss:4.0498 train_time:1086437ms step_avg:4069.05ms step:278/3200 train_loss:3.9972 train_time:1090587ms step_avg:4069.35ms step:279/3200 train_loss:3.9631 train_time:1094707ms step_avg:4069.54ms step:280/3200 train_loss:4.0161 train_time:1098827ms step_avg:4069.73ms step:281/3200 train_loss:4.2244 train_time:1102930ms step_avg:4069.85ms step:282/3200 train_loss:3.9931 train_time:1107023ms step_avg:4069.94ms step:283/3200 train_loss:3.9750 train_time:1111115ms step_avg:4070.02ms step:284/3200 train_loss:4.1135 train_time:1115209ms step_avg:4070.10ms step:285/3200 train_loss:4.0300 train_time:1119302ms step_avg:4070.19ms step:286/3200 train_loss:4.1779 train_time:1123387ms step_avg:4070.24ms step:287/3200 train_loss:4.0074 train_time:1127483ms step_avg:4070.33ms step:288/3200 train_loss:4.0244 train_time:1131580ms step_avg:4070.43ms step:289/3200 train_loss:4.0346 train_time:1135685ms step_avg:4070.55ms step:290/3200 train_loss:3.7945 train_time:1139798ms step_avg:4070.71ms step:291/3200 train_loss:4.0534 train_time:1143922ms step_avg:4070.90ms step:292/3200 train_loss:4.1674 train_time:1148053ms step_avg:4071.11ms step:293/3200 train_loss:3.9250 train_time:1152197ms step_avg:4071.37ms step:294/3200 train_loss:4.0503 train_time:1156358ms step_avg:4071.68ms step:295/3200 train_loss:3.8258 train_time:1160560ms step_avg:4072.14ms step:296/3200 train_loss:4.2619 train_time:1164760ms step_avg:4072.59ms step:297/3200 train_loss:4.0592 train_time:1168962ms step_avg:4073.04ms step:298/3200 train_loss:4.0955 train_time:1173187ms step_avg:4073.57ms step:299/3200 train_loss:3.8285 train_time:1177399ms step_avg:4074.04ms step:300/3200 train_loss:4.0239 train_time:1181592ms step_avg:4074.46ms step:301/3200 train_loss:4.0917 train_time:1185787ms step_avg:4074.87ms step:302/3200 train_loss:3.9671 train_time:1189961ms step_avg:4075.21ms step:303/3200 train_loss:3.9037 train_time:1194101ms step_avg:4075.43ms step:304/3200 train_loss:4.0355 train_time:1198223ms step_avg:4075.59ms step:305/3200 train_loss:3.9087 train_time:1202338ms step_avg:4075.72ms step:306/3200 train_loss:4.0303 train_time:1206450ms step_avg:4075.84ms step:307/3200 train_loss:4.3890 train_time:1210557ms step_avg:4075.95ms step:308/3200 train_loss:4.1961 train_time:1214664ms step_avg:4076.05ms step:309/3200 train_loss:3.9551 train_time:1218775ms step_avg:4076.17ms step:310/3200 train_loss:3.8832 train_time:1222886ms step_avg:4076.29ms step:311/3200 train_loss:3.9136 train_time:1227009ms step_avg:4076.44ms step:312/3200 train_loss:3.9831 train_time:1231132ms step_avg:4076.60ms step:313/3200 train_loss:3.9683 train_time:1235256ms step_avg:4076.75ms step:314/3200 train_loss:3.8860 train_time:1239394ms step_avg:4076.95ms step:315/3200 train_loss:3.9934 train_time:1243539ms step_avg:4077.18ms step:316/3200 train_loss:3.8236 train_time:1247705ms step_avg:4077.47ms step:317/3200 train_loss:3.9215 train_time:1251886ms step_avg:4077.81ms step:318/3200 train_loss:3.8290 train_time:1256072ms step_avg:4078.16ms step:319/3200 train_loss:3.9858 train_time:1260273ms step_avg:4078.55ms step:320/3200 train_loss:4.0467 train_time:1264471ms step_avg:4078.94ms step:321/3200 train_loss:4.0092 train_time:1268646ms step_avg:4079.25ms step:322/3200 train_loss:4.0022 train_time:1272823ms step_avg:4079.56ms step:323/3200 train_loss:3.9293 train_time:1276984ms step_avg:4079.82ms step:324/3200 train_loss:4.1477 train_time:1281128ms step_avg:4080.03ms step:325/3200 train_loss:4.1286 train_time:1285273ms step_avg:4080.23ms step:326/3200 train_loss:3.8389 train_time:1289416ms step_avg:4080.43ms step:327/3200 train_loss:3.9081 train_time:1293542ms step_avg:4080.57ms step:328/3200 train_loss:3.9950 train_time:1297674ms step_avg:4080.74ms step:329/3200 train_loss:3.7412 train_time:1301815ms step_avg:4080.92ms step:330/3200 train_loss:4.1865 train_time:1305952ms step_avg:4081.10ms step:331/3200 train_loss:3.9384 train_time:1310093ms step_avg:4081.29ms step:332/3200 train_loss:3.8618 train_time:1314247ms step_avg:4081.51ms step:333/3200 train_loss:4.0207 train_time:1318409ms step_avg:4081.76ms step:334/3200 train_loss:3.9468 train_time:1322585ms step_avg:4082.05ms step:335/3200 train_loss:3.9760 train_time:1326767ms step_avg:4082.36ms step:336/3200 train_loss:4.1294 train_time:1330953ms step_avg:4082.68ms step:337/3200 train_loss:5.0012 train_time:1335175ms step_avg:4083.10ms step:338/3200 train_loss:4.1019 train_time:1339415ms step_avg:4083.58ms step:339/3200 train_loss:3.9257 train_time:1343629ms step_avg:4083.98ms step:340/3200 train_loss:3.9948 train_time:1347818ms step_avg:4084.30ms step:341/3200 train_loss:4.0647 train_time:1351942ms step_avg:4084.42ms step:342/3200 train_loss:3.8372 train_time:1356034ms step_avg:4084.44ms step:343/3200 train_loss:4.0600 train_time:1360109ms step_avg:4084.41ms step:344/3200 train_loss:4.0796 train_time:1364160ms step_avg:4084.31ms step:345/3200 train_loss:3.9585 train_time:1368198ms step_avg:4084.17ms step:346/3200 train_loss:3.8649 train_time:1372222ms step_avg:4083.99ms step:347/3200 train_loss:3.8687 train_time:1376250ms step_avg:4083.83ms step:348/3200 train_loss:4.0147 train_time:1380281ms step_avg:4083.67ms step:349/3200 train_loss:3.9308 train_time:1384307ms step_avg:4083.50ms step:350/3200 train_loss:3.9712 train_time:1388328ms step_avg:4083.32ms step:351/3200 train_loss:3.8117 train_time:1392347ms step_avg:4083.13ms step:352/3200 train_loss:3.9933 train_time:1396362ms step_avg:4082.93ms step:353/3200 train_loss:3.7327 train_time:1400376ms step_avg:4082.73ms step:354/3200 train_loss:4.0591 train_time:1404390ms step_avg:4082.53ms step:355/3200 train_loss:4.0855 train_time:1408405ms step_avg:4082.33ms step:356/3200 train_loss:3.9374 train_time:1412419ms step_avg:4082.14ms step:357/3200 train_loss:3.9934 train_time:1416428ms step_avg:4081.93ms step:358/3200 train_loss:4.0119 train_time:1420436ms step_avg:4081.71ms step:359/3200 train_loss:3.7770 train_time:1424450ms step_avg:4081.52ms step:360/3200 train_loss:4.0251 train_time:1428465ms step_avg:4081.33ms step:361/3200 train_loss:3.8950 train_time:1432482ms step_avg:4081.15ms step:362/3200 train_loss:3.8057 train_time:1436501ms step_avg:4080.97ms step:363/3200 train_loss:3.9170 train_time:1440519ms step_avg:4080.79ms step:364/3200 train_loss:3.9634 train_time:1444547ms step_avg:4080.64ms step:365/3200 train_loss:3.8849 train_time:1448576ms step_avg:4080.50ms step:366/3200 train_loss:3.8126 train_time:1452607ms step_avg:4080.36ms step:367/3200 train_loss:3.7970 train_time:1456647ms step_avg:4080.24ms step:368/3200 train_loss:3.9807 train_time:1460683ms step_avg:4080.12ms step:369/3200 train_loss:3.8492 train_time:1464701ms step_avg:4079.95ms step:370/3200 train_loss:3.8545 train_time:1468730ms step_avg:4079.81ms step:371/3200 train_loss:4.0574 train_time:1472767ms step_avg:4079.69ms step:372/3200 train_loss:3.9308 train_time:1476814ms step_avg:4079.60ms step:373/3200 train_loss:4.2503 train_time:1480872ms step_avg:4079.54ms step:374/3200 train_loss:3.9660 train_time:1484953ms step_avg:4079.54ms step:375/3200 train_loss:4.0932 train_time:1489038ms step_avg:4079.56ms step:375/3200 val_loss:3.9125 train_time:1489039ms step_avg:4079.56ms step:376/3200 train_loss:4.1962 train_time:1493165ms step_avg:4079.69ms step:377/3200 train_loss:3.8236 train_time:1497248ms step_avg:4079.69ms step:378/3200 train_loss:3.9721 train_time:1501306ms step_avg:4079.64ms step:379/3200 train_loss:4.0371 train_time:1505342ms step_avg:4079.52ms step:380/3200 train_loss:3.9652 train_time:1509368ms step_avg:4079.37ms step:381/3200 train_loss:3.9102 train_time:1513389ms step_avg:4079.22ms step:382/3200 train_loss:3.8293 train_time:1517427ms step_avg:4079.10ms step:383/3200 train_loss:3.7984 train_time:1521455ms step_avg:4078.97ms step:384/3200 train_loss:3.8774 train_time:1525481ms step_avg:4078.83ms step:385/3200 train_loss:3.7149 train_time:1529505ms step_avg:4078.68ms step:386/3200 train_loss:4.0216 train_time:1533521ms step_avg:4078.51ms step:387/3200 train_loss:3.8151 train_time:1537537ms step_avg:4078.35ms step:388/3200 train_loss:3.9272 train_time:1541549ms step_avg:4078.17ms step:389/3200 train_loss:3.9820 train_time:1545559ms step_avg:4077.99ms step:390/3200 train_loss:3.9651 train_time:1549572ms step_avg:4077.82ms step:391/3200 train_loss:3.7413 train_time:1553579ms step_avg:4077.63ms step:392/3200 train_loss:3.5766 train_time:1557592ms step_avg:4077.46ms step:393/3200 train_loss:3.7633 train_time:1561607ms step_avg:4077.30ms step:394/3200 train_loss:3.9939 train_time:1565624ms step_avg:4077.14ms step:395/3200 train_loss:3.8569 train_time:1569646ms step_avg:4077.00ms step:396/3200 train_loss:3.9063 train_time:1573667ms step_avg:4076.86ms step:397/3200 train_loss:3.7287 train_time:1577687ms step_avg:4076.71ms step:398/3200 train_loss:3.9131 train_time:1581716ms step_avg:4076.59ms step:399/3200 train_loss:3.8602 train_time:1585745ms step_avg:4076.46ms step:400/3200 train_loss:3.8285 train_time:1589775ms step_avg:4076.35ms step:401/3200 train_loss:3.8196 train_time:1593815ms step_avg:4076.25ms step:402/3200 train_loss:3.7863 train_time:1597841ms step_avg:4076.12ms step:403/3200 train_loss:4.0336 train_time:1601871ms step_avg:4076.01ms step:404/3200 train_loss:3.9603 train_time:1605908ms step_avg:4075.91ms step:405/3200 train_loss:3.9584 train_time:1609953ms step_avg:4075.83ms step:406/3200 train_loss:3.9216 train_time:1614009ms step_avg:4075.78ms step:407/3200 train_loss:3.8668 train_time:1618085ms step_avg:4075.78ms step:408/3200 train_loss:3.7894 train_time:1622169ms step_avg:4075.80ms step:409/3200 train_loss:3.8059 train_time:1626272ms step_avg:4075.87ms step:410/3200 train_loss:3.7293 train_time:1630404ms step_avg:4076.01ms step:411/3200 train_loss:3.8788 train_time:1634548ms step_avg:4076.18ms step:412/3200 train_loss:3.8859 train_time:1638734ms step_avg:4076.45ms step:413/3200 train_loss:3.8207 train_time:1642955ms step_avg:4076.81ms step:414/3200 train_loss:3.8204 train_time:1647150ms step_avg:4077.10ms step:415/3200 train_loss:3.7135 train_time:1651304ms step_avg:4077.29ms step:416/3200 train_loss:3.9187 train_time:1655393ms step_avg:4077.32ms step:417/3200 train_loss:4.0730 train_time:1659454ms step_avg:4077.28ms step:418/3200 train_loss:3.7833 train_time:1663491ms step_avg:4077.18ms step:419/3200 train_loss:3.8995 train_time:1667520ms step_avg:4077.07ms step:420/3200 train_loss:4.1327 train_time:1671546ms step_avg:4076.94ms step:421/3200 train_loss:3.8429 train_time:1675564ms step_avg:4076.80ms step:422/3200 train_loss:3.9999 train_time:1679575ms step_avg:4076.64ms step:423/3200 train_loss:3.7467 train_time:1683580ms step_avg:4076.47ms step:424/3200 train_loss:3.8069 train_time:1687574ms step_avg:4076.26ms step:425/3200 train_loss:3.6669 train_time:1691586ms step_avg:4076.11ms step:426/3200 train_loss:3.9785 train_time:1695591ms step_avg:4075.94ms step:427/3200 train_loss:3.9154 train_time:1699597ms step_avg:4075.77ms step:428/3200 train_loss:3.9724 train_time:1703598ms step_avg:4075.59ms step:429/3200 train_loss:3.9769 train_time:1707595ms step_avg:4075.41ms step:430/3200 train_loss:3.7270 train_time:1711592ms step_avg:4075.22ms step:431/3200 train_loss:3.6413 train_time:1715587ms step_avg:4075.03ms step:432/3200 train_loss:4.0417 train_time:1719580ms step_avg:4074.83ms step:433/3200 train_loss:3.9529 train_time:1723571ms step_avg:4074.64ms step:434/3200 train_loss:3.8786 train_time:1727561ms step_avg:4074.44ms step:435/3200 train_loss:3.6607 train_time:1731554ms step_avg:4074.25ms step:436/3200 train_loss:3.9665 train_time:1735547ms step_avg:4074.05ms step:437/3200 train_loss:3.9347 train_time:1739545ms step_avg:4073.88ms step:438/3200 train_loss:3.8714 train_time:1743541ms step_avg:4073.69ms step:439/3200 train_loss:3.8788 train_time:1747541ms step_avg:4073.52ms step:440/3200 train_loss:3.8081 train_time:1751547ms step_avg:4073.36ms step:441/3200 train_loss:3.7865 train_time:1755548ms step_avg:4073.20ms step:442/3200 train_loss:3.9446 train_time:1759540ms step_avg:4073.01ms step:443/3200 train_loss:3.6723 train_time:1763528ms step_avg:4072.81ms step:444/3200 train_loss:3.7727 train_time:1767519ms step_avg:4072.62ms step:445/3200 train_loss:4.1408 train_time:1771516ms step_avg:4072.45ms step:446/3200 train_loss:3.7704 train_time:1775517ms step_avg:4072.29ms step:447/3200 train_loss:3.9947 train_time:1779523ms step_avg:4072.13ms step:448/3200 train_loss:4.0433 train_time:1783534ms step_avg:4072.00ms step:449/3200 train_loss:3.8696 train_time:1787553ms step_avg:4071.88ms step:450/3200 train_loss:3.9218 train_time:1791580ms step_avg:4071.77ms step:451/3200 train_loss:3.8574 train_time:1795605ms step_avg:4071.67ms step:452/3200 train_loss:3.8964 train_time:1799627ms step_avg:4071.55ms step:453/3200 train_loss:4.2493 train_time:1803658ms step_avg:4071.46ms step:454/3200 train_loss:3.6807 train_time:1807689ms step_avg:4071.37ms step:455/3200 train_loss:3.8284 train_time:1811744ms step_avg:4071.33ms step:456/3200 train_loss:3.9488 train_time:1815809ms step_avg:4071.32ms step:457/3200 train_loss:3.8242 train_time:1819890ms step_avg:4071.34ms step:458/3200 train_loss:3.9107 train_time:1823986ms step_avg:4071.40ms step:459/3200 train_loss:4.0022 train_time:1828104ms step_avg:4071.50ms step:460/3200 train_loss:3.6353 train_time:1832244ms step_avg:4071.65ms step:461/3200 train_loss:3.8253 train_time:1836432ms step_avg:4071.91ms step:462/3200 train_loss:3.9134 train_time:1840636ms step_avg:4072.20ms step:463/3200 train_loss:3.7126 train_time:1844861ms step_avg:4072.54ms step:464/3200 train_loss:3.7908 train_time:1849089ms step_avg:4072.88ms step:465/3200 train_loss:4.0368 train_time:1853288ms step_avg:4073.16ms step:466/3200 train_loss:3.7665 train_time:1857484ms step_avg:4073.43ms step:467/3200 train_loss:3.7184 train_time:1861681ms step_avg:4073.70ms step:468/3200 train_loss:3.7055 train_time:1865820ms step_avg:4073.84ms step:469/3200 train_loss:4.1121 train_time:1869941ms step_avg:4073.95ms step:470/3200 train_loss:3.8932 train_time:1874066ms step_avg:4074.06ms step:471/3200 train_loss:3.6497 train_time:1878190ms step_avg:4074.16ms step:472/3200 train_loss:3.9589 train_time:1882306ms step_avg:4074.26ms step:473/3200 train_loss:3.7350 train_time:1886421ms step_avg:4074.34ms step:474/3200 train_loss:3.9479 train_time:1890533ms step_avg:4074.42ms step:475/3200 train_loss:3.9628 train_time:1894647ms step_avg:4074.51ms step:476/3200 train_loss:3.8444 train_time:1898767ms step_avg:4074.61ms step:477/3200 train_loss:4.0580 train_time:1902904ms step_avg:4074.74ms step:478/3200 train_loss:3.8946 train_time:1907044ms step_avg:4074.88ms step:479/3200 train_loss:4.0820 train_time:1911204ms step_avg:4075.06ms step:480/3200 train_loss:3.7414 train_time:1915384ms step_avg:4075.28ms step:481/3200 train_loss:3.7692 train_time:1919564ms step_avg:4075.51ms step:482/3200 train_loss:3.9487 train_time:1923756ms step_avg:4075.75ms step:483/3200 train_loss:3.8781 train_time:1927942ms step_avg:4075.99ms step:484/3200 train_loss:3.9329 train_time:1932139ms step_avg:4076.24ms step:485/3200 train_loss:3.8529 train_time:1936314ms step_avg:4076.45ms step:486/3200 train_loss:3.6453 train_time:1940452ms step_avg:4076.58ms step:487/3200 train_loss:3.7865 train_time:1944568ms step_avg:4076.66ms step:488/3200 train_loss:3.9007 train_time:1948669ms step_avg:4076.71ms step:489/3200 train_loss:3.8836 train_time:1952754ms step_avg:4076.73ms step:490/3200 train_loss:4.1618 train_time:1956832ms step_avg:4076.73ms step:491/3200 train_loss:3.7181 train_time:1960905ms step_avg:4076.73ms step:492/3200 train_loss:3.7238 train_time:1964969ms step_avg:4076.70ms step:493/3200 train_loss:3.8603 train_time:1969033ms step_avg:4076.67ms step:494/3200 train_loss:3.6279 train_time:1973100ms step_avg:4076.65ms step:495/3200 train_loss:3.9490 train_time:1977158ms step_avg:4076.62ms step:496/3200 train_loss:4.0921 train_time:1981219ms step_avg:4076.58ms step:497/3200 train_loss:3.8566 train_time:1985279ms step_avg:4076.55ms step:498/3200 train_loss:3.8116 train_time:1989345ms step_avg:4076.53ms step:499/3200 train_loss:3.8227 train_time:1993415ms step_avg:4076.51ms step:500/3200 train_loss:3.6910 train_time:1997488ms step_avg:4076.51ms step:500/3200 val_loss:3.8130 train_time:1997488ms step_avg:4076.51ms step:501/3200 train_loss:3.7603 train_time:2001553ms step_avg:4076.48ms step:502/3200 train_loss:3.7526 train_time:2005610ms step_avg:4076.44ms step:503/3200 train_loss:3.9485 train_time:2009668ms step_avg:4076.41ms step:504/3200 train_loss:3.6912 train_time:2013725ms step_avg:4076.37ms step:505/3200 train_loss:4.0804 train_time:2017788ms step_avg:4076.34ms step:506/3200 train_loss:3.7774 train_time:2021851ms step_avg:4076.31ms step:507/3200 train_loss:4.0306 train_time:2025919ms step_avg:4076.30ms step:508/3200 train_loss:4.1475 train_time:2029994ms step_avg:4076.29ms step:509/3200 train_loss:3.6497 train_time:2034082ms step_avg:4076.32ms step:510/3200 train_loss:3.9593 train_time:2038169ms step_avg:4076.34ms step:511/3200 train_loss:3.7679 train_time:2042267ms step_avg:4076.38ms step:512/3200 train_loss:3.7783 train_time:2046380ms step_avg:4076.45ms step:513/3200 train_loss:3.5584 train_time:2050504ms step_avg:4076.55ms step:514/3200 train_loss:3.7468 train_time:2054647ms step_avg:4076.68ms step:515/3200 train_loss:4.3355 train_time:2058823ms step_avg:4076.88ms step:516/3200 train_loss:3.9006 train_time:2063005ms step_avg:4077.08ms step:517/3200 train_loss:3.8148 train_time:2067204ms step_avg:4077.33ms step:518/3200 train_loss:3.7867 train_time:2071404ms step_avg:4077.57ms step:519/3200 train_loss:3.7093 train_time:2075600ms step_avg:4077.80ms step:520/3200 train_loss:4.1163 train_time:2079776ms step_avg:4077.99ms step:521/3200 train_loss:3.7919 train_time:2083953ms step_avg:4078.19ms step:522/3200 train_loss:3.6749 train_time:2088098ms step_avg:4078.32ms step:523/3200 train_loss:4.0267 train_time:2092234ms step_avg:4078.43ms step:524/3200 train_loss:3.6350 train_time:2096370ms step_avg:4078.54ms step:525/3200 train_loss:3.7325 train_time:2100491ms step_avg:4078.62ms step:526/3200 train_loss:3.9443 train_time:2104610ms step_avg:4078.70ms step:527/3200 train_loss:3.8942 train_time:2108730ms step_avg:4078.78ms step:528/3200 train_loss:3.7760 train_time:2112858ms step_avg:4078.88ms step:529/3200 train_loss:3.8769 train_time:2116983ms step_avg:4078.97ms step:530/3200 train_loss:3.7776 train_time:2121121ms step_avg:4079.08ms step:531/3200 train_loss:3.8195 train_time:2125263ms step_avg:4079.20ms step:532/3200 train_loss:3.8464 train_time:2129413ms step_avg:4079.34ms step:533/3200 train_loss:3.8471 train_time:2133599ms step_avg:4079.54ms step:534/3200 train_loss:3.8371 train_time:2137782ms step_avg:4079.74ms step:535/3200 train_loss:3.8241 train_time:2141982ms step_avg:4079.97ms step:536/3200 train_loss:3.8009 train_time:2146175ms step_avg:4080.18ms step:537/3200 train_loss:3.9318 train_time:2150297ms step_avg:4080.26ms step:538/3200 train_loss:3.7124 train_time:2154396ms step_avg:4080.29ms step:539/3200 train_loss:3.8581 train_time:2158473ms step_avg:4080.29ms step:540/3200 train_loss:3.8397 train_time:2162532ms step_avg:4080.25ms step:541/3200 train_loss:3.7765 train_time:2166572ms step_avg:4080.17ms step:542/3200 train_loss:3.8848 train_time:2170606ms step_avg:4080.09ms step:543/3200 train_loss:3.8305 train_time:2174633ms step_avg:4079.99ms step:544/3200 train_loss:3.7830 train_time:2178654ms step_avg:4079.88ms step:545/3200 train_loss:3.8555 train_time:2182670ms step_avg:4079.76ms step:546/3200 train_loss:3.9582 train_time:2186680ms step_avg:4079.63ms step:547/3200 train_loss:3.7848 train_time:2190685ms step_avg:4079.49ms step:548/3200 train_loss:4.1319 train_time:2194691ms step_avg:4079.35ms step:549/3200 train_loss:3.4314 train_time:2198695ms step_avg:4079.21ms step:550/3200 train_loss:3.9066 train_time:2202696ms step_avg:4079.07ms step:551/3200 train_loss:3.9062 train_time:2206697ms step_avg:4078.92ms step:552/3200 train_loss:3.6991 train_time:2210695ms step_avg:4078.77ms step:553/3200 train_loss:3.9045 train_time:2214701ms step_avg:4078.64ms step:554/3200 train_loss:3.7325 train_time:2218709ms step_avg:4078.51ms step:555/3200 train_loss:3.7790 train_time:2222720ms step_avg:4078.38ms step:556/3200 train_loss:4.0916 train_time:2226734ms step_avg:4078.27ms step:557/3200 train_loss:3.7751 train_time:2230751ms step_avg:4078.16ms step:558/3200 train_loss:3.7495 train_time:2234767ms step_avg:4078.04ms step:559/3200 train_loss:3.7444 train_time:2238794ms step_avg:4077.95ms step:560/3200 train_loss:3.7439 train_time:2242825ms step_avg:4077.86ms step:561/3200 train_loss:3.7391 train_time:2246865ms step_avg:4077.79ms step:562/3200 train_loss:3.7029 train_time:2250916ms step_avg:4077.75ms step:563/3200 train_loss:3.5403 train_time:2254975ms step_avg:4077.71ms step:564/3200 train_loss:3.9269 train_time:2259049ms step_avg:4077.71ms step:565/3200 train_loss:3.6902 train_time:2263136ms step_avg:4077.72ms step:566/3200 train_loss:3.8257 train_time:2267231ms step_avg:4077.75ms step:567/3200 train_loss:4.0711 train_time:2271354ms step_avg:4077.84ms step:568/3200 train_loss:3.7835 train_time:2275497ms step_avg:4077.95ms step:569/3200 train_loss:4.5050 train_time:2279653ms step_avg:4078.09ms step:570/3200 train_loss:3.7876 train_time:2283835ms step_avg:4078.28ms step:571/3200 train_loss:3.7644 train_time:2288035ms step_avg:4078.49ms step:572/3200 train_loss:3.7260 train_time:2292233ms step_avg:4078.71ms step:573/3200 train_loss:3.8263 train_time:2296549ms step_avg:4079.13ms step:574/3200 train_loss:3.7327 train_time:2300726ms step_avg:4079.30ms step:575/3200 train_loss:3.6171 train_time:2304876ms step_avg:4079.43ms step:576/3200 train_loss:3.6488 train_time:2309008ms step_avg:4079.52ms step:577/3200 train_loss:3.7574 train_time:2313127ms step_avg:4079.59ms step:578/3200 train_loss:3.7844 train_time:2317237ms step_avg:4079.64ms step:579/3200 train_loss:3.6824 train_time:2321341ms step_avg:4079.69ms step:580/3200 train_loss:3.9346 train_time:2325448ms step_avg:4079.73ms step:581/3200 train_loss:3.7952 train_time:2329549ms step_avg:4079.77ms step:582/3200 train_loss:3.7182 train_time:2333652ms step_avg:4079.81ms step:583/3200 train_loss:3.9549 train_time:2337756ms step_avg:4079.85ms step:584/3200 train_loss:3.6274 train_time:2341863ms step_avg:4079.90ms step:585/3200 train_loss:3.8626 train_time:2345977ms step_avg:4079.96ms step:586/3200 train_loss:3.6011 train_time:2350100ms step_avg:4080.04ms step:587/3200 train_loss:3.5959 train_time:2354229ms step_avg:4080.12ms step:588/3200 train_loss:3.6734 train_time:2358373ms step_avg:4080.23ms step:589/3200 train_loss:3.7814 train_time:2362526ms step_avg:4080.36ms step:590/3200 train_loss:3.8467 train_time:2366706ms step_avg:4080.53ms step:591/3200 train_loss:3.8538 train_time:2370887ms step_avg:4080.70ms step:592/3200 train_loss:3.7125 train_time:2375088ms step_avg:4080.91ms step:593/3200 train_loss:3.9345 train_time:2379284ms step_avg:4081.11ms step:594/3200 train_loss:3.7495 train_time:2383460ms step_avg:4081.27ms step:595/3200 train_loss:3.8319 train_time:2387607ms step_avg:4081.38ms step:596/3200 train_loss:4.4130 train_time:2391734ms step_avg:4081.46ms step:597/3200 train_loss:3.5165 train_time:2395839ms step_avg:4081.50ms step:598/3200 train_loss:3.7387 train_time:2399931ms step_avg:4081.52ms step:599/3200 train_loss:3.8324 train_time:2404016ms step_avg:4081.52ms step:600/3200 train_loss:3.7288 train_time:2408094ms step_avg:4081.52ms step:601/3200 train_loss:3.6419 train_time:2412166ms step_avg:4081.50ms step:602/3200 train_loss:3.7258 train_time:2416232ms step_avg:4081.47ms step:603/3200 train_loss:3.8446 train_time:2420300ms step_avg:4081.45ms step:604/3200 train_loss:3.7053 train_time:2424365ms step_avg:4081.42ms step:605/3200 train_loss:4.1835 train_time:2428426ms step_avg:4081.39ms step:606/3200 train_loss:3.7315 train_time:2432492ms step_avg:4081.36ms step:607/3200 train_loss:3.6782 train_time:2436561ms step_avg:4081.34ms step:608/3200 train_loss:3.8263 train_time:2440628ms step_avg:4081.32ms step:609/3200 train_loss:3.6446 train_time:2444708ms step_avg:4081.32ms step:610/3200 train_loss:3.6194 train_time:2448794ms step_avg:4081.32ms step:611/3200 train_loss:3.8483 train_time:2452882ms step_avg:4081.33ms step:612/3200 train_loss:3.7671 train_time:2456987ms step_avg:4081.37ms step:613/3200 train_loss:3.5444 train_time:2461098ms step_avg:4081.42ms step:614/3200 train_loss:3.7733 train_time:2465233ms step_avg:4081.51ms step:615/3200 train_loss:3.6388 train_time:2469385ms step_avg:4081.63ms step:616/3200 train_loss:4.0457 train_time:2473552ms step_avg:4081.77ms step:617/3200 train_loss:3.5856 train_time:2477733ms step_avg:4081.93ms step:618/3200 train_loss:3.6583 train_time:2481910ms step_avg:4082.09ms step:619/3200 train_loss:3.7739 train_time:2486059ms step_avg:4082.20ms step:620/3200 train_loss:3.6659 train_time:2490166ms step_avg:4082.24ms step:621/3200 train_loss:3.6970 train_time:2494248ms step_avg:4082.24ms step:622/3200 train_loss:3.7681 train_time:2498321ms step_avg:4082.22ms step:623/3200 train_loss:3.5850 train_time:2502377ms step_avg:4082.18ms step:624/3200 train_loss:3.7933 train_time:2506428ms step_avg:4082.13ms step:625/3200 train_loss:3.3688 train_time:2510461ms step_avg:4082.05ms step:625/3200 val_loss:3.7382 train_time:2510461ms step_avg:4082.05ms step:626/3200 train_loss:3.7246 train_time:2514479ms step_avg:4081.95ms step:627/3200 train_loss:3.8712 train_time:2518496ms step_avg:4081.84ms step:628/3200 train_loss:3.9221 train_time:2522514ms step_avg:4081.74ms step:629/3200 train_loss:3.6581 train_time:2526534ms step_avg:4081.64ms step:630/3200 train_loss:3.6249 train_time:2530560ms step_avg:4081.55ms step:631/3200 train_loss:3.7852 train_time:2534580ms step_avg:4081.45ms step:632/3200 train_loss:3.7886 train_time:2538611ms step_avg:4081.37ms step:633/3200 train_loss:3.7565 train_time:2542640ms step_avg:4081.28ms step:634/3200 train_loss:3.7917 train_time:2546683ms step_avg:4081.22ms step:635/3200 train_loss:3.7813 train_time:2550732ms step_avg:4081.17ms step:636/3200 train_loss:3.2973 train_time:2554793ms step_avg:4081.14ms step:637/3200 train_loss:3.7905 train_time:2558869ms step_avg:4081.13ms step:638/3200 train_loss:3.7854 train_time:2562950ms step_avg:4081.13ms step:639/3200 train_loss:3.6447 train_time:2567048ms step_avg:4081.16ms step:640/3200 train_loss:3.9010 train_time:2571163ms step_avg:4081.21ms step:641/3200 train_loss:3.8775 train_time:2575266ms step_avg:4081.25ms step:642/3200 train_loss:3.7270 train_time:2579405ms step_avg:4081.34ms step:643/3200 train_loss:3.8186 train_time:2583586ms step_avg:4081.49ms step:644/3200 train_loss:3.7589 train_time:2587767ms step_avg:4081.65ms step:645/3200 train_loss:4.0638 train_time:2591961ms step_avg:4081.83ms step:646/3200 train_loss:3.8871 train_time:2596090ms step_avg:4081.90ms step:647/3200 train_loss:3.7905 train_time:2600174ms step_avg:4081.91ms step:648/3200 train_loss:3.7225 train_time:2604229ms step_avg:4081.86ms step:649/3200 train_loss:4.1011 train_time:2608261ms step_avg:4081.79ms step:650/3200 train_loss:3.7121 train_time:2612276ms step_avg:4081.68ms step:651/3200 train_loss:3.7917 train_time:2616277ms step_avg:4081.56ms step:652/3200 train_loss:3.8538 train_time:2620300ms step_avg:4081.46ms step:653/3200 train_loss:3.6834 train_time:2624308ms step_avg:4081.35ms step:654/3200 train_loss:3.6746 train_time:2628301ms step_avg:4081.21ms step:655/3200 train_loss:3.6487 train_time:2632294ms step_avg:4081.08ms step:656/3200 train_loss:3.8284 train_time:2636283ms step_avg:4080.93ms step:657/3200 train_loss:3.8116 train_time:2640266ms step_avg:4080.78ms step:658/3200 train_loss:3.7514 train_time:2644245ms step_avg:4080.62ms step:659/3200 train_loss:3.9592 train_time:2648221ms step_avg:4080.46ms step:660/3200 train_loss:3.8091 train_time:2652200ms step_avg:4080.31ms step:661/3200 train_loss:3.7630 train_time:2656175ms step_avg:4080.15ms step:662/3200 train_loss:3.7367 train_time:2660147ms step_avg:4079.98ms step:663/3200 train_loss:3.8553 train_time:2664122ms step_avg:4079.82ms step:664/3200 train_loss:3.6208 train_time:2668098ms step_avg:4079.66ms step:665/3200 train_loss:3.6346 train_time:2672073ms step_avg:4079.50ms step:666/3200 train_loss:3.7809 train_time:2676052ms step_avg:4079.35ms step:667/3200 train_loss:3.6623 train_time:2680026ms step_avg:4079.19ms step:668/3200 train_loss:3.7431 train_time:2684004ms step_avg:4079.03ms step:669/3200 train_loss:3.7613 train_time:2687982ms step_avg:4078.88ms step:670/3200 train_loss:3.7352 train_time:2691962ms step_avg:4078.73ms step:671/3200 train_loss:3.8071 train_time:2695946ms step_avg:4078.59ms step:672/3200 train_loss:3.8818 train_time:2699930ms step_avg:4078.44ms step:673/3200 train_loss:3.8780 train_time:2703924ms step_avg:4078.32ms step:674/3200 train_loss:3.6687 train_time:2707915ms step_avg:4078.19ms step:675/3200 train_loss:3.7702 train_time:2711914ms step_avg:4078.07ms step:676/3200 train_loss:3.5586 train_time:2715913ms step_avg:4077.95ms step:677/3200 train_loss:3.7912 train_time:2719919ms step_avg:4077.84ms step:678/3200 train_loss:3.8087 train_time:2723930ms step_avg:4077.74ms step:679/3200 train_loss:3.9421 train_time:2727949ms step_avg:4077.65ms step:680/3200 train_loss:3.7436 train_time:2731947ms step_avg:4077.53ms step:681/3200 train_loss:3.5907 train_time:2735944ms step_avg:4077.41ms step:682/3200 train_loss:3.6008 train_time:2739955ms step_avg:4077.31ms step:683/3200 train_loss:3.7651 train_time:2743974ms step_avg:4077.23ms step:684/3200 train_loss:3.7366 train_time:2748005ms step_avg:4077.16ms step:685/3200 train_loss:3.6148 train_time:2752038ms step_avg:4077.09ms step:686/3200 train_loss:3.8123 train_time:2756092ms step_avg:4077.06ms step:687/3200 train_loss:3.6293 train_time:2760158ms step_avg:4077.04ms step:688/3200 train_loss:3.8758 train_time:2764238ms step_avg:4077.05ms step:689/3200 train_loss:3.6358 train_time:2768338ms step_avg:4077.08ms step:690/3200 train_loss:3.8163 train_time:2772463ms step_avg:4077.15ms step:691/3200 train_loss:3.7325 train_time:2776607ms step_avg:4077.25ms step:692/3200 train_loss:3.6129 train_time:2780784ms step_avg:4077.40ms step:693/3200 train_loss:3.6592 train_time:2784965ms step_avg:4077.55ms step:694/3200 train_loss:3.6729 train_time:2789158ms step_avg:4077.72ms step:695/3200 train_loss:3.7895 train_time:2793286ms step_avg:4077.79ms step:696/3200 train_loss:3.6848 train_time:2797366ms step_avg:4077.79ms step:697/3200 train_loss:3.6745 train_time:2801417ms step_avg:4077.75ms step:698/3200 train_loss:3.8854 train_time:2805441ms step_avg:4077.68ms step:699/3200 train_loss:3.6708 train_time:2809452ms step_avg:4077.58ms step:700/3200 train_loss:3.7206 train_time:2813446ms step_avg:4077.46ms step:701/3200 train_loss:3.9868 train_time:2817457ms step_avg:4077.36ms step:702/3200 train_loss:3.6733 train_time:2821461ms step_avg:4077.26ms step:703/3200 train_loss:3.7543 train_time:2825456ms step_avg:4077.14ms step:704/3200 train_loss:3.8161 train_time:2829448ms step_avg:4077.01ms step:705/3200 train_loss:3.8028 train_time:2833432ms step_avg:4076.88ms step:706/3200 train_loss:3.7506 train_time:2837415ms step_avg:4076.75ms step:707/3200 train_loss:3.6362 train_time:2841395ms step_avg:4076.61ms step:708/3200 train_loss:3.6415 train_time:2845373ms step_avg:4076.47ms step:709/3200 train_loss:4.1922 train_time:2849347ms step_avg:4076.32ms step:710/3200 train_loss:3.7702 train_time:2853320ms step_avg:4076.17ms step:711/3200 train_loss:3.8100 train_time:2857295ms step_avg:4076.03ms step:712/3200 train_loss:3.8916 train_time:2861272ms step_avg:4075.89ms step:713/3200 train_loss:3.7454 train_time:2865242ms step_avg:4075.74ms step:714/3200 train_loss:3.7420 train_time:2869219ms step_avg:4075.59ms step:715/3200 train_loss:4.0057 train_time:2873192ms step_avg:4075.45ms step:716/3200 train_loss:3.8395 train_time:2877165ms step_avg:4075.30ms step:717/3200 train_loss:4.0350 train_time:2881143ms step_avg:4075.17ms step:718/3200 train_loss:3.7551 train_time:2885122ms step_avg:4075.03ms step:719/3200 train_loss:3.7738 train_time:2889098ms step_avg:4074.89ms step:720/3200 train_loss:3.6323 train_time:2893078ms step_avg:4074.76ms step:721/3200 train_loss:3.6793 train_time:2897060ms step_avg:4074.63ms step:722/3200 train_loss:3.6103 train_time:2901045ms step_avg:4074.50ms step:723/3200 train_loss:3.8921 train_time:2905037ms step_avg:4074.39ms step:724/3200 train_loss:3.7629 train_time:2909034ms step_avg:4074.28ms step:725/3200 train_loss:3.5140 train_time:2913036ms step_avg:4074.18ms step:726/3200 train_loss:3.6419 train_time:2917037ms step_avg:4074.07ms step:727/3200 train_loss:3.5200 train_time:2921048ms step_avg:4073.99ms step:728/3200 train_loss:3.6644 train_time:2925061ms step_avg:4073.90ms step:729/3200 train_loss:3.6410 train_time:2929057ms step_avg:4073.79ms step:730/3200 train_loss:3.5909 train_time:2933060ms step_avg:4073.69ms step:731/3200 train_loss:3.7216 train_time:2937073ms step_avg:4073.61ms step:732/3200 train_loss:3.6157 train_time:2941091ms step_avg:4073.53ms step:733/3200 train_loss:3.7641 train_time:2945120ms step_avg:4073.47ms step:734/3200 train_loss:3.7483 train_time:2949154ms step_avg:4073.42ms step:735/3200 train_loss:3.7575 train_time:2953208ms step_avg:4073.39ms step:736/3200 train_loss:3.7004 train_time:2957274ms step_avg:4073.38ms step:737/3200 train_loss:3.7204 train_time:2961363ms step_avg:4073.40ms step:738/3200 train_loss:3.5952 train_time:2965463ms step_avg:4073.44ms step:739/3200 train_loss:3.6396 train_time:2969597ms step_avg:4073.52ms step:740/3200 train_loss:3.6480 train_time:2973762ms step_avg:4073.65ms step:741/3200 train_loss:3.7094 train_time:2977966ms step_avg:4073.83ms step:742/3200 train_loss:3.7221 train_time:2982182ms step_avg:4074.02ms step:743/3200 train_loss:3.7536 train_time:2986384ms step_avg:4074.19ms step:744/3200 train_loss:3.6310 train_time:2990579ms step_avg:4074.36ms step:745/3200 train_loss:3.7031 train_time:2994730ms step_avg:4074.46ms step:746/3200 train_loss:3.5315 train_time:2998825ms step_avg:4074.49ms step:747/3200 train_loss:3.6461 train_time:3002899ms step_avg:4074.49ms step:748/3200 train_loss:3.6444 train_time:3006952ms step_avg:4074.46ms step:749/3200 train_loss:3.6904 train_time:3010988ms step_avg:4074.41ms step:750/3200 train_loss:3.6537 train_time:3015010ms step_avg:4074.34ms step:750/3200 val_loss:3.6825 train_time:3015010ms step_avg:4074.34ms step:751/3200 train_loss:3.7307 train_time:3019001ms step_avg:4074.23ms step:752/3200 train_loss:3.7611 train_time:3022986ms step_avg:4074.10ms step:753/3200 train_loss:3.7300 train_time:3026971ms step_avg:4073.98ms step:754/3200 train_loss:3.7887 train_time:3030970ms step_avg:4073.88ms step:755/3200 train_loss:3.8336 train_time:3034984ms step_avg:4073.80ms step:756/3200 train_loss:3.5575 train_time:3038999ms step_avg:4073.73ms step:757/3200 train_loss:3.6716 train_time:3043015ms step_avg:4073.65ms step:758/3200 train_loss:3.8369 train_time:3047037ms step_avg:4073.58ms step:759/3200 train_loss:3.4428 train_time:3051064ms step_avg:4073.52ms step:760/3200 train_loss:3.7298 train_time:3055073ms step_avg:4073.43ms step:761/3200 train_loss:3.7432 train_time:3059073ms step_avg:4073.33ms step:762/3200 train_loss:3.7086 train_time:3063085ms step_avg:4073.25ms step:763/3200 train_loss:3.6171 train_time:3067100ms step_avg:4073.17ms step:764/3200 train_loss:3.6196 train_time:3071124ms step_avg:4073.11ms step:765/3200 train_loss:3.6525 train_time:3075155ms step_avg:4073.05ms step:766/3200 train_loss:3.7459 train_time:3079194ms step_avg:4073.01ms step:767/3200 train_loss:3.9041 train_time:3083241ms step_avg:4072.97ms step:768/3200 train_loss:4.8675 train_time:3087295ms step_avg:4072.95ms step:769/3200 train_loss:3.9128 train_time:3091364ms step_avg:4072.94ms step:770/3200 train_loss:3.7256 train_time:3095447ms step_avg:4072.96ms step:771/3200 train_loss:3.6486 train_time:3099551ms step_avg:4073.00ms step:772/3200 train_loss:3.6059 train_time:3103677ms step_avg:4073.07ms step:773/3200 train_loss:3.5574 train_time:3107816ms step_avg:4073.15ms step:774/3200 train_loss:3.4946 train_time:3111998ms step_avg:4073.30ms step:775/3200 train_loss:3.7627 train_time:3116181ms step_avg:4073.44ms step:776/3200 train_loss:3.6398 train_time:3120379ms step_avg:4073.60ms step:777/3200 train_loss:3.8875 train_time:3124574ms step_avg:4073.76ms step:778/3200 train_loss:3.7652 train_time:3128734ms step_avg:4073.87ms step:779/3200 train_loss:3.6074 train_time:3132846ms step_avg:4073.92ms step:780/3200 train_loss:3.7236 train_time:3136925ms step_avg:4073.93ms step:781/3200 train_loss:3.7255 train_time:3140979ms step_avg:4073.90ms step:782/3200 train_loss:3.7569 train_time:3145013ms step_avg:4073.85ms step:783/3200 train_loss:3.7107 train_time:3149036ms step_avg:4073.79ms step:784/3200 train_loss:3.6888 train_time:3153045ms step_avg:4073.70ms step:785/3200 train_loss:3.6705 train_time:3157043ms step_avg:4073.60ms step:786/3200 train_loss:3.7153 train_time:3161035ms step_avg:4073.50ms step:787/3200 train_loss:3.5648 train_time:3165019ms step_avg:4073.38ms step:788/3200 train_loss:3.6093 train_time:3169024ms step_avg:4073.30ms step:789/3200 train_loss:3.5688 train_time:3173027ms step_avg:4073.21ms step:790/3200 train_loss:3.6945 train_time:3177026ms step_avg:4073.11ms step:791/3200 train_loss:3.8745 train_time:3181024ms step_avg:4073.01ms step:792/3200 train_loss:3.6871 train_time:3185015ms step_avg:4072.91ms step:793/3200 train_loss:3.9634 train_time:3189007ms step_avg:4072.81ms step:794/3200 train_loss:3.7142 train_time:3192998ms step_avg:4072.70ms step:795/3200 train_loss:3.9059 train_time:3196991ms step_avg:4072.60ms step:796/3200 train_loss:3.7772 train_time:3200986ms step_avg:4072.50ms step:797/3200 train_loss:3.5733 train_time:3204979ms step_avg:4072.40ms step:798/3200 train_loss:3.6698 train_time:3208975ms step_avg:4072.30ms step:799/3200 train_loss:3.9118 train_time:3212973ms step_avg:4072.21ms step:800/3200 train_loss:3.6919 train_time:3216971ms step_avg:4072.12ms step:801/3200 train_loss:3.7868 train_time:3220974ms step_avg:4072.03ms step:802/3200 train_loss:3.6332 train_time:3224980ms step_avg:4071.94ms step:803/3200 train_loss:3.7987 train_time:3228993ms step_avg:4071.87ms step:804/3200 train_loss:3.6220 train_time:3232989ms step_avg:4071.77ms step:805/3200 train_loss:3.6743 train_time:3236980ms step_avg:4071.67ms step:806/3200 train_loss:3.7742 train_time:3240976ms step_avg:4071.58ms step:807/3200 train_loss:3.9590 train_time:3244977ms step_avg:4071.49ms step:808/3200 train_loss:3.6815 train_time:3248985ms step_avg:4071.41ms step:809/3200 train_loss:3.6671 train_time:3253001ms step_avg:4071.34ms step:810/3200 train_loss:3.6774 train_time:3257026ms step_avg:4071.28ms step:811/3200 train_loss:3.5694 train_time:3261057ms step_avg:4071.23ms step:812/3200 train_loss:3.6098 train_time:3265098ms step_avg:4071.19ms step:813/3200 train_loss:3.6175 train_time:3269149ms step_avg:4071.17ms step:814/3200 train_loss:3.7067 train_time:3273211ms step_avg:4071.16ms step:815/3200 train_loss:3.7299 train_time:3277287ms step_avg:4071.16ms step:816/3200 train_loss:3.5587 train_time:3281392ms step_avg:4071.21ms step:817/3200 train_loss:3.6519 train_time:3285515ms step_avg:4071.27ms step:818/3200 train_loss:3.7034 train_time:3289666ms step_avg:4071.37ms step:819/3200 train_loss:3.6482 train_time:3293840ms step_avg:4071.50ms step:820/3200 train_loss:3.5894 train_time:3298022ms step_avg:4071.63ms step:821/3200 train_loss:3.5464 train_time:3302220ms step_avg:4071.79ms step:822/3200 train_loss:3.8758 train_time:3306415ms step_avg:4071.94ms step:823/3200 train_loss:3.6109 train_time:3310572ms step_avg:4072.04ms step:824/3200 train_loss:3.6657 train_time:3314682ms step_avg:4072.09ms step:825/3200 train_loss:3.6087 train_time:3318763ms step_avg:4072.10ms step:826/3200 train_loss:3.6610 train_time:3322821ms step_avg:4072.08ms step:827/3200 train_loss:3.6833 train_time:3326864ms step_avg:4072.05ms step:828/3200 train_loss:3.6456 train_time:3330884ms step_avg:4071.99ms step:829/3200 train_loss:3.7802 train_time:3334897ms step_avg:4071.91ms step:830/3200 train_loss:3.6901 train_time:3338899ms step_avg:4071.83ms step:831/3200 train_loss:3.5344 train_time:3342895ms step_avg:4071.74ms step:832/3200 train_loss:4.0964 train_time:3346882ms step_avg:4071.63ms step:833/3200 train_loss:3.5549 train_time:3350870ms step_avg:4071.53ms step:834/3200 train_loss:3.7640 train_time:3354852ms step_avg:4071.42ms step:835/3200 train_loss:3.6765 train_time:3358835ms step_avg:4071.31ms step:836/3200 train_loss:3.6634 train_time:3362824ms step_avg:4071.22ms step:837/3200 train_loss:3.7799 train_time:3366833ms step_avg:4071.14ms step:838/3200 train_loss:3.7056 train_time:3370834ms step_avg:4071.06ms step:839/3200 train_loss:3.6767 train_time:3374835ms step_avg:4070.97ms step:840/3200 train_loss:3.8318 train_time:3378837ms step_avg:4070.89ms step:841/3200 train_loss:3.6672 train_time:3382838ms step_avg:4070.80ms step:842/3200 train_loss:3.5343 train_time:3386846ms step_avg:4070.73ms step:843/3200 train_loss:3.7606 train_time:3390848ms step_avg:4070.65ms step:844/3200 train_loss:3.7202 train_time:3394859ms step_avg:4070.57ms step:845/3200 train_loss:3.6608 train_time:3398877ms step_avg:4070.51ms step:846/3200 train_loss:3.6254 train_time:3402865ms step_avg:4070.41ms step:847/3200 train_loss:3.5533 train_time:3406859ms step_avg:4070.32ms step:848/3200 train_loss:3.7805 train_time:3410857ms step_avg:4070.23ms step:849/3200 train_loss:3.6887 train_time:3414853ms step_avg:4070.15ms step:850/3200 train_loss:3.7697 train_time:3418860ms step_avg:4070.07ms step:851/3200 train_loss:3.6601 train_time:3422874ms step_avg:4070.01ms step:852/3200 train_loss:3.9548 train_time:3426895ms step_avg:4069.95ms step:853/3200 train_loss:3.6854 train_time:3430925ms step_avg:4069.90ms step:854/3200 train_loss:3.7461 train_time:3434967ms step_avg:4069.87ms step:855/3200 train_loss:3.5342 train_time:3439018ms step_avg:4069.84ms step:856/3200 train_loss:3.5733 train_time:3443084ms step_avg:4069.84ms step:857/3200 train_loss:3.6734 train_time:3447153ms step_avg:4069.84ms step:858/3200 train_loss:3.7162 train_time:3451244ms step_avg:4069.86ms step:859/3200 train_loss:3.6412 train_time:3455350ms step_avg:4069.91ms step:860/3200 train_loss:3.7285 train_time:3459485ms step_avg:4069.98ms step:861/3200 train_loss:3.5441 train_time:3463645ms step_avg:4070.09ms step:862/3200 train_loss:3.7085 train_time:3467826ms step_avg:4070.22ms step:863/3200 train_loss:3.6038 train_time:3472034ms step_avg:4070.38ms step:864/3200 train_loss:3.9166 train_time:3476250ms step_avg:4070.55ms step:865/3200 train_loss:3.7401 train_time:3480445ms step_avg:4070.70ms step:866/3200 train_loss:3.5873 train_time:3484617ms step_avg:4070.81ms step:867/3200 train_loss:3.6272 train_time:3488730ms step_avg:4070.86ms step:868/3200 train_loss:3.5927 train_time:3492800ms step_avg:4070.86ms step:869/3200 train_loss:3.7753 train_time:3496847ms step_avg:4070.83ms step:870/3200 train_loss:3.6200 train_time:3500870ms step_avg:4070.78ms step:871/3200 train_loss:3.6201 train_time:3504877ms step_avg:4070.71ms step:872/3200 train_loss:3.6109 train_time:3508874ms step_avg:4070.62ms step:873/3200 train_loss:3.8210 train_time:3512863ms step_avg:4070.53ms step:874/3200 train_loss:3.6671 train_time:3516841ms step_avg:4070.42ms step:875/3200 train_loss:3.7532 train_time:3520825ms step_avg:4070.32ms step:875/3200 val_loss:3.6385 train_time:3520825ms step_avg:4070.32ms step:876/3200 train_loss:3.6981 train_time:3524805ms step_avg:4070.21ms step:877/3200 train_loss:3.6224 train_time:3528780ms step_avg:4070.10ms step:878/3200 train_loss:3.5923 train_time:3532753ms step_avg:4069.99ms step:879/3200 train_loss:3.5658 train_time:3536727ms step_avg:4069.88ms step:880/3200 train_loss:3.5117 train_time:3540703ms step_avg:4069.77ms step:881/3200 train_loss:3.7036 train_time:3544683ms step_avg:4069.67ms step:882/3200 train_loss:3.6621 train_time:3548665ms step_avg:4069.57ms step:883/3200 train_loss:3.5375 train_time:3552645ms step_avg:4069.47ms step:884/3200 train_loss:3.5833 train_time:3556627ms step_avg:4069.37ms step:885/3200 train_loss:3.7035 train_time:3560610ms step_avg:4069.27ms step:886/3200 train_loss:3.5719 train_time:3564596ms step_avg:4069.17ms step:887/3200 train_loss:3.8098 train_time:3568588ms step_avg:4069.09ms step:888/3200 train_loss:3.6451 train_time:3572572ms step_avg:4068.99ms step:889/3200 train_loss:4.4264 train_time:3576570ms step_avg:4068.91ms step:890/3200 train_loss:3.6776 train_time:3580563ms step_avg:4068.82ms step:891/3200 train_loss:3.6972 train_time:3584542ms step_avg:4068.72ms step:892/3200 train_loss:3.5301 train_time:3588529ms step_avg:4068.63ms step:893/3200 train_loss:3.9317 train_time:3592522ms step_avg:4068.54ms step:894/3200 train_loss:3.6996 train_time:3596518ms step_avg:4068.46ms step:895/3200 train_loss:3.5830 train_time:3600522ms step_avg:4068.39ms step:896/3200 train_loss:3.7222 train_time:3604533ms step_avg:4068.32ms step:897/3200 train_loss:3.6472 train_time:3608557ms step_avg:4068.27ms step:898/3200 train_loss:3.5940 train_time:3612587ms step_avg:4068.23ms step:899/3200 train_loss:3.6023 train_time:3616630ms step_avg:4068.20ms step:900/3200 train_loss:3.6187 train_time:3620691ms step_avg:4068.19ms step:901/3200 train_loss:3.7412 train_time:3624764ms step_avg:4068.20ms step:902/3200 train_loss:3.5041 train_time:3628855ms step_avg:4068.22ms step:903/3200 train_loss:3.8036 train_time:3632962ms step_avg:4068.27ms step:904/3200 train_loss:4.0371 train_time:3637098ms step_avg:4068.34ms step:905/3200 train_loss:3.5567 train_time:3641248ms step_avg:4068.43ms step:906/3200 train_loss:3.5570 train_time:3645405ms step_avg:4068.53ms step:907/3200 train_loss:3.6991 train_time:3649543ms step_avg:4068.61ms step:908/3200 train_loss:3.4333 train_time:3653664ms step_avg:4068.67ms step:909/3200 train_loss:3.6328 train_time:3657774ms step_avg:4068.71ms step:910/3200 train_loss:3.7476 train_time:3661882ms step_avg:4068.76ms step:911/3200 train_loss:4.0616 train_time:3665983ms step_avg:4068.79ms step:912/3200 train_loss:3.7126 train_time:3670079ms step_avg:4068.82ms step:913/3200 train_loss:3.5875 train_time:3674177ms step_avg:4068.86ms step:914/3200 train_loss:3.4770 train_time:3678278ms step_avg:4068.89ms step:915/3200 train_loss:3.6216 train_time:3682388ms step_avg:4068.94ms step:916/3200 train_loss:3.5861 train_time:3686501ms step_avg:4068.99ms step:917/3200 train_loss:3.6062 train_time:3690622ms step_avg:4069.04ms step:918/3200 train_loss:3.5688 train_time:3694750ms step_avg:4069.11ms step:919/3200 train_loss:3.6375 train_time:3698892ms step_avg:4069.19ms step:920/3200 train_loss:4.1905 train_time:3703044ms step_avg:4069.28ms step:921/3200 train_loss:3.5018 train_time:3707230ms step_avg:4069.41ms step:922/3200 train_loss:3.9385 train_time:3711411ms step_avg:4069.53ms step:923/3200 train_loss:3.6452 train_time:3715629ms step_avg:4069.69ms step:924/3200 train_loss:3.7076 train_time:3719854ms step_avg:4069.86ms step:925/3200 train_loss:3.6994 train_time:3724049ms step_avg:4070.00ms step:926/3200 train_loss:3.5408 train_time:3728218ms step_avg:4070.11ms step:927/3200 train_loss:3.8347 train_time:3732354ms step_avg:4070.18ms step:928/3200 train_loss:3.5385 train_time:3736463ms step_avg:4070.22ms step:929/3200 train_loss:3.6225 train_time:3740558ms step_avg:4070.25ms step:930/3200 train_loss:3.7780 train_time:3744639ms step_avg:4070.26ms step:931/3200 train_loss:3.5896 train_time:3748715ms step_avg:4070.27ms step:932/3200 train_loss:3.7671 train_time:3752777ms step_avg:4070.26ms step:933/3200 train_loss:3.6358 train_time:3756842ms step_avg:4070.25ms step:934/3200 train_loss:3.8956 train_time:3760906ms step_avg:4070.24ms step:935/3200 train_loss:3.4895 train_time:3764964ms step_avg:4070.23ms step:936/3200 train_loss:4.3917 train_time:3769024ms step_avg:4070.22ms step:937/3200 train_loss:3.7580 train_time:3773086ms step_avg:4070.21ms step:938/3200 train_loss:3.5549 train_time:3777149ms step_avg:4070.20ms step:939/3200 train_loss:3.7118 train_time:3781220ms step_avg:4070.20ms step:940/3200 train_loss:3.7411 train_time:3785294ms step_avg:4070.21ms step:941/3200 train_loss:3.7106 train_time:3789371ms step_avg:4070.22ms step:942/3200 train_loss:3.6032 train_time:3793448ms step_avg:4070.22ms step:943/3200 train_loss:3.5131 train_time:3797537ms step_avg:4070.24ms step:944/3200 train_loss:3.6205 train_time:3801642ms step_avg:4070.28ms step:945/3200 train_loss:3.5456 train_time:3805759ms step_avg:4070.33ms step:946/3200 train_loss:3.7198 train_time:3809893ms step_avg:4070.40ms step:947/3200 train_loss:3.5625 train_time:3814038ms step_avg:4070.48ms step:948/3200 train_loss:3.6777 train_time:3818216ms step_avg:4070.59ms step:949/3200 train_loss:3.6057 train_time:3822400ms step_avg:4070.71ms step:950/3200 train_loss:3.6870 train_time:3826596ms step_avg:4070.85ms step:951/3200 train_loss:3.4881 train_time:3830732ms step_avg:4070.92ms step:952/3200 train_loss:3.6558 train_time:3834834ms step_avg:4070.95ms step:953/3200 train_loss:3.7239 train_time:3838918ms step_avg:4070.96ms step:954/3200 train_loss:3.6284 train_time:3842986ms step_avg:4070.96ms step:955/3200 train_loss:3.4901 train_time:3847033ms step_avg:4070.93ms step:956/3200 train_loss:3.5697 train_time:3851068ms step_avg:4070.90ms step:957/3200 train_loss:3.7022 train_time:3855095ms step_avg:4070.85ms step:958/3200 train_loss:3.6505 train_time:3859121ms step_avg:4070.80ms step:959/3200 train_loss:3.4329 train_time:3863148ms step_avg:4070.76ms step:960/3200 train_loss:3.7343 train_time:3867161ms step_avg:4070.70ms step:961/3200 train_loss:3.7559 train_time:3871158ms step_avg:4070.62ms step:962/3200 train_loss:3.7517 train_time:3875175ms step_avg:4070.56ms step:963/3200 train_loss:3.6522 train_time:3879174ms step_avg:4070.49ms step:964/3200 train_loss:3.6915 train_time:3883174ms step_avg:4070.41ms step:965/3200 train_loss:3.7391 train_time:3887176ms step_avg:4070.34ms step:966/3200 train_loss:3.5864 train_time:3891173ms step_avg:4070.27ms step:967/3200 train_loss:3.7042 train_time:3895175ms step_avg:4070.19ms step:968/3200 train_loss:3.6716 train_time:3899187ms step_avg:4070.13ms step:969/3200 train_loss:3.7414 train_time:3903203ms step_avg:4070.08ms step:970/3200 train_loss:3.5473 train_time:3907227ms step_avg:4070.03ms step:971/3200 train_loss:3.7389 train_time:3911252ms step_avg:4069.98ms step:972/3200 train_loss:3.6623 train_time:3915290ms step_avg:4069.95ms step:973/3200 train_loss:3.4777 train_time:3919340ms step_avg:4069.93ms step:974/3200 train_loss:3.4215 train_time:3923392ms step_avg:4069.91ms step:975/3200 train_loss:3.7226 train_time:3927454ms step_avg:4069.90ms step:976/3200 train_loss:3.5836 train_time:3931526ms step_avg:4069.90ms step:977/3200 train_loss:3.7185 train_time:3935611ms step_avg:4069.92ms step:978/3200 train_loss:3.6909 train_time:3939718ms step_avg:4069.96ms step:979/3200 train_loss:3.5413 train_time:3943840ms step_avg:4070.01ms step:980/3200 train_loss:3.6757 train_time:3947980ms step_avg:4070.08ms step:981/3200 train_loss:3.6810 train_time:3952159ms step_avg:4070.19ms step:982/3200 train_loss:3.5050 train_time:3956345ms step_avg:4070.31ms step:983/3200 train_loss:3.9469 train_time:3960543ms step_avg:4070.45ms step:984/3200 train_loss:3.6511 train_time:3964724ms step_avg:4070.56ms step:985/3200 train_loss:3.6678 train_time:3968834ms step_avg:4070.60ms step:986/3200 train_loss:3.6042 train_time:3972908ms step_avg:4070.60ms step:987/3200 train_loss:3.8545 train_time:3976940ms step_avg:4070.56ms step:988/3200 train_loss:3.7241 train_time:3980963ms step_avg:4070.51ms step:989/3200 train_loss:3.6282 train_time:3984965ms step_avg:4070.44ms step:990/3200 train_loss:3.5516 train_time:3988957ms step_avg:4070.36ms step:991/3200 train_loss:3.3812 train_time:3992940ms step_avg:4070.27ms step:992/3200 train_loss:3.5914 train_time:3996913ms step_avg:4070.18ms step:993/3200 train_loss:3.6526 train_time:4000881ms step_avg:4070.07ms step:994/3200 train_loss:3.5584 train_time:4004854ms step_avg:4069.97ms step:995/3200 train_loss:3.4857 train_time:4008837ms step_avg:4069.89ms step:996/3200 train_loss:3.6503 train_time:4012817ms step_avg:4069.79ms step:997/3200 train_loss:3.4493 train_time:4016795ms step_avg:4069.70ms step:998/3200 train_loss:4.1124 train_time:4020769ms step_avg:4069.60ms step:999/3200 train_loss:3.7055 train_time:4024735ms step_avg:4069.50ms step:1000/3200 train_loss:3.5555 train_time:4028707ms step_avg:4069.40ms step:1000/3200 val_loss:3.6000 train_time:4028707ms step_avg:4069.40ms step:1001/3200 train_loss:3.7659 train_time:4032691ms step_avg:4069.32ms step:1002/3200 train_loss:3.6305 train_time:4036674ms step_avg:4069.23ms step:1003/3200 train_loss:3.6941 train_time:4040660ms step_avg:4069.14ms step:1004/3200 train_loss:3.6056 train_time:4044647ms step_avg:4069.06ms step:1005/3200 train_loss:3.6672 train_time:4048635ms step_avg:4068.98ms step:1006/3200 train_loss:3.3651 train_time:4052625ms step_avg:4068.90ms step:1007/3200 train_loss:3.4732 train_time:4056619ms step_avg:4068.83ms step:1008/3200 train_loss:3.5716 train_time:4060608ms step_avg:4068.75ms step:1009/3200 train_loss:3.6107 train_time:4064595ms step_avg:4068.66ms step:1010/3200 train_loss:3.4991 train_time:4068579ms step_avg:4068.58ms step:1011/3200 train_loss:4.0297 train_time:4072570ms step_avg:4068.50ms step:1012/3200 train_loss:3.5553 train_time:4076569ms step_avg:4068.43ms step:1013/3200 train_loss:3.5289 train_time:4080576ms step_avg:4068.37ms step:1014/3200 train_loss:3.6300 train_time:4084591ms step_avg:4068.32ms step:1015/3200 train_loss:3.3730 train_time:4088613ms step_avg:4068.27ms step:1016/3200 train_loss:3.5344 train_time:4092646ms step_avg:4068.24ms step:1017/3200 train_loss:3.7607 train_time:4096683ms step_avg:4068.21ms step:1018/3200 train_loss:3.5773 train_time:4100743ms step_avg:4068.20ms step:1019/3200 train_loss:3.7000 train_time:4104816ms step_avg:4068.20ms step:1020/3200 train_loss:3.6038 train_time:4108901ms step_avg:4068.22ms step:1021/3200 train_loss:3.9128 train_time:4113003ms step_avg:4068.25ms step:1022/3200 train_loss:3.8276 train_time:4117125ms step_avg:4068.31ms step:1023/3200 train_loss:3.6108 train_time:4121272ms step_avg:4068.38ms step:1024/3200 train_loss:3.6602 train_time:4125443ms step_avg:4068.48ms step:1025/3200 train_loss:3.5526 train_time:4129625ms step_avg:4068.60ms step:1026/3200 train_loss:3.6910 train_time:4133823ms step_avg:4068.72ms step:1027/3200 train_loss:3.8052 train_time:4137990ms step_avg:4068.82ms step:1028/3200 train_loss:3.3893 train_time:4142098ms step_avg:4068.86ms step:1029/3200 train_loss:3.5766 train_time:4146179ms step_avg:4068.87ms step:1030/3200 train_loss:3.5693 train_time:4150231ms step_avg:4068.85ms step:1031/3200 train_loss:3.5538 train_time:4154265ms step_avg:4068.82ms step:1032/3200 train_loss:3.6303 train_time:4158282ms step_avg:4068.77ms step:1033/3200 train_loss:3.5544 train_time:4162287ms step_avg:4068.71ms step:1034/3200 train_loss:3.5800 train_time:4166281ms step_avg:4068.63ms step:1035/3200 train_loss:3.6227 train_time:4170269ms step_avg:4068.56ms step:1036/3200 train_loss:3.6887 train_time:4174251ms step_avg:4068.47ms step:1037/3200 train_loss:3.4822 train_time:4178229ms step_avg:4068.38ms step:1038/3200 train_loss:3.6569 train_time:4182202ms step_avg:4068.29ms step:1039/3200 train_loss:3.5207 train_time:4186170ms step_avg:4068.19ms step:1040/3200 train_loss:3.6371 train_time:4190157ms step_avg:4068.11ms step:1041/3200 train_loss:3.6285 train_time:4194149ms step_avg:4068.04ms step:1042/3200 train_loss:3.5410 train_time:4198140ms step_avg:4067.97ms step:1043/3200 train_loss:3.6956 train_time:4202127ms step_avg:4067.89ms step:1044/3200 train_loss:3.6696 train_time:4206118ms step_avg:4067.81ms step:1045/3200 train_loss:3.7053 train_time:4210107ms step_avg:4067.74ms step:1046/3200 train_loss:3.6882 train_time:4214096ms step_avg:4067.66ms step:1047/3200 train_loss:3.4419 train_time:4218084ms step_avg:4067.58ms step:1048/3200 train_loss:3.5686 train_time:4222076ms step_avg:4067.51ms step:1049/3200 train_loss:3.7677 train_time:4226070ms step_avg:4067.44ms step:1050/3200 train_loss:3.7556 train_time:4230064ms step_avg:4067.37ms step:1051/3200 train_loss:3.6146 train_time:4234064ms step_avg:4067.30ms step:1052/3200 train_loss:3.5468 train_time:4238047ms step_avg:4067.22ms step:1053/3200 train_loss:5.1001 train_time:4242025ms step_avg:4067.14ms step:1054/3200 train_loss:3.6977 train_time:4246012ms step_avg:4067.06ms step:1055/3200 train_loss:3.5790 train_time:4250004ms step_avg:4066.99ms step:1056/3200 train_loss:3.7632 train_time:4253998ms step_avg:4066.92ms step:1057/3200 train_loss:3.6155 train_time:4258000ms step_avg:4066.86ms step:1058/3200 train_loss:3.6653 train_time:4262006ms step_avg:4066.80ms step:1059/3200 train_loss:3.6249 train_time:4266024ms step_avg:4066.75ms step:1060/3200 train_loss:3.5879 train_time:4270050ms step_avg:4066.71ms step:1061/3200 train_loss:3.6612 train_time:4274078ms step_avg:4066.68ms step:1062/3200 train_loss:3.6911 train_time:4278117ms step_avg:4066.65ms step:1063/3200 train_loss:3.5477 train_time:4282170ms step_avg:4066.64ms step:1064/3200 train_loss:3.8052 train_time:4286241ms step_avg:4066.64ms step:1065/3200 train_loss:3.7281 train_time:4290321ms step_avg:4066.66ms step:1066/3200 train_loss:3.4635 train_time:4294404ms step_avg:4066.67ms step:1067/3200 train_loss:3.7259 train_time:4298498ms step_avg:4066.70ms step:1068/3200 train_loss:3.6179 train_time:4302605ms step_avg:4066.73ms step:1069/3200 train_loss:3.5029 train_time:4306712ms step_avg:4066.77ms step:1070/3200 train_loss:3.6516 train_time:4310845ms step_avg:4066.84ms step:1071/3200 train_loss:3.6677 train_time:4314997ms step_avg:4066.92ms step:1072/3200 train_loss:3.8058 train_time:4319184ms step_avg:4067.03ms step:1073/3200 train_loss:3.7947 train_time:4323364ms step_avg:4067.13ms step:1074/3200 train_loss:3.5699 train_time:4327541ms step_avg:4067.24ms step:1075/3200 train_loss:3.7086 train_time:4331693ms step_avg:4067.32ms step:1076/3200 train_loss:2.9929 train_time:4335791ms step_avg:4067.35ms step:1077/3200 train_loss:3.6352 train_time:4339860ms step_avg:4067.35ms step:1078/3200 train_loss:3.4763 train_time:4343913ms step_avg:4067.33ms step:1079/3200 train_loss:3.6007 train_time:4347942ms step_avg:4067.30ms step:1080/3200 train_loss:3.9340 train_time:4351964ms step_avg:4067.26ms step:1081/3200 train_loss:3.5536 train_time:4355973ms step_avg:4067.20ms step:1082/3200 train_loss:3.7700 train_time:4359972ms step_avg:4067.14ms step:1083/3200 train_loss:3.7932 train_time:4363965ms step_avg:4067.07ms step:1084/3200 train_loss:3.6721 train_time:4367949ms step_avg:4066.99ms step:1085/3200 train_loss:3.6945 train_time:4371930ms step_avg:4066.91ms step:1086/3200 train_loss:3.6196 train_time:4375910ms step_avg:4066.83ms step:1087/3200 train_loss:3.3977 train_time:4379885ms step_avg:4066.75ms step:1088/3200 train_loss:3.6415 train_time:4383855ms step_avg:4066.66ms step:1089/3200 train_loss:3.5385 train_time:4387825ms step_avg:4066.57ms step:1090/3200 train_loss:3.5723 train_time:4391796ms step_avg:4066.48ms step:1091/3200 train_loss:3.5627 train_time:4395764ms step_avg:4066.39ms step:1092/3200 train_loss:3.5692 train_time:4399733ms step_avg:4066.30ms step:1093/3200 train_loss:3.6486 train_time:4403701ms step_avg:4066.21ms step:1094/3200 train_loss:3.6814 train_time:4407673ms step_avg:4066.12ms step:1095/3200 train_loss:3.5336 train_time:4411645ms step_avg:4066.03ms step:1096/3200 train_loss:3.6314 train_time:4415619ms step_avg:4065.95ms step:1097/3200 train_loss:3.7952 train_time:4419594ms step_avg:4065.86ms step:1098/3200 train_loss:3.2757 train_time:4423577ms step_avg:4065.79ms step:1099/3200 train_loss:3.4963 train_time:4427561ms step_avg:4065.71ms step:1100/3200 train_loss:3.5831 train_time:4431548ms step_avg:4065.64ms step:1101/3200 train_loss:3.6554 train_time:4435537ms step_avg:4065.57ms step:1102/3200 train_loss:3.7593 train_time:4439531ms step_avg:4065.50ms step:1103/3200 train_loss:4.0282 train_time:4443533ms step_avg:4065.45ms step:1104/3200 train_loss:3.3610 train_time:4447542ms step_avg:4065.39ms step:1105/3200 train_loss:3.6303 train_time:4451553ms step_avg:4065.35ms step:1106/3200 train_loss:3.6848 train_time:4455579ms step_avg:4065.31ms step:1107/3200 train_loss:3.6516 train_time:4459602ms step_avg:4065.27ms step:1108/3200 train_loss:3.5926 train_time:4463642ms step_avg:4065.25ms step:1109/3200 train_loss:4.0139 train_time:4467694ms step_avg:4065.24ms step:1110/3200 train_loss:3.6278 train_time:4471757ms step_avg:4065.23ms step:1111/3200 train_loss:3.5054 train_time:4475838ms step_avg:4065.25ms step:1112/3200 train_loss:3.7382 train_time:4479932ms step_avg:4065.27ms step:1113/3200 train_loss:3.5841 train_time:4484049ms step_avg:4065.32ms step:1114/3200 train_loss:3.5422 train_time:4488189ms step_avg:4065.39ms step:1115/3200 train_loss:3.4569 train_time:4492371ms step_avg:4065.49ms step:1116/3200 train_loss:3.4817 train_time:4496549ms step_avg:4065.60ms step:1117/3200 train_loss:3.3796 train_time:4500725ms step_avg:4065.70ms step:1118/3200 train_loss:3.7595 train_time:4504880ms step_avg:4065.78ms step:1119/3200 train_loss:4.3735 train_time:4509016ms step_avg:4065.84ms step:1120/3200 train_loss:3.6653 train_time:4513138ms step_avg:4065.89ms step:1121/3200 train_loss:3.6145 train_time:4517254ms step_avg:4065.94ms step:1122/3200 train_loss:3.4546 train_time:4521365ms step_avg:4065.98ms step:1123/3200 train_loss:3.6729 train_time:4525480ms step_avg:4066.02ms step:1124/3200 train_loss:3.6981 train_time:4529594ms step_avg:4066.06ms step:1125/3200 train_loss:3.3273 train_time:4533710ms step_avg:4066.11ms step:1125/3200 val_loss:3.5718 train_time:4533710ms step_avg:4066.11ms step:1126/3200 train_loss:3.5800 train_time:4537910ms step_avg:4066.23ms step:1127/3200 train_loss:3.5372 train_time:4542109ms step_avg:4066.35ms step:1128/3200 train_loss:3.7717 train_time:4546276ms step_avg:4066.44ms step:1129/3200 train_loss:3.5073 train_time:4550376ms step_avg:4066.47ms step:1130/3200 train_loss:3.7467 train_time:4554441ms step_avg:4066.47ms step:1131/3200 train_loss:3.4975 train_time:4558479ms step_avg:4066.44ms step:1132/3200 train_loss:3.3846 train_time:4562505ms step_avg:4066.40ms step:1133/3200 train_loss:3.5325 train_time:4566533ms step_avg:4066.37ms step:1134/3200 train_loss:3.4820 train_time:4570554ms step_avg:4066.33ms step:1135/3200 train_loss:3.6956 train_time:4574563ms step_avg:4066.28ms step:1136/3200 train_loss:3.5502 train_time:4578567ms step_avg:4066.22ms step:1137/3200 train_loss:3.6089 train_time:4582558ms step_avg:4066.16ms step:1138/3200 train_loss:3.4248 train_time:4586546ms step_avg:4066.09ms step:1139/3200 train_loss:3.6502 train_time:4590534ms step_avg:4066.02ms step:1140/3200 train_loss:3.5569 train_time:4594512ms step_avg:4065.94ms step:1141/3200 train_loss:3.5420 train_time:4598491ms step_avg:4065.86ms step:1142/3200 train_loss:3.5845 train_time:4602471ms step_avg:4065.79ms step:1143/3200 train_loss:3.5329 train_time:4606448ms step_avg:4065.71ms step:1144/3200 train_loss:3.5957 train_time:4610422ms step_avg:4065.63ms step:1145/3200 train_loss:3.5927 train_time:4614395ms step_avg:4065.55ms step:1146/3200 train_loss:3.8060 train_time:4618369ms step_avg:4065.47ms step:1147/3200 train_loss:3.5441 train_time:4622337ms step_avg:4065.38ms step:1148/3200 train_loss:3.6613 train_time:4626309ms step_avg:4065.30ms step:1149/3200 train_loss:3.6206 train_time:4630279ms step_avg:4065.21ms step:1150/3200 train_loss:3.6201 train_time:4634251ms step_avg:4065.13ms step:1151/3200 train_loss:3.6071 train_time:4638229ms step_avg:4065.06ms step:1152/3200 train_loss:3.4605 train_time:4642206ms step_avg:4064.98ms step:1153/3200 train_loss:3.5008 train_time:4646188ms step_avg:4064.91ms step:1154/3200 train_loss:3.5086 train_time:4650166ms step_avg:4064.83ms step:1155/3200 train_loss:3.4097 train_time:4654144ms step_avg:4064.75ms step:1156/3200 train_loss:3.6593 train_time:4658129ms step_avg:4064.69ms step:1157/3200 train_loss:3.5773 train_time:4662118ms step_avg:4064.62ms step:1158/3200 train_loss:3.7664 train_time:4666109ms step_avg:4064.55ms step:1159/3200 train_loss:3.5887 train_time:4670107ms step_avg:4064.50ms step:1160/3200 train_loss:3.7072 train_time:4674110ms step_avg:4064.44ms step:1161/3200 train_loss:3.6967 train_time:4678114ms step_avg:4064.39ms step:1162/3200 train_loss:3.5154 train_time:4682126ms step_avg:4064.35ms step:1163/3200 train_loss:3.3670 train_time:4686145ms step_avg:4064.31ms step:1164/3200 train_loss:3.5472 train_time:4690174ms step_avg:4064.28ms step:1165/3200 train_loss:3.4264 train_time:4694206ms step_avg:4064.25ms step:1166/3200 train_loss:3.6866 train_time:4698248ms step_avg:4064.23ms step:1167/3200 train_loss:3.6161 train_time:4702280ms step_avg:4064.20ms step:1168/3200 train_loss:3.5135 train_time:4706310ms step_avg:4064.17ms step:1169/3200 train_loss:3.5027 train_time:4710358ms step_avg:4064.16ms step:1170/3200 train_loss:3.6727 train_time:4714419ms step_avg:4064.15ms step:1171/3200 train_loss:3.6533 train_time:4718499ms step_avg:4064.17ms step:1172/3200 train_loss:3.4612 train_time:4722587ms step_avg:4064.19ms step:1173/3200 train_loss:3.4296 train_time:4726702ms step_avg:4064.23ms step:1174/3200 train_loss:3.5615 train_time:4730841ms step_avg:4064.30ms step:1175/3200 train_loss:3.4860 train_time:4735022ms step_avg:4064.40ms step:1176/3200 train_loss:3.6768 train_time:4739209ms step_avg:4064.50ms step:1177/3200 train_loss:3.9078 train_time:4743406ms step_avg:4064.62ms step:1178/3200 train_loss:3.5582 train_time:4747600ms step_avg:4064.73ms step:1179/3200 train_loss:3.5288 train_time:4751745ms step_avg:4064.79ms step:1180/3200 train_loss:3.5297 train_time:4755867ms step_avg:4064.84ms step:1181/3200 train_loss:3.5091 train_time:4759967ms step_avg:4064.87ms step:1182/3200 train_loss:3.5708 train_time:4764053ms step_avg:4064.89ms step:1183/3200 train_loss:3.5756 train_time:4768131ms step_avg:4064.90ms step:1184/3200 train_loss:3.6406 train_time:4772200ms step_avg:4064.91ms step:1185/3200 train_loss:3.4547 train_time:4776266ms step_avg:4064.91ms step:1186/3200 train_loss:3.6674 train_time:4780322ms step_avg:4064.90ms step:1187/3200 train_loss:3.5191 train_time:4784378ms step_avg:4064.89ms step:1188/3200 train_loss:3.4427 train_time:4788431ms step_avg:4064.88ms step:1189/3200 train_loss:3.5044 train_time:4792483ms step_avg:4064.87ms step:1190/3200 train_loss:3.4916 train_time:4796537ms step_avg:4064.86ms step:1191/3200 train_loss:3.4925 train_time:4800593ms step_avg:4064.85ms step:1192/3200 train_loss:3.3876 train_time:4804654ms step_avg:4064.85ms step:1193/3200 train_loss:3.4347 train_time:4808718ms step_avg:4064.85ms step:1194/3200 train_loss:3.6128 train_time:4812792ms step_avg:4064.86ms step:1195/3200 train_loss:3.6739 train_time:4816871ms step_avg:4064.87ms step:1196/3200 train_loss:3.7787 train_time:4820958ms step_avg:4064.89ms step:1197/3200 train_loss:3.6789 train_time:4825055ms step_avg:4064.92ms step:1198/3200 train_loss:3.4539 train_time:4829165ms step_avg:4064.95ms step:1199/3200 train_loss:3.4735 train_time:4833291ms step_avg:4065.01ms step:1200/3200 train_loss:3.5161 train_time:4837443ms step_avg:4065.08ms step:1201/3200 train_loss:3.3794 train_time:4841608ms step_avg:4065.16ms step:1202/3200 train_loss:3.4358 train_time:4845789ms step_avg:4065.26ms step:1203/3200 train_loss:3.4880 train_time:4850004ms step_avg:4065.38ms step:1204/3200 train_loss:3.4299 train_time:4854230ms step_avg:4065.52ms step:1205/3200 train_loss:3.5260 train_time:4858422ms step_avg:4065.63ms step:1206/3200 train_loss:3.5243 train_time:4862552ms step_avg:4065.68ms step:1207/3200 train_loss:3.6280 train_time:4866642ms step_avg:4065.70ms step:1208/3200 train_loss:3.6899 train_time:4870716ms step_avg:4065.71ms step:1209/3200 train_loss:3.6900 train_time:4874759ms step_avg:4065.69ms step:1210/3200 train_loss:3.4072 train_time:4878794ms step_avg:4065.66ms step:1211/3200 train_loss:3.5966 train_time:4882813ms step_avg:4065.62ms step:1212/3200 train_loss:3.5132 train_time:4886846ms step_avg:4065.60ms step:1213/3200 train_loss:3.5027 train_time:4890873ms step_avg:4065.56ms step:1214/3200 train_loss:3.6352 train_time:4894895ms step_avg:4065.53ms step:1215/3200 train_loss:3.5486 train_time:4898918ms step_avg:4065.49ms step:1216/3200 train_loss:3.6006 train_time:4902938ms step_avg:4065.45ms step:1217/3200 train_loss:3.5057 train_time:4906950ms step_avg:4065.41ms step:1218/3200 train_loss:3.6452 train_time:4910961ms step_avg:4065.37ms step:1219/3200 train_loss:3.7006 train_time:4914969ms step_avg:4065.32ms step:1220/3200 train_loss:3.5464 train_time:4918980ms step_avg:4065.27ms step:1221/3200 train_loss:3.4694 train_time:4922987ms step_avg:4065.22ms step:1222/3200 train_loss:3.5476 train_time:4926996ms step_avg:4065.18ms step:1223/3200 train_loss:3.6873 train_time:4931003ms step_avg:4065.13ms step:1224/3200 train_loss:3.5416 train_time:4935008ms step_avg:4065.08ms step:1225/3200 train_loss:3.4678 train_time:4939018ms step_avg:4065.04ms step:1226/3200 train_loss:3.5597 train_time:4943034ms step_avg:4064.99ms step:1227/3200 train_loss:3.7746 train_time:4947051ms step_avg:4064.96ms step:1228/3200 train_loss:3.4427 train_time:4951072ms step_avg:4064.92ms step:1229/3200 train_loss:3.5211 train_time:4955098ms step_avg:4064.89ms step:1230/3200 train_loss:3.7302 train_time:4959131ms step_avg:4064.86ms step:1231/3200 train_loss:3.5781 train_time:4963164ms step_avg:4064.84ms step:1232/3200 train_loss:3.4303 train_time:4967196ms step_avg:4064.81ms step:1233/3200 train_loss:3.6498 train_time:4971218ms step_avg:4064.77ms step:1234/3200 train_loss:3.4942 train_time:4975244ms step_avg:4064.74ms step:1235/3200 train_loss:3.5978 train_time:4979277ms step_avg:4064.72ms step:1236/3200 train_loss:3.5855 train_time:4983325ms step_avg:4064.70ms step:1237/3200 train_loss:3.5417 train_time:4987384ms step_avg:4064.70ms step:1238/3200 train_loss:3.5857 train_time:4991456ms step_avg:4064.70ms step:1239/3200 train_loss:3.6451 train_time:4995541ms step_avg:4064.72ms step:1240/3200 train_loss:3.6855 train_time:4999640ms step_avg:4064.75ms step:1241/3200 train_loss:3.4306 train_time:5003755ms step_avg:4064.79ms step:1242/3200 train_loss:3.5043 train_time:5007912ms step_avg:4064.86ms step:1243/3200 train_loss:3.5271 train_time:5012098ms step_avg:4064.96ms step:1244/3200 train_loss:3.4687 train_time:5016302ms step_avg:4065.07ms step:1245/3200 train_loss:3.5629 train_time:5020506ms step_avg:4065.19ms step:1246/3200 train_loss:3.7436 train_time:5024717ms step_avg:4065.30ms step:1247/3200 train_loss:3.5536 train_time:5028887ms step_avg:4065.39ms step:1248/3200 train_loss:3.6004 train_time:5032974ms step_avg:4065.41ms step:1249/3200 train_loss:3.5315 train_time:5037018ms step_avg:4065.39ms step:1250/3200 train_loss:3.6156 train_time:5041036ms step_avg:4065.35ms step:1250/3200 val_loss:3.5436 train_time:5041036ms step_avg:4065.35ms step:1251/3200 train_loss:3.5386 train_time:5045028ms step_avg:4065.29ms step:1252/3200 train_loss:3.7143 train_time:5049011ms step_avg:4065.23ms step:1253/3200 train_loss:3.4554 train_time:5052992ms step_avg:4065.16ms step:1254/3200 train_loss:3.4400 train_time:5056973ms step_avg:4065.09ms step:1255/3200 train_loss:3.6098 train_time:5060947ms step_avg:4065.02ms step:1256/3200 train_loss:3.5863 train_time:5064925ms step_avg:4064.95ms step:1257/3200 train_loss:3.5289 train_time:5068897ms step_avg:4064.87ms step:1258/3200 train_loss:3.6730 train_time:5072868ms step_avg:4064.80ms step:1259/3200 train_loss:3.4955 train_time:5076841ms step_avg:4064.72ms step:1260/3200 train_loss:3.3689 train_time:5080813ms step_avg:4064.65ms step:1261/3200 train_loss:3.9749 train_time:5084788ms step_avg:4064.58ms step:1262/3200 train_loss:3.5756 train_time:5088759ms step_avg:4064.50ms step:1263/3200 train_loss:3.6645 train_time:5092728ms step_avg:4064.43ms step:1264/3200 train_loss:3.5223 train_time:5096700ms step_avg:4064.35ms step:1265/3200 train_loss:3.7517 train_time:5100671ms step_avg:4064.28ms step:1266/3200 train_loss:3.7474 train_time:5104646ms step_avg:4064.21ms step:1267/3200 train_loss:3.8510 train_time:5108625ms step_avg:4064.14ms step:1268/3200 train_loss:3.5000 train_time:5112605ms step_avg:4064.07ms step:1269/3200 train_loss:3.4496 train_time:5116583ms step_avg:4064.01ms step:1270/3200 train_loss:3.6162 train_time:5120567ms step_avg:4063.94ms step:1271/3200 train_loss:3.5801 train_time:5124551ms step_avg:4063.88ms step:1272/3200 train_loss:3.4928 train_time:5128536ms step_avg:4063.82ms step:1273/3200 train_loss:3.5538 train_time:5132525ms step_avg:4063.76ms step:1274/3200 train_loss:3.6592 train_time:5136515ms step_avg:4063.70ms step:1275/3200 train_loss:3.5259 train_time:5140507ms step_avg:4063.64ms step:1276/3200 train_loss:3.4346 train_time:5144504ms step_avg:4063.59ms step:1277/3200 train_loss:3.6343 train_time:5148503ms step_avg:4063.54ms step:1278/3200 train_loss:3.5602 train_time:5152505ms step_avg:4063.49ms step:1279/3200 train_loss:3.5103 train_time:5156497ms step_avg:4063.43ms step:1280/3200 train_loss:3.6954 train_time:5160482ms step_avg:4063.37ms step:1281/3200 train_loss:3.4773 train_time:5164474ms step_avg:4063.32ms step:1282/3200 train_loss:3.4879 train_time:5168465ms step_avg:4063.26ms step:1283/3200 train_loss:3.7926 train_time:5172462ms step_avg:4063.21ms step:1284/3200 train_loss:3.6411 train_time:5176472ms step_avg:4063.17ms step:1285/3200 train_loss:3.5729 train_time:5180488ms step_avg:4063.13ms step:1286/3200 train_loss:3.5385 train_time:5184492ms step_avg:4063.08ms step:1287/3200 train_loss:3.6124 train_time:5188495ms step_avg:4063.03ms step:1288/3200 train_loss:3.4396 train_time:5192516ms step_avg:4063.00ms step:1289/3200 train_loss:3.6787 train_time:5196550ms step_avg:4062.98ms step:1290/3200 train_loss:3.7677 train_time:5200592ms step_avg:4062.96ms step:1291/3200 train_loss:3.5233 train_time:5204648ms step_avg:4062.96ms step:1292/3200 train_loss:3.4210 train_time:5208724ms step_avg:4062.97ms step:1293/3200 train_loss:3.7368 train_time:5212815ms step_avg:4062.99ms step:1294/3200 train_loss:3.4136 train_time:5216925ms step_avg:4063.03ms step:1295/3200 train_loss:3.6647 train_time:5221060ms step_avg:4063.08ms step:1296/3200 train_loss:3.5774 train_time:5225241ms step_avg:4063.17ms step:1297/3200 train_loss:3.6141 train_time:5229422ms step_avg:4063.27ms step:1298/3200 train_loss:3.4632 train_time:5233622ms step_avg:4063.37ms step:1299/3200 train_loss:3.6087 train_time:5237817ms step_avg:4063.47ms step:1300/3200 train_loss:3.5016 train_time:5241998ms step_avg:4063.56ms step:1301/3200 train_loss:3.4306 train_time:5246175ms step_avg:4063.65ms step:1302/3200 train_loss:3.6907 train_time:5250354ms step_avg:4063.74ms step:1303/3200 train_loss:3.3696 train_time:5254532ms step_avg:4063.83ms step:1304/3200 train_loss:3.6383 train_time:5258711ms step_avg:4063.92ms step:1305/3200 train_loss:3.5245 train_time:5262868ms step_avg:4063.99ms step:1306/3200 train_loss:3.7143 train_time:5266987ms step_avg:4064.03ms step:1307/3200 train_loss:3.5372 train_time:5271080ms step_avg:4064.06ms step:1308/3200 train_loss:3.5557 train_time:5275151ms step_avg:4064.06ms step:1309/3200 train_loss:3.2795 train_time:5279211ms step_avg:4064.06ms step:1310/3200 train_loss:3.6197 train_time:5283256ms step_avg:4064.04ms step:1311/3200 train_loss:3.4216 train_time:5287294ms step_avg:4064.02ms step:1312/3200 train_loss:3.4673 train_time:5291329ms step_avg:4064.00ms step:1313/3200 train_loss:3.7704 train_time:5295355ms step_avg:4063.97ms step:1314/3200 train_loss:3.5236 train_time:5299375ms step_avg:4063.94ms step:1315/3200 train_loss:3.5812 train_time:5303392ms step_avg:4063.90ms step:1316/3200 train_loss:3.5980 train_time:5307412ms step_avg:4063.87ms step:1317/3200 train_loss:3.4654 train_time:5311428ms step_avg:4063.83ms step:1318/3200 train_loss:3.4294 train_time:5315442ms step_avg:4063.79ms step:1319/3200 train_loss:3.3839 train_time:5319460ms step_avg:4063.76ms step:1320/3200 train_loss:3.4272 train_time:5323480ms step_avg:4063.73ms step:1321/3200 train_loss:3.6973 train_time:5327499ms step_avg:4063.69ms step:1322/3200 train_loss:3.4409 train_time:5331525ms step_avg:4063.66ms step:1323/3200 train_loss:3.7050 train_time:5335552ms step_avg:4063.63ms step:1324/3200 train_loss:3.5595 train_time:5339585ms step_avg:4063.61ms step:1325/3200 train_loss:3.7710 train_time:5343618ms step_avg:4063.59ms step:1326/3200 train_loss:3.2703 train_time:5347663ms step_avg:4063.57ms step:1327/3200 train_loss:3.6225 train_time:5351716ms step_avg:4063.57ms step:1328/3200 train_loss:3.7017 train_time:5355781ms step_avg:4063.57ms step:1329/3200 train_loss:3.4486 train_time:5359857ms step_avg:4063.58ms step:1330/3200 train_loss:3.5422 train_time:5363948ms step_avg:4063.60ms step:1331/3200 train_loss:3.5641 train_time:5368044ms step_avg:4063.62ms step:1332/3200 train_loss:3.4764 train_time:5372156ms step_avg:4063.66ms step:1333/3200 train_loss:3.7200 train_time:5376298ms step_avg:4063.72ms step:1334/3200 train_loss:3.4614 train_time:5380472ms step_avg:4063.80ms step:1335/3200 train_loss:3.5798 train_time:5384653ms step_avg:4063.89ms step:1336/3200 train_loss:4.1320 train_time:5388856ms step_avg:4063.99ms step:1337/3200 train_loss:3.6293 train_time:5393053ms step_avg:4064.09ms step:1338/3200 train_loss:3.5461 train_time:5397246ms step_avg:4064.19ms step:1339/3200 train_loss:3.5305 train_time:5401385ms step_avg:4064.25ms step:1340/3200 train_loss:3.4042 train_time:5405492ms step_avg:4064.28ms step:1341/3200 train_loss:3.7276 train_time:5409568ms step_avg:4064.29ms step:1342/3200 train_loss:3.7280 train_time:5413630ms step_avg:4064.29ms step:1343/3200 train_loss:3.5735 train_time:5417674ms step_avg:4064.27ms step:1344/3200 train_loss:3.5293 train_time:5421705ms step_avg:4064.25ms step:1345/3200 train_loss:3.7800 train_time:5425726ms step_avg:4064.21ms step:1346/3200 train_loss:3.5161 train_time:5429742ms step_avg:4064.18ms step:1347/3200 train_loss:3.4946 train_time:5433752ms step_avg:4064.14ms step:1348/3200 train_loss:3.5968 train_time:5437752ms step_avg:4064.09ms step:1349/3200 train_loss:3.5522 train_time:5441750ms step_avg:4064.04ms step:1350/3200 train_loss:3.5252 train_time:5445743ms step_avg:4063.99ms step:1351/3200 train_loss:3.4514 train_time:5449734ms step_avg:4063.93ms step:1352/3200 train_loss:3.5059 train_time:5453727ms step_avg:4063.88ms step:1353/3200 train_loss:3.4804 train_time:5457719ms step_avg:4063.83ms step:1354/3200 train_loss:3.5575 train_time:5461711ms step_avg:4063.77ms step:1355/3200 train_loss:3.3538 train_time:5465704ms step_avg:4063.72ms step:1356/3200 train_loss:3.5372 train_time:5469696ms step_avg:4063.67ms step:1357/3200 train_loss:3.6080 train_time:5473690ms step_avg:4063.62ms step:1358/3200 train_loss:3.4224 train_time:5477692ms step_avg:4063.57ms step:1359/3200 train_loss:3.4826 train_time:5481697ms step_avg:4063.53ms step:1360/3200 train_loss:3.8388 train_time:5485700ms step_avg:4063.48ms step:1361/3200 train_loss:3.6096 train_time:5489709ms step_avg:4063.44ms step:1362/3200 train_loss:3.3662 train_time:5493723ms step_avg:4063.40ms step:1363/3200 train_loss:3.5672 train_time:5497746ms step_avg:4063.37ms step:1364/3200 train_loss:3.4115 train_time:5501775ms step_avg:4063.35ms step:1365/3200 train_loss:3.4065 train_time:5505814ms step_avg:4063.33ms step:1366/3200 train_loss:3.5911 train_time:5509860ms step_avg:4063.32ms step:1367/3200 train_loss:3.5359 train_time:5513914ms step_avg:4063.31ms step:1368/3200 train_loss:3.5453 train_time:5517984ms step_avg:4063.32ms step:1369/3200 train_loss:3.4561 train_time:5522064ms step_avg:4063.33ms step:1370/3200 train_loss:3.6115 train_time:5526154ms step_avg:4063.35ms step:1371/3200 train_loss:3.6159 train_time:5530275ms step_avg:4063.39ms step:1372/3200 train_loss:3.4182 train_time:5534413ms step_avg:4063.45ms step:1373/3200 train_loss:3.3414 train_time:5538583ms step_avg:4063.52ms step:1374/3200 train_loss:3.7077 train_time:5542761ms step_avg:4063.61ms step:1375/3200 train_loss:3.5552 train_time:5546946ms step_avg:4063.70ms step:1375/3200 val_loss:3.5220 train_time:5546946ms step_avg:4063.70ms step:1376/3200 train_loss:3.6500 train_time:5550967ms step_avg:4063.67ms step:1377/3200 train_loss:3.5665 train_time:5554970ms step_avg:4063.62ms step:1378/3200 train_loss:3.3270 train_time:5558966ms step_avg:4063.57ms step:1379/3200 train_loss:3.6850 train_time:5562980ms step_avg:4063.54ms step:1380/3200 train_loss:3.5483 train_time:5566987ms step_avg:4063.49ms step:1381/3200 train_loss:3.6281 train_time:5570992ms step_avg:4063.45ms step:1382/3200 train_loss:3.6838 train_time:5574994ms step_avg:4063.41ms step:1383/3200 train_loss:3.2286 train_time:5578993ms step_avg:4063.36ms step:1384/3200 train_loss:3.5030 train_time:5582992ms step_avg:4063.31ms step:1385/3200 train_loss:4.3698 train_time:5586985ms step_avg:4063.26ms step:1386/3200 train_loss:3.4461 train_time:5590984ms step_avg:4063.22ms step:1387/3200 train_loss:3.5716 train_time:5594978ms step_avg:4063.16ms step:1388/3200 train_loss:3.6276 train_time:5598975ms step_avg:4063.12ms step:1389/3200 train_loss:3.6885 train_time:5602971ms step_avg:4063.07ms step:1390/3200 train_loss:3.5299 train_time:5606974ms step_avg:4063.02ms step:1391/3200 train_loss:3.3177 train_time:5610972ms step_avg:4062.98ms step:1392/3200 train_loss:3.6052 train_time:5614974ms step_avg:4062.93ms step:1393/3200 train_loss:3.7861 train_time:5618980ms step_avg:4062.89ms step:1394/3200 train_loss:4.0099 train_time:5622993ms step_avg:4062.86ms step:1395/3200 train_loss:3.5718 train_time:5627006ms step_avg:4062.82ms step:1396/3200 train_loss:3.4395 train_time:5631030ms step_avg:4062.79ms step:1397/3200 train_loss:3.3519 train_time:5635052ms step_avg:4062.76ms step:1398/3200 train_loss:3.8817 train_time:5639057ms step_avg:4062.72ms step:1399/3200 train_loss:3.6855 train_time:5643067ms step_avg:4062.68ms step:1400/3200 train_loss:3.7226 train_time:5647087ms step_avg:4062.65ms step:1401/3200 train_loss:3.6565 train_time:5651113ms step_avg:4062.63ms step:1402/3200 train_loss:3.4254 train_time:5655143ms step_avg:4062.60ms step:1403/3200 train_loss:3.5188 train_time:5659187ms step_avg:4062.59ms step:1404/3200 train_loss:3.5121 train_time:5663240ms step_avg:4062.58ms step:1405/3200 train_loss:3.5875 train_time:5667314ms step_avg:4062.59ms step:1406/3200 train_loss:3.5379 train_time:5671405ms step_avg:4062.61ms step:1407/3200 train_loss:3.9059 train_time:5675507ms step_avg:4062.64ms step:1408/3200 train_loss:3.3952 train_time:5679634ms step_avg:4062.69ms step:1409/3200 train_loss:3.4504 train_time:5683773ms step_avg:4062.74ms step:1410/3200 train_loss:3.5158 train_time:5687953ms step_avg:4062.82ms step:1411/3200 train_loss:3.3051 train_time:5692134ms step_avg:4062.91ms step:1412/3200 train_loss:3.6838 train_time:5696332ms step_avg:4063.00ms step:1413/3200 train_loss:3.4826 train_time:5700528ms step_avg:4063.10ms step:1414/3200 train_loss:3.5086 train_time:5704645ms step_avg:4063.14ms step:1415/3200 train_loss:3.5919 train_time:5708710ms step_avg:4063.14ms step:1416/3200 train_loss:3.5576 train_time:5712741ms step_avg:4063.12ms step:1417/3200 train_loss:3.7311 train_time:5716752ms step_avg:4063.08ms step:1418/3200 train_loss:3.5149 train_time:5720761ms step_avg:4063.04ms step:1419/3200 train_loss:3.4663 train_time:5724763ms step_avg:4063.00ms step:1420/3200 train_loss:3.4485 train_time:5728755ms step_avg:4062.95ms step:1421/3200 train_loss:3.5601 train_time:5732735ms step_avg:4062.89ms step:1422/3200 train_loss:3.7537 train_time:5736734ms step_avg:4062.84ms step:1423/3200 train_loss:3.7159 train_time:5740723ms step_avg:4062.79ms step:1424/3200 train_loss:3.5900 train_time:5744710ms step_avg:4062.74ms step:1425/3200 train_loss:3.7478 train_time:5748690ms step_avg:4062.68ms step:1426/3200 train_loss:3.5535 train_time:5752666ms step_avg:4062.62ms step:1427/3200 train_loss:3.4346 train_time:5756638ms step_avg:4062.55ms step:1428/3200 train_loss:3.4404 train_time:5760610ms step_avg:4062.49ms step:1429/3200 train_loss:3.6858 train_time:5764581ms step_avg:4062.42ms step:1430/3200 train_loss:3.5113 train_time:5768547ms step_avg:4062.36ms step:1431/3200 train_loss:3.3588 train_time:5772521ms step_avg:4062.29ms step:1432/3200 train_loss:3.5708 train_time:5776489ms step_avg:4062.23ms step:1433/3200 train_loss:3.5888 train_time:5780466ms step_avg:4062.17ms step:1434/3200 train_loss:3.2188 train_time:5784435ms step_avg:4062.10ms step:1435/3200 train_loss:3.4335 train_time:5788399ms step_avg:4062.03ms step:1436/3200 train_loss:3.6137 train_time:5792364ms step_avg:4061.97ms step:1437/3200 train_loss:3.3156 train_time:5796332ms step_avg:4061.90ms step:1438/3200 train_loss:3.6110 train_time:5800300ms step_avg:4061.84ms step:1439/3200 train_loss:3.5693 train_time:5804270ms step_avg:4061.77ms step:1440/3200 train_loss:3.4511 train_time:5808241ms step_avg:4061.71ms step:1441/3200 train_loss:3.6624 train_time:5812212ms step_avg:4061.64ms step:1442/3200 train_loss:3.5770 train_time:5816183ms step_avg:4061.58ms step:1443/3200 train_loss:3.4672 train_time:5820161ms step_avg:4061.52ms step:1444/3200 train_loss:3.3377 train_time:5824137ms step_avg:4061.46ms step:1445/3200 train_loss:3.5837 train_time:5828119ms step_avg:4061.41ms step:1446/3200 train_loss:3.4208 train_time:5832102ms step_avg:4061.35ms step:1447/3200 train_loss:3.7936 train_time:5836085ms step_avg:4061.30ms step:1448/3200 train_loss:3.7313 train_time:5840075ms step_avg:4061.25ms step:1449/3200 train_loss:3.5206 train_time:5844067ms step_avg:4061.20ms step:1450/3200 train_loss:3.4373 train_time:5848064ms step_avg:4061.16ms step:1451/3200 train_loss:3.4961 train_time:5852051ms step_avg:4061.10ms step:1452/3200 train_loss:3.5162 train_time:5856034ms step_avg:4061.05ms step:1453/3200 train_loss:3.5372 train_time:5860022ms step_avg:4061.00ms step:1454/3200 train_loss:3.5902 train_time:5864018ms step_avg:4060.95ms step:1455/3200 train_loss:3.4699 train_time:5868014ms step_avg:4060.91ms step:1456/3200 train_loss:3.4408 train_time:5872018ms step_avg:4060.87ms step:1457/3200 train_loss:3.5129 train_time:5876033ms step_avg:4060.84ms step:1458/3200 train_loss:3.4656 train_time:5880063ms step_avg:4060.82ms step:1459/3200 train_loss:3.5143 train_time:5884069ms step_avg:4060.78ms step:1460/3200 train_loss:3.5480 train_time:5888083ms step_avg:4060.75ms step:1461/3200 train_loss:3.3556 train_time:5892106ms step_avg:4060.72ms step:1462/3200 train_loss:3.6672 train_time:5896139ms step_avg:4060.70ms step:1463/3200 train_loss:3.4546 train_time:5900184ms step_avg:4060.69ms step:1464/3200 train_loss:3.6530 train_time:5904244ms step_avg:4060.69ms step:1465/3200 train_loss:3.5399 train_time:5908320ms step_avg:4060.70ms step:1466/3200 train_loss:3.3618 train_time:5912415ms step_avg:4060.72ms step:1467/3200 train_loss:3.5795 train_time:5916533ms step_avg:4060.76ms step:1468/3200 train_loss:3.6074 train_time:5920681ms step_avg:4060.82ms step:1469/3200 train_loss:3.6137 train_time:5924858ms step_avg:4060.90ms step:1470/3200 train_loss:3.5335 train_time:5929042ms step_avg:4060.99ms step:1471/3200 train_loss:3.6477 train_time:5933241ms step_avg:4061.08ms step:1472/3200 train_loss:3.4373 train_time:5937435ms step_avg:4061.17ms step:1473/3200 train_loss:3.5611 train_time:5941581ms step_avg:4061.23ms step:1474/3200 train_loss:3.4352 train_time:5945704ms step_avg:4061.27ms step:1475/3200 train_loss:3.4107 train_time:5949803ms step_avg:4061.30ms step:1476/3200 train_loss:3.6428 train_time:5953888ms step_avg:4061.32ms step:1477/3200 train_loss:3.6200 train_time:5957969ms step_avg:4061.33ms step:1478/3200 train_loss:3.5299 train_time:5962042ms step_avg:4061.34ms step:1479/3200 train_loss:3.4646 train_time:5966107ms step_avg:4061.34ms step:1480/3200 train_loss:3.5066 train_time:5970166ms step_avg:4061.34ms step:1481/3200 train_loss:3.5288 train_time:5974224ms step_avg:4061.34ms step:1482/3200 train_loss:3.5131 train_time:5978281ms step_avg:4061.33ms step:1483/3200 train_loss:3.6767 train_time:5982341ms step_avg:4061.33ms step:1484/3200 train_loss:3.3848 train_time:5986401ms step_avg:4061.33ms step:1485/3200 train_loss:3.6408 train_time:5990458ms step_avg:4061.33ms step:1486/3200 train_loss:3.5710 train_time:5994525ms step_avg:4061.33ms step:1487/3200 train_loss:3.5178 train_time:5998598ms step_avg:4061.34ms step:1488/3200 train_loss:3.4698 train_time:6002681ms step_avg:4061.35ms step:1489/3200 train_loss:3.4988 train_time:6006765ms step_avg:4061.37ms step:1490/3200 train_loss:3.4028 train_time:6010854ms step_avg:4061.39ms step:1491/3200 train_loss:3.5803 train_time:6014950ms step_avg:4061.41ms step:1492/3200 train_loss:3.4330 train_time:6019055ms step_avg:4061.44ms step:1493/3200 train_loss:3.4420 train_time:6023186ms step_avg:4061.49ms step:1494/3200 train_loss:3.5198 train_time:6027328ms step_avg:4061.54ms step:1495/3200 train_loss:3.5185 train_time:6031508ms step_avg:4061.62ms step:1496/3200 train_loss:3.3992 train_time:6035687ms step_avg:4061.70ms step:1497/3200 train_loss:3.5675 train_time:6039884ms step_avg:4061.79ms step:1498/3200 train_loss:3.4738 train_time:6044045ms step_avg:4061.86ms step:1499/3200 train_loss:3.1141 train_time:6048173ms step_avg:4061.90ms step:1500/3200 train_loss:3.6538 train_time:6052275ms step_avg:4061.93ms step:1500/3200 val_loss:3.4984 train_time:6052275ms step_avg:4061.93ms step:1501/3200 train_loss:3.5860 train_time:6056310ms step_avg:4061.91ms step:1502/3200 train_loss:3.6355 train_time:6060336ms step_avg:4061.89ms step:1503/3200 train_loss:3.6102 train_time:6064364ms step_avg:4061.86ms step:1504/3200 train_loss:3.6119 train_time:6068392ms step_avg:4061.84ms step:1505/3200 train_loss:3.1811 train_time:6072420ms step_avg:4061.82ms step:1506/3200 train_loss:3.5670 train_time:6076441ms step_avg:4061.79ms step:1507/3200 train_loss:3.4870 train_time:6080461ms step_avg:4061.76ms step:1508/3200 train_loss:3.4379 train_time:6084480ms step_avg:4061.74ms step:1509/3200 train_loss:3.4391 train_time:6088507ms step_avg:4061.71ms step:1510/3200 train_loss:3.4835 train_time:6092535ms step_avg:4061.69ms step:1511/3200 train_loss:3.4054 train_time:6096570ms step_avg:4061.67ms step:1512/3200 train_loss:3.3757 train_time:6100598ms step_avg:4061.65ms step:1513/3200 train_loss:3.5383 train_time:6104648ms step_avg:4061.64ms step:1514/3200 train_loss:3.4458 train_time:6108707ms step_avg:4061.64ms step:1515/3200 train_loss:3.6757 train_time:6112770ms step_avg:4061.64ms step:1516/3200 train_loss:3.4746 train_time:6116847ms step_avg:4061.65ms step:1517/3200 train_loss:3.5796 train_time:6120933ms step_avg:4061.67ms step:1518/3200 train_loss:3.2614 train_time:6125030ms step_avg:4061.69ms step:1519/3200 train_loss:3.3762 train_time:6129154ms step_avg:4061.73ms step:1520/3200 train_loss:3.4843 train_time:6133287ms step_avg:4061.78ms step:1521/3200 train_loss:3.4538 train_time:6137464ms step_avg:4061.86ms step:1522/3200 train_loss:3.4494 train_time:6141645ms step_avg:4061.93ms step:1523/3200 train_loss:3.8406 train_time:6145846ms step_avg:4062.03ms step:1524/3200 train_loss:3.3345 train_time:6150044ms step_avg:4062.12ms step:1525/3200 train_loss:3.4760 train_time:6154238ms step_avg:4062.20ms step:1526/3200 train_loss:3.4386 train_time:6158417ms step_avg:4062.28ms step:1527/3200 train_loss:3.3482 train_time:6162573ms step_avg:4062.34ms step:1528/3200 train_loss:3.6779 train_time:6166672ms step_avg:4062.37ms step:1529/3200 train_loss:3.7015 train_time:6170736ms step_avg:4062.37ms step:1530/3200 train_loss:3.4301 train_time:6174773ms step_avg:4062.35ms step:1531/3200 train_loss:3.5612 train_time:6178797ms step_avg:4062.33ms step:1532/3200 train_loss:3.3188 train_time:6182805ms step_avg:4062.29ms step:1533/3200 train_loss:3.6622 train_time:6186815ms step_avg:4062.26ms step:1534/3200 train_loss:3.3916 train_time:6190827ms step_avg:4062.22ms step:1535/3200 train_loss:3.4345 train_time:6194836ms step_avg:4062.19ms step:1536/3200 train_loss:3.4967 train_time:6198834ms step_avg:4062.15ms step:1537/3200 train_loss:4.1809 train_time:6202827ms step_avg:4062.10ms step:1538/3200 train_loss:3.4742 train_time:6206818ms step_avg:4062.05ms step:1539/3200 train_loss:3.5103 train_time:6210801ms step_avg:4062.00ms step:1540/3200 train_loss:3.4457 train_time:6214786ms step_avg:4061.95ms step:1541/3200 train_loss:3.4542 train_time:6218769ms step_avg:4061.90ms step:1542/3200 train_loss:3.3877 train_time:6222748ms step_avg:4061.85ms step:1543/3200 train_loss:3.9717 train_time:6226732ms step_avg:4061.80ms step:1544/3200 train_loss:3.3336 train_time:6230706ms step_avg:4061.74ms step:1545/3200 train_loss:3.4715 train_time:6234683ms step_avg:4061.68ms step:1546/3200 train_loss:3.4150 train_time:6238660ms step_avg:4061.63ms step:1547/3200 train_loss:3.5113 train_time:6242638ms step_avg:4061.57ms step:1548/3200 train_loss:3.5318 train_time:6246619ms step_avg:4061.52ms step:1549/3200 train_loss:3.4755 train_time:6250596ms step_avg:4061.47ms step:1550/3200 train_loss:3.0196 train_time:6254580ms step_avg:4061.42ms step:1551/3200 train_loss:3.4952 train_time:6258563ms step_avg:4061.36ms step:1552/3200 train_loss:3.5687 train_time:6262546ms step_avg:4061.31ms step:1553/3200 train_loss:3.3991 train_time:6266535ms step_avg:4061.27ms step:1554/3200 train_loss:3.4461 train_time:6270527ms step_avg:4061.22ms step:1555/3200 train_loss:3.6675 train_time:6274522ms step_avg:4061.18ms step:1556/3200 train_loss:3.3947 train_time:6278515ms step_avg:4061.14ms step:1557/3200 train_loss:3.4177 train_time:6282516ms step_avg:4061.10ms step:1558/3200 train_loss:3.5224 train_time:6286529ms step_avg:4061.07ms step:1559/3200 train_loss:3.5199 train_time:6290542ms step_avg:4061.03ms step:1560/3200 train_loss:3.4867 train_time:6294563ms step_avg:4061.01ms step:1561/3200 train_loss:3.4032 train_time:6298563ms step_avg:4060.97ms step:1562/3200 train_loss:3.4790 train_time:6302568ms step_avg:4060.93ms step:1563/3200 train_loss:3.3253 train_time:6306580ms step_avg:4060.90ms step:1564/3200 train_loss:3.2886 train_time:6310596ms step_avg:4060.87ms step:1565/3200 train_loss:3.5047 train_time:6314631ms step_avg:4060.86ms step:1566/3200 train_loss:3.4926 train_time:6318677ms step_avg:4060.85ms step:1567/3200 train_loss:3.3705 train_time:6322730ms step_avg:4060.84ms step:1568/3200 train_loss:3.4714 train_time:6326808ms step_avg:4060.85ms step:1569/3200 train_loss:3.4930 train_time:6330902ms step_avg:4060.87ms step:1570/3200 train_loss:3.6669 train_time:6335007ms step_avg:4060.90ms step:1571/3200 train_loss:3.4920 train_time:6339140ms step_avg:4060.95ms step:1572/3200 train_loss:3.4358 train_time:6343301ms step_avg:4061.01ms step:1573/3200 train_loss:3.6599 train_time:6347483ms step_avg:4061.09ms step:1574/3200 train_loss:3.5260 train_time:6351683ms step_avg:4061.18ms step:1575/3200 train_loss:3.2938 train_time:6355878ms step_avg:4061.26ms step:1576/3200 train_loss:3.4368 train_time:6360016ms step_avg:4061.31ms step:1577/3200 train_loss:3.5297 train_time:6364105ms step_avg:4061.33ms step:1578/3200 train_loss:3.6295 train_time:6368155ms step_avg:4061.32ms step:1579/3200 train_loss:3.6359 train_time:6372184ms step_avg:4061.30ms step:1580/3200 train_loss:3.5061 train_time:6376190ms step_avg:4061.27ms step:1581/3200 train_loss:3.5486 train_time:6380203ms step_avg:4061.24ms step:1582/3200 train_loss:3.3651 train_time:6384209ms step_avg:4061.20ms step:1583/3200 train_loss:3.6835 train_time:6388207ms step_avg:4061.16ms step:1584/3200 train_loss:3.6480 train_time:6392198ms step_avg:4061.12ms step:1585/3200 train_loss:3.4096 train_time:6396177ms step_avg:4061.06ms step:1586/3200 train_loss:3.7768 train_time:6400151ms step_avg:4061.01ms step:1587/3200 train_loss:3.4040 train_time:6404136ms step_avg:4060.96ms step:1588/3200 train_loss:3.6347 train_time:6408124ms step_avg:4060.91ms step:1589/3200 train_loss:3.5746 train_time:6412108ms step_avg:4060.87ms step:1590/3200 train_loss:3.6108 train_time:6416094ms step_avg:4060.82ms step:1591/3200 train_loss:3.5178 train_time:6420076ms step_avg:4060.77ms step:1592/3200 train_loss:3.4039 train_time:6424053ms step_avg:4060.72ms step:1593/3200 train_loss:3.2334 train_time:6428030ms step_avg:4060.66ms step:1594/3200 train_loss:3.4701 train_time:6432004ms step_avg:4060.61ms step:1595/3200 train_loss:3.2050 train_time:6435982ms step_avg:4060.56ms step:1596/3200 train_loss:3.4632 train_time:6439960ms step_avg:4060.50ms step:1597/3200 train_loss:3.5076 train_time:6443939ms step_avg:4060.45ms step:1598/3200 train_loss:3.1572 train_time:6447920ms step_avg:4060.40ms step:1599/3200 train_loss:3.4715 train_time:6451899ms step_avg:4060.35ms step:1600/3200 train_loss:3.2953 train_time:6455878ms step_avg:4060.30ms step:1601/3200 train_loss:3.4144 train_time:6459845ms step_avg:4060.24ms step:1602/3200 train_loss:3.4803 train_time:6463822ms step_avg:4060.19ms step:1603/3200 train_loss:3.6872 train_time:6467804ms step_avg:4060.14ms step:1604/3200 train_loss:3.4672 train_time:6471784ms step_avg:4060.09ms step:1605/3200 train_loss:3.5095 train_time:6475770ms step_avg:4060.04ms step:1606/3200 train_loss:3.6753 train_time:6479757ms step_avg:4060.00ms step:1607/3200 train_loss:3.4893 train_time:6483748ms step_avg:4059.96ms step:1608/3200 train_loss:3.2664 train_time:6487739ms step_avg:4059.91ms step:1609/3200 train_loss:2.9772 train_time:6491718ms step_avg:4059.86ms step:1610/3200 train_loss:3.4630 train_time:6495699ms step_avg:4059.81ms step:1611/3200 train_loss:3.3433 train_time:6499687ms step_avg:4059.77ms step:1612/3200 train_loss:3.4785 train_time:6503682ms step_avg:4059.73ms step:1613/3200 train_loss:3.1795 train_time:6507675ms step_avg:4059.69ms step:1614/3200 train_loss:3.5586 train_time:6511678ms step_avg:4059.65ms step:1615/3200 train_loss:3.4127 train_time:6515684ms step_avg:4059.62ms step:1616/3200 train_loss:3.6317 train_time:6519699ms step_avg:4059.59ms step:1617/3200 train_loss:3.2819 train_time:6523716ms step_avg:4059.56ms step:1618/3200 train_loss:3.3167 train_time:6527716ms step_avg:4059.53ms step:1619/3200 train_loss:3.3786 train_time:6531730ms step_avg:4059.50ms step:1620/3200 train_loss:4.3465 train_time:6535753ms step_avg:4059.47ms step:1621/3200 train_loss:3.5322 train_time:6539786ms step_avg:4059.46ms step:1622/3200 train_loss:3.1459 train_time:6543825ms step_avg:4059.45ms step:1623/3200 train_loss:3.3872 train_time:6547886ms step_avg:4059.45ms step:1624/3200 train_loss:3.0661 train_time:6551966ms step_avg:4059.46ms step:1625/3200 train_loss:3.2503 train_time:6556069ms step_avg:4059.49ms step:1625/3200 val_loss:3.4844 train_time:6556069ms step_avg:4059.49ms step:1626/3200 train_loss:3.5223 train_time:6560116ms step_avg:4059.48ms step:1627/3200 train_loss:3.3498 train_time:6564135ms step_avg:4059.45ms step:1628/3200 train_loss:2.9737 train_time:6568141ms step_avg:4059.42ms step:1629/3200 train_loss:3.4891 train_time:6572150ms step_avg:4059.39ms step:1630/3200 train_loss:3.6047 train_time:6576159ms step_avg:4059.36ms step:1631/3200 train_loss:3.5923 train_time:6580164ms step_avg:4059.32ms step:1632/3200 train_loss:3.5146 train_time:6584158ms step_avg:4059.28ms step:1633/3200 train_loss:3.3140 train_time:6588145ms step_avg:4059.24ms step:1634/3200 train_loss:3.2649 train_time:6592127ms step_avg:4059.19ms step:1635/3200 train_loss:3.4774 train_time:6596105ms step_avg:4059.14ms step:1636/3200 train_loss:3.4241 train_time:6600077ms step_avg:4059.09ms step:1637/3200 train_loss:3.4376 train_time:6604050ms step_avg:4059.04ms step:1638/3200 train_loss:3.5152 train_time:6608023ms step_avg:4058.98ms step:1639/3200 train_loss:3.7285 train_time:6611996ms step_avg:4058.93ms step:1640/3200 train_loss:3.4807 train_time:6615983ms step_avg:4058.89ms step:1641/3200 train_loss:3.4839 train_time:6619976ms step_avg:4058.84ms step:1642/3200 train_loss:3.4815 train_time:6623966ms step_avg:4058.80ms step:1643/3200 train_loss:3.3872 train_time:6627956ms step_avg:4058.76ms step:1644/3200 train_loss:3.5047 train_time:6631951ms step_avg:4058.72ms step:1645/3200 train_loss:3.6615 train_time:6635946ms step_avg:4058.68ms step:1646/3200 train_loss:3.4835 train_time:6639944ms step_avg:4058.65ms step:1647/3200 train_loss:3.5195 train_time:6643927ms step_avg:4058.60ms step:1648/3200 train_loss:3.3298 train_time:6647908ms step_avg:4058.55ms step:1649/3200 train_loss:3.4119 train_time:6651889ms step_avg:4058.50ms step:1650/3200 train_loss:3.4954 train_time:6655872ms step_avg:4058.46ms step:1651/3200 train_loss:3.5794 train_time:6659862ms step_avg:4058.42ms step:1652/3200 train_loss:3.6754 train_time:6663855ms step_avg:4058.38ms step:1653/3200 train_loss:3.6016 train_time:6667850ms step_avg:4058.34ms step:1654/3200 train_loss:3.6284 train_time:6671852ms step_avg:4058.30ms step:1655/3200 train_loss:3.7534 train_time:6675858ms step_avg:4058.27ms step:1656/3200 train_loss:3.3817 train_time:6679870ms step_avg:4058.24ms step:1657/3200 train_loss:3.3843 train_time:6683888ms step_avg:4058.22ms step:1658/3200 train_loss:3.4839 train_time:6687907ms step_avg:4058.20ms step:1659/3200 train_loss:3.4944 train_time:6691913ms step_avg:4058.16ms step:1660/3200 train_loss:3.5450 train_time:6695925ms step_avg:4058.14ms step:1661/3200 train_loss:3.4677 train_time:6699950ms step_avg:4058.12ms step:1662/3200 train_loss:3.4870 train_time:6703982ms step_avg:4058.10ms step:1663/3200 train_loss:3.6424 train_time:6708025ms step_avg:4058.09ms step:1664/3200 train_loss:3.3087 train_time:6712086ms step_avg:4058.09ms step:1665/3200 train_loss:3.8370 train_time:6716158ms step_avg:4058.10ms step:1666/3200 train_loss:3.4848 train_time:6720246ms step_avg:4058.12ms step:1667/3200 train_loss:3.5355 train_time:6724354ms step_avg:4058.15ms step:1668/3200 train_loss:3.5784 train_time:6728493ms step_avg:4058.20ms step:1669/3200 train_loss:3.4777 train_time:6732663ms step_avg:4058.27ms step:1670/3200 train_loss:3.5019 train_time:6736844ms step_avg:4058.34ms step:1671/3200 train_loss:3.5850 train_time:6741040ms step_avg:4058.42ms step:1672/3200 train_loss:3.5624 train_time:6745185ms step_avg:4058.47ms step:1673/3200 train_loss:3.4394 train_time:6749279ms step_avg:4058.50ms step:1674/3200 train_loss:3.8391 train_time:6753342ms step_avg:4058.50ms step:1675/3200 train_loss:3.4773 train_time:6757374ms step_avg:4058.48ms step:1676/3200 train_loss:3.5410 train_time:6761390ms step_avg:4058.46ms step:1677/3200 train_loss:3.4972 train_time:6765389ms step_avg:4058.42ms step:1678/3200 train_loss:3.5582 train_time:6769406ms step_avg:4058.40ms step:1679/3200 train_loss:3.3766 train_time:6773412ms step_avg:4058.37ms step:1680/3200 train_loss:3.5964 train_time:6777409ms step_avg:4058.33ms step:1681/3200 train_loss:3.6400 train_time:6781400ms step_avg:4058.29ms step:1682/3200 train_loss:3.7195 train_time:6785386ms step_avg:4058.25ms step:1683/3200 train_loss:3.2509 train_time:6789368ms step_avg:4058.20ms step:1684/3200 train_loss:3.4292 train_time:6793348ms step_avg:4058.15ms step:1685/3200 train_loss:3.4985 train_time:6797320ms step_avg:4058.10ms step:1686/3200 train_loss:3.4867 train_time:6801295ms step_avg:4058.05ms step:1687/3200 train_loss:3.6969 train_time:6805269ms step_avg:4058.00ms step:1688/3200 train_loss:3.6772 train_time:6809245ms step_avg:4057.95ms step:1689/3200 train_loss:3.2566 train_time:6813218ms step_avg:4057.90ms step:1690/3200 train_loss:3.4254 train_time:6817190ms step_avg:4057.85ms step:1691/3200 train_loss:3.3211 train_time:6821161ms step_avg:4057.80ms step:1692/3200 train_loss:3.5094 train_time:6825133ms step_avg:4057.75ms step:1693/3200 train_loss:3.4305 train_time:6829112ms step_avg:4057.70ms step:1694/3200 train_loss:3.5443 train_time:6833079ms step_avg:4057.65ms step:1695/3200 train_loss:3.4377 train_time:6837056ms step_avg:4057.60ms step:1696/3200 train_loss:3.3684 train_time:6841032ms step_avg:4057.55ms step:1697/3200 train_loss:3.2849 train_time:6845011ms step_avg:4057.51ms step:1698/3200 train_loss:3.5371 train_time:6848995ms step_avg:4057.46ms step:1699/3200 train_loss:3.4689 train_time:6852981ms step_avg:4057.42ms step:1700/3200 train_loss:3.4213 train_time:6856970ms step_avg:4057.38ms step:1701/3200 train_loss:3.1410 train_time:6860964ms step_avg:4057.34ms step:1702/3200 train_loss:3.1200 train_time:6864962ms step_avg:4057.31ms step:1703/3200 train_loss:3.4045 train_time:6868965ms step_avg:4057.27ms step:1704/3200 train_loss:3.5079 train_time:6872973ms step_avg:4057.25ms step:1705/3200 train_loss:3.4696 train_time:6876985ms step_avg:4057.22ms step:1706/3200 train_loss:3.4356 train_time:6880992ms step_avg:4057.19ms step:1707/3200 train_loss:3.4225 train_time:6884992ms step_avg:4057.16ms step:1708/3200 train_loss:3.3428 train_time:6888999ms step_avg:4057.13ms step:1709/3200 train_loss:3.4006 train_time:6893018ms step_avg:4057.10ms step:1710/3200 train_loss:3.3514 train_time:6897050ms step_avg:4057.09ms step:1711/3200 train_loss:3.4218 train_time:6901084ms step_avg:4057.07ms step:1712/3200 train_loss:3.5388 train_time:6905136ms step_avg:4057.07ms step:1713/3200 train_loss:3.4514 train_time:6909199ms step_avg:4057.08ms step:1714/3200 train_loss:3.5120 train_time:6913283ms step_avg:4057.09ms step:1715/3200 train_loss:3.5305 train_time:6917382ms step_avg:4057.12ms step:1716/3200 train_loss:3.4545 train_time:6921496ms step_avg:4057.15ms step:1717/3200 train_loss:3.5293 train_time:6925657ms step_avg:4057.21ms step:1718/3200 train_loss:3.5206 train_time:6929843ms step_avg:4057.29ms step:1719/3200 train_loss:3.4809 train_time:6934038ms step_avg:4057.37ms step:1720/3200 train_loss:3.4211 train_time:6938220ms step_avg:4057.44ms step:1721/3200 train_loss:3.7220 train_time:6942384ms step_avg:4057.50ms step:1722/3200 train_loss:3.3587 train_time:6946516ms step_avg:4057.54ms step:1723/3200 train_loss:3.4684 train_time:6950626ms step_avg:4057.58ms step:1724/3200 train_loss:3.5827 train_time:6954717ms step_avg:4057.59ms step:1725/3200 train_loss:3.7583 train_time:6958800ms step_avg:4057.61ms step:1726/3200 train_loss:3.7359 train_time:6962874ms step_avg:4057.62ms step:1727/3200 train_loss:3.6399 train_time:6966939ms step_avg:4057.62ms step:1728/3200 train_loss:3.4010 train_time:6971000ms step_avg:4057.63ms step:1729/3200 train_loss:3.4455 train_time:6975058ms step_avg:4057.63ms step:1730/3200 train_loss:3.3377 train_time:6979108ms step_avg:4057.62ms step:1731/3200 train_loss:3.6174 train_time:6983162ms step_avg:4057.62ms step:1732/3200 train_loss:3.5266 train_time:6987212ms step_avg:4057.61ms step:1733/3200 train_loss:3.4865 train_time:6991265ms step_avg:4057.61ms step:1734/3200 train_loss:3.4719 train_time:6995320ms step_avg:4057.61ms step:1735/3200 train_loss:3.4568 train_time:6999376ms step_avg:4057.61ms step:1736/3200 train_loss:3.4382 train_time:7003440ms step_avg:4057.61ms step:1737/3200 train_loss:3.4927 train_time:7007517ms step_avg:4057.62ms step:1738/3200 train_loss:3.5228 train_time:7011598ms step_avg:4057.64ms step:1739/3200 train_loss:3.4174 train_time:7015692ms step_avg:4057.66ms step:1740/3200 train_loss:3.4596 train_time:7019793ms step_avg:4057.68ms step:1741/3200 train_loss:3.5923 train_time:7023911ms step_avg:4057.72ms step:1742/3200 train_loss:3.5128 train_time:7028045ms step_avg:4057.76ms step:1743/3200 train_loss:3.3948 train_time:7032190ms step_avg:4057.81ms step:1744/3200 train_loss:3.3469 train_time:7036369ms step_avg:4057.88ms step:1745/3200 train_loss:3.2561 train_time:7040550ms step_avg:4057.95ms step:1746/3200 train_loss:3.4497 train_time:7044749ms step_avg:4058.04ms step:1747/3200 train_loss:3.4208 train_time:7048944ms step_avg:4058.11ms step:1748/3200 train_loss:3.4923 train_time:7053102ms step_avg:4058.17ms step:1749/3200 train_loss:3.4760 train_time:7057232ms step_avg:4058.21ms step:1750/3200 train_loss:3.5399 train_time:7061340ms step_avg:4058.24ms step:1750/3200 val_loss:3.4667 train_time:7061340ms step_avg:4058.24ms step:1751/3200 train_loss:3.2749 train_time:7065414ms step_avg:4058.25ms step:1752/3200 train_loss:3.6455 train_time:7069482ms step_avg:4058.26ms step:1753/3200 train_loss:3.7111 train_time:7073547ms step_avg:4058.26ms step:1754/3200 train_loss:3.4542 train_time:7077613ms step_avg:4058.26ms step:1755/3200 train_loss:3.5058 train_time:7081683ms step_avg:4058.27ms step:1756/3200 train_loss:3.5544 train_time:7085757ms step_avg:4058.28ms step:1757/3200 train_loss:3.4711 train_time:7089841ms step_avg:4058.29ms step:1758/3200 train_loss:3.5682 train_time:7093930ms step_avg:4058.31ms step:1759/3200 train_loss:3.4518 train_time:7098034ms step_avg:4058.34ms step:1760/3200 train_loss:3.4086 train_time:7102145ms step_avg:4058.37ms step:1761/3200 train_loss:3.8230 train_time:7106276ms step_avg:4058.41ms step:1762/3200 train_loss:3.4316 train_time:7110420ms step_avg:4058.46ms step:1763/3200 train_loss:3.3847 train_time:7114580ms step_avg:4058.52ms step:1764/3200 train_loss:3.5772 train_time:7118760ms step_avg:4058.59ms step:1765/3200 train_loss:3.5610 train_time:7122962ms step_avg:4058.67ms step:1766/3200 train_loss:3.4819 train_time:7127161ms step_avg:4058.75ms step:1767/3200 train_loss:3.5444 train_time:7131354ms step_avg:4058.82ms step:1768/3200 train_loss:3.5944 train_time:7135495ms step_avg:4058.87ms step:1769/3200 train_loss:3.3319 train_time:7139603ms step_avg:4058.90ms step:1770/3200 train_loss:3.5652 train_time:7143685ms step_avg:4058.91ms step:1771/3200 train_loss:3.4001 train_time:7147747ms step_avg:4058.91ms step:1772/3200 train_loss:3.2799 train_time:7151791ms step_avg:4058.91ms step:1773/3200 train_loss:3.5544 train_time:7155828ms step_avg:4058.89ms step:1774/3200 train_loss:3.5641 train_time:7159853ms step_avg:4058.87ms step:1775/3200 train_loss:3.4535 train_time:7163872ms step_avg:4058.85ms step:1776/3200 train_loss:3.4956 train_time:7167879ms step_avg:4058.82ms step:1777/3200 train_loss:3.5049 train_time:7171884ms step_avg:4058.79ms step:1778/3200 train_loss:3.3615 train_time:7175889ms step_avg:4058.76ms step:1779/3200 train_loss:3.6350 train_time:7179889ms step_avg:4058.73ms step:1780/3200 train_loss:3.6174 train_time:7183882ms step_avg:4058.69ms step:1781/3200 train_loss:3.4543 train_time:7187882ms step_avg:4058.66ms step:1782/3200 train_loss:3.6677 train_time:7191881ms step_avg:4058.62ms step:1783/3200 train_loss:3.4469 train_time:7195878ms step_avg:4058.59ms step:1784/3200 train_loss:3.6330 train_time:7199878ms step_avg:4058.56ms step:1785/3200 train_loss:3.3994 train_time:7203877ms step_avg:4058.52ms step:1786/3200 train_loss:3.4326 train_time:7207877ms step_avg:4058.49ms step:1787/3200 train_loss:3.4943 train_time:7211884ms step_avg:4058.46ms step:1788/3200 train_loss:3.5058 train_time:7215889ms step_avg:4058.43ms step:1789/3200 train_loss:3.5145 train_time:7219894ms step_avg:4058.40ms step:1790/3200 train_loss:3.3423 train_time:7223908ms step_avg:4058.38ms step:1791/3200 train_loss:3.5136 train_time:7227929ms step_avg:4058.35ms step:1792/3200 train_loss:3.5071 train_time:7231956ms step_avg:4058.34ms step:1793/3200 train_loss:3.3371 train_time:7235992ms step_avg:4058.32ms step:1794/3200 train_loss:3.4963 train_time:7240027ms step_avg:4058.31ms step:1795/3200 train_loss:3.6314 train_time:7244078ms step_avg:4058.31ms step:1796/3200 train_loss:3.5808 train_time:7248139ms step_avg:4058.31ms step:1797/3200 train_loss:3.4151 train_time:7252218ms step_avg:4058.32ms step:1798/3200 train_loss:3.4327 train_time:7256300ms step_avg:4058.33ms step:1799/3200 train_loss:3.2620 train_time:7260402ms step_avg:4058.36ms step:1800/3200 train_loss:3.5201 train_time:7264522ms step_avg:4058.39ms step:1801/3200 train_loss:3.7042 train_time:7268669ms step_avg:4058.44ms step:1802/3200 train_loss:3.4746 train_time:7272851ms step_avg:4058.51ms step:1803/3200 train_loss:3.4613 train_time:7277034ms step_avg:4058.58ms step:1804/3200 train_loss:3.6708 train_time:7281234ms step_avg:4058.66ms step:1805/3200 train_loss:3.4801 train_time:7285427ms step_avg:4058.73ms step:1806/3200 train_loss:3.3834 train_time:7289549ms step_avg:4058.77ms step:1807/3200 train_loss:3.7371 train_time:7293602ms step_avg:4058.77ms step:1808/3200 train_loss:3.4536 train_time:7297617ms step_avg:4058.74ms step:1809/3200 train_loss:3.8314 train_time:7301621ms step_avg:4058.71ms step:1810/3200 train_loss:3.4448 train_time:7305603ms step_avg:4058.67ms step:1811/3200 train_loss:3.4751 train_time:7309593ms step_avg:4058.63ms step:1812/3200 train_loss:3.4249 train_time:7313569ms step_avg:4058.58ms step:1813/3200 train_loss:3.3918 train_time:7317535ms step_avg:4058.53ms step:1814/3200 train_loss:3.7910 train_time:7321493ms step_avg:4058.48ms step:1815/3200 train_loss:3.5112 train_time:7325442ms step_avg:4058.42ms step:1816/3200 train_loss:3.4713 train_time:7329389ms step_avg:4058.35ms step:1817/3200 train_loss:3.5056 train_time:7333330ms step_avg:4058.29ms step:1818/3200 train_loss:3.4603 train_time:7337267ms step_avg:4058.22ms step:1819/3200 train_loss:3.4224 train_time:7341204ms step_avg:4058.16ms step:1820/3200 train_loss:3.5273 train_time:7345140ms step_avg:4058.09ms step:1821/3200 train_loss:3.7760 train_time:7349071ms step_avg:4058.02ms step:1822/3200 train_loss:3.3705 train_time:7353003ms step_avg:4057.95ms step:1823/3200 train_loss:3.4696 train_time:7356932ms step_avg:4057.88ms step:1824/3200 train_loss:2.8919 train_time:7360862ms step_avg:4057.81ms step:1825/3200 train_loss:3.4634 train_time:7364788ms step_avg:4057.73ms step:1826/3200 train_loss:3.5063 train_time:7368717ms step_avg:4057.66ms step:1827/3200 train_loss:3.7085 train_time:7372645ms step_avg:4057.59ms step:1828/3200 train_loss:3.4396 train_time:7376573ms step_avg:4057.52ms step:1829/3200 train_loss:3.2739 train_time:7380498ms step_avg:4057.45ms step:1830/3200 train_loss:3.5231 train_time:7384423ms step_avg:4057.38ms step:1831/3200 train_loss:3.4654 train_time:7388350ms step_avg:4057.30ms step:1832/3200 train_loss:3.3454 train_time:7392276ms step_avg:4057.23ms step:1833/3200 train_loss:3.2773 train_time:7396205ms step_avg:4057.16ms step:1834/3200 train_loss:3.4800 train_time:7400131ms step_avg:4057.09ms step:1835/3200 train_loss:3.5348 train_time:7404059ms step_avg:4057.02ms step:1836/3200 train_loss:3.4354 train_time:7407987ms step_avg:4056.95ms step:1837/3200 train_loss:3.4225 train_time:7411916ms step_avg:4056.88ms step:1838/3200 train_loss:3.4050 train_time:7415846ms step_avg:4056.81ms step:1839/3200 train_loss:3.4400 train_time:7419775ms step_avg:4056.74ms step:1840/3200 train_loss:3.6280 train_time:7423706ms step_avg:4056.67ms step:1841/3200 train_loss:3.5201 train_time:7427639ms step_avg:4056.60ms step:1842/3200 train_loss:3.5896 train_time:7431573ms step_avg:4056.54ms step:1843/3200 train_loss:3.6226 train_time:7435507ms step_avg:4056.47ms step:1844/3200 train_loss:3.6753 train_time:7439444ms step_avg:4056.40ms step:1845/3200 train_loss:3.3823 train_time:7443380ms step_avg:4056.34ms step:1846/3200 train_loss:3.3841 train_time:7447319ms step_avg:4056.27ms step:1847/3200 train_loss:3.5533 train_time:7451259ms step_avg:4056.21ms step:1848/3200 train_loss:3.7915 train_time:7455202ms step_avg:4056.15ms step:1849/3200 train_loss:3.3148 train_time:7459143ms step_avg:4056.09ms step:1850/3200 train_loss:3.4212 train_time:7463089ms step_avg:4056.03ms step:1851/3200 train_loss:3.4181 train_time:7467029ms step_avg:4055.96ms step:1852/3200 train_loss:3.3728 train_time:7470971ms step_avg:4055.90ms step:1853/3200 train_loss:3.4615 train_time:7474920ms step_avg:4055.84ms step:1854/3200 train_loss:3.2985 train_time:7478871ms step_avg:4055.79ms step:1855/3200 train_loss:3.5117 train_time:7482824ms step_avg:4055.73ms step:1856/3200 train_loss:3.4787 train_time:7486777ms step_avg:4055.68ms step:1857/3200 train_loss:3.4103 train_time:7490734ms step_avg:4055.62ms step:1858/3200 train_loss:3.4690 train_time:7494692ms step_avg:4055.57ms step:1859/3200 train_loss:3.5638 train_time:7498654ms step_avg:4055.52ms step:1860/3200 train_loss:3.5801 train_time:7502619ms step_avg:4055.47ms step:1861/3200 train_loss:3.4296 train_time:7506586ms step_avg:4055.42ms step:1862/3200 train_loss:3.5203 train_time:7510554ms step_avg:4055.37ms step:1863/3200 train_loss:3.4034 train_time:7514525ms step_avg:4055.33ms step:1864/3200 train_loss:3.6328 train_time:7518500ms step_avg:4055.29ms step:1865/3200 train_loss:3.4684 train_time:7522481ms step_avg:4055.25ms step:1866/3200 train_loss:3.3952 train_time:7526463ms step_avg:4055.21ms step:1867/3200 train_loss:3.4990 train_time:7530430ms step_avg:4055.16ms step:1868/3200 train_loss:3.5631 train_time:7534398ms step_avg:4055.11ms step:1869/3200 train_loss:3.5628 train_time:7538376ms step_avg:4055.07ms step:1870/3200 train_loss:3.5422 train_time:7542357ms step_avg:4055.03ms step:1871/3200 train_loss:3.3919 train_time:7546341ms step_avg:4054.99ms step:1872/3200 train_loss:3.5694 train_time:7550329ms step_avg:4054.96ms step:1873/3200 train_loss:3.4180 train_time:7554329ms step_avg:4054.93ms step:1874/3200 train_loss:3.7350 train_time:7558335ms step_avg:4054.90ms step:1875/3200 train_loss:3.3914 train_time:7562338ms step_avg:4054.87ms step:1875/3200 val_loss:3.4520 train_time:7562338ms step_avg:4054.87ms step:1876/3200 train_loss:3.5701 train_time:7566437ms step_avg:4054.90ms step:1877/3200 train_loss:3.4618 train_time:7570553ms step_avg:4054.93ms step:1878/3200 train_loss:3.5161 train_time:7574706ms step_avg:4054.98ms step:1879/3200 train_loss:3.4716 train_time:7578888ms step_avg:4055.05ms step:1880/3200 train_loss:3.4955 train_time:7583069ms step_avg:4055.12ms step:1881/3200 train_loss:3.3035 train_time:7587292ms step_avg:4055.21ms step:1882/3200 train_loss:4.4456 train_time:7591487ms step_avg:4055.28ms step:1883/3200 train_loss:3.4028 train_time:7595661ms step_avg:4055.34ms step:1884/3200 train_loss:3.7950 train_time:7599811ms step_avg:4055.40ms step:1885/3200 train_loss:3.4604 train_time:7603942ms step_avg:4055.44ms step:1886/3200 train_loss:3.3834 train_time:7608059ms step_avg:4055.47ms step:1887/3200 train_loss:3.4544 train_time:7612174ms step_avg:4055.50ms step:1888/3200 train_loss:3.3784 train_time:7616286ms step_avg:4055.53ms step:1889/3200 train_loss:3.4150 train_time:7620392ms step_avg:4055.56ms step:1890/3200 train_loss:3.4421 train_time:7624496ms step_avg:4055.58ms step:1891/3200 train_loss:3.4529 train_time:7628607ms step_avg:4055.61ms step:1892/3200 train_loss:3.4212 train_time:7632712ms step_avg:4055.64ms step:1893/3200 train_loss:3.5473 train_time:7636826ms step_avg:4055.67ms step:1894/3200 train_loss:3.5618 train_time:7640947ms step_avg:4055.70ms step:1895/3200 train_loss:3.3862 train_time:7645082ms step_avg:4055.75ms step:1896/3200 train_loss:3.6281 train_time:7649225ms step_avg:4055.79ms step:1897/3200 train_loss:3.4758 train_time:7653384ms step_avg:4055.85ms step:1898/3200 train_loss:3.5436 train_time:7657563ms step_avg:4055.91ms step:1899/3200 train_loss:3.4260 train_time:7661744ms step_avg:4055.98ms step:1900/3200 train_loss:3.3736 train_time:7665950ms step_avg:4056.06ms step:1901/3200 train_loss:3.4002 train_time:7670172ms step_avg:4056.15ms step:1902/3200 train_loss:3.5591 train_time:7674365ms step_avg:4056.22ms step:1903/3200 train_loss:3.4785 train_time:7678557ms step_avg:4056.29ms step:1904/3200 train_loss:3.4393 train_time:7682717ms step_avg:4056.34ms step:1905/3200 train_loss:3.2620 train_time:7686861ms step_avg:4056.39ms step:1906/3200 train_loss:3.3316 train_time:7690992ms step_avg:4056.43ms step:1907/3200 train_loss:3.3854 train_time:7695112ms step_avg:4056.46ms step:1908/3200 train_loss:3.4993 train_time:7699230ms step_avg:4056.50ms step:1909/3200 train_loss:3.3833 train_time:7703348ms step_avg:4056.53ms step:1910/3200 train_loss:3.5961 train_time:7707465ms step_avg:4056.56ms step:1911/3200 train_loss:3.4514 train_time:7711582ms step_avg:4056.59ms step:1912/3200 train_loss:3.4733 train_time:7715703ms step_avg:4056.63ms step:1913/3200 train_loss:3.4902 train_time:7719833ms step_avg:4056.66ms step:1914/3200 train_loss:3.2732 train_time:7723970ms step_avg:4056.71ms step:1915/3200 train_loss:3.4490 train_time:7728120ms step_avg:4056.76ms step:1916/3200 train_loss:3.5106 train_time:7732278ms step_avg:4056.81ms step:1917/3200 train_loss:3.4282 train_time:7736458ms step_avg:4056.87ms step:1918/3200 train_loss:3.3984 train_time:7740639ms step_avg:4056.94ms step:1919/3200 train_loss:2.9326 train_time:7744839ms step_avg:4057.01ms step:1920/3200 train_loss:3.3371 train_time:7749041ms step_avg:4057.09ms step:1921/3200 train_loss:3.5798 train_time:7753205ms step_avg:4057.15ms step:1922/3200 train_loss:3.6270 train_time:7757360ms step_avg:4057.20ms step:1923/3200 train_loss:3.4664 train_time:7761501ms step_avg:4057.24ms step:1924/3200 train_loss:3.4303 train_time:7765620ms step_avg:4057.27ms step:1925/3200 train_loss:3.6808 train_time:7769747ms step_avg:4057.31ms step:1926/3200 train_loss:3.3796 train_time:7773856ms step_avg:4057.34ms step:1927/3200 train_loss:3.4482 train_time:7777956ms step_avg:4057.36ms step:1928/3200 train_loss:3.4641 train_time:7782051ms step_avg:4057.38ms step:1929/3200 train_loss:3.5280 train_time:7786146ms step_avg:4057.40ms step:1930/3200 train_loss:3.1670 train_time:7790244ms step_avg:4057.42ms step:1931/3200 train_loss:3.3429 train_time:7794343ms step_avg:4057.44ms step:1932/3200 train_loss:3.2915 train_time:7798443ms step_avg:4057.46ms step:1933/3200 train_loss:3.3826 train_time:7802552ms step_avg:4057.49ms step:1934/3200 train_loss:3.1555 train_time:7806669ms step_avg:4057.52ms step:1935/3200 train_loss:3.3734 train_time:7810803ms step_avg:4057.56ms step:1936/3200 train_loss:3.6182 train_time:7814943ms step_avg:4057.60ms step:1937/3200 train_loss:3.4790 train_time:7819097ms step_avg:4057.65ms step:1938/3200 train_loss:3.5797 train_time:7823271ms step_avg:4057.71ms step:1939/3200 train_loss:3.4104 train_time:7827398ms step_avg:4057.75ms step:1940/3200 train_loss:3.4020 train_time:7831507ms step_avg:4057.78ms step:1941/3200 train_loss:3.4892 train_time:7835597ms step_avg:4057.79ms step:1942/3200 train_loss:3.4250 train_time:7839674ms step_avg:4057.80ms step:1943/3200 train_loss:3.5280 train_time:7843744ms step_avg:4057.81ms step:1944/3200 train_loss:3.3815 train_time:7847802ms step_avg:4057.81ms step:1945/3200 train_loss:3.5636 train_time:7851856ms step_avg:4057.81ms step:1946/3200 train_loss:3.6998 train_time:7855907ms step_avg:4057.80ms step:1947/3200 train_loss:3.4671 train_time:7859953ms step_avg:4057.80ms step:1948/3200 train_loss:3.4053 train_time:7864001ms step_avg:4057.79ms step:1949/3200 train_loss:3.6212 train_time:7868045ms step_avg:4057.78ms step:1950/3200 train_loss:3.2981 train_time:7872090ms step_avg:4057.78ms step:1951/3200 train_loss:3.4861 train_time:7876141ms step_avg:4057.77ms step:1952/3200 train_loss:3.4601 train_time:7880194ms step_avg:4057.77ms step:1953/3200 train_loss:3.3507 train_time:7884252ms step_avg:4057.77ms step:1954/3200 train_loss:3.3756 train_time:7888314ms step_avg:4057.77ms step:1955/3200 train_loss:3.3320 train_time:7892379ms step_avg:4057.78ms step:1956/3200 train_loss:3.3687 train_time:7896454ms step_avg:4057.79ms step:1957/3200 train_loss:3.3561 train_time:7900536ms step_avg:4057.80ms step:1958/3200 train_loss:3.5785 train_time:7904628ms step_avg:4057.82ms step:1959/3200 train_loss:3.5726 train_time:7908730ms step_avg:4057.84ms step:1960/3200 train_loss:3.4269 train_time:7912852ms step_avg:4057.87ms step:1961/3200 train_loss:3.3047 train_time:7916990ms step_avg:4057.91ms step:1962/3200 train_loss:3.2580 train_time:7921151ms step_avg:4057.97ms step:1963/3200 train_loss:3.6863 train_time:7925331ms step_avg:4058.03ms step:1964/3200 train_loss:3.4592 train_time:7929513ms step_avg:4058.09ms step:1965/3200 train_loss:3.5648 train_time:7933712ms step_avg:4058.16ms step:1966/3200 train_loss:3.3487 train_time:7937904ms step_avg:4058.23ms step:1967/3200 train_loss:3.5411 train_time:7942028ms step_avg:4058.27ms step:1968/3200 train_loss:3.5188 train_time:7946119ms step_avg:4058.28ms step:1969/3200 train_loss:3.4538 train_time:7950185ms step_avg:4058.29ms step:1970/3200 train_loss:3.3830 train_time:7954226ms step_avg:4058.28ms step:1971/3200 train_loss:3.6444 train_time:7958257ms step_avg:4058.26ms step:1972/3200 train_loss:3.3647 train_time:7962276ms step_avg:4058.24ms step:1973/3200 train_loss:3.4048 train_time:7966280ms step_avg:4058.22ms step:1974/3200 train_loss:3.4826 train_time:7970296ms step_avg:4058.20ms step:1975/3200 train_loss:3.2916 train_time:7974315ms step_avg:4058.18ms step:1976/3200 train_loss:3.0657 train_time:7978329ms step_avg:4058.15ms step:1977/3200 train_loss:3.1363 train_time:7982338ms step_avg:4058.13ms step:1978/3200 train_loss:3.4380 train_time:7986347ms step_avg:4058.10ms step:1979/3200 train_loss:3.3560 train_time:7990350ms step_avg:4058.08ms step:1980/3200 train_loss:3.2574 train_time:7994348ms step_avg:4058.04ms step:1981/3200 train_loss:3.5272 train_time:7998349ms step_avg:4058.02ms step:1982/3200 train_loss:3.3854 train_time:8002344ms step_avg:4057.98ms step:1983/3200 train_loss:3.9703 train_time:8006342ms step_avg:4057.95ms step:1984/3200 train_loss:3.4051 train_time:8010343ms step_avg:4057.92ms step:1985/3200 train_loss:3.4275 train_time:8014342ms step_avg:4057.89ms step:1986/3200 train_loss:3.4435 train_time:8018342ms step_avg:4057.87ms step:1987/3200 train_loss:3.4889 train_time:8022346ms step_avg:4057.84ms step:1988/3200 train_loss:3.4409 train_time:8026355ms step_avg:4057.81ms step:1989/3200 train_loss:3.3236 train_time:8030362ms step_avg:4057.79ms step:1990/3200 train_loss:3.5261 train_time:8034375ms step_avg:4057.77ms step:1991/3200 train_loss:3.5743 train_time:8038391ms step_avg:4057.74ms step:1992/3200 train_loss:3.3391 train_time:8042413ms step_avg:4057.73ms step:1993/3200 train_loss:3.4068 train_time:8046442ms step_avg:4057.71ms step:1994/3200 train_loss:3.4841 train_time:8050452ms step_avg:4057.69ms step:1995/3200 train_loss:3.5529 train_time:8054461ms step_avg:4057.66ms step:1996/3200 train_loss:3.3447 train_time:8058481ms step_avg:4057.64ms step:1997/3200 train_loss:3.3128 train_time:8062506ms step_avg:4057.63ms step:1998/3200 train_loss:3.6420 train_time:8066541ms step_avg:4057.62ms step:1999/3200 train_loss:3.3435 train_time:8070587ms step_avg:4057.61ms step:2000/3200 train_loss:3.3870 train_time:8074639ms step_avg:4057.61ms step:2000/3200 val_loss:3.4389 train_time:8074639ms step_avg:4057.61ms step:2001/3200 train_loss:3.4752 train_time:8078839ms step_avg:4057.68ms step:2002/3200 train_loss:3.3871 train_time:8083035ms step_avg:4057.75ms step:2003/3200 train_loss:3.3558 train_time:8087193ms step_avg:4057.80ms step:2004/3200 train_loss:3.3470 train_time:8091335ms step_avg:4057.84ms step:2005/3200 train_loss:3.4304 train_time:8095464ms step_avg:4057.88ms step:2006/3200 train_loss:3.5459 train_time:8099586ms step_avg:4057.91ms step:2007/3200 train_loss:3.4521 train_time:8103698ms step_avg:4057.94ms step:2008/3200 train_loss:3.8053 train_time:8107805ms step_avg:4057.96ms step:2009/3200 train_loss:3.5096 train_time:8111910ms step_avg:4057.98ms step:2010/3200 train_loss:3.3451 train_time:8116020ms step_avg:4058.01ms step:2011/3200 train_loss:3.3993 train_time:8120129ms step_avg:4058.04ms step:2012/3200 train_loss:3.2412 train_time:8124243ms step_avg:4058.06ms step:2013/3200 train_loss:3.5408 train_time:8128361ms step_avg:4058.09ms step:2014/3200 train_loss:3.4490 train_time:8132485ms step_avg:4058.13ms step:2015/3200 train_loss:3.4192 train_time:8136623ms step_avg:4058.17ms step:2016/3200 train_loss:3.4766 train_time:8140774ms step_avg:4058.21ms step:2017/3200 train_loss:3.5199 train_time:8144955ms step_avg:4058.27ms step:2018/3200 train_loss:3.3516 train_time:8149135ms step_avg:4058.33ms step:2019/3200 train_loss:3.4459 train_time:8153318ms step_avg:4058.40ms step:2020/3200 train_loss:3.3908 train_time:8157517ms step_avg:4058.47ms step:2021/3200 train_loss:3.3872 train_time:8161715ms step_avg:4058.54ms step:2022/3200 train_loss:3.6917 train_time:8165908ms step_avg:4058.60ms step:2023/3200 train_loss:3.3654 train_time:8170033ms step_avg:4058.64ms step:2024/3200 train_loss:3.6373 train_time:8174102ms step_avg:4058.64ms step:2025/3200 train_loss:3.4115 train_time:8178135ms step_avg:4058.63ms step:2026/3200 train_loss:3.1209 train_time:8182145ms step_avg:4058.60ms step:2027/3200 train_loss:3.3959 train_time:8186149ms step_avg:4058.58ms step:2028/3200 train_loss:3.3901 train_time:8190156ms step_avg:4058.55ms step:2029/3200 train_loss:3.4682 train_time:8194149ms step_avg:4058.52ms step:2030/3200 train_loss:3.5708 train_time:8198130ms step_avg:4058.48ms step:2031/3200 train_loss:3.3879 train_time:8202105ms step_avg:4058.44ms step:2032/3200 train_loss:2.8679 train_time:8206087ms step_avg:4058.40ms step:2033/3200 train_loss:3.3498 train_time:8210067ms step_avg:4058.36ms step:2034/3200 train_loss:3.5026 train_time:8214043ms step_avg:4058.32ms step:2035/3200 train_loss:3.3824 train_time:8218016ms step_avg:4058.28ms step:2036/3200 train_loss:3.4885 train_time:8221988ms step_avg:4058.24ms step:2037/3200 train_loss:3.2651 train_time:8225955ms step_avg:4058.19ms step:2038/3200 train_loss:3.4974 train_time:8229924ms step_avg:4058.15ms step:2039/3200 train_loss:3.3137 train_time:8233892ms step_avg:4058.10ms step:2040/3200 train_loss:3.4878 train_time:8237858ms step_avg:4058.06ms step:2041/3200 train_loss:3.5011 train_time:8241822ms step_avg:4058.01ms step:2042/3200 train_loss:3.5266 train_time:8245784ms step_avg:4057.96ms step:2043/3200 train_loss:3.3185 train_time:8249747ms step_avg:4057.92ms step:2044/3200 train_loss:3.6581 train_time:8253713ms step_avg:4057.87ms step:2045/3200 train_loss:3.4826 train_time:8257664ms step_avg:4057.82ms step:2046/3200 train_loss:3.5415 train_time:8261616ms step_avg:4057.77ms step:2047/3200 train_loss:3.3589 train_time:8265580ms step_avg:4057.72ms step:2048/3200 train_loss:3.4243 train_time:8269547ms step_avg:4057.68ms step:2049/3200 train_loss:3.5012 train_time:8273514ms step_avg:4057.63ms step:2050/3200 train_loss:3.4742 train_time:8277482ms step_avg:4057.59ms step:2051/3200 train_loss:3.2047 train_time:8281450ms step_avg:4057.55ms step:2052/3200 train_loss:3.3301 train_time:8285418ms step_avg:4057.50ms step:2053/3200 train_loss:3.4538 train_time:8289391ms step_avg:4057.46ms step:2054/3200 train_loss:3.3628 train_time:8293366ms step_avg:4057.42ms step:2055/3200 train_loss:3.2851 train_time:8297343ms step_avg:4057.38ms step:2056/3200 train_loss:3.5549 train_time:8301319ms step_avg:4057.34ms step:2057/3200 train_loss:3.4766 train_time:8305297ms step_avg:4057.30ms step:2058/3200 train_loss:3.4059 train_time:8309281ms step_avg:4057.27ms step:2059/3200 train_loss:3.4187 train_time:8313266ms step_avg:4057.23ms step:2060/3200 train_loss:3.4538 train_time:8317255ms step_avg:4057.20ms step:2061/3200 train_loss:3.3783 train_time:8321241ms step_avg:4057.16ms step:2062/3200 train_loss:3.7186 train_time:8325214ms step_avg:4057.12ms step:2063/3200 train_loss:3.6435 train_time:8329194ms step_avg:4057.08ms step:2064/3200 train_loss:3.4086 train_time:8333172ms step_avg:4057.05ms step:2065/3200 train_loss:3.5467 train_time:8337159ms step_avg:4057.01ms step:2066/3200 train_loss:3.4972 train_time:8341155ms step_avg:4056.98ms step:2067/3200 train_loss:3.5100 train_time:8345152ms step_avg:4056.95ms step:2068/3200 train_loss:3.4487 train_time:8349157ms step_avg:4056.93ms step:2069/3200 train_loss:3.7411 train_time:8353173ms step_avg:4056.91ms step:2070/3200 train_loss:3.4789 train_time:8357173ms step_avg:4056.88ms step:2071/3200 train_loss:3.5572 train_time:8361174ms step_avg:4056.85ms step:2072/3200 train_loss:3.3943 train_time:8365185ms step_avg:4056.83ms step:2073/3200 train_loss:3.3335 train_time:8369204ms step_avg:4056.81ms step:2074/3200 train_loss:3.3885 train_time:8373236ms step_avg:4056.80ms step:2075/3200 train_loss:3.6372 train_time:8377279ms step_avg:4056.79ms step:2076/3200 train_loss:3.5900 train_time:8381336ms step_avg:4056.79ms step:2077/3200 train_loss:3.4857 train_time:8385413ms step_avg:4056.80ms step:2078/3200 train_loss:3.4435 train_time:8389504ms step_avg:4056.82ms step:2079/3200 train_loss:3.4638 train_time:8393618ms step_avg:4056.85ms step:2080/3200 train_loss:3.4486 train_time:8397759ms step_avg:4056.89ms step:2081/3200 train_loss:3.1976 train_time:8401937ms step_avg:4056.95ms step:2082/3200 train_loss:3.5860 train_time:8406114ms step_avg:4057.00ms step:2083/3200 train_loss:3.2715 train_time:8410265ms step_avg:4057.05ms step:2084/3200 train_loss:3.2740 train_time:8414374ms step_avg:4057.08ms step:2085/3200 train_loss:3.3515 train_time:8418458ms step_avg:4057.09ms step:2086/3200 train_loss:3.4205 train_time:8422524ms step_avg:4057.09ms step:2087/3200 train_loss:3.2931 train_time:8426576ms step_avg:4057.09ms step:2088/3200 train_loss:3.5340 train_time:8430608ms step_avg:4057.08ms step:2089/3200 train_loss:3.3350 train_time:8434636ms step_avg:4057.06ms step:2090/3200 train_loss:3.2848 train_time:8438656ms step_avg:4057.05ms step:2091/3200 train_loss:3.4617 train_time:8442674ms step_avg:4057.03ms step:2092/3200 train_loss:3.5196 train_time:8446689ms step_avg:4057.01ms step:2093/3200 train_loss:3.4470 train_time:8450699ms step_avg:4056.98ms step:2094/3200 train_loss:3.7018 train_time:8454707ms step_avg:4056.96ms step:2095/3200 train_loss:3.5396 train_time:8458713ms step_avg:4056.94ms step:2096/3200 train_loss:3.5453 train_time:8462720ms step_avg:4056.91ms step:2097/3200 train_loss:3.4662 train_time:8466729ms step_avg:4056.89ms step:2098/3200 train_loss:3.4161 train_time:8470868ms step_avg:4056.93ms step:2099/3200 train_loss:3.3715 train_time:8474876ms step_avg:4056.91ms step:2100/3200 train_loss:3.2878 train_time:8478890ms step_avg:4056.89ms step:2101/3200 train_loss:3.5437 train_time:8482905ms step_avg:4056.87ms step:2102/3200 train_loss:3.4439 train_time:8486926ms step_avg:4056.85ms step:2103/3200 train_loss:3.2567 train_time:8490955ms step_avg:4056.83ms step:2104/3200 train_loss:3.5386 train_time:8494998ms step_avg:4056.83ms step:2105/3200 train_loss:3.2186 train_time:8499040ms step_avg:4056.82ms step:2106/3200 train_loss:3.6175 train_time:8503086ms step_avg:4056.82ms step:2107/3200 train_loss:3.3505 train_time:8507141ms step_avg:4056.81ms step:2108/3200 train_loss:3.3198 train_time:8511204ms step_avg:4056.82ms step:2109/3200 train_loss:3.5051 train_time:8515289ms step_avg:4056.83ms step:2110/3200 train_loss:3.4042 train_time:8519381ms step_avg:4056.85ms step:2111/3200 train_loss:3.2925 train_time:8523489ms step_avg:4056.87ms step:2112/3200 train_loss:3.4357 train_time:8527614ms step_avg:4056.90ms step:2113/3200 train_loss:3.4382 train_time:8531758ms step_avg:4056.95ms step:2114/3200 train_loss:3.4592 train_time:8535934ms step_avg:4057.00ms step:2115/3200 train_loss:3.5192 train_time:8540110ms step_avg:4057.06ms step:2116/3200 train_loss:3.4773 train_time:8544258ms step_avg:4057.10ms step:2117/3200 train_loss:3.5167 train_time:8548390ms step_avg:4057.14ms step:2118/3200 train_loss:3.3530 train_time:8552516ms step_avg:4057.17ms step:2119/3200 train_loss:3.5405 train_time:8556640ms step_avg:4057.20ms step:2120/3200 train_loss:3.3915 train_time:8560768ms step_avg:4057.24ms step:2121/3200 train_loss:3.4632 train_time:8564897ms step_avg:4057.27ms step:2122/3200 train_loss:3.3418 train_time:8569023ms step_avg:4057.30ms step:2123/3200 train_loss:3.3131 train_time:8573155ms step_avg:4057.34ms step:2124/3200 train_loss:3.3046 train_time:8577287ms step_avg:4057.37ms step:2125/3200 train_loss:3.4443 train_time:8581431ms step_avg:4057.41ms step:2125/3200 val_loss:3.4284 train_time:8581431ms step_avg:4057.41ms step:2126/3200 train_loss:3.3338 train_time:8585594ms step_avg:4057.46ms step:2127/3200 train_loss:3.6837 train_time:8589709ms step_avg:4057.49ms step:2128/3200 train_loss:3.3922 train_time:8593791ms step_avg:4057.50ms step:2129/3200 train_loss:3.3419 train_time:8597853ms step_avg:4057.50ms step:2130/3200 train_loss:3.3969 train_time:8601893ms step_avg:4057.50ms step:2131/3200 train_loss:3.2358 train_time:8605925ms step_avg:4057.48ms step:2132/3200 train_loss:3.3825 train_time:8609942ms step_avg:4057.47ms step:2133/3200 train_loss:3.5101 train_time:8613950ms step_avg:4057.44ms step:2134/3200 train_loss:3.6333 train_time:8617975ms step_avg:4057.43ms step:2135/3200 train_loss:3.5424 train_time:8621998ms step_avg:4057.41ms step:2136/3200 train_loss:3.4420 train_time:8626017ms step_avg:4057.39ms step:2137/3200 train_loss:3.7022 train_time:8630029ms step_avg:4057.37ms step:2138/3200 train_loss:3.7574 train_time:8634043ms step_avg:4057.35ms step:2139/3200 train_loss:3.3290 train_time:8638056ms step_avg:4057.33ms step:2140/3200 train_loss:3.5005 train_time:8642064ms step_avg:4057.31ms step:2141/3200 train_loss:3.3878 train_time:8646074ms step_avg:4057.28ms step:2142/3200 train_loss:3.3892 train_time:8650079ms step_avg:4057.26ms step:2143/3200 train_loss:3.3150 train_time:8654089ms step_avg:4057.24ms step:2144/3200 train_loss:3.3026 train_time:8658093ms step_avg:4057.21ms step:2145/3200 train_loss:3.2183 train_time:8662101ms step_avg:4057.19ms step:2146/3200 train_loss:3.5672 train_time:8666114ms step_avg:4057.17ms step:2147/3200 train_loss:3.9332 train_time:8670128ms step_avg:4057.15ms step:2148/3200 train_loss:3.4223 train_time:8674145ms step_avg:4057.13ms step:2149/3200 train_loss:3.4062 train_time:8678163ms step_avg:4057.11ms step:2150/3200 train_loss:3.3332 train_time:8682185ms step_avg:4057.10ms step:2151/3200 train_loss:3.5473 train_time:8686211ms step_avg:4057.08ms step:2152/3200 train_loss:3.7331 train_time:8690240ms step_avg:4057.07ms step:2153/3200 train_loss:3.5096 train_time:8694256ms step_avg:4057.05ms step:2154/3200 train_loss:3.5605 train_time:8698268ms step_avg:4057.03ms step:2155/3200 train_loss:3.3080 train_time:8702289ms step_avg:4057.01ms step:2156/3200 train_loss:3.5455 train_time:8706316ms step_avg:4057.00ms step:2157/3200 train_loss:3.4312 train_time:8710349ms step_avg:4056.99ms step:2158/3200 train_loss:3.4934 train_time:8714389ms step_avg:4056.98ms step:2159/3200 train_loss:4.7540 train_time:8718442ms step_avg:4056.98ms step:2160/3200 train_loss:3.4825 train_time:8722509ms step_avg:4056.98ms step:2161/3200 train_loss:3.6834 train_time:8726584ms step_avg:4056.99ms step:2162/3200 train_loss:3.3495 train_time:8730673ms step_avg:4057.00ms step:2163/3200 train_loss:3.6316 train_time:8734780ms step_avg:4057.03ms step:2164/3200 train_loss:3.4654 train_time:8738903ms step_avg:4057.06ms step:2165/3200 train_loss:3.3488 train_time:8743045ms step_avg:4057.10ms step:2166/3200 train_loss:3.3115 train_time:8747225ms step_avg:4057.15ms step:2167/3200 train_loss:3.4129 train_time:8751405ms step_avg:4057.21ms step:2168/3200 train_loss:3.3647 train_time:8755601ms step_avg:4057.28ms step:2169/3200 train_loss:3.1150 train_time:8759753ms step_avg:4057.32ms step:2170/3200 train_loss:3.3418 train_time:8763875ms step_avg:4057.35ms step:2171/3200 train_loss:3.4121 train_time:8767964ms step_avg:4057.36ms step:2172/3200 train_loss:3.6913 train_time:8772042ms step_avg:4057.37ms step:2173/3200 train_loss:3.4307 train_time:8776110ms step_avg:4057.38ms step:2174/3200 train_loss:3.3361 train_time:8780162ms step_avg:4057.38ms step:2175/3200 train_loss:3.6013 train_time:8784206ms step_avg:4057.37ms step:2176/3200 train_loss:3.4665 train_time:8788242ms step_avg:4057.36ms step:2177/3200 train_loss:3.2486 train_time:8792268ms step_avg:4057.35ms step:2178/3200 train_loss:3.4306 train_time:8796296ms step_avg:4057.33ms step:2179/3200 train_loss:3.2317 train_time:8800323ms step_avg:4057.32ms step:2180/3200 train_loss:3.5944 train_time:8804345ms step_avg:4057.30ms step:2181/3200 train_loss:3.4792 train_time:8808365ms step_avg:4057.28ms step:2182/3200 train_loss:3.4389 train_time:8812390ms step_avg:4057.27ms step:2183/3200 train_loss:3.4544 train_time:8816419ms step_avg:4057.26ms step:2184/3200 train_loss:3.4398 train_time:8820447ms step_avg:4057.24ms step:2185/3200 train_loss:3.3762 train_time:8824476ms step_avg:4057.23ms step:2186/3200 train_loss:3.3257 train_time:8828500ms step_avg:4057.22ms step:2187/3200 train_loss:3.5377 train_time:8832528ms step_avg:4057.20ms step:2188/3200 train_loss:3.4048 train_time:8836557ms step_avg:4057.19ms step:2189/3200 train_loss:3.4781 train_time:8840599ms step_avg:4057.18ms step:2190/3200 train_loss:3.3094 train_time:8844648ms step_avg:4057.18ms step:2191/3200 train_loss:3.0704 train_time:8848704ms step_avg:4057.18ms step:2192/3200 train_loss:3.4335 train_time:8852765ms step_avg:4057.18ms step:2193/3200 train_loss:3.4281 train_time:8856832ms step_avg:4057.18ms step:2194/3200 train_loss:3.4710 train_time:8860911ms step_avg:4057.19ms step:2195/3200 train_loss:3.3863 train_time:8865005ms step_avg:4057.21ms step:2196/3200 train_loss:3.4370 train_time:8869111ms step_avg:4057.23ms step:2197/3200 train_loss:3.4065 train_time:8873231ms step_avg:4057.26ms step:2198/3200 train_loss:3.2618 train_time:8877363ms step_avg:4057.30ms step:2199/3200 train_loss:3.4059 train_time:8881518ms step_avg:4057.34ms step:2200/3200 train_loss:3.5630 train_time:8885702ms step_avg:4057.40ms step:2201/3200 train_loss:3.4023 train_time:8889883ms step_avg:4057.45ms step:2202/3200 train_loss:3.3992 train_time:8894083ms step_avg:4057.52ms step:2203/3200 train_loss:3.4448 train_time:8898276ms step_avg:4057.58ms step:2204/3200 train_loss:3.5314 train_time:8902415ms step_avg:4057.62ms step:2205/3200 train_loss:3.2796 train_time:8906511ms step_avg:4057.64ms step:2206/3200 train_loss:3.3728 train_time:8910578ms step_avg:4057.64ms step:2207/3200 train_loss:3.5585 train_time:8914620ms step_avg:4057.63ms step:2208/3200 train_loss:3.5512 train_time:8918647ms step_avg:4057.62ms step:2209/3200 train_loss:3.5638 train_time:8922661ms step_avg:4057.60ms step:2210/3200 train_loss:3.3847 train_time:8926696ms step_avg:4057.59ms step:2211/3200 train_loss:3.4871 train_time:8930716ms step_avg:4057.57ms step:2212/3200 train_loss:3.4900 train_time:8934726ms step_avg:4057.55ms step:2213/3200 train_loss:3.5354 train_time:8938731ms step_avg:4057.53ms step:2214/3200 train_loss:3.4761 train_time:8942728ms step_avg:4057.50ms step:2215/3200 train_loss:3.7530 train_time:8946723ms step_avg:4057.47ms step:2216/3200 train_loss:3.4270 train_time:8950714ms step_avg:4057.44ms step:2217/3200 train_loss:3.3760 train_time:8954704ms step_avg:4057.41ms step:2218/3200 train_loss:3.4151 train_time:8958697ms step_avg:4057.38ms step:2219/3200 train_loss:3.4823 train_time:8962685ms step_avg:4057.35ms step:2220/3200 train_loss:3.6389 train_time:8966672ms step_avg:4057.32ms step:2221/3200 train_loss:3.4645 train_time:8970653ms step_avg:4057.28ms step:2222/3200 train_loss:3.3574 train_time:8974637ms step_avg:4057.25ms step:2223/3200 train_loss:3.1612 train_time:8978624ms step_avg:4057.22ms step:2224/3200 train_loss:3.3585 train_time:8982607ms step_avg:4057.18ms step:2225/3200 train_loss:3.3716 train_time:8986593ms step_avg:4057.15ms step:2226/3200 train_loss:3.4794 train_time:8990581ms step_avg:4057.12ms step:2227/3200 train_loss:3.3577 train_time:8994572ms step_avg:4057.09ms step:2228/3200 train_loss:3.4276 train_time:8998564ms step_avg:4057.06ms step:2229/3200 train_loss:3.3412 train_time:9002558ms step_avg:4057.03ms step:2230/3200 train_loss:3.3816 train_time:9006556ms step_avg:4057.01ms step:2231/3200 train_loss:3.2490 train_time:9010556ms step_avg:4056.98ms step:2232/3200 train_loss:3.3475 train_time:9014563ms step_avg:4056.96ms step:2233/3200 train_loss:3.4234 train_time:9018571ms step_avg:4056.94ms step:2234/3200 train_loss:4.1996 train_time:9022580ms step_avg:4056.92ms step:2235/3200 train_loss:3.5143 train_time:9026595ms step_avg:4056.90ms step:2236/3200 train_loss:3.4804 train_time:9030614ms step_avg:4056.88ms step:2237/3200 train_loss:3.4066 train_time:9034644ms step_avg:4056.87ms step:2238/3200 train_loss:3.6672 train_time:9038659ms step_avg:4056.85ms step:2239/3200 train_loss:3.2371 train_time:9042674ms step_avg:4056.83ms step:2240/3200 train_loss:3.4592 train_time:9046699ms step_avg:4056.82ms step:2241/3200 train_loss:3.5244 train_time:9050732ms step_avg:4056.80ms step:2242/3200 train_loss:3.3044 train_time:9054764ms step_avg:4056.79ms step:2243/3200 train_loss:3.5418 train_time:9058808ms step_avg:4056.79ms step:2244/3200 train_loss:3.3424 train_time:9062865ms step_avg:4056.79ms step:2245/3200 train_loss:3.2445 train_time:9066939ms step_avg:4056.80ms step:2246/3200 train_loss:3.3316 train_time:9071033ms step_avg:4056.81ms step:2247/3200 train_loss:3.4031 train_time:9075146ms step_avg:4056.84ms step:2248/3200 train_loss:3.2310 train_time:9079270ms step_avg:4056.87ms step:2249/3200 train_loss:3.4192 train_time:9083416ms step_avg:4056.91ms step:2250/3200 train_loss:3.5346 train_time:9087582ms step_avg:4056.96ms step:2250/3200 val_loss:3.4176 train_time:9087583ms step_avg:4056.96ms step:2251/3200 train_loss:3.3285 train_time:9091650ms step_avg:4056.96ms step:2252/3200 train_loss:3.3758 train_time:9095695ms step_avg:4056.96ms step:2253/3200 train_loss:3.3588 train_time:9099730ms step_avg:4056.95ms step:2254/3200 train_loss:3.4797 train_time:9103756ms step_avg:4056.93ms step:2255/3200 train_loss:3.4028 train_time:9107774ms step_avg:4056.91ms step:2256/3200 train_loss:3.4109 train_time:9111784ms step_avg:4056.89ms step:2257/3200 train_loss:3.4310 train_time:9115816ms step_avg:4056.88ms step:2258/3200 train_loss:3.4919 train_time:9119843ms step_avg:4056.87ms step:2259/3200 train_loss:3.7200 train_time:9123870ms step_avg:4056.86ms step:2260/3200 train_loss:3.3991 train_time:9127890ms step_avg:4056.84ms step:2261/3200 train_loss:3.4254 train_time:9131909ms step_avg:4056.82ms step:2262/3200 train_loss:3.5245 train_time:9135931ms step_avg:4056.81ms step:2263/3200 train_loss:3.1317 train_time:9139953ms step_avg:4056.79ms step:2264/3200 train_loss:3.4740 train_time:9143976ms step_avg:4056.78ms step:2265/3200 train_loss:3.2904 train_time:9148002ms step_avg:4056.76ms step:2266/3200 train_loss:3.2963 train_time:9152025ms step_avg:4056.75ms step:2267/3200 train_loss:3.3356 train_time:9156052ms step_avg:4056.74ms step:2268/3200 train_loss:3.4403 train_time:9160082ms step_avg:4056.72ms step:2269/3200 train_loss:3.4178 train_time:9164113ms step_avg:4056.71ms step:2270/3200 train_loss:3.3384 train_time:9168140ms step_avg:4056.70ms step:2271/3200 train_loss:3.3805 train_time:9172154ms step_avg:4056.68ms step:2272/3200 train_loss:3.3094 train_time:9176174ms step_avg:4056.66ms step:2273/3200 train_loss:3.4774 train_time:9180204ms step_avg:4056.65ms step:2274/3200 train_loss:3.4538 train_time:9184236ms step_avg:4056.64ms step:2275/3200 train_loss:3.3500 train_time:9188274ms step_avg:4056.63ms step:2276/3200 train_loss:3.2156 train_time:9192325ms step_avg:4056.63ms step:2277/3200 train_loss:3.2808 train_time:9196385ms step_avg:4056.63ms step:2278/3200 train_loss:3.7566 train_time:9200451ms step_avg:4056.64ms step:2279/3200 train_loss:3.3053 train_time:9204539ms step_avg:4056.65ms step:2280/3200 train_loss:3.3943 train_time:9208641ms step_avg:4056.67ms step:2281/3200 train_loss:3.4058 train_time:9212758ms step_avg:4056.70ms step:2282/3200 train_loss:3.4196 train_time:9216901ms step_avg:4056.73ms step:2283/3200 train_loss:3.3369 train_time:9221066ms step_avg:4056.78ms step:2284/3200 train_loss:3.4093 train_time:9225245ms step_avg:4056.84ms step:2285/3200 train_loss:3.4255 train_time:9229447ms step_avg:4056.90ms step:2286/3200 train_loss:3.4093 train_time:9233644ms step_avg:4056.96ms step:2287/3200 train_loss:3.3820 train_time:9237839ms step_avg:4057.02ms step:2288/3200 train_loss:3.5667 train_time:9242004ms step_avg:4057.07ms step:2289/3200 train_loss:3.4340 train_time:9246142ms step_avg:4057.10ms step:2290/3200 train_loss:3.4076 train_time:9250258ms step_avg:4057.13ms step:2291/3200 train_loss:3.3453 train_time:9254368ms step_avg:4057.15ms step:2292/3200 train_loss:3.4338 train_time:9258463ms step_avg:4057.17ms step:2293/3200 train_loss:3.2828 train_time:9262546ms step_avg:4057.18ms step:2294/3200 train_loss:3.5165 train_time:9266623ms step_avg:4057.19ms step:2295/3200 train_loss:3.4635 train_time:9270690ms step_avg:4057.19ms step:2296/3200 train_loss:3.4712 train_time:9274760ms step_avg:4057.20ms step:2297/3200 train_loss:3.2704 train_time:9278826ms step_avg:4057.20ms step:2298/3200 train_loss:3.6150 train_time:9282897ms step_avg:4057.21ms step:2299/3200 train_loss:3.3746 train_time:9286970ms step_avg:4057.22ms step:2300/3200 train_loss:3.4723 train_time:9291046ms step_avg:4057.23ms step:2301/3200 train_loss:3.2240 train_time:9295123ms step_avg:4057.23ms step:2302/3200 train_loss:3.5508 train_time:9299208ms step_avg:4057.25ms step:2303/3200 train_loss:3.4065 train_time:9303295ms step_avg:4057.26ms step:2304/3200 train_loss:3.4277 train_time:9307389ms step_avg:4057.27ms step:2305/3200 train_loss:3.3233 train_time:9311488ms step_avg:4057.29ms step:2306/3200 train_loss:3.4873 train_time:9315605ms step_avg:4057.32ms step:2307/3200 train_loss:3.2974 train_time:9319727ms step_avg:4057.35ms step:2308/3200 train_loss:3.3981 train_time:9323871ms step_avg:4057.39ms step:2309/3200 train_loss:3.5079 train_time:9328035ms step_avg:4057.43ms step:2310/3200 train_loss:3.4196 train_time:9332229ms step_avg:4057.49ms step:2311/3200 train_loss:3.2629 train_time:9336407ms step_avg:4057.54ms step:2312/3200 train_loss:3.2720 train_time:9340564ms step_avg:4057.59ms step:2313/3200 train_loss:3.7653 train_time:9344695ms step_avg:4057.62ms step:2314/3200 train_loss:3.3945 train_time:9348808ms step_avg:4057.64ms step:2315/3200 train_loss:3.4831 train_time:9352914ms step_avg:4057.66ms step:2316/3200 train_loss:3.7172 train_time:9357003ms step_avg:4057.68ms step:2317/3200 train_loss:3.3770 train_time:9361091ms step_avg:4057.69ms step:2318/3200 train_loss:3.4507 train_time:9365176ms step_avg:4057.70ms step:2319/3200 train_loss:3.2553 train_time:9369254ms step_avg:4057.71ms step:2320/3200 train_loss:3.3022 train_time:9373336ms step_avg:4057.72ms step:2321/3200 train_loss:3.4278 train_time:9377412ms step_avg:4057.73ms step:2322/3200 train_loss:3.3133 train_time:9381493ms step_avg:4057.74ms step:2323/3200 train_loss:3.3506 train_time:9385577ms step_avg:4057.75ms step:2324/3200 train_loss:3.3618 train_time:9389660ms step_avg:4057.76ms step:2325/3200 train_loss:3.5358 train_time:9393748ms step_avg:4057.77ms step:2326/3200 train_loss:3.4487 train_time:9397846ms step_avg:4057.79ms step:2327/3200 train_loss:3.3408 train_time:9401952ms step_avg:4057.81ms step:2328/3200 train_loss:3.4910 train_time:9406066ms step_avg:4057.84ms step:2329/3200 train_loss:3.5077 train_time:9410187ms step_avg:4057.86ms step:2330/3200 train_loss:3.5271 train_time:9414324ms step_avg:4057.90ms step:2331/3200 train_loss:3.3612 train_time:9418481ms step_avg:4057.94ms step:2332/3200 train_loss:3.5302 train_time:9422660ms step_avg:4057.99ms step:2333/3200 train_loss:3.4429 train_time:9426841ms step_avg:4058.05ms step:2334/3200 train_loss:3.4131 train_time:9431040ms step_avg:4058.11ms step:2335/3200 train_loss:3.2814 train_time:9435242ms step_avg:4058.17ms step:2336/3200 train_loss:3.3442 train_time:9439440ms step_avg:4058.23ms step:2337/3200 train_loss:3.3902 train_time:9443638ms step_avg:4058.29ms step:2338/3200 train_loss:3.4020 train_time:9447833ms step_avg:4058.35ms step:2339/3200 train_loss:3.3339 train_time:9451984ms step_avg:4058.39ms step:2340/3200 train_loss:3.5910 train_time:9456088ms step_avg:4058.41ms step:2341/3200 train_loss:3.3954 train_time:9460162ms step_avg:4058.41ms step:2342/3200 train_loss:3.3978 train_time:9464215ms step_avg:4058.41ms step:2343/3200 train_loss:3.3721 train_time:9468254ms step_avg:4058.40ms step:2344/3200 train_loss:3.3276 train_time:9472279ms step_avg:4058.39ms step:2345/3200 train_loss:3.4067 train_time:9476290ms step_avg:4058.37ms step:2346/3200 train_loss:3.1880 train_time:9480296ms step_avg:4058.35ms step:2347/3200 train_loss:3.2909 train_time:9484319ms step_avg:4058.33ms step:2348/3200 train_loss:3.3981 train_time:9488337ms step_avg:4058.31ms step:2349/3200 train_loss:3.4152 train_time:9492348ms step_avg:4058.29ms step:2350/3200 train_loss:3.4390 train_time:9496356ms step_avg:4058.27ms step:2351/3200 train_loss:3.1841 train_time:9500361ms step_avg:4058.25ms step:2352/3200 train_loss:3.1232 train_time:9504367ms step_avg:4058.23ms step:2353/3200 train_loss:3.3825 train_time:9508369ms step_avg:4058.20ms step:2354/3200 train_loss:3.5201 train_time:9512370ms step_avg:4058.18ms step:2355/3200 train_loss:3.3196 train_time:9516368ms step_avg:4058.15ms step:2356/3200 train_loss:3.2238 train_time:9520368ms step_avg:4058.13ms step:2357/3200 train_loss:3.3884 train_time:9524365ms step_avg:4058.10ms step:2358/3200 train_loss:3.4289 train_time:9528364ms step_avg:4058.08ms step:2359/3200 train_loss:3.3247 train_time:9532365ms step_avg:4058.05ms step:2360/3200 train_loss:3.5023 train_time:9536365ms step_avg:4058.03ms step:2361/3200 train_loss:3.4723 train_time:9540371ms step_avg:4058.01ms step:2362/3200 train_loss:3.3577 train_time:9544377ms step_avg:4057.98ms step:2363/3200 train_loss:3.2958 train_time:9548386ms step_avg:4057.96ms step:2364/3200 train_loss:3.4356 train_time:9552401ms step_avg:4057.94ms step:2365/3200 train_loss:3.3985 train_time:9556418ms step_avg:4057.93ms step:2366/3200 train_loss:3.2349 train_time:9560437ms step_avg:4057.91ms step:2367/3200 train_loss:3.2689 train_time:9564464ms step_avg:4057.90ms step:2368/3200 train_loss:3.5782 train_time:9568472ms step_avg:4057.88ms step:2369/3200 train_loss:3.3388 train_time:9572479ms step_avg:4057.85ms step:2370/3200 train_loss:3.4109 train_time:9576493ms step_avg:4057.84ms step:2371/3200 train_loss:3.3254 train_time:9580518ms step_avg:4057.82ms step:2372/3200 train_loss:3.5543 train_time:9584550ms step_avg:4057.81ms step:2373/3200 train_loss:3.3610 train_time:9588590ms step_avg:4057.80ms step:2374/3200 train_loss:3.3714 train_time:9592643ms step_avg:4057.80ms step:2375/3200 train_loss:3.5824 train_time:9596701ms step_avg:4057.80ms step:2375/3200 val_loss:3.4015 train_time:9596701ms step_avg:4057.80ms step:2376/3200 train_loss:3.3249 train_time:9600898ms step_avg:4057.86ms step:2377/3200 train_loss:3.4636 train_time:9605092ms step_avg:4057.92ms step:2378/3200 train_loss:3.3053 train_time:9609251ms step_avg:4057.96ms step:2379/3200 train_loss:3.6702 train_time:9613386ms step_avg:4057.99ms step:2380/3200 train_loss:3.4978 train_time:9617500ms step_avg:4058.02ms step:2381/3200 train_loss:3.5555 train_time:9621601ms step_avg:4058.03ms step:2382/3200 train_loss:5.1514 train_time:9625690ms step_avg:4058.05ms step:2383/3200 train_loss:3.1199 train_time:9629774ms step_avg:4058.06ms step:2384/3200 train_loss:3.2403 train_time:9633851ms step_avg:4058.07ms step:2385/3200 train_loss:3.2819 train_time:9637922ms step_avg:4058.07ms step:2386/3200 train_loss:3.3130 train_time:9641996ms step_avg:4058.08ms step:2387/3200 train_loss:3.4327 train_time:9646062ms step_avg:4058.08ms step:2388/3200 train_loss:3.5369 train_time:9650134ms step_avg:4058.09ms step:2389/3200 train_loss:3.3036 train_time:9654205ms step_avg:4058.09ms step:2390/3200 train_loss:3.7327 train_time:9658283ms step_avg:4058.10ms step:2391/3200 train_loss:3.4989 train_time:9662363ms step_avg:4058.11ms step:2392/3200 train_loss:3.4747 train_time:9666444ms step_avg:4058.12ms step:2393/3200 train_loss:3.6915 train_time:9670529ms step_avg:4058.13ms step:2394/3200 train_loss:3.3532 train_time:9674624ms step_avg:4058.15ms step:2395/3200 train_loss:3.4836 train_time:9678733ms step_avg:4058.17ms step:2396/3200 train_loss:3.3158 train_time:9682848ms step_avg:4058.19ms step:2397/3200 train_loss:3.3378 train_time:9686981ms step_avg:4058.22ms step:2398/3200 train_loss:3.3249 train_time:9691129ms step_avg:4058.26ms step:2399/3200 train_loss:3.5178 train_time:9695305ms step_avg:4058.31ms step:2400/3200 train_loss:3.1560 train_time:9699485ms step_avg:4058.36ms step:2401/3200 train_loss:3.4895 train_time:9703676ms step_avg:4058.42ms step:2402/3200 train_loss:3.1294 train_time:9707862ms step_avg:4058.47ms step:2403/3200 train_loss:3.4131 train_time:9712042ms step_avg:4058.52ms step:2404/3200 train_loss:3.2546 train_time:9716238ms step_avg:4058.58ms step:2405/3200 train_loss:3.3292 train_time:9720415ms step_avg:4058.63ms step:2406/3200 train_loss:3.4924 train_time:9724594ms step_avg:4058.68ms step:2407/3200 train_loss:3.4499 train_time:9728773ms step_avg:4058.73ms step:2408/3200 train_loss:3.2339 train_time:9732953ms step_avg:4058.78ms step:2409/3200 train_loss:3.6228 train_time:9737130ms step_avg:4058.83ms step:2410/3200 train_loss:3.4218 train_time:9741309ms step_avg:4058.88ms step:2411/3200 train_loss:3.3645 train_time:9745488ms step_avg:4058.93ms step:2412/3200 train_loss:3.3719 train_time:9749668ms step_avg:4058.98ms step:2413/3200 train_loss:3.4283 train_time:9753846ms step_avg:4059.03ms step:2414/3200 train_loss:3.3220 train_time:9758028ms step_avg:4059.08ms step:2415/3200 train_loss:3.2532 train_time:9762228ms step_avg:4059.14ms step:2416/3200 train_loss:3.4771 train_time:9766430ms step_avg:4059.20ms step:2417/3200 train_loss:3.2386 train_time:9770627ms step_avg:4059.26ms step:2418/3200 train_loss:3.4687 train_time:9774822ms step_avg:4059.31ms step:2419/3200 train_loss:3.4003 train_time:9779018ms step_avg:4059.37ms step:2420/3200 train_loss:3.2990 train_time:9783193ms step_avg:4059.42ms step:2421/3200 train_loss:3.4489 train_time:9787337ms step_avg:4059.45ms step:2422/3200 train_loss:3.3557 train_time:9791473ms step_avg:4059.48ms step:2423/3200 train_loss:3.4262 train_time:9795596ms step_avg:4059.51ms step:2424/3200 train_loss:3.3055 train_time:9799710ms step_avg:4059.53ms step:2425/3200 train_loss:3.4580 train_time:9803820ms step_avg:4059.55ms step:2426/3200 train_loss:3.3248 train_time:9807928ms step_avg:4059.57ms step:2427/3200 train_loss:3.2928 train_time:9812031ms step_avg:4059.59ms step:2428/3200 train_loss:3.3164 train_time:9816139ms step_avg:4059.61ms step:2429/3200 train_loss:3.3488 train_time:9820248ms step_avg:4059.63ms step:2430/3200 train_loss:3.2676 train_time:9824359ms step_avg:4059.65ms step:2431/3200 train_loss:3.4746 train_time:9828476ms step_avg:4059.68ms step:2432/3200 train_loss:3.4300 train_time:9832601ms step_avg:4059.70ms step:2433/3200 train_loss:3.4473 train_time:9836738ms step_avg:4059.74ms step:2434/3200 train_loss:3.1991 train_time:9840887ms step_avg:4059.77ms step:2435/3200 train_loss:3.2618 train_time:9845040ms step_avg:4059.81ms step:2436/3200 train_loss:3.3765 train_time:9849216ms step_avg:4059.86ms step:2437/3200 train_loss:3.3975 train_time:9853396ms step_avg:4059.91ms step:2438/3200 train_loss:3.4586 train_time:9857597ms step_avg:4059.97ms step:2439/3200 train_loss:3.2705 train_time:9861799ms step_avg:4060.02ms step:2440/3200 train_loss:3.5790 train_time:9866002ms step_avg:4060.08ms step:2441/3200 train_loss:3.4592 train_time:9870197ms step_avg:4060.14ms step:2442/3200 train_loss:3.4334 train_time:9874382ms step_avg:4060.19ms step:2443/3200 train_loss:3.6357 train_time:9878499ms step_avg:4060.21ms step:2444/3200 train_loss:3.3240 train_time:9882564ms step_avg:4060.22ms step:2445/3200 train_loss:3.3942 train_time:9886594ms step_avg:4060.20ms step:2446/3200 train_loss:3.4252 train_time:9890609ms step_avg:4060.18ms step:2447/3200 train_loss:3.3718 train_time:9894636ms step_avg:4060.17ms step:2448/3200 train_loss:3.3594 train_time:9898644ms step_avg:4060.15ms step:2449/3200 train_loss:3.5080 train_time:9902640ms step_avg:4060.12ms step:2450/3200 train_loss:3.5108 train_time:9906624ms step_avg:4060.09ms step:2451/3200 train_loss:3.5132 train_time:9910625ms step_avg:4060.07ms step:2452/3200 train_loss:3.3975 train_time:9914620ms step_avg:4060.04ms step:2453/3200 train_loss:3.3080 train_time:9918612ms step_avg:4060.01ms step:2454/3200 train_loss:3.3126 train_time:9922596ms step_avg:4059.98ms step:2455/3200 train_loss:3.4563 train_time:9926580ms step_avg:4059.95ms step:2456/3200 train_loss:3.4074 train_time:9930561ms step_avg:4059.92ms step:2457/3200 train_loss:3.2547 train_time:9934536ms step_avg:4059.88ms step:2458/3200 train_loss:3.2787 train_time:9938511ms step_avg:4059.85ms step:2459/3200 train_loss:3.2074 train_time:9942484ms step_avg:4059.81ms step:2460/3200 train_loss:3.4412 train_time:9946457ms step_avg:4059.78ms step:2461/3200 train_loss:3.2260 train_time:9950429ms step_avg:4059.74ms step:2462/3200 train_loss:3.4803 train_time:9954401ms step_avg:4059.71ms step:2463/3200 train_loss:3.4137 train_time:9958371ms step_avg:4059.67ms step:2464/3200 train_loss:3.2426 train_time:9962341ms step_avg:4059.63ms step:2465/3200 train_loss:3.4284 train_time:9966313ms step_avg:4059.60ms step:2466/3200 train_loss:3.3386 train_time:9970281ms step_avg:4059.56ms step:2467/3200 train_loss:3.4140 train_time:9974252ms step_avg:4059.52ms step:2468/3200 train_loss:3.5834 train_time:9978224ms step_avg:4059.49ms step:2469/3200 train_loss:3.3547 train_time:9982198ms step_avg:4059.45ms step:2470/3200 train_loss:3.5177 train_time:9986172ms step_avg:4059.42ms step:2471/3200 train_loss:3.4615 train_time:9990148ms step_avg:4059.39ms step:2472/3200 train_loss:3.2724 train_time:9994123ms step_avg:4059.35ms step:2473/3200 train_loss:3.3183 train_time:9998100ms step_avg:4059.32ms step:2474/3200 train_loss:3.4017 train_time:10002080ms step_avg:4059.29ms step:2475/3200 train_loss:3.4510 train_time:10006062ms step_avg:4059.25ms step:2476/3200 train_loss:3.4038 train_time:10010045ms step_avg:4059.22ms step:2477/3200 train_loss:3.4672 train_time:10014028ms step_avg:4059.19ms step:2478/3200 train_loss:3.5251 train_time:10018020ms step_avg:4059.17ms step:2479/3200 train_loss:3.2943 train_time:10022015ms step_avg:4059.14ms step:2480/3200 train_loss:3.2690 train_time:10026008ms step_avg:4059.11ms step:2481/3200 train_loss:3.5528 train_time:10030001ms step_avg:4059.09ms step:2482/3200 train_loss:3.4192 train_time:10033983ms step_avg:4059.05ms step:2483/3200 train_loss:3.3814 train_time:10037969ms step_avg:4059.02ms step:2484/3200 train_loss:3.6304 train_time:10041956ms step_avg:4059.00ms step:2485/3200 train_loss:3.3717 train_time:10045946ms step_avg:4058.97ms step:2486/3200 train_loss:3.3686 train_time:10049944ms step_avg:4058.94ms step:2487/3200 train_loss:3.3445 train_time:10053948ms step_avg:4058.92ms step:2488/3200 train_loss:3.4763 train_time:10057958ms step_avg:4058.90ms step:2489/3200 train_loss:3.2003 train_time:10061974ms step_avg:4058.88ms step:2490/3200 train_loss:3.3564 train_time:10065981ms step_avg:4058.86ms step:2491/3200 train_loss:3.4330 train_time:10069984ms step_avg:4058.84ms step:2492/3200 train_loss:3.4228 train_time:10073994ms step_avg:4058.82ms step:2493/3200 train_loss:3.3082 train_time:10078016ms step_avg:4058.81ms step:2494/3200 train_loss:3.3191 train_time:10082043ms step_avg:4058.79ms step:2495/3200 train_loss:3.5453 train_time:10086082ms step_avg:4058.79ms step:2496/3200 train_loss:3.4141 train_time:10090128ms step_avg:4058.78ms step:2497/3200 train_loss:3.2858 train_time:10094197ms step_avg:4058.78ms step:2498/3200 train_loss:3.5647 train_time:10098277ms step_avg:4058.79ms step:2499/3200 train_loss:3.4448 train_time:10102376ms step_avg:4058.81ms step:2500/3200 train_loss:3.5576 train_time:10106494ms step_avg:4058.83ms step:2500/3200 val_loss:3.3777 train_time:10106494ms step_avg:4058.83ms step:2501/3200 train_loss:3.4493 train_time:10110616ms step_avg:4058.86ms step:2502/3200 train_loss:3.5347 train_time:10114716ms step_avg:4058.87ms step:2503/3200 train_loss:3.3241 train_time:10118804ms step_avg:4058.89ms step:2504/3200 train_loss:3.2083 train_time:10122891ms step_avg:4058.90ms step:2505/3200 train_loss:3.3234 train_time:10126958ms step_avg:4058.90ms step:2506/3200 train_loss:3.3631 train_time:10131028ms step_avg:4058.91ms step:2507/3200 train_loss:3.3692 train_time:10135100ms step_avg:4058.91ms step:2508/3200 train_loss:3.3420 train_time:10139170ms step_avg:4058.92ms step:2509/3200 train_loss:3.3588 train_time:10143244ms step_avg:4058.92ms step:2510/3200 train_loss:3.4195 train_time:10147315ms step_avg:4058.93ms step:2511/3200 train_loss:3.2814 train_time:10151389ms step_avg:4058.93ms step:2512/3200 train_loss:3.2840 train_time:10155466ms step_avg:4058.94ms step:2513/3200 train_loss:3.5048 train_time:10159547ms step_avg:4058.95ms step:2514/3200 train_loss:3.6114 train_time:10163637ms step_avg:4058.96ms step:2515/3200 train_loss:3.3441 train_time:10167737ms step_avg:4058.98ms step:2516/3200 train_loss:3.5098 train_time:10171841ms step_avg:4058.99ms step:2517/3200 train_loss:3.3591 train_time:10175962ms step_avg:4059.02ms step:2518/3200 train_loss:3.2013 train_time:10180090ms step_avg:4059.05ms step:2519/3200 train_loss:3.3527 train_time:10184233ms step_avg:4059.08ms step:2520/3200 train_loss:3.3551 train_time:10188401ms step_avg:4059.12ms step:2521/3200 train_loss:3.2220 train_time:10192580ms step_avg:4059.17ms step:2522/3200 train_loss:3.3159 train_time:10196758ms step_avg:4059.22ms step:2523/3200 train_loss:3.4207 train_time:10200911ms step_avg:4059.26ms step:2524/3200 train_loss:3.3316 train_time:10205064ms step_avg:4059.29ms step:2525/3200 train_loss:3.3923 train_time:10209204ms step_avg:4059.33ms step:2526/3200 train_loss:3.3499 train_time:10213331ms step_avg:4059.35ms step:2527/3200 train_loss:3.2704 train_time:10217457ms step_avg:4059.38ms step:2528/3200 train_loss:3.3322 train_time:10221585ms step_avg:4059.41ms step:2529/3200 train_loss:3.3672 train_time:10225709ms step_avg:4059.43ms step:2530/3200 train_loss:3.3832 train_time:10229843ms step_avg:4059.46ms step:2531/3200 train_loss:3.2913 train_time:10233982ms step_avg:4059.49ms step:2532/3200 train_loss:3.3662 train_time:10238119ms step_avg:4059.52ms step:2533/3200 train_loss:3.4288 train_time:10242267ms step_avg:4059.56ms step:2534/3200 train_loss:3.4924 train_time:10246422ms step_avg:4059.60ms step:2535/3200 train_loss:3.5784 train_time:10250601ms step_avg:4059.64ms step:2536/3200 train_loss:3.6049 train_time:10254780ms step_avg:4059.69ms step:2537/3200 train_loss:3.2217 train_time:10258964ms step_avg:4059.74ms step:2538/3200 train_loss:3.3484 train_time:10263163ms step_avg:4059.80ms step:2539/3200 train_loss:3.2836 train_time:10267359ms step_avg:4059.85ms step:2540/3200 train_loss:3.3676 train_time:10271557ms step_avg:4059.90ms step:2541/3200 train_loss:3.3174 train_time:10275752ms step_avg:4059.96ms step:2542/3200 train_loss:3.2420 train_time:10279911ms step_avg:4060.00ms step:2543/3200 train_loss:3.3997 train_time:10284051ms step_avg:4060.03ms step:2544/3200 train_loss:3.2176 train_time:10288175ms step_avg:4060.05ms step:2545/3200 train_loss:3.4084 train_time:10292289ms step_avg:4060.07ms step:2546/3200 train_loss:3.4808 train_time:10296401ms step_avg:4060.09ms step:2547/3200 train_loss:3.3694 train_time:10300503ms step_avg:4060.11ms step:2548/3200 train_loss:3.3920 train_time:10304599ms step_avg:4060.13ms step:2549/3200 train_loss:3.5679 train_time:10308694ms step_avg:4060.14ms step:2550/3200 train_loss:3.0195 train_time:10312785ms step_avg:4060.15ms step:2551/3200 train_loss:3.4543 train_time:10316877ms step_avg:4060.16ms step:2552/3200 train_loss:3.6724 train_time:10320973ms step_avg:4060.18ms step:2553/3200 train_loss:3.4178 train_time:10325075ms step_avg:4060.19ms step:2554/3200 train_loss:3.6365 train_time:10329177ms step_avg:4060.21ms step:2555/3200 train_loss:3.2472 train_time:10333290ms step_avg:4060.23ms step:2556/3200 train_loss:3.4818 train_time:10337410ms step_avg:4060.26ms step:2557/3200 train_loss:3.2483 train_time:10341541ms step_avg:4060.28ms step:2558/3200 train_loss:3.2924 train_time:10345671ms step_avg:4060.31ms step:2559/3200 train_loss:3.4047 train_time:10349828ms step_avg:4060.35ms step:2560/3200 train_loss:3.3337 train_time:10354008ms step_avg:4060.40ms step:2561/3200 train_loss:3.2239 train_time:10358175ms step_avg:4060.44ms step:2562/3200 train_loss:3.3124 train_time:10362375ms step_avg:4060.49ms step:2563/3200 train_loss:3.2335 train_time:10366573ms step_avg:4060.55ms step:2564/3200 train_loss:3.6032 train_time:10370773ms step_avg:4060.60ms step:2565/3200 train_loss:3.3268 train_time:10374970ms step_avg:4060.65ms step:2566/3200 train_loss:3.4444 train_time:10379167ms step_avg:4060.71ms step:2567/3200 train_loss:3.4047 train_time:10383340ms step_avg:4060.75ms step:2568/3200 train_loss:3.3399 train_time:10387479ms step_avg:4060.78ms step:2569/3200 train_loss:3.3669 train_time:10391588ms step_avg:4060.80ms step:2570/3200 train_loss:3.4509 train_time:10395682ms step_avg:4060.81ms step:2571/3200 train_loss:3.5510 train_time:10399760ms step_avg:4060.82ms step:2572/3200 train_loss:3.3220 train_time:10403831ms step_avg:4060.82ms step:2573/3200 train_loss:3.4300 train_time:10407891ms step_avg:4060.82ms step:2574/3200 train_loss:3.3466 train_time:10411940ms step_avg:4060.82ms step:2575/3200 train_loss:3.3847 train_time:10415985ms step_avg:4060.81ms step:2576/3200 train_loss:3.3104 train_time:10420025ms step_avg:4060.80ms step:2577/3200 train_loss:3.3682 train_time:10424062ms step_avg:4060.80ms step:2578/3200 train_loss:3.3915 train_time:10428090ms step_avg:4060.78ms step:2579/3200 train_loss:3.2333 train_time:10432121ms step_avg:4060.77ms step:2580/3200 train_loss:3.4965 train_time:10436160ms step_avg:4060.76ms step:2581/3200 train_loss:3.2306 train_time:10440200ms step_avg:4060.75ms step:2582/3200 train_loss:3.1757 train_time:10444240ms step_avg:4060.75ms step:2583/3200 train_loss:3.2757 train_time:10448277ms step_avg:4060.74ms step:2584/3200 train_loss:3.3052 train_time:10452317ms step_avg:4060.73ms step:2585/3200 train_loss:3.4442 train_time:10456362ms step_avg:4060.72ms step:2586/3200 train_loss:3.4045 train_time:10460420ms step_avg:4060.72ms step:2587/3200 train_loss:3.3209 train_time:10464481ms step_avg:4060.72ms step:2588/3200 train_loss:3.3316 train_time:10468551ms step_avg:4060.73ms step:2589/3200 train_loss:3.4323 train_time:10472631ms step_avg:4060.73ms step:2590/3200 train_loss:3.3273 train_time:10476713ms step_avg:4060.74ms step:2591/3200 train_loss:3.2897 train_time:10480812ms step_avg:4060.76ms step:2592/3200 train_loss:3.4567 train_time:10484929ms step_avg:4060.78ms step:2593/3200 train_loss:3.3493 train_time:10489057ms step_avg:4060.80ms step:2594/3200 train_loss:3.4339 train_time:10493210ms step_avg:4060.84ms step:2595/3200 train_loss:3.1807 train_time:10497388ms step_avg:4060.89ms step:2596/3200 train_loss:3.3888 train_time:10501569ms step_avg:4060.93ms step:2597/3200 train_loss:3.4133 train_time:10505771ms step_avg:4060.99ms step:2598/3200 train_loss:3.1155 train_time:10509973ms step_avg:4061.04ms step:2599/3200 train_loss:3.3366 train_time:10514169ms step_avg:4061.09ms step:2600/3200 train_loss:3.3463 train_time:10518363ms step_avg:4061.14ms step:2601/3200 train_loss:3.3717 train_time:10522541ms step_avg:4061.19ms step:2602/3200 train_loss:3.3501 train_time:10526718ms step_avg:4061.23ms step:2603/3200 train_loss:3.2884 train_time:10530871ms step_avg:4061.27ms step:2604/3200 train_loss:3.2898 train_time:10535014ms step_avg:4061.30ms step:2605/3200 train_loss:3.2570 train_time:10539142ms step_avg:4061.33ms step:2606/3200 train_loss:3.5752 train_time:10543270ms step_avg:4061.35ms step:2607/3200 train_loss:3.3472 train_time:10547394ms step_avg:4061.38ms step:2608/3200 train_loss:3.3084 train_time:10551517ms step_avg:4061.40ms step:2609/3200 train_loss:3.2959 train_time:10555645ms step_avg:4061.43ms step:2610/3200 train_loss:3.2320 train_time:10559768ms step_avg:4061.45ms step:2611/3200 train_loss:3.2393 train_time:10563905ms step_avg:4061.48ms step:2612/3200 train_loss:3.3330 train_time:10568043ms step_avg:4061.51ms step:2613/3200 train_loss:3.5816 train_time:10572186ms step_avg:4061.54ms step:2614/3200 train_loss:3.3430 train_time:10576321ms step_avg:4061.57ms step:2615/3200 train_loss:3.2735 train_time:10580502ms step_avg:4061.61ms step:2616/3200 train_loss:3.3720 train_time:10584683ms step_avg:4061.66ms step:2617/3200 train_loss:3.4099 train_time:10588864ms step_avg:4061.70ms step:2618/3200 train_loss:3.2591 train_time:10593066ms step_avg:4061.76ms step:2619/3200 train_loss:3.3468 train_time:10597261ms step_avg:4061.81ms step:2620/3200 train_loss:3.1820 train_time:10601454ms step_avg:4061.86ms step:2621/3200 train_loss:3.4269 train_time:10605577ms step_avg:4061.88ms step:2622/3200 train_loss:3.2630 train_time:10609676ms step_avg:4061.90ms step:2623/3200 train_loss:3.4800 train_time:10613756ms step_avg:4061.90ms step:2624/3200 train_loss:3.3520 train_time:10617819ms step_avg:4061.90ms step:2625/3200 train_loss:3.4069 train_time:10621862ms step_avg:4061.90ms step:2625/3200 val_loss:3.3554 train_time:10621863ms step_avg:4061.90ms step:2626/3200 train_loss:3.3806 train_time:10625873ms step_avg:4061.88ms step:2627/3200 train_loss:3.5322 train_time:10629877ms step_avg:4061.86ms step:2628/3200 train_loss:3.4438 train_time:10633882ms step_avg:4061.83ms step:2629/3200 train_loss:3.4878 train_time:10637886ms step_avg:4061.81ms step:2630/3200 train_loss:3.4569 train_time:10641892ms step_avg:4061.79ms step:2631/3200 train_loss:3.3234 train_time:10645900ms step_avg:4061.77ms step:2632/3200 train_loss:3.4018 train_time:10649908ms step_avg:4061.75ms step:2633/3200 train_loss:3.2214 train_time:10653918ms step_avg:4061.73ms step:2634/3200 train_loss:3.3347 train_time:10657929ms step_avg:4061.71ms step:2635/3200 train_loss:3.4491 train_time:10661944ms step_avg:4061.69ms step:2636/3200 train_loss:3.2479 train_time:10665963ms step_avg:4061.68ms step:2637/3200 train_loss:3.4513 train_time:10669985ms step_avg:4061.66ms step:2638/3200 train_loss:3.4422 train_time:10674011ms step_avg:4061.65ms step:2639/3200 train_loss:3.3962 train_time:10678043ms step_avg:4061.64ms step:2640/3200 train_loss:3.2136 train_time:10682087ms step_avg:4061.63ms step:2641/3200 train_loss:3.2869 train_time:10686138ms step_avg:4061.63ms step:2642/3200 train_loss:3.4248 train_time:10690203ms step_avg:4061.63ms step:2643/3200 train_loss:3.3729 train_time:10694278ms step_avg:4061.63ms step:2644/3200 train_loss:3.2804 train_time:10698363ms step_avg:4061.64ms step:2645/3200 train_loss:3.4318 train_time:10702459ms step_avg:4061.65ms step:2646/3200 train_loss:3.3225 train_time:10706575ms step_avg:4061.67ms step:2647/3200 train_loss:3.3556 train_time:10710702ms step_avg:4061.70ms step:2648/3200 train_loss:3.3499 train_time:10714860ms step_avg:4061.74ms step:2649/3200 train_loss:3.2407 train_time:10719022ms step_avg:4061.77ms step:2650/3200 train_loss:3.3732 train_time:10723219ms step_avg:4061.83ms step:2651/3200 train_loss:3.2817 train_time:10727393ms step_avg:4061.87ms step:2652/3200 train_loss:3.4510 train_time:10731527ms step_avg:4061.90ms step:2653/3200 train_loss:3.2900 train_time:10735632ms step_avg:4061.91ms step:2654/3200 train_loss:3.5331 train_time:10739723ms step_avg:4061.92ms step:2655/3200 train_loss:3.2434 train_time:10743795ms step_avg:4061.93ms step:2656/3200 train_loss:3.3375 train_time:10747861ms step_avg:4061.93ms step:2657/3200 train_loss:3.1945 train_time:10751910ms step_avg:4061.92ms step:2658/3200 train_loss:3.4964 train_time:10755950ms step_avg:4061.91ms step:2659/3200 train_loss:3.3534 train_time:10759987ms step_avg:4061.91ms step:2660/3200 train_loss:3.3745 train_time:10764015ms step_avg:4061.89ms step:2661/3200 train_loss:3.4735 train_time:10768042ms step_avg:4061.88ms step:2662/3200 train_loss:3.3805 train_time:10772069ms step_avg:4061.87ms step:2663/3200 train_loss:3.5153 train_time:10776099ms step_avg:4061.85ms step:2664/3200 train_loss:3.3701 train_time:10780125ms step_avg:4061.84ms step:2665/3200 train_loss:3.3667 train_time:10784155ms step_avg:4061.83ms step:2666/3200 train_loss:3.2914 train_time:10788185ms step_avg:4061.82ms step:2667/3200 train_loss:3.4158 train_time:10792217ms step_avg:4061.81ms step:2668/3200 train_loss:3.4236 train_time:10796251ms step_avg:4061.80ms step:2669/3200 train_loss:3.3443 train_time:10800286ms step_avg:4061.78ms step:2670/3200 train_loss:3.2495 train_time:10804327ms step_avg:4061.78ms step:2671/3200 train_loss:3.3240 train_time:10808369ms step_avg:4061.77ms step:2672/3200 train_loss:3.4914 train_time:10812418ms step_avg:4061.76ms step:2673/3200 train_loss:3.3533 train_time:10816472ms step_avg:4061.76ms step:2674/3200 train_loss:3.3507 train_time:10820539ms step_avg:4061.76ms step:2675/3200 train_loss:3.5666 train_time:10824618ms step_avg:4061.77ms step:2676/3200 train_loss:2.9321 train_time:10828704ms step_avg:4061.78ms step:2677/3200 train_loss:3.2702 train_time:10832806ms step_avg:4061.79ms step:2678/3200 train_loss:3.4944 train_time:10836924ms step_avg:4061.82ms step:2679/3200 train_loss:3.5273 train_time:10841058ms step_avg:4061.84ms step:2680/3200 train_loss:3.2380 train_time:10845207ms step_avg:4061.88ms step:2681/3200 train_loss:3.2341 train_time:10849379ms step_avg:4061.92ms step:2682/3200 train_loss:3.3546 train_time:10853561ms step_avg:4061.96ms step:2683/3200 train_loss:3.5419 train_time:10857761ms step_avg:4062.01ms step:2684/3200 train_loss:3.3256 train_time:10861959ms step_avg:4062.06ms step:2685/3200 train_loss:3.3564 train_time:10866157ms step_avg:4062.11ms step:2686/3200 train_loss:3.3057 train_time:10870352ms step_avg:4062.16ms step:2687/3200 train_loss:3.2730 train_time:10874511ms step_avg:4062.20ms step:2688/3200 train_loss:3.2898 train_time:10878658ms step_avg:4062.23ms step:2689/3200 train_loss:3.5000 train_time:10882786ms step_avg:4062.26ms step:2690/3200 train_loss:3.4730 train_time:10886905ms step_avg:4062.28ms step:2691/3200 train_loss:3.3312 train_time:10891011ms step_avg:4062.29ms step:2692/3200 train_loss:3.4927 train_time:10895116ms step_avg:4062.31ms step:2693/3200 train_loss:3.4185 train_time:10899214ms step_avg:4062.32ms step:2694/3200 train_loss:3.4943 train_time:10903305ms step_avg:4062.33ms step:2695/3200 train_loss:3.2917 train_time:10907398ms step_avg:4062.35ms step:2696/3200 train_loss:3.2503 train_time:10911496ms step_avg:4062.36ms step:2697/3200 train_loss:3.2674 train_time:10915596ms step_avg:4062.37ms step:2698/3200 train_loss:3.6958 train_time:10919695ms step_avg:4062.39ms step:2699/3200 train_loss:3.4018 train_time:10923805ms step_avg:4062.40ms step:2700/3200 train_loss:3.7062 train_time:10927920ms step_avg:4062.42ms step:2701/3200 train_loss:3.5733 train_time:10932048ms step_avg:4062.45ms step:2702/3200 train_loss:3.1915 train_time:10936184ms step_avg:4062.48ms step:2703/3200 train_loss:3.3021 train_time:10940336ms step_avg:4062.51ms step:2704/3200 train_loss:3.4656 train_time:10944508ms step_avg:4062.55ms step:2705/3200 train_loss:3.5136 train_time:10948688ms step_avg:4062.59ms step:2706/3200 train_loss:3.2626 train_time:10952869ms step_avg:4062.64ms step:2707/3200 train_loss:3.1593 train_time:10957067ms step_avg:4062.69ms step:2708/3200 train_loss:3.3737 train_time:10961223ms step_avg:4062.72ms step:2709/3200 train_loss:3.3244 train_time:10965360ms step_avg:4062.75ms step:2710/3200 train_loss:3.4313 train_time:10969474ms step_avg:4062.77ms step:2711/3200 train_loss:3.3737 train_time:10973562ms step_avg:4062.78ms step:2712/3200 train_loss:3.3525 train_time:10977639ms step_avg:4062.78ms step:2713/3200 train_loss:3.4511 train_time:10981704ms step_avg:4062.78ms step:2714/3200 train_loss:3.3390 train_time:10985758ms step_avg:4062.78ms step:2715/3200 train_loss:3.3366 train_time:10989806ms step_avg:4062.77ms step:2716/3200 train_loss:3.1433 train_time:10993845ms step_avg:4062.77ms step:2717/3200 train_loss:3.4409 train_time:10997883ms step_avg:4062.76ms step:2718/3200 train_loss:3.4432 train_time:11001911ms step_avg:4062.74ms step:2719/3200 train_loss:3.3779 train_time:11005940ms step_avg:4062.73ms step:2720/3200 train_loss:3.3455 train_time:11009967ms step_avg:4062.72ms step:2721/3200 train_loss:3.2939 train_time:11013996ms step_avg:4062.71ms step:2722/3200 train_loss:3.0961 train_time:11018025ms step_avg:4062.69ms step:2723/3200 train_loss:3.3586 train_time:11022055ms step_avg:4062.68ms step:2724/3200 train_loss:3.3268 train_time:11026083ms step_avg:4062.67ms step:2725/3200 train_loss:3.4968 train_time:11030114ms step_avg:4062.66ms step:2726/3200 train_loss:3.3923 train_time:11034153ms step_avg:4062.65ms step:2727/3200 train_loss:3.3366 train_time:11038193ms step_avg:4062.64ms step:2728/3200 train_loss:3.3008 train_time:11042237ms step_avg:4062.63ms step:2729/3200 train_loss:3.3544 train_time:11046293ms step_avg:4062.63ms step:2730/3200 train_loss:3.2694 train_time:11050353ms step_avg:4062.63ms step:2731/3200 train_loss:3.4433 train_time:11054424ms step_avg:4062.63ms step:2732/3200 train_loss:3.3159 train_time:11058502ms step_avg:4062.64ms step:2733/3200 train_loss:3.3714 train_time:11062584ms step_avg:4062.65ms step:2734/3200 train_loss:3.4888 train_time:11066689ms step_avg:4062.66ms step:2735/3200 train_loss:3.3949 train_time:11070815ms step_avg:4062.68ms step:2736/3200 train_loss:3.4582 train_time:11074950ms step_avg:4062.71ms step:2737/3200 train_loss:3.0776 train_time:11079105ms step_avg:4062.74ms step:2738/3200 train_loss:3.3640 train_time:11083290ms step_avg:4062.79ms step:2739/3200 train_loss:3.3704 train_time:11087498ms step_avg:4062.84ms step:2740/3200 train_loss:3.0495 train_time:11091726ms step_avg:4062.90ms step:2741/3200 train_loss:3.3712 train_time:11095951ms step_avg:4062.96ms step:2742/3200 train_loss:3.3870 train_time:11100169ms step_avg:4063.02ms step:2743/3200 train_loss:3.3840 train_time:11104361ms step_avg:4063.07ms step:2744/3200 train_loss:3.2282 train_time:11108501ms step_avg:4063.09ms step:2745/3200 train_loss:3.3315 train_time:11112594ms step_avg:4063.11ms step:2746/3200 train_loss:3.2975 train_time:11116656ms step_avg:4063.11ms step:2747/3200 train_loss:3.1738 train_time:11120705ms step_avg:4063.10ms step:2748/3200 train_loss:3.3201 train_time:11124739ms step_avg:4063.09ms step:2749/3200 train_loss:3.3911 train_time:11128758ms step_avg:4063.07ms step:2750/3200 train_loss:3.3808 train_time:11132789ms step_avg:4063.06ms step:2750/3200 val_loss:3.3350 train_time:11132790ms step_avg:4063.06ms step:2751/3200 train_loss:3.2459 train_time:11136795ms step_avg:4063.04ms step:2752/3200 train_loss:3.3606 train_time:11140794ms step_avg:4063.02ms step:2753/3200 train_loss:3.3835 train_time:11144792ms step_avg:4062.99ms step:2754/3200 train_loss:3.2321 train_time:11148789ms step_avg:4062.97ms step:2755/3200 train_loss:3.2735 train_time:11152788ms step_avg:4062.95ms step:2756/3200 train_loss:3.2765 train_time:11156785ms step_avg:4062.92ms step:2757/3200 train_loss:3.3717 train_time:11160783ms step_avg:4062.90ms step:2758/3200 train_loss:3.3036 train_time:11164782ms step_avg:4062.88ms step:2759/3200 train_loss:3.3780 train_time:11168783ms step_avg:4062.85ms step:2760/3200 train_loss:3.4560 train_time:11172788ms step_avg:4062.83ms step:2761/3200 train_loss:3.4390 train_time:11176797ms step_avg:4062.81ms step:2762/3200 train_loss:3.3525 train_time:11180805ms step_avg:4062.79ms step:2763/3200 train_loss:3.0142 train_time:11184820ms step_avg:4062.78ms step:2764/3200 train_loss:3.3641 train_time:11188836ms step_avg:4062.76ms step:2765/3200 train_loss:3.2158 train_time:11192858ms step_avg:4062.74ms step:2766/3200 train_loss:3.5870 train_time:11196886ms step_avg:4062.73ms step:2767/3200 train_loss:3.2144 train_time:11200912ms step_avg:4062.72ms step:2768/3200 train_loss:3.3277 train_time:11204945ms step_avg:4062.71ms step:2769/3200 train_loss:3.3111 train_time:11208965ms step_avg:4062.69ms step:2770/3200 train_loss:3.1391 train_time:11212988ms step_avg:4062.68ms step:2771/3200 train_loss:3.3220 train_time:11217020ms step_avg:4062.67ms step:2772/3200 train_loss:3.4012 train_time:11221060ms step_avg:4062.66ms step:2773/3200 train_loss:3.3275 train_time:11225112ms step_avg:4062.65ms step:2774/3200 train_loss:3.4167 train_time:11229177ms step_avg:4062.65ms step:2775/3200 train_loss:3.4466 train_time:11233259ms step_avg:4062.66ms step:2776/3200 train_loss:3.3065 train_time:11237350ms step_avg:4062.67ms step:2777/3200 train_loss:3.3256 train_time:11241459ms step_avg:4062.69ms step:2778/3200 train_loss:3.1963 train_time:11245588ms step_avg:4062.71ms step:2779/3200 train_loss:3.2760 train_time:11249737ms step_avg:4062.74ms step:2780/3200 train_loss:3.3139 train_time:11253917ms step_avg:4062.79ms step:2781/3200 train_loss:3.2821 train_time:11258120ms step_avg:4062.84ms step:2782/3200 train_loss:3.3218 train_time:11262321ms step_avg:4062.89ms step:2783/3200 train_loss:3.3052 train_time:11266517ms step_avg:4062.93ms step:2784/3200 train_loss:3.5970 train_time:11270714ms step_avg:4062.98ms step:2785/3200 train_loss:3.4277 train_time:11274910ms step_avg:4063.03ms step:2786/3200 train_loss:3.3353 train_time:11279086ms step_avg:4063.07ms step:2787/3200 train_loss:3.4313 train_time:11283242ms step_avg:4063.10ms step:2788/3200 train_loss:3.4730 train_time:11287384ms step_avg:4063.13ms step:2789/3200 train_loss:3.4181 train_time:11291524ms step_avg:4063.16ms step:2790/3200 train_loss:3.1955 train_time:11295661ms step_avg:4063.19ms step:2791/3200 train_loss:3.3456 train_time:11299795ms step_avg:4063.21ms step:2792/3200 train_loss:3.3182 train_time:11303922ms step_avg:4063.24ms step:2793/3200 train_loss:3.5164 train_time:11308055ms step_avg:4063.26ms step:2794/3200 train_loss:3.3269 train_time:11312192ms step_avg:4063.29ms step:2795/3200 train_loss:3.4465 train_time:11316345ms step_avg:4063.32ms step:2796/3200 train_loss:3.3649 train_time:11320497ms step_avg:4063.35ms step:2797/3200 train_loss:3.3545 train_time:11324674ms step_avg:4063.39ms step:2798/3200 train_loss:3.3209 train_time:11328853ms step_avg:4063.43ms step:2799/3200 train_loss:3.3593 train_time:11333035ms step_avg:4063.48ms step:2800/3200 train_loss:3.2656 train_time:11337232ms step_avg:4063.52ms step:2801/3200 train_loss:3.2818 train_time:11341408ms step_avg:4063.56ms step:2802/3200 train_loss:3.4521 train_time:11345564ms step_avg:4063.60ms step:2803/3200 train_loss:3.5998 train_time:11349681ms step_avg:4063.62ms step:2804/3200 train_loss:3.3303 train_time:11353771ms step_avg:4063.63ms step:2805/3200 train_loss:3.2770 train_time:11357846ms step_avg:4063.63ms step:2806/3200 train_loss:3.4660 train_time:11361910ms step_avg:4063.63ms step:2807/3200 train_loss:3.2383 train_time:11365970ms step_avg:4063.63ms step:2808/3200 train_loss:3.2659 train_time:11370014ms step_avg:4063.62ms step:2809/3200 train_loss:3.1766 train_time:11374060ms step_avg:4063.62ms step:2810/3200 train_loss:3.2287 train_time:11378098ms step_avg:4063.61ms step:2811/3200 train_loss:3.1807 train_time:11382126ms step_avg:4063.59ms step:2812/3200 train_loss:3.5952 train_time:11386153ms step_avg:4063.58ms step:2813/3200 train_loss:3.6611 train_time:11390180ms step_avg:4063.57ms step:2814/3200 train_loss:3.3663 train_time:11394205ms step_avg:4063.55ms step:2815/3200 train_loss:3.3152 train_time:11398229ms step_avg:4063.54ms step:2816/3200 train_loss:3.4843 train_time:11402253ms step_avg:4063.53ms step:2817/3200 train_loss:3.4261 train_time:11406278ms step_avg:4063.51ms step:2818/3200 train_loss:3.3734 train_time:11410305ms step_avg:4063.50ms step:2819/3200 train_loss:3.4767 train_time:11414343ms step_avg:4063.49ms step:2820/3200 train_loss:3.3769 train_time:11418378ms step_avg:4063.48ms step:2821/3200 train_loss:3.3321 train_time:11422418ms step_avg:4063.47ms step:2822/3200 train_loss:3.2594 train_time:11426462ms step_avg:4063.46ms step:2823/3200 train_loss:3.2099 train_time:11430512ms step_avg:4063.46ms step:2824/3200 train_loss:3.2239 train_time:11434571ms step_avg:4063.46ms step:2825/3200 train_loss:3.4607 train_time:11438639ms step_avg:4063.46ms step:2826/3200 train_loss:3.4989 train_time:11442717ms step_avg:4063.46ms step:2827/3200 train_loss:3.3649 train_time:11446800ms step_avg:4063.47ms step:2828/3200 train_loss:3.6627 train_time:11450905ms step_avg:4063.49ms step:2829/3200 train_loss:3.2798 train_time:11455018ms step_avg:4063.50ms step:2830/3200 train_loss:3.4605 train_time:11459153ms step_avg:4063.53ms step:2831/3200 train_loss:3.3934 train_time:11463311ms step_avg:4063.56ms step:2832/3200 train_loss:3.5089 train_time:11467491ms step_avg:4063.60ms step:2833/3200 train_loss:3.3663 train_time:11471672ms step_avg:4063.65ms step:2834/3200 train_loss:3.5226 train_time:11475875ms step_avg:4063.70ms step:2835/3200 train_loss:3.2159 train_time:11480095ms step_avg:4063.75ms step:2836/3200 train_loss:3.4270 train_time:11484287ms step_avg:4063.80ms step:2837/3200 train_loss:3.4593 train_time:11488428ms step_avg:4063.82ms step:2838/3200 train_loss:3.5741 train_time:11492513ms step_avg:4063.83ms step:2839/3200 train_loss:3.2784 train_time:11496554ms step_avg:4063.82ms step:2840/3200 train_loss:3.4034 train_time:11500575ms step_avg:4063.81ms step:2841/3200 train_loss:3.2682 train_time:11504595ms step_avg:4063.79ms step:2842/3200 train_loss:3.3196 train_time:11508606ms step_avg:4063.77ms step:2843/3200 train_loss:3.3487 train_time:11512604ms step_avg:4063.75ms step:2844/3200 train_loss:3.4333 train_time:11516599ms step_avg:4063.73ms step:2845/3200 train_loss:3.3937 train_time:11520600ms step_avg:4063.70ms step:2846/3200 train_loss:3.3105 train_time:11524592ms step_avg:4063.68ms step:2847/3200 train_loss:3.3715 train_time:11528584ms step_avg:4063.65ms step:2848/3200 train_loss:3.3214 train_time:11532570ms step_avg:4063.63ms step:2849/3200 train_loss:3.3216 train_time:11536551ms step_avg:4063.60ms step:2850/3200 train_loss:3.2957 train_time:11540530ms step_avg:4063.57ms step:2851/3200 train_loss:3.5752 train_time:11544507ms step_avg:4063.54ms step:2852/3200 train_loss:3.2416 train_time:11548482ms step_avg:4063.51ms step:2853/3200 train_loss:3.2713 train_time:11552455ms step_avg:4063.47ms step:2854/3200 train_loss:3.4145 train_time:11556425ms step_avg:4063.44ms step:2855/3200 train_loss:3.2491 train_time:11560395ms step_avg:4063.41ms step:2856/3200 train_loss:3.2846 train_time:11564365ms step_avg:4063.38ms step:2857/3200 train_loss:3.2114 train_time:11568336ms step_avg:4063.34ms step:2858/3200 train_loss:3.4049 train_time:11572305ms step_avg:4063.31ms step:2859/3200 train_loss:3.6401 train_time:11576276ms step_avg:4063.28ms step:2860/3200 train_loss:3.4039 train_time:11580248ms step_avg:4063.24ms step:2861/3200 train_loss:3.3114 train_time:11584221ms step_avg:4063.21ms step:2862/3200 train_loss:3.5613 train_time:11588190ms step_avg:4063.18ms step:2863/3200 train_loss:3.3239 train_time:11592163ms step_avg:4063.15ms step:2864/3200 train_loss:3.4840 train_time:11596135ms step_avg:4063.12ms step:2865/3200 train_loss:3.3589 train_time:11600108ms step_avg:4063.09ms step:2866/3200 train_loss:3.3179 train_time:11604083ms step_avg:4063.05ms step:2867/3200 train_loss:3.4287 train_time:11608059ms step_avg:4063.02ms step:2868/3200 train_loss:3.2807 train_time:11612039ms step_avg:4062.99ms step:2869/3200 train_loss:3.4137 train_time:11616020ms step_avg:4062.97ms step:2870/3200 train_loss:3.3850 train_time:11620002ms step_avg:4062.94ms step:2871/3200 train_loss:3.3768 train_time:11623986ms step_avg:4062.91ms step:2872/3200 train_loss:3.3902 train_time:11627976ms step_avg:4062.88ms step:2873/3200 train_loss:3.4247 train_time:11631964ms step_avg:4062.86ms step:2874/3200 train_loss:3.3437 train_time:11635957ms step_avg:4062.83ms step:2875/3200 train_loss:3.7695 train_time:11639952ms step_avg:4062.81ms step:2875/3200 val_loss:3.3144 train_time:11639952ms step_avg:4062.81ms step:2876/3200 train_loss:3.5406 train_time:11643970ms step_avg:4062.79ms step:2877/3200 train_loss:3.2966 train_time:11647991ms step_avg:4062.78ms step:2878/3200 train_loss:3.4915 train_time:11652015ms step_avg:4062.77ms step:2879/3200 train_loss:3.1108 train_time:11656021ms step_avg:4062.75ms step:2880/3200 train_loss:3.1828 train_time:11660034ms step_avg:4062.73ms step:2881/3200 train_loss:3.3797 train_time:11664050ms step_avg:4062.71ms step:2882/3200 train_loss:3.5051 train_time:11668072ms step_avg:4062.70ms step:2883/3200 train_loss:3.2755 train_time:11672120ms step_avg:4062.69ms step:2884/3200 train_loss:3.4295 train_time:11676174ms step_avg:4062.69ms step:2885/3200 train_loss:3.3576 train_time:11680243ms step_avg:4062.69ms step:2886/3200 train_loss:3.4253 train_time:11684330ms step_avg:4062.70ms step:2887/3200 train_loss:3.2213 train_time:11688438ms step_avg:4062.72ms step:2888/3200 train_loss:3.4841 train_time:11692561ms step_avg:4062.74ms step:2889/3200 train_loss:3.4187 train_time:11696722ms step_avg:4062.77ms step:2890/3200 train_loss:3.2880 train_time:11700903ms step_avg:4062.81ms step:2891/3200 train_loss:3.3038 train_time:11705084ms step_avg:4062.85ms step:2892/3200 train_loss:3.3752 train_time:11709279ms step_avg:4062.90ms step:2893/3200 train_loss:3.3311 train_time:11713423ms step_avg:4062.93ms step:2894/3200 train_loss:3.2863 train_time:11717518ms step_avg:4062.94ms step:2895/3200 train_loss:3.0579 train_time:11721576ms step_avg:4062.94ms step:2896/3200 train_loss:3.3738 train_time:11725610ms step_avg:4062.93ms step:2897/3200 train_loss:3.3121 train_time:11729625ms step_avg:4062.91ms step:2898/3200 train_loss:3.8653 train_time:11733637ms step_avg:4062.89ms step:2899/3200 train_loss:3.2756 train_time:11737651ms step_avg:4062.88ms step:2900/3200 train_loss:3.1112 train_time:11741656ms step_avg:4062.86ms step:2901/3200 train_loss:3.3070 train_time:11745653ms step_avg:4062.83ms step:2902/3200 train_loss:3.3292 train_time:11749645ms step_avg:4062.81ms step:2903/3200 train_loss:3.1731 train_time:11753630ms step_avg:4062.78ms step:2904/3200 train_loss:3.1808 train_time:11757614ms step_avg:4062.76ms step:2905/3200 train_loss:3.4323 train_time:11761611ms step_avg:4062.73ms step:2906/3200 train_loss:3.2104 train_time:11765607ms step_avg:4062.71ms step:2907/3200 train_loss:3.4198 train_time:11769600ms step_avg:4062.69ms step:2908/3200 train_loss:4.8759 train_time:11773592ms step_avg:4062.66ms step:2909/3200 train_loss:3.3917 train_time:11777581ms step_avg:4062.64ms step:2910/3200 train_loss:3.2596 train_time:11781568ms step_avg:4062.61ms step:2911/3200 train_loss:3.3882 train_time:11785553ms step_avg:4062.58ms step:2912/3200 train_loss:3.1609 train_time:11789539ms step_avg:4062.56ms step:2913/3200 train_loss:3.4298 train_time:11793528ms step_avg:4062.53ms step:2914/3200 train_loss:3.3452 train_time:11797517ms step_avg:4062.51ms step:2915/3200 train_loss:3.2736 train_time:11801510ms step_avg:4062.48ms step:2916/3200 train_loss:3.6235 train_time:11805502ms step_avg:4062.46ms step:2917/3200 train_loss:3.4053 train_time:11809491ms step_avg:4062.43ms step:2918/3200 train_loss:3.4642 train_time:11813485ms step_avg:4062.41ms step:2919/3200 train_loss:3.3239 train_time:11817480ms step_avg:4062.39ms step:2920/3200 train_loss:3.3569 train_time:11821477ms step_avg:4062.36ms step:2921/3200 train_loss:3.1696 train_time:11825477ms step_avg:4062.34ms step:2922/3200 train_loss:3.2928 train_time:11829476ms step_avg:4062.32ms step:2923/3200 train_loss:3.3043 train_time:11833458ms step_avg:4062.29ms step:2924/3200 train_loss:3.3434 train_time:11837447ms step_avg:4062.27ms step:2925/3200 train_loss:3.4227 train_time:11841438ms step_avg:4062.24ms step:2926/3200 train_loss:3.3066 train_time:11845429ms step_avg:4062.22ms step:2927/3200 train_loss:3.2146 train_time:11849424ms step_avg:4062.20ms step:2928/3200 train_loss:3.7189 train_time:11853430ms step_avg:4062.18ms step:2929/3200 train_loss:3.5131 train_time:11857438ms step_avg:4062.16ms step:2930/3200 train_loss:3.2987 train_time:11861453ms step_avg:4062.14ms step:2931/3200 train_loss:3.2878 train_time:11865478ms step_avg:4062.13ms step:2932/3200 train_loss:3.1972 train_time:11869508ms step_avg:4062.12ms step:2933/3200 train_loss:3.1955 train_time:11873526ms step_avg:4062.10ms step:2934/3200 train_loss:3.4232 train_time:11877542ms step_avg:4062.09ms step:2935/3200 train_loss:3.1504 train_time:11881566ms step_avg:4062.07ms step:2936/3200 train_loss:3.3624 train_time:11885595ms step_avg:4062.06ms step:2937/3200 train_loss:3.3253 train_time:11889641ms step_avg:4062.06ms step:2938/3200 train_loss:3.3639 train_time:11893696ms step_avg:4062.05ms step:2939/3200 train_loss:3.2906 train_time:11897769ms step_avg:4062.06ms step:2940/3200 train_loss:3.2134 train_time:11901856ms step_avg:4062.07ms step:2941/3200 train_loss:3.2497 train_time:11905960ms step_avg:4062.08ms step:2942/3200 train_loss:3.4140 train_time:11910087ms step_avg:4062.10ms step:2943/3200 train_loss:3.3506 train_time:11914235ms step_avg:4062.13ms step:2944/3200 train_loss:3.3465 train_time:11918414ms step_avg:4062.17ms step:2945/3200 train_loss:4.4135 train_time:11922596ms step_avg:4062.21ms step:2946/3200 train_loss:3.0978 train_time:11926797ms step_avg:4062.26ms step:2947/3200 train_loss:3.6317 train_time:11930993ms step_avg:4062.31ms step:2948/3200 train_loss:3.3484 train_time:11935191ms step_avg:4062.35ms step:2949/3200 train_loss:3.4191 train_time:11939367ms step_avg:4062.39ms step:2950/3200 train_loss:3.4471 train_time:11943523ms step_avg:4062.42ms step:2951/3200 train_loss:3.3860 train_time:11947669ms step_avg:4062.45ms step:2952/3200 train_loss:3.1378 train_time:11951803ms step_avg:4062.48ms step:2953/3200 train_loss:3.2452 train_time:11955934ms step_avg:4062.50ms step:2954/3200 train_loss:3.1327 train_time:11960060ms step_avg:4062.52ms step:2955/3200 train_loss:3.2916 train_time:11964181ms step_avg:4062.54ms step:2956/3200 train_loss:3.2650 train_time:11968302ms step_avg:4062.56ms step:2957/3200 train_loss:3.1457 train_time:11972430ms step_avg:4062.58ms step:2958/3200 train_loss:3.1815 train_time:11976569ms step_avg:4062.61ms step:2959/3200 train_loss:3.2101 train_time:11980713ms step_avg:4062.64ms step:2960/3200 train_loss:3.2634 train_time:11984871ms step_avg:4062.67ms step:2961/3200 train_loss:3.2912 train_time:11989051ms step_avg:4062.71ms step:2962/3200 train_loss:3.3170 train_time:11993231ms step_avg:4062.75ms step:2963/3200 train_loss:3.3405 train_time:11997410ms step_avg:4062.79ms step:2964/3200 train_loss:3.4804 train_time:12001592ms step_avg:4062.83ms step:2965/3200 train_loss:3.3814 train_time:12005794ms step_avg:4062.87ms step:2966/3200 train_loss:3.3570 train_time:12009993ms step_avg:4062.92ms step:2967/3200 train_loss:3.5660 train_time:12014182ms step_avg:4062.96ms step:2968/3200 train_loss:3.2127 train_time:12018305ms step_avg:4062.98ms step:2969/3200 train_loss:3.2325 train_time:12022382ms step_avg:4062.99ms step:2970/3200 train_loss:3.2149 train_time:12026420ms step_avg:4062.98ms step:2971/3200 train_loss:3.3449 train_time:12030437ms step_avg:4062.96ms step:2972/3200 train_loss:3.3608 train_time:12034463ms step_avg:4062.95ms step:2973/3200 train_loss:3.1804 train_time:12038477ms step_avg:4062.94ms step:2974/3200 train_loss:3.2054 train_time:12042475ms step_avg:4062.91ms step:2975/3200 train_loss:3.3030 train_time:12046468ms step_avg:4062.89ms step:2976/3200 train_loss:3.2335 train_time:12050470ms step_avg:4062.87ms step:2977/3200 train_loss:3.2568 train_time:12054468ms step_avg:4062.85ms step:2978/3200 train_loss:3.1787 train_time:12058460ms step_avg:4062.82ms step:2979/3200 train_loss:3.4856 train_time:12062448ms step_avg:4062.80ms step:2980/3200 train_loss:3.3368 train_time:12066432ms step_avg:4062.77ms step:2981/3200 train_loss:3.1808 train_time:12070415ms step_avg:4062.74ms step:2982/3200 train_loss:3.3102 train_time:12074395ms step_avg:4062.72ms step:2983/3200 train_loss:3.3221 train_time:12078376ms step_avg:4062.69ms step:2984/3200 train_loss:3.2251 train_time:12082352ms step_avg:4062.66ms step:2985/3200 train_loss:3.1932 train_time:12086328ms step_avg:4062.63ms step:2986/3200 train_loss:3.4249 train_time:12090302ms step_avg:4062.60ms step:2987/3200 train_loss:3.2442 train_time:12094276ms step_avg:4062.57ms step:2988/3200 train_loss:3.4216 train_time:12098250ms step_avg:4062.54ms step:2989/3200 train_loss:3.3251 train_time:12102224ms step_avg:4062.51ms step:2990/3200 train_loss:3.3676 train_time:12106200ms step_avg:4062.48ms step:2991/3200 train_loss:3.3758 train_time:12110175ms step_avg:4062.45ms step:2992/3200 train_loss:3.3802 train_time:12114149ms step_avg:4062.42ms step:2993/3200 train_loss:3.4602 train_time:12118123ms step_avg:4062.39ms step:2994/3200 train_loss:3.3027 train_time:12122100ms step_avg:4062.37ms step:2995/3200 train_loss:3.1408 train_time:12126080ms step_avg:4062.34ms step:2996/3200 train_loss:3.2875 train_time:12130058ms step_avg:4062.31ms step:2997/3200 train_loss:3.0913 train_time:12134038ms step_avg:4062.28ms step:2998/3200 train_loss:3.1683 train_time:12138024ms step_avg:4062.26ms step:2999/3200 train_loss:3.3960 train_time:12142008ms step_avg:4062.23ms step:3000/3200 train_loss:3.3834 train_time:12145992ms step_avg:4062.20ms step:3000/3200 val_loss:3.2964 train_time:12145992ms step_avg:4062.20ms step:3001/3200 train_loss:3.3633 train_time:12149988ms step_avg:4062.18ms step:3002/3200 train_loss:3.4673 train_time:12153978ms step_avg:4062.16ms step:3003/3200 train_loss:3.0215 train_time:12157975ms step_avg:4062.14ms step:3004/3200 train_loss:3.2895 train_time:12161974ms step_avg:4062.12ms step:3005/3200 train_loss:3.4708 train_time:12165980ms step_avg:4062.10ms step:3006/3200 train_loss:3.3300 train_time:12169993ms step_avg:4062.08ms step:3007/3200 train_loss:3.1284 train_time:12174013ms step_avg:4062.07ms step:3008/3200 train_loss:3.1658 train_time:12178043ms step_avg:4062.06ms step:3009/3200 train_loss:3.3142 train_time:12182074ms step_avg:4062.05ms step:3010/3200 train_loss:3.2396 train_time:12186094ms step_avg:4062.03ms step:3011/3200 train_loss:3.2998 train_time:12190121ms step_avg:4062.02ms step:3012/3200 train_loss:3.2621 train_time:12194154ms step_avg:4062.01ms step:3013/3200 train_loss:3.3704 train_time:12198202ms step_avg:4062.01ms step:3014/3200 train_loss:3.3325 train_time:12202266ms step_avg:4062.01ms step:3015/3200 train_loss:3.2749 train_time:12206343ms step_avg:4062.01ms step:3016/3200 train_loss:3.2837 train_time:12210435ms step_avg:4062.02ms step:3017/3200 train_loss:3.6119 train_time:12214556ms step_avg:4062.04ms step:3018/3200 train_loss:3.2328 train_time:12218700ms step_avg:4062.07ms step:3019/3200 train_loss:3.3380 train_time:12222866ms step_avg:4062.10ms step:3020/3200 train_loss:3.3519 train_time:12227060ms step_avg:4062.15ms step:3021/3200 train_loss:3.3120 train_time:12231259ms step_avg:4062.19ms step:3022/3200 train_loss:3.1970 train_time:12235457ms step_avg:4062.24ms step:3023/3200 train_loss:3.3253 train_time:12239636ms step_avg:4062.28ms step:3024/3200 train_loss:3.3515 train_time:12243814ms step_avg:4062.31ms step:3025/3200 train_loss:3.3634 train_time:12247993ms step_avg:4062.35ms step:3026/3200 train_loss:3.4113 train_time:12252172ms step_avg:4062.39ms step:3027/3200 train_loss:3.2726 train_time:12256373ms step_avg:4062.44ms step:3028/3200 train_loss:3.2343 train_time:12260572ms step_avg:4062.48ms step:3029/3200 train_loss:3.2927 train_time:12264770ms step_avg:4062.53ms step:3030/3200 train_loss:3.2868 train_time:12268971ms step_avg:4062.57ms step:3031/3200 train_loss:3.3683 train_time:12273170ms step_avg:4062.62ms step:3032/3200 train_loss:3.3921 train_time:12277366ms step_avg:4062.66ms step:3033/3200 train_loss:3.2430 train_time:12281564ms step_avg:4062.71ms step:3034/3200 train_loss:3.3294 train_time:12285752ms step_avg:4062.75ms step:3035/3200 train_loss:3.1627 train_time:12289874ms step_avg:4062.77ms step:3036/3200 train_loss:3.2085 train_time:12293957ms step_avg:4062.78ms step:3037/3200 train_loss:3.3020 train_time:12298016ms step_avg:4062.77ms step:3038/3200 train_loss:3.1045 train_time:12302056ms step_avg:4062.77ms step:3039/3200 train_loss:3.3245 train_time:12306077ms step_avg:4062.75ms step:3040/3200 train_loss:3.2311 train_time:12310093ms step_avg:4062.74ms step:3041/3200 train_loss:3.2921 train_time:12314120ms step_avg:4062.73ms step:3042/3200 train_loss:3.3868 train_time:12318146ms step_avg:4062.71ms step:3043/3200 train_loss:3.2819 train_time:12322160ms step_avg:4062.70ms step:3044/3200 train_loss:3.1885 train_time:12326170ms step_avg:4062.68ms step:3045/3200 train_loss:3.3162 train_time:12330174ms step_avg:4062.66ms step:3046/3200 train_loss:3.3065 train_time:12334176ms step_avg:4062.64ms step:3047/3200 train_loss:3.6225 train_time:12338173ms step_avg:4062.62ms step:3048/3200 train_loss:3.3067 train_time:12342170ms step_avg:4062.60ms step:3049/3200 train_loss:3.2539 train_time:12346167ms step_avg:4062.58ms step:3050/3200 train_loss:3.2916 train_time:12350161ms step_avg:4062.55ms step:3051/3200 train_loss:3.3141 train_time:12354158ms step_avg:4062.53ms step:3052/3200 train_loss:3.0032 train_time:12358154ms step_avg:4062.51ms step:3053/3200 train_loss:3.3782 train_time:12362148ms step_avg:4062.49ms step:3054/3200 train_loss:3.2867 train_time:12366142ms step_avg:4062.46ms step:3055/3200 train_loss:3.3377 train_time:12370139ms step_avg:4062.44ms step:3056/3200 train_loss:3.2042 train_time:12374138ms step_avg:4062.42ms step:3057/3200 train_loss:3.2223 train_time:12378138ms step_avg:4062.40ms step:3058/3200 train_loss:3.1398 train_time:12382140ms step_avg:4062.38ms step:3059/3200 train_loss:3.1824 train_time:12386146ms step_avg:4062.36ms step:3060/3200 train_loss:3.2570 train_time:12390153ms step_avg:4062.35ms step:3061/3200 train_loss:3.2829 train_time:12394164ms step_avg:4062.33ms step:3062/3200 train_loss:3.2659 train_time:12398173ms step_avg:4062.31ms step:3063/3200 train_loss:3.3213 train_time:12402189ms step_avg:4062.30ms step:3064/3200 train_loss:3.1708 train_time:12406219ms step_avg:4062.29ms step:3065/3200 train_loss:3.2247 train_time:12410251ms step_avg:4062.28ms step:3066/3200 train_loss:3.4957 train_time:12414277ms step_avg:4062.26ms step:3067/3200 train_loss:3.4346 train_time:12418290ms step_avg:4062.25ms step:3068/3200 train_loss:3.3366 train_time:12422312ms step_avg:4062.23ms step:3069/3200 train_loss:3.3925 train_time:12426348ms step_avg:4062.23ms step:3070/3200 train_loss:3.2823 train_time:12430388ms step_avg:4062.22ms step:3071/3200 train_loss:3.4476 train_time:12434436ms step_avg:4062.21ms step:3072/3200 train_loss:3.0844 train_time:12438501ms step_avg:4062.21ms step:3073/3200 train_loss:3.3185 train_time:12442571ms step_avg:4062.22ms step:3074/3200 train_loss:3.4090 train_time:12446665ms step_avg:4062.23ms step:3075/3200 train_loss:3.3169 train_time:12450769ms step_avg:4062.24ms step:3076/3200 train_loss:3.4249 train_time:12454895ms step_avg:4062.26ms step:3077/3200 train_loss:3.2612 train_time:12459045ms step_avg:4062.29ms step:3078/3200 train_loss:3.3531 train_time:12463224ms step_avg:4062.33ms step:3079/3200 train_loss:3.2815 train_time:12467402ms step_avg:4062.37ms step:3080/3200 train_loss:3.4218 train_time:12471574ms step_avg:4062.40ms step:3081/3200 train_loss:3.2248 train_time:12475723ms step_avg:4062.43ms step:3082/3200 train_loss:3.2901 train_time:12479869ms step_avg:4062.46ms step:3083/3200 train_loss:3.5664 train_time:12484007ms step_avg:4062.48ms step:3084/3200 train_loss:3.3837 train_time:12488136ms step_avg:4062.50ms step:3085/3200 train_loss:3.4017 train_time:12492262ms step_avg:4062.52ms step:3086/3200 train_loss:3.3844 train_time:12496393ms step_avg:4062.55ms step:3087/3200 train_loss:3.2541 train_time:12500528ms step_avg:4062.57ms step:3088/3200 train_loss:3.2286 train_time:12504668ms step_avg:4062.60ms step:3089/3200 train_loss:3.3119 train_time:12508812ms step_avg:4062.62ms step:3090/3200 train_loss:3.2733 train_time:12512966ms step_avg:4062.65ms step:3091/3200 train_loss:3.4626 train_time:12517147ms step_avg:4062.69ms step:3092/3200 train_loss:3.3196 train_time:12521326ms step_avg:4062.73ms step:3093/3200 train_loss:3.1447 train_time:12525506ms step_avg:4062.77ms step:3094/3200 train_loss:3.2691 train_time:12529687ms step_avg:4062.80ms step:3095/3200 train_loss:3.2601 train_time:12533889ms step_avg:4062.85ms step:3096/3200 train_loss:3.3286 train_time:12538084ms step_avg:4062.89ms step:3097/3200 train_loss:3.1376 train_time:12542283ms step_avg:4062.94ms step:3098/3200 train_loss:3.2764 train_time:12546445ms step_avg:4062.97ms step:3099/3200 train_loss:3.3151 train_time:12550581ms step_avg:4062.99ms step:3100/3200 train_loss:3.4665 train_time:12554698ms step_avg:4063.01ms step:3101/3200 train_loss:3.1834 train_time:12558805ms step_avg:4063.02ms step:3102/3200 train_loss:3.3702 train_time:12562901ms step_avg:4063.03ms step:3103/3200 train_loss:3.4066 train_time:12566990ms step_avg:4063.04ms step:3104/3200 train_loss:3.0497 train_time:12571074ms step_avg:4063.05ms step:3105/3200 train_loss:3.1937 train_time:12575158ms step_avg:4063.06ms step:3106/3200 train_loss:3.2780 train_time:12579242ms step_avg:4063.06ms step:3107/3200 train_loss:3.3024 train_time:12583322ms step_avg:4063.07ms step:3108/3200 train_loss:3.1158 train_time:12587406ms step_avg:4063.07ms step:3109/3200 train_loss:3.2744 train_time:12591488ms step_avg:4063.08ms step:3110/3200 train_loss:3.2494 train_time:12595574ms step_avg:4063.09ms step:3111/3200 train_loss:3.1663 train_time:12599664ms step_avg:4063.10ms step:3112/3200 train_loss:3.2110 train_time:12603758ms step_avg:4063.11ms step:3113/3200 train_loss:3.2798 train_time:12607856ms step_avg:4063.12ms step:3114/3200 train_loss:3.3276 train_time:12611970ms step_avg:4063.13ms step:3115/3200 train_loss:3.0836 train_time:12616094ms step_avg:4063.15ms step:3116/3200 train_loss:3.3369 train_time:12620225ms step_avg:4063.18ms step:3117/3200 train_loss:3.2976 train_time:12624373ms step_avg:4063.20ms step:3118/3200 train_loss:2.9873 train_time:12628553ms step_avg:4063.24ms step:3119/3200 train_loss:3.1199 train_time:12632735ms step_avg:4063.28ms step:3120/3200 train_loss:3.6130 train_time:12636945ms step_avg:4063.33ms step:3121/3200 train_loss:3.2442 train_time:12641185ms step_avg:4063.38ms step:3122/3200 train_loss:3.3017 train_time:12645442ms step_avg:4063.45ms step:3123/3200 train_loss:3.3695 train_time:12649651ms step_avg:4063.49ms step:3124/3200 train_loss:3.1735 train_time:12653823ms step_avg:4063.53ms step:3125/3200 train_loss:3.5270 train_time:12657938ms step_avg:4063.54ms step:3125/3200 val_loss:3.2818 train_time:12657939ms step_avg:4063.54ms step:3126/3200 train_loss:3.2041 train_time:12661945ms step_avg:4063.53ms step:3127/3200 train_loss:3.3411 train_time:12665974ms step_avg:4063.51ms step:3128/3200 train_loss:3.2161 train_time:12669994ms step_avg:4063.50ms step:3129/3200 train_loss:3.3537 train_time:12674013ms step_avg:4063.49ms step:3130/3200 train_loss:3.3561 train_time:12678029ms step_avg:4063.47ms step:3131/3200 train_loss:3.3506 train_time:12682042ms step_avg:4063.45ms step:3132/3200 train_loss:3.4475 train_time:12686056ms step_avg:4063.44ms step:3133/3200 train_loss:3.1907 train_time:12690071ms step_avg:4063.42ms step:3134/3200 train_loss:3.2718 train_time:12694080ms step_avg:4063.41ms step:3135/3200 train_loss:3.1363 train_time:12698093ms step_avg:4063.39ms step:3136/3200 train_loss:3.3176 train_time:12702111ms step_avg:4063.38ms step:3137/3200 train_loss:3.2646 train_time:12706133ms step_avg:4063.36ms step:3138/3200 train_loss:4.0743 train_time:12710156ms step_avg:4063.35ms step:3139/3200 train_loss:3.2549 train_time:12714179ms step_avg:4063.34ms step:3140/3200 train_loss:3.2240 train_time:12718200ms step_avg:4063.32ms step:3141/3200 train_loss:3.2783 train_time:12722226ms step_avg:4063.31ms step:3142/3200 train_loss:3.2858 train_time:12726258ms step_avg:4063.30ms step:3143/3200 train_loss:3.1776 train_time:12730280ms step_avg:4063.29ms step:3144/3200 train_loss:3.2381 train_time:12734297ms step_avg:4063.27ms step:3145/3200 train_loss:3.1025 train_time:12738319ms step_avg:4063.26ms step:3146/3200 train_loss:3.3950 train_time:12742350ms step_avg:4063.25ms step:3147/3200 train_loss:3.1410 train_time:12746384ms step_avg:4063.24ms step:3148/3200 train_loss:3.1364 train_time:12750425ms step_avg:4063.23ms step:3149/3200 train_loss:3.2105 train_time:12754478ms step_avg:4063.23ms step:3150/3200 train_loss:3.3658 train_time:12758537ms step_avg:4063.23ms step:3151/3200 train_loss:3.3006 train_time:12762618ms step_avg:4063.23ms step:3152/3200 train_loss:3.3348 train_time:12766705ms step_avg:4063.24ms step:3153/3200 train_loss:3.0852 train_time:12770808ms step_avg:4063.25ms step:3154/3200 train_loss:3.2523 train_time:12774925ms step_avg:4063.27ms step:3155/3200 train_loss:3.2149 train_time:12779062ms step_avg:4063.29ms step:3156/3200 train_loss:3.2861 train_time:12783241ms step_avg:4063.33ms step:3157/3200 train_loss:3.1056 train_time:12787424ms step_avg:4063.37ms step:3158/3200 train_loss:3.3254 train_time:12791626ms step_avg:4063.41ms step:3159/3200 train_loss:3.1966 train_time:12795873ms step_avg:4063.47ms step:3160/3200 train_loss:3.2418 train_time:12800076ms step_avg:4063.52ms step:3161/3200 train_loss:3.2229 train_time:12804231ms step_avg:4063.55ms step:3162/3200 train_loss:3.2656 train_time:12808349ms step_avg:4063.56ms step:3163/3200 train_loss:3.3862 train_time:12812442ms step_avg:4063.57ms step:3164/3200 train_loss:3.2510 train_time:12816509ms step_avg:4063.57ms step:3165/3200 train_loss:3.1640 train_time:12820562ms step_avg:4063.57ms step:3166/3200 train_loss:3.3202 train_time:12824600ms step_avg:4063.56ms step:3167/3200 train_loss:3.3827 train_time:12828631ms step_avg:4063.55ms step:3168/3200 train_loss:3.2633 train_time:12832654ms step_avg:4063.54ms step:3169/3200 train_loss:3.2346 train_time:12836672ms step_avg:4063.52ms step:3170/3200 train_loss:3.2515 train_time:12840682ms step_avg:4063.51ms step:3171/3200 train_loss:3.2059 train_time:12844691ms step_avg:4063.49ms step:3172/3200 train_loss:3.1092 train_time:12848696ms step_avg:4063.47ms step:3173/3200 train_loss:3.3215 train_time:12852718ms step_avg:4063.46ms step:3174/3200 train_loss:3.0788 train_time:12856746ms step_avg:4063.45ms step:3175/3200 train_loss:3.4591 train_time:12860773ms step_avg:4063.44ms step:3176/3200 train_loss:3.3341 train_time:12864794ms step_avg:4063.42ms step:3177/3200 train_loss:3.2206 train_time:12868822ms step_avg:4063.41ms step:3178/3200 train_loss:3.1259 train_time:12872850ms step_avg:4063.40ms step:3179/3200 train_loss:3.2598 train_time:12876879ms step_avg:4063.39ms step:3180/3200 train_loss:3.1457 train_time:12880910ms step_avg:4063.38ms step:3181/3200 train_loss:3.2866 train_time:12884941ms step_avg:4063.37ms step:3182/3200 train_loss:3.4432 train_time:12888976ms step_avg:4063.36ms step:3183/3200 train_loss:3.4723 train_time:12892989ms step_avg:4063.34ms step:3184/3200 train_loss:3.4146 train_time:12897008ms step_avg:4063.33ms step:3185/3200 train_loss:3.1887 train_time:12901033ms step_avg:4063.32ms step:3186/3200 train_loss:3.3239 train_time:12905064ms step_avg:4063.31ms step:3187/3200 train_loss:3.2777 train_time:12909094ms step_avg:4063.30ms step:3188/3200 train_loss:3.6387 train_time:12913139ms step_avg:4063.29ms step:3189/3200 train_loss:3.3323 train_time:12917190ms step_avg:4063.29ms step:3190/3200 train_loss:3.1519 train_time:12921257ms step_avg:4063.29ms step:3191/3200 train_loss:3.3583 train_time:12925329ms step_avg:4063.29ms step:3192/3200 train_loss:3.3599 train_time:12929416ms step_avg:4063.30ms step:3193/3200 train_loss:3.1016 train_time:12933517ms step_avg:4063.31ms step:3194/3200 train_loss:3.3017 train_time:12937636ms step_avg:4063.33ms step:3195/3200 train_loss:3.2009 train_time:12941771ms step_avg:4063.35ms step:3196/3200 train_loss:3.2175 train_time:12945943ms step_avg:4063.38ms step:3197/3200 train_loss:3.2819 train_time:12950131ms step_avg:4063.42ms step:3198/3200 train_loss:3.1602 train_time:12954327ms step_avg:4063.47ms step:3199/3200 train_loss:3.1988 train_time:12958486ms step_avg:4063.50ms step:3200/3200 train_loss:3.3662 train_time:12962609ms step_avg:4063.51ms step:3200/3200 val_loss:3.2775 train_time:12962609ms step_avg:4063.51ms