diff --git "a/abba5381-1376-415f-a331-a869506e243d.txt" "b/abba5381-1376-415f-a331-a869506e243d.txt" new file mode 100644--- /dev/null +++ "b/abba5381-1376-415f-a331-a869506e243d.txt" @@ -0,0 +1,3869 @@ +==================================================================================================== +import os +import sys +with open(sys.argv[0]) as f: + code = f.read() # read the code of this file ASAP, for logging +import uuid +import glob +import time +from dataclasses import dataclass + +import numpy as np +import torch +from torch import nn +import torch.nn.functional as F +import torch.distributed as dist +import torch._inductor.config as config +from torch.nn.parallel import DistributedDataParallel as DDP + +# ----------------------------------------------------------------------------- +# Muon optimizer + +def zeropower_via_svd(G, steps=None): + U, S, V = G.svd() + return U @ V.T + +@torch.compile +def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): + """ + Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a + quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose + of minimizing steps, it turns out to be empirically effective to keep increasing the slope at + zero even beyond the point where the iteration no longer converges all the way to one everywhere + on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T + where S' is diagonal with S_{ii}' \sim Uniform(0.5, 1.5), which turns out not to hurt model + performance at all relative to UV^T, where USV^T = G is the SVD. + """ + assert len(G.shape) == 2 + a, b, c = (3.4445, -4.7750, 2.0315) + X = G.bfloat16() + X /= (X.norm() + eps) # ensure top singular value <= 1 + if G.size(0) > G.size(1): + X = X.T + for _ in range(steps): + A = X @ X.T + B = A @ X + X = a * X + b * B + c * A @ B + if G.size(0) > G.size(1): + X = X.T + return X + +zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) + +class Muon(torch.optim.Optimizer): + """ + Muon - MomentUm Orthogonalized by Newton-schulz + + Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- + processing step, in which each 2D parameter's update is replaced with the nearest orthogonal + matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has + the advantage that it can be stably run in bfloat16 on the GPU. + + Some warnings: + - This optimizer assumes that all parameters passed in are 2D. + - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D + parameters; those should all be optimized by a standard method (e.g., AdamW). + - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. + - We believe it is unlikely to work well for training with small batch size. + - We believe it may not work well for finetuning pretrained models, but we haven't tested this. + - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). + + Arguments: + lr: The learning rate used by the internal SGD. + momentum: The momentum used by the internal SGD. + nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) + backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') + backend_steps: The number of iteration steps to use in the backend, if it is iterative. + """ + def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, + backend='newtonschulz5', backend_steps=5): + defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) + super().__init__(params, defaults) + + def step(self): + + for group in self.param_groups: + + lr = group['lr'] + momentum = group['momentum'] + zeropower_backend = zeropower_backends[group['backend']] + + # generate weight updates in distributed fashion + total_params = sum(p.numel() for p in group['params']) + updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) + curr_idx = 0 + for i, p in enumerate(group['params']): + # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs + if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): + g = p.grad + assert g is not None + state = self.state[p] + if 'momentum_buffer' not in state: + state['momentum_buffer'] = torch.zeros_like(g) + buf = state['momentum_buffer'] + buf.mul_(momentum).add_(g) + if group['nesterov']: + g = g.add(buf, alpha=momentum) + g = zeropower_backend(g, steps=group['backend_steps']) + g *= max(1, g.size(0)/g.size(1))**0.5 + updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() + curr_idx += p.numel() + + # sync updates across devices. we are not memory-constrained so can do this simple deserialization + dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) + + # deserialize and apply updates + curr_idx = 0 + for p in group['params']: + g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) + p.data.add_(g, alpha=-lr) + curr_idx += p.numel() + +# ----------------------------------------------------------------------------- +# PyTorch nn.Module definitions for the GPT-2 model + +class Rotary(torch.nn.Module): + + def __init__(self, dim, base=10000): + super().__init__() + self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) + self.seq_len_cached = None + self.cos_cached = None + self.sin_cached = None + + def forward(self, x): + seq_len = x.shape[1] + if seq_len != self.seq_len_cached: + self.seq_len_cached = seq_len + t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) + freqs = torch.outer(t, self.inv_freq).to(x.device) + self.cos_cached = freqs.cos().bfloat16() + self.sin_cached = freqs.sin().bfloat16() + return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] + +def apply_rotary_emb(x, cos, sin): + assert x.ndim == 4 # multihead attention + d = x.shape[3]//2 + x1 = x[..., :d] + x2 = x[..., d:] + y1 = x1 * cos + x2 * sin + y2 = x1 * (-sin) + x2 * cos + return torch.cat([y1, y2], 3).type_as(x) + +class CastedLinear(nn.Linear): + def forward(self, x): + return F.linear(x, self.weight.to(x.dtype)) + +class CausalSelfAttention(nn.Module): + + def __init__(self, config): + super().__init__() + self.n_head = config.n_head + self.n_embd = config.n_embd + self.head_dim = self.n_embd // self.n_head + assert self.n_embd % self.n_head == 0 + self.c_q = CastedLinear(self.n_embd, self.n_embd, bias=False) + self.c_k = CastedLinear(self.n_embd, self.n_embd, bias=False) + self.c_v = CastedLinear(self.n_embd, self.n_embd, bias=False) + # output projection + self.c_proj = CastedLinear(self.n_embd, self.n_embd, bias=False) + self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 + self.rotary = Rotary(self.head_dim) + self.lamb = nn.Parameter(torch.tensor(0.5)) # @Grad62304977 + + def forward(self, x, v1=None): + B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) + q = self.c_q(x).view(B, T, self.n_head, self.head_dim) + k = self.c_k(x).view(B, T, self.n_head, self.head_dim) + v = self.c_v(x).view(B, T, self.n_head, self.head_dim) + if v1 is None: + v1 = v # This happens if we are in the first block. v needs to be accessed by subsequent blocks + v = (1 - self.lamb) * v + self.lamb * v1.view_as(v) # @Grad62304977 + cos, sin = self.rotary(q) + q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 + q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) + y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) + y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side + y = self.c_proj(y) + return y, v1 + +class MLP(nn.Module): + + def __init__(self, config): + super().__init__() + self.c_fc = CastedLinear(config.n_embd, 4 * config.n_embd, bias=False) + self.c_proj = CastedLinear(4 * config.n_embd, config.n_embd, bias=False) + self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 + + def forward(self, x): + x = self.c_fc(x) + x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 + x = self.c_proj(x) + return x + +class Block(nn.Module): + + def __init__(self, config): + super().__init__() + self.attn = CausalSelfAttention(config) + self.mlp = MLP(config) + self.lambdas = nn.Parameter(torch.tensor([1., 0.])) + + def forward(self, x, v1, x0): + x = self.lambdas[0] * x + self.lambdas[1] * x0 + x1, v1 = self.attn(F.rms_norm(x, (x.size(-1),)), v1) + x = x + x1 + x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) + return x, v1 + +# ----------------------------------------------------------------------------- +# The main GPT-2 model + +@dataclass +class GPTConfig: + vocab_size : int = 50304 + n_layer : int = 12 + n_head : int = 6 # head dim 128 suggested by @Grad62304977 + n_embd : int = 768 + +class GPT(nn.Module): + + def __init__(self, config): + super().__init__() + self.config = config + + self.transformer = nn.ModuleDict(dict( + wte = nn.Embedding(config.vocab_size, config.n_embd), + h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), + )) + self.lm_head = CastedLinear(config.n_embd, config.vocab_size, bias=False) + self.lm_head.weight.data.zero_() # @Grad62304977 + + def forward(self, idx, target): + + # forward the GPT model itself + x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 + x0 = x + v1 = None + for block in self.transformer.h: + x, v1 = block(x, v1, x0) + x = F.rms_norm(x, (x.size(-1),)) + + logits = self.lm_head(x) + logits = 30 * torch.tanh(logits / 30) # @Grad62304977 + logits = logits.float() + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), target.view(-1)) + return loss.float() + +# ----------------------------------------------------------------------------- +# Our own simple Distributed Data Loader + +def _peek_data_shard(filename): + # only reads the header, returns header data + with open(filename, "rb") as f: + # first read the header, which is 256 int32 integers (4 bytes each) + header = np.frombuffer(f.read(256*4), dtype=np.int32) + if header[0] != 20240520: + print("ERROR: magic number mismatch in the data .bin file!") + print("---> HINT: Are you passing in a correct file with --input_bin?") + print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") + print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") + exit(1) + assert header[1] == 1, "unsupported version" + ntok = header[2] # number of tokens (claimed) + return ntok # for now just return the number of tokens + +def _load_data_shard(filename): + with open(filename, "rb") as f: + # first read the header, which is 256 int32 integers (4 bytes each) + header = np.frombuffer(f.read(256*4), dtype=np.int32) + assert header[0] == 20240520, "magic number mismatch in the data .bin file" + assert header[1] == 1, "unsupported version" + ntok = header[2] # number of tokens (claimed) + # the rest of it are tokens, stored as uint16 + tokens = np.frombuffer(f.read(), dtype=np.uint16) + assert len(tokens) == ntok, "number of tokens read does not match header?" + return tokens + +class DistributedDataLoader: + def __init__(self, filename_pattern, B, T, process_rank, num_processes): + self.process_rank = process_rank + self.num_processes = num_processes + self.B = B + self.T = T + + # glob files that match the pattern + self.files = sorted(glob.glob(filename_pattern)) + assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" + + # load and validate all data shards, count number of tokens in total + ntok_total = 0 + for fname in self.files: + shard_ntok = _peek_data_shard(fname) + assert shard_ntok >= num_processes * B * T + 1 + ntok_total += int(shard_ntok) + self.ntok_total = ntok_total + + # kick things off + self.reset() + + def reset(self): + self.current_shard = 0 + self.current_position = self.process_rank * self.B * self.T + self.tokens = _load_data_shard(self.files[self.current_shard]) + + def advance(self): # advance to next data shard + self.current_shard = (self.current_shard + 1) % len(self.files) + self.current_position = self.process_rank * self.B * self.T + self.tokens = _load_data_shard(self.files[self.current_shard]) + + def next_batch(self): + B = self.B + T = self.T + buf = self.tokens[self.current_position : self.current_position+B*T+1] + buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) + x = (buf[:-1]).view(B, T) # inputs + y = (buf[1:]).view(B, T) # targets + # advance current position and load next shard if necessary + self.current_position += B * T * self.num_processes + if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): + self.advance() + return x.cuda(), y.cuda() + +# ----------------------------------------------------------------------------- +# int main + +@dataclass +class Hyperparameters: + # data hyperparams + input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on + input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on + # optimization hyperparams + batch_size : int = 8*64 # batch size, in sequences, across all devices + device_batch_size : int = 16 # batch size, in sequences, per device + sequence_length : int = 1024 # sequence length, in tokens + num_iterations : int = 3242 # number of iterations to run + warmup_iters : int = 0 + warmdown_iters : int = 926 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule + weight_decay : float = 0 + # evaluation and logging hyperparams + val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end + val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons + save_every : int = 0 # every how many steps to save the checkpoint? 0 for only at the end +args = Hyperparameters() + +# set up DDP (distributed data parallel). torchrun sets this env variable +assert torch.cuda.is_available() +dist.init_process_group(backend='nccl') +ddp_rank = int(os.environ['RANK']) +ddp_local_rank = int(os.environ['LOCAL_RANK']) +ddp_world_size = int(os.environ['WORLD_SIZE']) +device = f'cuda:{ddp_local_rank}' +torch.cuda.set_device(device) +print(f"using device: {device}") +master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. + +# convenience variables +B, T = args.device_batch_size, args.sequence_length +# calculate the number of steps to take in the val loop. +assert args.val_tokens % (B * T * ddp_world_size) == 0 +val_steps = args.val_tokens // (B * T * ddp_world_size) +# calculate the steps of gradient accumulation required to attain the desired global batch size. +assert args.batch_size % (B * ddp_world_size) == 0 +train_accumulation_steps = args.batch_size // (B * ddp_world_size) + +# load tokens +train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) +val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) +if master_process: + print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") + print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") +x, y = train_loader.next_batch() + +# there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. +# this originates from Karpathy's experiments. +num_vocab = 50304 +model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=6, n_embd=768)) +model = model.cuda().bfloat16() +for m in model.modules(): + if isinstance(m, CastedLinear): + m.float() +if hasattr(config, "coordinate_descent_tuning"): + config.coordinate_descent_tuning = True # suggested by @Chillee +model = torch.compile(model) +# here we wrap model into DDP container +model = DDP(model, device_ids=[ddp_local_rank]) +raw_model = model.module # always contains the "raw" unwrapped model + +# CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 +from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp +enable_cudnn_sdp(True) +enable_flash_sdp(False) +enable_mem_efficient_sdp(False) +enable_math_sdp(False) + +# init the optimizer(s) +optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.3, betas=(0.9, 0.95), fused=True) +optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.002, betas=(0.9, 0.95), fused=True) +params = list(raw_model.transformer.h.parameters()) +matrix_params = [p for p in params if p.ndim == 2] +scalar_params = [p for p in params if p.ndim < 2] +optimizer3 = Muon(matrix_params, lr=0.02, momentum=0.95) +optimizer4 = torch.optim.Adam(scalar_params, lr=0.02, betas=(0.9, 0.95), fused=True) # note that this learning rate is neither sensitive nor tuned +optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] +# learning rate decay scheduler (linear warmup and warmdown) +def get_lr(it): + assert it <= args.num_iterations + # 1) linear warmup for warmup_iters steps + if it < args.warmup_iters: + return (it+1) / args.warmup_iters + # 2) constant lr for a while + elif it < args.num_iterations - args.warmdown_iters: + return 1.0 + # 3) linear warmdown + else: + decay_ratio = (args.num_iterations - it) / args.warmdown_iters + return decay_ratio +schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] + +# begin logging +if master_process: + run_id = str(uuid.uuid4()) + logdir = 'logs/%s/' % run_id + os.makedirs(logdir, exist_ok=True) + logfile = 'logs/%s.txt' % run_id + # create the log file + with open(logfile, "w") as f: + # begin the log by printing this file (the Python code) + f.write('='*100 + '\n') + f.write(code) + f.write('='*100 + '\n') + # log information about the hardware/software environment this is running on + # and print the full `nvidia-smi` to file + f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") + import subprocess + result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + f.write(f'{result.stdout}\n') + f.write('='*100 + '\n') + +training_time_ms = 0 +# start the clock +torch.cuda.synchronize() +t0 = time.time() +# begin training +train_loader.reset() +for step in range(args.num_iterations + 1): + last_step = (step == args.num_iterations) + # This effectively ignores timing first 10 steps, which are slower for weird reasons. + # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 + # steps with dummy data first, and then re-initialize the model and reset the loader. + if step == 10: + training_time_ms = 0 + t0 = time.time() + timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val + + # once in a while evaluate the validation dataset + if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): + # stop the clock + torch.cuda.synchronize() + training_time_ms += 1000 * (time.time() - t0) + # run validation batches + model.eval() + val_loader.reset() + val_loss = 0.0 + for _ in range(val_steps): + with torch.no_grad(): + x_val, y_val = val_loader.next_batch() + val_loss += model(x_val, y_val) + dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) + val_loss /= val_steps + # log val loss to console and to logfile + if master_process: + print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') + with open(logfile, "a") as f: + f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') + # start the clock again + torch.cuda.synchronize() + t0 = time.time() + + if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): + # stop the clock + torch.cuda.synchronize() + training_time_ms += 1000 * (time.time() - t0) + # save the state of the training process + log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) + torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) + # start the clock again + torch.cuda.synchronize() + t0 = time.time() + + # bit confusing: we want to make sure to eval on 0th iteration + # but also after the very last iteration. so we loop for step <= num_iterations + # instead of just < num_iterations (one extra due to <=), only to do + # the validation/sampling one last time, and then we break right here as we're done. + if last_step: + break + + # --------------- TRAINING SECTION BEGIN ----------------- + model.train() + for i in range(1, train_accumulation_steps+1): + # forward pass + loss = model(x, y) + train_loss = loss.detach() + # advance the dataset for the next batch + x, y = train_loader.next_batch() + # backward pass + if i < train_accumulation_steps: + with model.no_sync(): # there's no need to sync gradients every accumulation step + loss.backward() + else: + loss.backward() # just sync on the last step + for p in model.parameters(): + p.grad /= train_accumulation_steps + # momentum warmup for Muon + frac = min(step/500, 1) + optimizer3.param_groups[0]['momentum'] = (1 - frac) * 0.85 + frac * 0.95 + # step the optimizers and schedulers + for opt, sched in zip(optimizers, schedulers): + opt.step() + sched.step() + # null the gradients + model.zero_grad(set_to_none=True) + # --------------- TRAINING SECTION END ------------------- + # everything that follows now is just diagnostics, prints, logging, etc. + + #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower + if master_process: + approx_time = training_time_ms + 1000 * (time.time() - t0) + print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") + with open(logfile, "a") as f: + f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") + +if master_process: + print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") + +# ------------------------------------------------------------------------- +# clean up nice +dist.destroy_process_group() +==================================================================================================== +Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 +nvidia-smi: +Sun Nov 10 00:59:47 2024 ++-----------------------------------------------------------------------------------------+ +| NVIDIA-SMI 550.90.07 Driver Version: 550.90.07 CUDA Version: 12.4 | +|-----------------------------------------+------------------------+----------------------+ +| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+========================+======================| +| 0 NVIDIA GeForce RTX 3090 On | 00000000:01:00.0 Off | N/A | +| 0% 36C P2 108W / 250W | 2239MiB / 24576MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +| 1 NVIDIA GeForce RTX 3090 On | 00000000:02:00.0 Off | N/A | +| 0% 37C P2 118W / 250W | 2239MiB / 24576MiB | 31% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +| 2 NVIDIA GeForce RTX 3090 On | 00000000:03:00.0 Off | N/A | +| 0% 39C P2 132W / 250W | 2239MiB / 24576MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +| 3 NVIDIA GeForce RTX 3090 On | 00000000:04:00.0 Off | N/A | +| 33% 35C P2 130W / 250W | 2239MiB / 24576MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +| 4 NVIDIA GeForce RTX 3090 On | 00000000:05:00.0 Off | N/A | +| 34% 37C P2 130W / 250W | 2239MiB / 24576MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +| 5 NVIDIA GeForce RTX 3090 On | 00000000:81:00.0 Off | N/A | +| 0% 38C P2 133W / 250W | 2239MiB / 24576MiB | 8% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +| 6 NVIDIA GeForce RTX 3090 On | 00000000:82:00.0 Off | N/A | +| 0% 35C P2 136W / 250W | 2239MiB / 24576MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +| 7 NVIDIA GeForce RTX 3090 On | 00000000:83:00.0 Off | N/A | +| 0% 35C P2 119W / 250W | 2239MiB / 24576MiB | 16% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ + ++-----------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=========================================================================================| ++-----------------------------------------------------------------------------------------+ + +==================================================================================================== +step:0/3242 val_loss:10.8258 train_time:382ms step_avg:nanms +step:1/3242 train_loss:10.8258 train_time:101423ms step_avg:nanms +step:2/3242 train_loss:10.5774 train_time:103367ms step_avg:nanms +step:3/3242 train_loss:9.9378 train_time:105482ms step_avg:nanms +step:4/3242 train_loss:8.9455 train_time:107581ms step_avg:nanms +step:5/3242 train_loss:8.1827 train_time:109692ms step_avg:nanms +step:6/3242 train_loss:7.3693 train_time:111802ms step_avg:nanms +step:7/3242 train_loss:7.0055 train_time:113915ms step_avg:nanms +step:8/3242 train_loss:5.9723 train_time:116026ms step_avg:nanms +step:9/3242 train_loss:6.9704 train_time:118142ms step_avg:nanms +step:10/3242 train_loss:6.7147 train_time:120252ms step_avg:nanms +step:11/3242 train_loss:6.6959 train_time:1703ms step_avg:nanms +step:12/3242 train_loss:6.5138 train_time:3816ms step_avg:nanms +step:13/3242 train_loss:6.5906 train_time:5924ms step_avg:1974.58ms +step:14/3242 train_loss:6.5442 train_time:8039ms step_avg:2009.84ms +step:15/3242 train_loss:6.4742 train_time:10153ms step_avg:2030.66ms +step:16/3242 train_loss:6.3138 train_time:12271ms step_avg:2045.20ms +step:17/3242 train_loss:6.1143 train_time:14378ms step_avg:2054.00ms +step:18/3242 train_loss:6.5037 train_time:16499ms step_avg:2062.40ms +step:19/3242 train_loss:6.0189 train_time:18613ms step_avg:2068.09ms +step:20/3242 train_loss:6.3428 train_time:20722ms step_avg:2072.21ms +step:21/3242 train_loss:6.2252 train_time:22841ms step_avg:2076.50ms +step:22/3242 train_loss:6.1434 train_time:24957ms step_avg:2079.73ms +step:23/3242 train_loss:6.2748 train_time:27075ms step_avg:2082.68ms +step:24/3242 train_loss:5.9630 train_time:29194ms step_avg:2085.27ms +step:25/3242 train_loss:6.0908 train_time:31305ms step_avg:2086.99ms +step:26/3242 train_loss:6.1666 train_time:33412ms step_avg:2088.24ms +step:27/3242 train_loss:6.1281 train_time:35527ms step_avg:2089.83ms +step:28/3242 train_loss:6.0608 train_time:37644ms step_avg:2091.34ms +step:29/3242 train_loss:5.9877 train_time:39759ms step_avg:2092.59ms +step:30/3242 train_loss:6.3199 train_time:41870ms step_avg:2093.51ms +step:31/3242 train_loss:6.1545 train_time:43988ms step_avg:2094.66ms +step:32/3242 train_loss:5.8008 train_time:46112ms step_avg:2096.02ms +step:33/3242 train_loss:5.8494 train_time:48237ms step_avg:2097.25ms +step:34/3242 train_loss:5.8021 train_time:50354ms step_avg:2098.07ms +step:35/3242 train_loss:6.1004 train_time:52471ms step_avg:2098.85ms +step:36/3242 train_loss:5.9750 train_time:54588ms step_avg:2099.53ms +step:37/3242 train_loss:5.9919 train_time:56697ms step_avg:2099.89ms +step:38/3242 train_loss:6.1530 train_time:58813ms step_avg:2100.46ms +step:39/3242 train_loss:6.1604 train_time:60926ms step_avg:2100.89ms +step:40/3242 train_loss:5.5553 train_time:63039ms step_avg:2101.31ms +step:41/3242 train_loss:6.0810 train_time:65155ms step_avg:2101.78ms +step:42/3242 train_loss:5.7906 train_time:67276ms step_avg:2102.38ms +step:43/3242 train_loss:5.9363 train_time:69389ms step_avg:2102.69ms +step:44/3242 train_loss:5.6669 train_time:71498ms step_avg:2102.87ms +step:45/3242 train_loss:5.5522 train_time:73616ms step_avg:2103.31ms +step:46/3242 train_loss:5.6694 train_time:75736ms step_avg:2103.77ms +step:47/3242 train_loss:5.6245 train_time:77857ms step_avg:2104.24ms +step:48/3242 train_loss:5.7115 train_time:79973ms step_avg:2104.55ms +step:49/3242 train_loss:5.5189 train_time:82086ms step_avg:2104.77ms +step:50/3242 train_loss:5.5554 train_time:84202ms step_avg:2105.04ms +step:51/3242 train_loss:5.4456 train_time:86319ms step_avg:2105.34ms +step:52/3242 train_loss:5.7622 train_time:88435ms step_avg:2105.60ms +step:53/3242 train_loss:5.5252 train_time:90554ms step_avg:2105.90ms +step:54/3242 train_loss:5.5044 train_time:92670ms step_avg:2106.14ms +step:55/3242 train_loss:5.4659 train_time:94795ms step_avg:2106.56ms +step:56/3242 train_loss:5.4679 train_time:96915ms step_avg:2106.85ms +step:57/3242 train_loss:5.3641 train_time:99031ms step_avg:2107.04ms +step:58/3242 train_loss:5.4351 train_time:101153ms step_avg:2107.36ms +step:59/3242 train_loss:5.3387 train_time:103276ms step_avg:2107.67ms +step:60/3242 train_loss:5.4442 train_time:105385ms step_avg:2107.70ms +step:61/3242 train_loss:5.4330 train_time:107508ms step_avg:2108.00ms +step:62/3242 train_loss:5.3028 train_time:109627ms step_avg:2108.21ms +step:63/3242 train_loss:5.3533 train_time:111741ms step_avg:2108.32ms +step:64/3242 train_loss:5.2518 train_time:113865ms step_avg:2108.61ms +step:65/3242 train_loss:5.1106 train_time:115980ms step_avg:2108.72ms +step:66/3242 train_loss:5.2589 train_time:118091ms step_avg:2108.77ms +step:67/3242 train_loss:5.2065 train_time:120209ms step_avg:2108.92ms +step:68/3242 train_loss:5.3377 train_time:122325ms step_avg:2109.06ms +step:69/3242 train_loss:5.2252 train_time:124440ms step_avg:2109.15ms +step:70/3242 train_loss:5.2604 train_time:126562ms step_avg:2109.36ms +step:71/3242 train_loss:5.2228 train_time:128678ms step_avg:2109.48ms +step:72/3242 train_loss:5.1890 train_time:130798ms step_avg:2109.64ms +step:73/3242 train_loss:5.2928 train_time:132917ms step_avg:2109.79ms +step:74/3242 train_loss:5.0079 train_time:135039ms step_avg:2109.98ms +step:75/3242 train_loss:5.2693 train_time:137157ms step_avg:2110.10ms +step:76/3242 train_loss:5.2311 train_time:139271ms step_avg:2110.16ms +step:77/3242 train_loss:5.3761 train_time:141395ms step_avg:2110.37ms +step:78/3242 train_loss:5.1247 train_time:143516ms step_avg:2110.53ms +step:79/3242 train_loss:5.0737 train_time:145630ms step_avg:2110.57ms +step:80/3242 train_loss:5.1913 train_time:147750ms step_avg:2110.71ms +step:81/3242 train_loss:5.0469 train_time:149873ms step_avg:2110.89ms +step:82/3242 train_loss:5.3454 train_time:151992ms step_avg:2111.00ms +step:83/3242 train_loss:5.1707 train_time:154111ms step_avg:2111.11ms +step:84/3242 train_loss:4.9371 train_time:156225ms step_avg:2111.15ms +step:85/3242 train_loss:5.0544 train_time:158344ms step_avg:2111.25ms +step:86/3242 train_loss:5.1726 train_time:160463ms step_avg:2111.35ms +step:87/3242 train_loss:5.1411 train_time:162589ms step_avg:2111.55ms +step:88/3242 train_loss:4.9101 train_time:164704ms step_avg:2111.59ms +step:89/3242 train_loss:5.0530 train_time:166824ms step_avg:2111.69ms +step:90/3242 train_loss:4.9850 train_time:168946ms step_avg:2111.83ms +step:91/3242 train_loss:5.0930 train_time:171064ms step_avg:2111.90ms +step:92/3242 train_loss:5.0010 train_time:173183ms step_avg:2111.98ms +step:93/3242 train_loss:5.2176 train_time:175296ms step_avg:2112.01ms +step:94/3242 train_loss:4.9870 train_time:177417ms step_avg:2112.11ms +step:95/3242 train_loss:4.7550 train_time:179540ms step_avg:2112.24ms +step:96/3242 train_loss:4.8897 train_time:181655ms step_avg:2112.27ms +step:97/3242 train_loss:5.1393 train_time:183777ms step_avg:2112.38ms +step:98/3242 train_loss:4.9002 train_time:185904ms step_avg:2112.54ms +step:99/3242 train_loss:4.8429 train_time:188019ms step_avg:2112.57ms +step:100/3242 train_loss:4.9818 train_time:190137ms step_avg:2112.64ms +step:101/3242 train_loss:4.8077 train_time:192259ms step_avg:2112.73ms +step:102/3242 train_loss:4.9317 train_time:194374ms step_avg:2112.76ms +step:103/3242 train_loss:4.6536 train_time:196496ms step_avg:2112.86ms +step:104/3242 train_loss:4.6406 train_time:198617ms step_avg:2112.95ms +step:105/3242 train_loss:5.3221 train_time:200731ms step_avg:2112.96ms +step:106/3242 train_loss:5.9453 train_time:202851ms step_avg:2113.04ms +step:107/3242 train_loss:4.7554 train_time:204972ms step_avg:2113.12ms +step:108/3242 train_loss:4.7332 train_time:207091ms step_avg:2113.17ms +step:109/3242 train_loss:5.3456 train_time:209218ms step_avg:2113.31ms +step:110/3242 train_loss:5.0092 train_time:211331ms step_avg:2113.31ms +step:111/3242 train_loss:4.6998 train_time:213455ms step_avg:2113.42ms +step:112/3242 train_loss:5.0955 train_time:215570ms step_avg:2113.43ms +step:113/3242 train_loss:4.3955 train_time:217689ms step_avg:2113.49ms +step:114/3242 train_loss:4.9148 train_time:219809ms step_avg:2113.55ms +step:115/3242 train_loss:4.8144 train_time:221919ms step_avg:2113.51ms +step:116/3242 train_loss:4.8479 train_time:224043ms step_avg:2113.62ms +step:117/3242 train_loss:4.7231 train_time:226165ms step_avg:2113.69ms +step:118/3242 train_loss:4.7948 train_time:228290ms step_avg:2113.79ms +step:119/3242 train_loss:4.5434 train_time:230405ms step_avg:2113.81ms +step:120/3242 train_loss:4.7632 train_time:232523ms step_avg:2113.85ms +step:121/3242 train_loss:4.6667 train_time:234652ms step_avg:2113.98ms +step:122/3242 train_loss:4.7445 train_time:236762ms step_avg:2113.95ms +step:123/3242 train_loss:4.6341 train_time:238884ms step_avg:2114.02ms +step:124/3242 train_loss:4.7462 train_time:240999ms step_avg:2114.03ms +step:125/3242 train_loss:4.4827 train_time:243121ms step_avg:2114.09ms +step:125/3242 val_loss:4.7203 train_time:243532ms step_avg:2117.67ms +step:126/3242 train_loss:4.5589 train_time:245241ms step_avg:2114.15ms +step:127/3242 train_loss:4.7844 train_time:247360ms step_avg:2114.19ms +step:128/3242 train_loss:4.7880 train_time:249477ms step_avg:2114.21ms +step:129/3242 train_loss:4.7758 train_time:251594ms step_avg:2114.23ms +step:130/3242 train_loss:4.7504 train_time:253710ms step_avg:2114.25ms +step:131/3242 train_loss:4.5175 train_time:255832ms step_avg:2114.31ms +step:132/3242 train_loss:4.6470 train_time:257952ms step_avg:2114.36ms +step:133/3242 train_loss:4.7635 train_time:260078ms step_avg:2114.46ms +step:134/3242 train_loss:4.6395 train_time:262194ms step_avg:2114.47ms +step:135/3242 train_loss:4.4371 train_time:264316ms step_avg:2114.52ms +step:136/3242 train_loss:4.5697 train_time:266435ms step_avg:2114.57ms +step:137/3242 train_loss:4.7636 train_time:268551ms step_avg:2114.57ms +step:138/3242 train_loss:4.5428 train_time:270670ms step_avg:2114.61ms +step:139/3242 train_loss:4.7854 train_time:272784ms step_avg:2114.61ms +step:140/3242 train_loss:4.5826 train_time:274903ms step_avg:2114.64ms +step:141/3242 train_loss:4.4881 train_time:277023ms step_avg:2114.68ms +step:142/3242 train_loss:4.5546 train_time:279144ms step_avg:2114.73ms +step:143/3242 train_loss:4.7156 train_time:281263ms step_avg:2114.76ms +step:144/3242 train_loss:4.6157 train_time:283390ms step_avg:2114.85ms +step:145/3242 train_loss:4.6010 train_time:285505ms step_avg:2114.86ms +step:146/3242 train_loss:4.6429 train_time:287624ms step_avg:2114.88ms +step:147/3242 train_loss:4.4391 train_time:289742ms step_avg:2114.90ms +step:148/3242 train_loss:4.4604 train_time:291865ms step_avg:2114.96ms +step:149/3242 train_loss:4.5877 train_time:293981ms step_avg:2114.97ms +step:150/3242 train_loss:4.4921 train_time:296103ms step_avg:2115.02ms +step:151/3242 train_loss:4.5512 train_time:298219ms step_avg:2115.02ms +step:152/3242 train_loss:4.4796 train_time:300342ms step_avg:2115.08ms +step:153/3242 train_loss:4.5415 train_time:302457ms step_avg:2115.08ms +step:154/3242 train_loss:4.4985 train_time:304578ms step_avg:2115.12ms +step:155/3242 train_loss:4.2095 train_time:306693ms step_avg:2115.12ms +step:156/3242 train_loss:4.3933 train_time:308813ms step_avg:2115.16ms +step:157/3242 train_loss:4.6887 train_time:310935ms step_avg:2115.20ms +step:158/3242 train_loss:4.4396 train_time:313059ms step_avg:2115.26ms +step:159/3242 train_loss:4.5147 train_time:315179ms step_avg:2115.29ms +step:160/3242 train_loss:4.3278 train_time:317294ms step_avg:2115.29ms +step:161/3242 train_loss:4.2670 train_time:319415ms step_avg:2115.33ms +step:162/3242 train_loss:4.4822 train_time:321540ms step_avg:2115.39ms +step:163/3242 train_loss:4.4199 train_time:323654ms step_avg:2115.38ms +step:164/3242 train_loss:4.3540 train_time:325778ms step_avg:2115.44ms +step:165/3242 train_loss:4.3405 train_time:327894ms step_avg:2115.45ms +step:166/3242 train_loss:4.5672 train_time:330017ms step_avg:2115.49ms +step:167/3242 train_loss:4.5475 train_time:332137ms step_avg:2115.53ms +step:168/3242 train_loss:4.5596 train_time:334259ms step_avg:2115.57ms +step:169/3242 train_loss:4.3608 train_time:336368ms step_avg:2115.52ms +step:170/3242 train_loss:4.3476 train_time:338487ms step_avg:2115.55ms +step:171/3242 train_loss:3.6764 train_time:340611ms step_avg:2115.59ms +step:172/3242 train_loss:4.3171 train_time:342725ms step_avg:2115.59ms +step:173/3242 train_loss:4.2759 train_time:344850ms step_avg:2115.64ms +step:174/3242 train_loss:4.7857 train_time:346972ms step_avg:2115.69ms +step:175/3242 train_loss:4.3976 train_time:349086ms step_avg:2115.67ms +step:176/3242 train_loss:4.4331 train_time:351206ms step_avg:2115.70ms +step:177/3242 train_loss:4.7170 train_time:353318ms step_avg:2115.68ms +step:178/3242 train_loss:4.4232 train_time:355440ms step_avg:2115.72ms +step:179/3242 train_loss:4.2649 train_time:357559ms step_avg:2115.73ms +step:180/3242 train_loss:4.2347 train_time:359680ms step_avg:2115.77ms +step:181/3242 train_loss:4.2388 train_time:361806ms step_avg:2115.82ms +step:182/3242 train_loss:4.2478 train_time:363924ms step_avg:2115.84ms +step:183/3242 train_loss:4.1559 train_time:366037ms step_avg:2115.82ms +step:184/3242 train_loss:4.7181 train_time:368164ms step_avg:2115.88ms +step:185/3242 train_loss:4.3235 train_time:370281ms step_avg:2115.89ms +step:186/3242 train_loss:4.3490 train_time:372395ms step_avg:2115.88ms +step:187/3242 train_loss:4.2133 train_time:374516ms step_avg:2115.91ms +step:188/3242 train_loss:4.3264 train_time:376635ms step_avg:2115.93ms +step:189/3242 train_loss:4.2206 train_time:378760ms step_avg:2115.98ms +step:190/3242 train_loss:4.2502 train_time:380878ms step_avg:2115.99ms +step:191/3242 train_loss:4.1693 train_time:383116ms step_avg:2116.66ms +step:192/3242 train_loss:4.2480 train_time:385233ms step_avg:2116.66ms +step:193/3242 train_loss:4.4002 train_time:387358ms step_avg:2116.71ms +step:194/3242 train_loss:4.2867 train_time:389476ms step_avg:2116.72ms +step:195/3242 train_loss:4.9303 train_time:391593ms step_avg:2116.72ms +step:196/3242 train_loss:4.2364 train_time:393720ms step_avg:2116.78ms +step:197/3242 train_loss:4.1538 train_time:395846ms step_avg:2116.82ms +step:198/3242 train_loss:4.2131 train_time:397959ms step_avg:2116.80ms +step:199/3242 train_loss:4.1908 train_time:400080ms step_avg:2116.83ms +step:200/3242 train_loss:4.2337 train_time:402197ms step_avg:2116.83ms +step:201/3242 train_loss:4.0140 train_time:404316ms step_avg:2116.84ms +step:202/3242 train_loss:4.3344 train_time:406437ms step_avg:2116.86ms +step:203/3242 train_loss:4.2586 train_time:408555ms step_avg:2116.87ms +step:204/3242 train_loss:4.2029 train_time:410673ms step_avg:2116.87ms +step:205/3242 train_loss:4.2030 train_time:412793ms step_avg:2116.89ms +step:206/3242 train_loss:4.1450 train_time:414910ms step_avg:2116.89ms +step:207/3242 train_loss:4.1685 train_time:417030ms step_avg:2116.91ms +step:208/3242 train_loss:4.4303 train_time:419148ms step_avg:2116.91ms +step:209/3242 train_loss:4.3124 train_time:421264ms step_avg:2116.90ms +step:210/3242 train_loss:4.3942 train_time:423391ms step_avg:2116.96ms +step:211/3242 train_loss:4.2261 train_time:425511ms step_avg:2116.97ms +step:212/3242 train_loss:4.2953 train_time:427629ms step_avg:2116.98ms +step:213/3242 train_loss:4.1563 train_time:429753ms step_avg:2117.01ms +step:214/3242 train_loss:4.2233 train_time:431873ms step_avg:2117.02ms +step:215/3242 train_loss:4.0192 train_time:433989ms step_avg:2117.02ms +step:216/3242 train_loss:4.0711 train_time:436105ms step_avg:2117.01ms +step:217/3242 train_loss:4.1623 train_time:438223ms step_avg:2117.02ms +step:218/3242 train_loss:4.0778 train_time:440339ms step_avg:2117.02ms +step:219/3242 train_loss:4.2189 train_time:442467ms step_avg:2117.07ms +step:220/3242 train_loss:4.2668 train_time:444585ms step_avg:2117.07ms +step:221/3242 train_loss:4.2517 train_time:446705ms step_avg:2117.09ms +step:222/3242 train_loss:4.0999 train_time:448825ms step_avg:2117.10ms +step:223/3242 train_loss:3.8366 train_time:450943ms step_avg:2117.10ms +step:224/3242 train_loss:4.6873 train_time:453057ms step_avg:2117.09ms +step:225/3242 train_loss:4.1361 train_time:455189ms step_avg:2117.16ms +step:226/3242 train_loss:3.8550 train_time:457316ms step_avg:2117.21ms +step:227/3242 train_loss:4.1910 train_time:459434ms step_avg:2117.21ms +step:228/3242 train_loss:4.2577 train_time:461558ms step_avg:2117.24ms +step:229/3242 train_loss:4.0672 train_time:463673ms step_avg:2117.23ms +step:230/3242 train_loss:4.0982 train_time:465792ms step_avg:2117.24ms +step:231/3242 train_loss:4.1485 train_time:467910ms step_avg:2117.24ms +step:232/3242 train_loss:3.8542 train_time:470035ms step_avg:2117.27ms +step:233/3242 train_loss:4.2951 train_time:472149ms step_avg:2117.26ms +step:234/3242 train_loss:4.0259 train_time:474274ms step_avg:2117.30ms +step:235/3242 train_loss:3.7392 train_time:476397ms step_avg:2117.32ms +step:236/3242 train_loss:4.2177 train_time:478514ms step_avg:2117.32ms +step:237/3242 train_loss:4.2257 train_time:480627ms step_avg:2117.30ms +step:238/3242 train_loss:3.9698 train_time:482746ms step_avg:2117.31ms +step:239/3242 train_loss:4.3302 train_time:484866ms step_avg:2117.32ms +step:240/3242 train_loss:4.1833 train_time:486986ms step_avg:2117.33ms +step:241/3242 train_loss:3.9999 train_time:489100ms step_avg:2117.32ms +step:242/3242 train_loss:3.7538 train_time:491219ms step_avg:2117.32ms +step:243/3242 train_loss:4.1753 train_time:493343ms step_avg:2117.35ms +step:244/3242 train_loss:3.9924 train_time:495454ms step_avg:2117.33ms +step:245/3242 train_loss:4.3998 train_time:497577ms step_avg:2117.35ms +step:246/3242 train_loss:4.4477 train_time:499694ms step_avg:2117.35ms +step:247/3242 train_loss:4.1146 train_time:501822ms step_avg:2117.39ms +step:248/3242 train_loss:4.1786 train_time:503942ms step_avg:2117.40ms +step:249/3242 train_loss:4.0115 train_time:506059ms step_avg:2117.40ms +step:250/3242 train_loss:3.9028 train_time:508181ms step_avg:2117.42ms +step:250/3242 val_loss:4.1020 train_time:508593ms step_avg:2119.14ms +step:251/3242 train_loss:4.2248 train_time:510306ms step_avg:2117.45ms +step:252/3242 train_loss:4.1979 train_time:512423ms step_avg:2117.45ms +step:253/3242 train_loss:3.7065 train_time:514536ms step_avg:2117.43ms +step:254/3242 train_loss:4.1741 train_time:516660ms step_avg:2117.46ms +step:255/3242 train_loss:4.0405 train_time:518780ms step_avg:2117.47ms +step:256/3242 train_loss:4.1912 train_time:520898ms step_avg:2117.47ms +step:257/3242 train_loss:3.9918 train_time:523017ms step_avg:2117.48ms +step:258/3242 train_loss:3.8470 train_time:525134ms step_avg:2117.48ms +step:259/3242 train_loss:4.2802 train_time:527257ms step_avg:2117.50ms +step:260/3242 train_loss:4.0913 train_time:529374ms step_avg:2117.49ms +step:261/3242 train_loss:4.4239 train_time:531491ms step_avg:2117.50ms +step:262/3242 train_loss:4.1597 train_time:533609ms step_avg:2117.50ms +step:263/3242 train_loss:4.0886 train_time:535736ms step_avg:2117.53ms +step:264/3242 train_loss:4.0720 train_time:537853ms step_avg:2117.53ms +step:265/3242 train_loss:3.9197 train_time:539974ms step_avg:2117.54ms +step:266/3242 train_loss:4.0690 train_time:542083ms step_avg:2117.51ms +step:267/3242 train_loss:3.8499 train_time:544211ms step_avg:2117.55ms +step:268/3242 train_loss:4.1209 train_time:546330ms step_avg:2117.56ms +step:269/3242 train_loss:3.8296 train_time:548444ms step_avg:2117.54ms +step:270/3242 train_loss:4.2143 train_time:550568ms step_avg:2117.57ms +step:271/3242 train_loss:4.0743 train_time:552688ms step_avg:2117.58ms +step:272/3242 train_loss:3.9091 train_time:554806ms step_avg:2117.58ms +step:273/3242 train_loss:4.1477 train_time:556929ms step_avg:2117.60ms +step:274/3242 train_loss:4.1245 train_time:559051ms step_avg:2117.62ms +step:275/3242 train_loss:4.2860 train_time:561167ms step_avg:2117.61ms +step:276/3242 train_loss:3.9515 train_time:563286ms step_avg:2117.62ms +step:277/3242 train_loss:3.8861 train_time:565401ms step_avg:2117.61ms +step:278/3242 train_loss:4.0836 train_time:567523ms step_avg:2117.62ms +step:279/3242 train_loss:3.9254 train_time:569642ms step_avg:2117.63ms +step:280/3242 train_loss:4.4883 train_time:571759ms step_avg:2117.62ms +step:281/3242 train_loss:4.0926 train_time:573878ms step_avg:2117.63ms +step:282/3242 train_loss:4.0321 train_time:575995ms step_avg:2117.63ms +step:283/3242 train_loss:3.9302 train_time:578110ms step_avg:2117.62ms +step:284/3242 train_loss:3.9619 train_time:580233ms step_avg:2117.64ms +step:285/3242 train_loss:4.1222 train_time:582359ms step_avg:2117.67ms +step:286/3242 train_loss:4.3761 train_time:584480ms step_avg:2117.68ms +step:287/3242 train_loss:3.8850 train_time:586595ms step_avg:2117.67ms +step:288/3242 train_loss:4.2305 train_time:588714ms step_avg:2117.67ms +step:289/3242 train_loss:4.0367 train_time:590833ms step_avg:2117.68ms +step:290/3242 train_loss:4.0351 train_time:592957ms step_avg:2117.70ms +step:291/3242 train_loss:4.1703 train_time:595072ms step_avg:2117.69ms +step:292/3242 train_loss:3.9372 train_time:597196ms step_avg:2117.72ms +step:293/3242 train_loss:3.9109 train_time:599312ms step_avg:2117.71ms +step:294/3242 train_loss:4.1557 train_time:601425ms step_avg:2117.69ms +step:295/3242 train_loss:3.9323 train_time:603540ms step_avg:2117.68ms +step:296/3242 train_loss:4.0279 train_time:605658ms step_avg:2117.68ms +step:297/3242 train_loss:3.9861 train_time:607776ms step_avg:2117.68ms +step:298/3242 train_loss:4.1105 train_time:609898ms step_avg:2117.70ms +step:299/3242 train_loss:4.0187 train_time:612016ms step_avg:2117.70ms +step:300/3242 train_loss:4.0672 train_time:614141ms step_avg:2117.73ms +step:301/3242 train_loss:4.0092 train_time:616260ms step_avg:2117.73ms +step:302/3242 train_loss:3.8668 train_time:618380ms step_avg:2117.74ms +step:303/3242 train_loss:3.8429 train_time:620510ms step_avg:2117.78ms +step:304/3242 train_loss:3.9806 train_time:622621ms step_avg:2117.76ms +step:305/3242 train_loss:5.6433 train_time:624746ms step_avg:2117.78ms +step:306/3242 train_loss:3.9400 train_time:626866ms step_avg:2117.79ms +step:307/3242 train_loss:3.9069 train_time:628984ms step_avg:2117.79ms +step:308/3242 train_loss:4.2430 train_time:631101ms step_avg:2117.79ms +step:309/3242 train_loss:3.8919 train_time:633217ms step_avg:2117.78ms +step:310/3242 train_loss:4.1163 train_time:635337ms step_avg:2117.79ms +step:311/3242 train_loss:3.7945 train_time:637460ms step_avg:2117.81ms +step:312/3242 train_loss:3.7561 train_time:639577ms step_avg:2117.81ms +step:313/3242 train_loss:4.1396 train_time:641700ms step_avg:2117.82ms +step:314/3242 train_loss:4.2161 train_time:643816ms step_avg:2117.82ms +step:315/3242 train_loss:3.7615 train_time:645932ms step_avg:2117.81ms +step:316/3242 train_loss:3.8338 train_time:648047ms step_avg:2117.80ms +step:317/3242 train_loss:4.1245 train_time:650173ms step_avg:2117.83ms +step:318/3242 train_loss:3.9248 train_time:652296ms step_avg:2117.84ms +step:319/3242 train_loss:4.0056 train_time:654412ms step_avg:2117.84ms +step:320/3242 train_loss:4.4600 train_time:656533ms step_avg:2117.85ms +step:321/3242 train_loss:3.8870 train_time:658651ms step_avg:2117.85ms +step:322/3242 train_loss:4.1026 train_time:660768ms step_avg:2117.85ms +step:323/3242 train_loss:4.3769 train_time:662896ms step_avg:2117.88ms +step:324/3242 train_loss:4.1011 train_time:665015ms step_avg:2117.88ms +step:325/3242 train_loss:4.1338 train_time:667130ms step_avg:2117.87ms +step:326/3242 train_loss:3.7518 train_time:669252ms step_avg:2117.89ms +step:327/3242 train_loss:3.9554 train_time:671367ms step_avg:2117.88ms +step:328/3242 train_loss:4.1045 train_time:673491ms step_avg:2117.90ms +step:329/3242 train_loss:3.9930 train_time:675609ms step_avg:2117.90ms +step:330/3242 train_loss:3.8239 train_time:677726ms step_avg:2117.89ms +step:331/3242 train_loss:3.9220 train_time:679850ms step_avg:2117.91ms +step:332/3242 train_loss:4.3190 train_time:681969ms step_avg:2117.92ms +step:333/3242 train_loss:3.9020 train_time:684087ms step_avg:2117.92ms +step:334/3242 train_loss:3.9874 train_time:686206ms step_avg:2117.92ms +step:335/3242 train_loss:4.0978 train_time:688330ms step_avg:2117.94ms +step:336/3242 train_loss:4.6110 train_time:690454ms step_avg:2117.96ms +step:337/3242 train_loss:4.7980 train_time:692568ms step_avg:2117.95ms +step:338/3242 train_loss:4.0533 train_time:694683ms step_avg:2117.94ms +step:339/3242 train_loss:3.8356 train_time:696794ms step_avg:2117.91ms +step:340/3242 train_loss:4.0280 train_time:698919ms step_avg:2117.94ms +step:341/3242 train_loss:4.2618 train_time:701035ms step_avg:2117.93ms +step:342/3242 train_loss:4.0636 train_time:703147ms step_avg:2117.91ms +step:343/3242 train_loss:3.7989 train_time:705270ms step_avg:2117.93ms +step:344/3242 train_loss:3.9297 train_time:707380ms step_avg:2117.90ms +step:345/3242 train_loss:3.8577 train_time:709509ms step_avg:2117.94ms +step:346/3242 train_loss:3.6504 train_time:711624ms step_avg:2117.93ms +step:347/3242 train_loss:3.7984 train_time:713736ms step_avg:2117.91ms +step:348/3242 train_loss:3.9380 train_time:715849ms step_avg:2117.90ms +step:349/3242 train_loss:3.8435 train_time:717969ms step_avg:2117.90ms +step:350/3242 train_loss:3.5025 train_time:720081ms step_avg:2117.88ms +step:351/3242 train_loss:3.6244 train_time:722200ms step_avg:2117.89ms +step:352/3242 train_loss:4.0811 train_time:724321ms step_avg:2117.90ms +step:353/3242 train_loss:3.9925 train_time:726442ms step_avg:2117.91ms +step:354/3242 train_loss:3.9196 train_time:728559ms step_avg:2117.90ms +step:355/3242 train_loss:4.1272 train_time:730672ms step_avg:2117.89ms +step:356/3242 train_loss:4.1040 train_time:732796ms step_avg:2117.91ms +step:357/3242 train_loss:4.0914 train_time:734913ms step_avg:2117.90ms +step:358/3242 train_loss:3.7532 train_time:737038ms step_avg:2117.93ms +step:359/3242 train_loss:4.1236 train_time:739153ms step_avg:2117.92ms +step:360/3242 train_loss:3.4113 train_time:741272ms step_avg:2117.92ms +step:361/3242 train_loss:3.9321 train_time:743388ms step_avg:2117.91ms +step:362/3242 train_loss:3.9368 train_time:745502ms step_avg:2117.90ms +step:363/3242 train_loss:3.8811 train_time:747619ms step_avg:2117.90ms +step:364/3242 train_loss:3.8121 train_time:749733ms step_avg:2117.89ms +step:365/3242 train_loss:4.0553 train_time:751857ms step_avg:2117.91ms +step:366/3242 train_loss:4.2510 train_time:753975ms step_avg:2117.91ms +step:367/3242 train_loss:3.9510 train_time:756090ms step_avg:2117.90ms +step:368/3242 train_loss:3.8420 train_time:758216ms step_avg:2117.92ms +step:369/3242 train_loss:3.4702 train_time:760324ms step_avg:2117.89ms +step:370/3242 train_loss:3.9619 train_time:762442ms step_avg:2117.89ms +step:371/3242 train_loss:3.7599 train_time:764562ms step_avg:2117.90ms +step:372/3242 train_loss:3.7512 train_time:766684ms step_avg:2117.91ms +step:373/3242 train_loss:4.0426 train_time:768805ms step_avg:2117.92ms +step:374/3242 train_loss:3.9685 train_time:770924ms step_avg:2117.92ms +step:375/3242 train_loss:3.8829 train_time:773049ms step_avg:2117.94ms +step:375/3242 val_loss:3.9142 train_time:773463ms step_avg:2119.08ms +step:376/3242 train_loss:3.7470 train_time:775175ms step_avg:2117.97ms +step:377/3242 train_loss:4.1128 train_time:777289ms step_avg:2117.95ms +step:378/3242 train_loss:3.8625 train_time:779410ms step_avg:2117.96ms +step:379/3242 train_loss:3.9692 train_time:781527ms step_avg:2117.96ms +step:380/3242 train_loss:4.0500 train_time:783648ms step_avg:2117.97ms +step:381/3242 train_loss:3.8479 train_time:785906ms step_avg:2118.34ms +step:382/3242 train_loss:3.8645 train_time:788024ms step_avg:2118.34ms +step:383/3242 train_loss:3.9453 train_time:790137ms step_avg:2118.33ms +step:384/3242 train_loss:3.8317 train_time:792257ms step_avg:2118.33ms +step:385/3242 train_loss:3.6312 train_time:794374ms step_avg:2118.33ms +step:386/3242 train_loss:4.0761 train_time:796499ms step_avg:2118.35ms +step:387/3242 train_loss:3.9211 train_time:798627ms step_avg:2118.37ms +step:388/3242 train_loss:3.8425 train_time:800742ms step_avg:2118.37ms +step:389/3242 train_loss:4.0243 train_time:802865ms step_avg:2118.38ms +step:390/3242 train_loss:4.0887 train_time:804976ms step_avg:2118.36ms +step:391/3242 train_loss:3.7857 train_time:807093ms step_avg:2118.36ms +step:392/3242 train_loss:3.6253 train_time:809215ms step_avg:2118.37ms +step:393/3242 train_loss:3.7436 train_time:811337ms step_avg:2118.37ms +step:394/3242 train_loss:4.0610 train_time:813454ms step_avg:2118.37ms +step:395/3242 train_loss:4.0215 train_time:815568ms step_avg:2118.36ms +step:396/3242 train_loss:3.8991 train_time:817691ms step_avg:2118.37ms +step:397/3242 train_loss:3.6278 train_time:819815ms step_avg:2118.39ms +step:398/3242 train_loss:3.9545 train_time:821933ms step_avg:2118.38ms +step:399/3242 train_loss:3.7301 train_time:824048ms step_avg:2118.37ms +step:400/3242 train_loss:3.7133 train_time:826173ms step_avg:2118.39ms +step:401/3242 train_loss:3.8536 train_time:828283ms step_avg:2118.37ms +step:402/3242 train_loss:3.7598 train_time:830410ms step_avg:2118.39ms +step:403/3242 train_loss:3.6522 train_time:832523ms step_avg:2118.38ms +step:404/3242 train_loss:3.9222 train_time:834644ms step_avg:2118.39ms +step:405/3242 train_loss:4.0379 train_time:836760ms step_avg:2118.38ms +step:406/3242 train_loss:3.9107 train_time:838877ms step_avg:2118.38ms +step:407/3242 train_loss:3.8909 train_time:840998ms step_avg:2118.38ms +step:408/3242 train_loss:3.8627 train_time:843117ms step_avg:2118.38ms +step:409/3242 train_loss:3.7306 train_time:845228ms step_avg:2118.37ms +step:410/3242 train_loss:3.8566 train_time:847352ms step_avg:2118.38ms +step:411/3242 train_loss:3.8901 train_time:849465ms step_avg:2118.37ms +step:412/3242 train_loss:4.0862 train_time:851590ms step_avg:2118.38ms +step:413/3242 train_loss:3.8795 train_time:853711ms step_avg:2118.39ms +step:414/3242 train_loss:3.8173 train_time:855827ms step_avg:2118.38ms +step:415/3242 train_loss:3.7327 train_time:857944ms step_avg:2118.38ms +step:416/3242 train_loss:4.0668 train_time:860061ms step_avg:2118.38ms +step:417/3242 train_loss:4.0715 train_time:862185ms step_avg:2118.39ms +step:418/3242 train_loss:3.8128 train_time:864298ms step_avg:2118.38ms +step:419/3242 train_loss:3.9232 train_time:866416ms step_avg:2118.38ms +step:420/3242 train_loss:4.2602 train_time:868535ms step_avg:2118.38ms +step:421/3242 train_loss:3.8809 train_time:870656ms step_avg:2118.38ms +step:422/3242 train_loss:4.0996 train_time:872771ms step_avg:2118.38ms +step:423/3242 train_loss:3.6233 train_time:874895ms step_avg:2118.39ms +step:424/3242 train_loss:3.7318 train_time:877006ms step_avg:2118.37ms +step:425/3242 train_loss:3.5978 train_time:879128ms step_avg:2118.38ms +step:426/3242 train_loss:3.9806 train_time:881248ms step_avg:2118.39ms +step:427/3242 train_loss:3.8205 train_time:883369ms step_avg:2118.39ms +step:428/3242 train_loss:4.0169 train_time:885478ms step_avg:2118.37ms +step:429/3242 train_loss:3.8754 train_time:887600ms step_avg:2118.38ms +step:430/3242 train_loss:3.6380 train_time:889715ms step_avg:2118.37ms +step:431/3242 train_loss:3.5258 train_time:891833ms step_avg:2118.37ms +step:432/3242 train_loss:4.0724 train_time:893951ms step_avg:2118.37ms +step:433/3242 train_loss:3.9582 train_time:896077ms step_avg:2118.39ms +step:434/3242 train_loss:3.8647 train_time:898196ms step_avg:2118.39ms +step:435/3242 train_loss:3.6718 train_time:900311ms step_avg:2118.38ms +step:436/3242 train_loss:3.9849 train_time:902429ms step_avg:2118.38ms +step:437/3242 train_loss:3.8118 train_time:904553ms step_avg:2118.39ms +step:438/3242 train_loss:3.8202 train_time:906676ms step_avg:2118.40ms +step:439/3242 train_loss:3.9013 train_time:908790ms step_avg:2118.39ms +step:440/3242 train_loss:3.7535 train_time:910911ms step_avg:2118.40ms +step:441/3242 train_loss:3.7087 train_time:913033ms step_avg:2118.41ms +step:442/3242 train_loss:3.9244 train_time:915144ms step_avg:2118.39ms +step:443/3242 train_loss:3.6299 train_time:917266ms step_avg:2118.40ms +step:444/3242 train_loss:3.7332 train_time:919385ms step_avg:2118.40ms +step:445/3242 train_loss:4.0070 train_time:921502ms step_avg:2118.39ms +step:446/3242 train_loss:3.7163 train_time:923626ms step_avg:2118.41ms +step:447/3242 train_loss:3.9368 train_time:925748ms step_avg:2118.42ms +step:448/3242 train_loss:4.1343 train_time:927867ms step_avg:2118.42ms +step:449/3242 train_loss:3.8438 train_time:929985ms step_avg:2118.42ms +step:450/3242 train_loss:4.0468 train_time:932098ms step_avg:2118.41ms +step:451/3242 train_loss:3.8482 train_time:934223ms step_avg:2118.42ms +step:452/3242 train_loss:3.9933 train_time:936336ms step_avg:2118.41ms +step:453/3242 train_loss:4.6770 train_time:938459ms step_avg:2118.42ms +step:454/3242 train_loss:3.4557 train_time:940577ms step_avg:2118.42ms +step:455/3242 train_loss:3.8344 train_time:942690ms step_avg:2118.40ms +step:456/3242 train_loss:3.7808 train_time:944811ms step_avg:2118.41ms +step:457/3242 train_loss:3.7976 train_time:946933ms step_avg:2118.42ms +step:458/3242 train_loss:3.9440 train_time:949056ms step_avg:2118.43ms +step:459/3242 train_loss:4.1026 train_time:951176ms step_avg:2118.43ms +step:460/3242 train_loss:3.4696 train_time:953295ms step_avg:2118.43ms +step:461/3242 train_loss:3.7719 train_time:955417ms step_avg:2118.44ms +step:462/3242 train_loss:4.0181 train_time:957537ms step_avg:2118.44ms +step:463/3242 train_loss:3.8066 train_time:959652ms step_avg:2118.44ms +step:464/3242 train_loss:3.8973 train_time:961776ms step_avg:2118.45ms +step:465/3242 train_loss:3.9107 train_time:963891ms step_avg:2118.44ms +step:466/3242 train_loss:4.1850 train_time:966007ms step_avg:2118.44ms +step:467/3242 train_loss:3.7691 train_time:968133ms step_avg:2118.45ms +step:468/3242 train_loss:3.8400 train_time:970251ms step_avg:2118.45ms +step:469/3242 train_loss:4.4968 train_time:972364ms step_avg:2118.44ms +step:470/3242 train_loss:3.9215 train_time:974486ms step_avg:2118.45ms +step:471/3242 train_loss:3.7842 train_time:976603ms step_avg:2118.45ms +step:472/3242 train_loss:3.9005 train_time:978721ms step_avg:2118.44ms +step:473/3242 train_loss:3.8083 train_time:980853ms step_avg:2118.47ms +step:474/3242 train_loss:3.8041 train_time:982972ms step_avg:2118.47ms +step:475/3242 train_loss:3.8949 train_time:985088ms step_avg:2118.47ms +step:476/3242 train_loss:3.8910 train_time:987208ms step_avg:2118.47ms +step:477/3242 train_loss:4.1878 train_time:989337ms step_avg:2118.50ms +step:478/3242 train_loss:3.7721 train_time:991453ms step_avg:2118.49ms +step:479/3242 train_loss:3.7424 train_time:993568ms step_avg:2118.48ms +step:480/3242 train_loss:3.7242 train_time:995693ms step_avg:2118.50ms +step:481/3242 train_loss:3.6070 train_time:997815ms step_avg:2118.50ms +step:482/3242 train_loss:4.0192 train_time:999935ms step_avg:2118.51ms +step:483/3242 train_loss:3.8160 train_time:1002049ms step_avg:2118.50ms +step:484/3242 train_loss:3.7993 train_time:1004166ms step_avg:2118.49ms +step:485/3242 train_loss:3.7625 train_time:1006289ms step_avg:2118.50ms +step:486/3242 train_loss:3.6103 train_time:1008410ms step_avg:2118.51ms +step:487/3242 train_loss:3.7331 train_time:1010531ms step_avg:2118.51ms +step:488/3242 train_loss:3.7760 train_time:1012647ms step_avg:2118.51ms +step:489/3242 train_loss:3.8418 train_time:1014762ms step_avg:2118.50ms +step:490/3242 train_loss:3.9339 train_time:1016882ms step_avg:2118.50ms +step:491/3242 train_loss:3.7080 train_time:1019002ms step_avg:2118.51ms +step:492/3242 train_loss:3.8125 train_time:1021121ms step_avg:2118.51ms +step:493/3242 train_loss:3.8932 train_time:1023245ms step_avg:2118.52ms +step:494/3242 train_loss:3.5008 train_time:1025365ms step_avg:2118.52ms +step:495/3242 train_loss:4.0759 train_time:1027485ms step_avg:2118.53ms +step:496/3242 train_loss:4.3423 train_time:1029609ms step_avg:2118.54ms +step:497/3242 train_loss:3.8541 train_time:1031730ms step_avg:2118.54ms +step:498/3242 train_loss:3.9692 train_time:1033843ms step_avg:2118.53ms +step:499/3242 train_loss:3.8420 train_time:1035966ms step_avg:2118.54ms +step:500/3242 train_loss:3.7026 train_time:1038080ms step_avg:2118.53ms +step:500/3242 val_loss:3.8119 train_time:1038494ms step_avg:2119.37ms +step:501/3242 train_loss:3.7384 train_time:1040200ms step_avg:2118.53ms +step:502/3242 train_loss:3.7140 train_time:1042326ms step_avg:2118.55ms +step:503/3242 train_loss:3.9052 train_time:1044446ms step_avg:2118.55ms +step:504/3242 train_loss:3.6578 train_time:1046560ms step_avg:2118.54ms +step:505/3242 train_loss:3.8164 train_time:1048679ms step_avg:2118.54ms +step:506/3242 train_loss:3.7533 train_time:1050806ms step_avg:2118.56ms +step:507/3242 train_loss:4.0485 train_time:1052924ms step_avg:2118.56ms +step:508/3242 train_loss:4.5439 train_time:1055039ms step_avg:2118.55ms +step:509/3242 train_loss:3.5567 train_time:1057156ms step_avg:2118.55ms +step:510/3242 train_loss:3.9899 train_time:1059271ms step_avg:2118.54ms +step:511/3242 train_loss:3.9503 train_time:1061394ms step_avg:2118.55ms +step:512/3242 train_loss:3.8122 train_time:1063511ms step_avg:2118.55ms +step:513/3242 train_loss:3.4330 train_time:1065633ms step_avg:2118.55ms +step:514/3242 train_loss:3.6137 train_time:1067751ms step_avg:2118.55ms +step:515/3242 train_loss:4.7736 train_time:1069869ms step_avg:2118.55ms +step:516/3242 train_loss:3.9751 train_time:1071985ms step_avg:2118.55ms +step:517/3242 train_loss:3.6027 train_time:1074104ms step_avg:2118.55ms +step:518/3242 train_loss:3.6072 train_time:1076228ms step_avg:2118.56ms +step:519/3242 train_loss:3.5344 train_time:1078346ms step_avg:2118.56ms +step:520/3242 train_loss:3.9334 train_time:1080471ms step_avg:2118.57ms +step:521/3242 train_loss:3.7828 train_time:1082586ms step_avg:2118.56ms +step:522/3242 train_loss:3.4865 train_time:1084702ms step_avg:2118.56ms +step:523/3242 train_loss:3.9828 train_time:1086825ms step_avg:2118.57ms +step:524/3242 train_loss:3.6079 train_time:1088942ms step_avg:2118.56ms +step:525/3242 train_loss:3.6735 train_time:1091063ms step_avg:2118.57ms +step:526/3242 train_loss:4.1020 train_time:1093179ms step_avg:2118.56ms +step:527/3242 train_loss:3.8160 train_time:1095300ms step_avg:2118.57ms +step:528/3242 train_loss:3.6639 train_time:1097423ms step_avg:2118.58ms +step:529/3242 train_loss:3.8906 train_time:1099542ms step_avg:2118.58ms +step:530/3242 train_loss:3.7188 train_time:1101662ms step_avg:2118.58ms +step:531/3242 train_loss:3.8101 train_time:1103783ms step_avg:2118.58ms +step:532/3242 train_loss:3.7843 train_time:1105903ms step_avg:2118.59ms +step:533/3242 train_loss:3.7617 train_time:1108019ms step_avg:2118.58ms +step:534/3242 train_loss:3.8737 train_time:1110133ms step_avg:2118.57ms +step:535/3242 train_loss:3.9150 train_time:1112262ms step_avg:2118.59ms +step:536/3242 train_loss:3.8247 train_time:1114374ms step_avg:2118.58ms +step:537/3242 train_loss:4.0140 train_time:1116495ms step_avg:2118.59ms +step:538/3242 train_loss:3.7400 train_time:1118610ms step_avg:2118.58ms +step:539/3242 train_loss:3.9439 train_time:1120736ms step_avg:2118.59ms +step:540/3242 train_loss:3.8747 train_time:1122855ms step_avg:2118.59ms +step:541/3242 train_loss:3.6532 train_time:1124970ms step_avg:2118.59ms +step:542/3242 train_loss:3.9227 train_time:1127095ms step_avg:2118.60ms +step:543/3242 train_loss:3.7919 train_time:1129216ms step_avg:2118.60ms +step:544/3242 train_loss:3.7534 train_time:1131328ms step_avg:2118.59ms +step:545/3242 train_loss:3.8738 train_time:1133447ms step_avg:2118.59ms +step:546/3242 train_loss:3.8975 train_time:1135568ms step_avg:2118.60ms +step:547/3242 train_loss:3.8268 train_time:1137681ms step_avg:2118.59ms +step:548/3242 train_loss:4.1118 train_time:1139804ms step_avg:2118.59ms +step:549/3242 train_loss:3.8821 train_time:1141926ms step_avg:2118.60ms +step:550/3242 train_loss:3.9455 train_time:1144043ms step_avg:2118.60ms +step:551/3242 train_loss:3.9233 train_time:1146169ms step_avg:2118.61ms +step:552/3242 train_loss:3.8216 train_time:1148283ms step_avg:2118.60ms +step:553/3242 train_loss:3.7325 train_time:1150403ms step_avg:2118.61ms +step:554/3242 train_loss:3.6724 train_time:1152530ms step_avg:2118.62ms +step:555/3242 train_loss:3.5968 train_time:1154641ms step_avg:2118.61ms +step:556/3242 train_loss:4.2825 train_time:1156757ms step_avg:2118.60ms +step:557/3242 train_loss:3.6901 train_time:1158881ms step_avg:2118.61ms +step:558/3242 train_loss:3.6417 train_time:1161003ms step_avg:2118.62ms +step:559/3242 train_loss:3.7899 train_time:1163121ms step_avg:2118.62ms +step:560/3242 train_loss:3.6639 train_time:1165243ms step_avg:2118.62ms +step:561/3242 train_loss:3.7117 train_time:1167354ms step_avg:2118.61ms +step:562/3242 train_loss:3.7856 train_time:1169481ms step_avg:2118.63ms +step:563/3242 train_loss:3.5296 train_time:1171595ms step_avg:2118.62ms +step:564/3242 train_loss:3.9145 train_time:1173715ms step_avg:2118.62ms +step:565/3242 train_loss:3.6417 train_time:1175838ms step_avg:2118.63ms +step:566/3242 train_loss:3.8054 train_time:1177951ms step_avg:2118.62ms +step:567/3242 train_loss:4.2167 train_time:1180074ms step_avg:2118.62ms +step:568/3242 train_loss:3.5455 train_time:1182182ms step_avg:2118.61ms +step:569/3242 train_loss:5.1336 train_time:1184296ms step_avg:2118.60ms +step:570/3242 train_loss:3.7611 train_time:1186416ms step_avg:2118.60ms +step:571/3242 train_loss:3.7616 train_time:1188535ms step_avg:2118.60ms +step:572/3242 train_loss:3.9105 train_time:1190781ms step_avg:2118.83ms +step:573/3242 train_loss:3.8324 train_time:1192905ms step_avg:2118.84ms +step:574/3242 train_loss:3.8950 train_time:1195028ms step_avg:2118.84ms +step:575/3242 train_loss:3.9357 train_time:1197148ms step_avg:2118.85ms +step:576/3242 train_loss:3.8152 train_time:1199275ms step_avg:2118.86ms +step:577/3242 train_loss:3.9320 train_time:1201393ms step_avg:2118.86ms +step:578/3242 train_loss:3.8301 train_time:1203512ms step_avg:2118.86ms +step:579/3242 train_loss:3.7461 train_time:1205633ms step_avg:2118.86ms +step:580/3242 train_loss:3.8559 train_time:1207752ms step_avg:2118.86ms +step:581/3242 train_loss:3.6740 train_time:1209866ms step_avg:2118.85ms +step:582/3242 train_loss:3.7821 train_time:1211992ms step_avg:2118.87ms +step:583/3242 train_loss:3.8755 train_time:1214110ms step_avg:2118.87ms +step:584/3242 train_loss:3.7245 train_time:1216228ms step_avg:2118.86ms +step:585/3242 train_loss:3.6337 train_time:1218349ms step_avg:2118.87ms +step:586/3242 train_loss:4.2185 train_time:1220468ms step_avg:2118.87ms +step:587/3242 train_loss:3.6264 train_time:1222581ms step_avg:2118.86ms +step:588/3242 train_loss:3.9267 train_time:1224692ms step_avg:2118.84ms +step:589/3242 train_loss:3.7613 train_time:1226821ms step_avg:2118.86ms +step:590/3242 train_loss:3.8700 train_time:1228933ms step_avg:2118.85ms +step:591/3242 train_loss:3.8172 train_time:1231045ms step_avg:2118.84ms +step:592/3242 train_loss:3.4108 train_time:1233164ms step_avg:2118.84ms +step:593/3242 train_loss:3.6593 train_time:1235283ms step_avg:2118.84ms +step:594/3242 train_loss:3.7285 train_time:1237407ms step_avg:2118.85ms +step:595/3242 train_loss:3.5505 train_time:1239521ms step_avg:2118.84ms +step:596/3242 train_loss:4.1679 train_time:1241639ms step_avg:2118.84ms +step:597/3242 train_loss:3.8371 train_time:1243751ms step_avg:2118.83ms +step:598/3242 train_loss:3.6571 train_time:1245873ms step_avg:2118.83ms +step:599/3242 train_loss:3.8592 train_time:1247990ms step_avg:2118.83ms +step:600/3242 train_loss:3.5726 train_time:1250113ms step_avg:2118.84ms +step:601/3242 train_loss:3.6495 train_time:1252237ms step_avg:2118.84ms +step:602/3242 train_loss:3.7433 train_time:1254356ms step_avg:2118.84ms +step:603/3242 train_loss:3.8725 train_time:1256474ms step_avg:2118.84ms +step:604/3242 train_loss:3.8502 train_time:1258593ms step_avg:2118.84ms +step:605/3242 train_loss:3.7869 train_time:1260719ms step_avg:2118.85ms +step:606/3242 train_loss:3.9089 train_time:1262838ms step_avg:2118.85ms +step:607/3242 train_loss:3.8233 train_time:1264960ms step_avg:2118.86ms +step:608/3242 train_loss:4.1625 train_time:1267075ms step_avg:2118.85ms +step:609/3242 train_loss:3.8682 train_time:1269196ms step_avg:2118.86ms +step:610/3242 train_loss:3.7094 train_time:1271321ms step_avg:2118.87ms +step:611/3242 train_loss:4.2520 train_time:1273449ms step_avg:2118.88ms +step:612/3242 train_loss:3.6964 train_time:1275570ms step_avg:2118.89ms +step:613/3242 train_loss:3.7720 train_time:1277689ms step_avg:2118.89ms +step:614/3242 train_loss:4.0351 train_time:1279795ms step_avg:2118.87ms +step:615/3242 train_loss:3.7720 train_time:1281915ms step_avg:2118.87ms +step:616/3242 train_loss:3.9128 train_time:1284031ms step_avg:2118.86ms +step:617/3242 train_loss:3.7813 train_time:1286158ms step_avg:2118.88ms +step:618/3242 train_loss:3.5229 train_time:1288276ms step_avg:2118.87ms +step:619/3242 train_loss:3.6977 train_time:1290392ms step_avg:2118.87ms +step:620/3242 train_loss:3.8328 train_time:1292514ms step_avg:2118.88ms +step:621/3242 train_loss:3.5818 train_time:1294630ms step_avg:2118.87ms +step:622/3242 train_loss:3.6986 train_time:1296750ms step_avg:2118.87ms +step:623/3242 train_loss:3.5433 train_time:1298870ms step_avg:2118.87ms +step:624/3242 train_loss:3.5832 train_time:1300987ms step_avg:2118.87ms +step:625/3242 train_loss:3.9648 train_time:1303107ms step_avg:2118.87ms +step:625/3242 val_loss:3.7386 train_time:1303521ms step_avg:2119.55ms +step:626/3242 train_loss:3.8766 train_time:1305237ms step_avg:2118.89ms +step:627/3242 train_loss:4.0635 train_time:1307348ms step_avg:2118.88ms +step:628/3242 train_loss:3.3542 train_time:1309471ms step_avg:2118.88ms +step:629/3242 train_loss:3.6572 train_time:1311592ms step_avg:2118.89ms +step:630/3242 train_loss:3.6358 train_time:1313706ms step_avg:2118.88ms +step:631/3242 train_loss:3.6483 train_time:1315830ms step_avg:2118.89ms +step:632/3242 train_loss:3.7638 train_time:1317947ms step_avg:2118.89ms +step:633/3242 train_loss:3.8934 train_time:1320067ms step_avg:2118.89ms +step:634/3242 train_loss:3.5919 train_time:1322188ms step_avg:2118.89ms +step:635/3242 train_loss:3.7517 train_time:1324304ms step_avg:2118.89ms +step:636/3242 train_loss:4.0722 train_time:1326423ms step_avg:2118.89ms +step:637/3242 train_loss:3.7775 train_time:1328542ms step_avg:2118.89ms +step:638/3242 train_loss:3.7313 train_time:1330660ms step_avg:2118.89ms +step:639/3242 train_loss:3.9998 train_time:1332783ms step_avg:2118.89ms +step:640/3242 train_loss:3.8059 train_time:1334901ms step_avg:2118.89ms +step:641/3242 train_loss:3.7499 train_time:1337024ms step_avg:2118.90ms +step:642/3242 train_loss:3.6923 train_time:1339142ms step_avg:2118.90ms +step:643/3242 train_loss:3.6974 train_time:1341261ms step_avg:2118.90ms +step:644/3242 train_loss:3.8138 train_time:1343378ms step_avg:2118.89ms +step:645/3242 train_loss:3.4699 train_time:1345497ms step_avg:2118.89ms +step:646/3242 train_loss:3.6405 train_time:1347616ms step_avg:2118.89ms +step:647/3242 train_loss:3.5775 train_time:1349734ms step_avg:2118.89ms +step:648/3242 train_loss:3.6689 train_time:1351856ms step_avg:2118.90ms +step:649/3242 train_loss:3.8802 train_time:1353976ms step_avg:2118.90ms +step:650/3242 train_loss:3.8721 train_time:1356094ms step_avg:2118.90ms +step:651/3242 train_loss:3.7345 train_time:1358216ms step_avg:2118.90ms +step:652/3242 train_loss:4.2421 train_time:1360336ms step_avg:2118.90ms +step:653/3242 train_loss:3.7323 train_time:1362456ms step_avg:2118.90ms +step:654/3242 train_loss:3.7623 train_time:1364569ms step_avg:2118.90ms +step:655/3242 train_loss:3.3911 train_time:1366694ms step_avg:2118.91ms +step:656/3242 train_loss:3.8237 train_time:1368810ms step_avg:2118.90ms +step:657/3242 train_loss:3.6289 train_time:1370934ms step_avg:2118.91ms +step:658/3242 train_loss:3.6276 train_time:1373048ms step_avg:2118.90ms +step:659/3242 train_loss:3.8488 train_time:1375171ms step_avg:2118.91ms +step:660/3242 train_loss:3.6827 train_time:1377285ms step_avg:2118.90ms +step:661/3242 train_loss:3.7240 train_time:1379414ms step_avg:2118.91ms +step:662/3242 train_loss:3.6563 train_time:1381524ms step_avg:2118.90ms +step:663/3242 train_loss:3.5233 train_time:1383648ms step_avg:2118.91ms +step:664/3242 train_loss:3.8173 train_time:1385766ms step_avg:2118.91ms +step:665/3242 train_loss:3.9551 train_time:1387887ms step_avg:2118.91ms +step:666/3242 train_loss:3.6096 train_time:1390003ms step_avg:2118.91ms +step:667/3242 train_loss:4.0008 train_time:1392119ms step_avg:2118.90ms +step:668/3242 train_loss:3.8018 train_time:1394239ms step_avg:2118.90ms +step:669/3242 train_loss:3.7060 train_time:1396360ms step_avg:2118.91ms +step:670/3242 train_loss:3.7076 train_time:1398476ms step_avg:2118.90ms +step:671/3242 train_loss:3.4086 train_time:1400600ms step_avg:2118.91ms +step:672/3242 train_loss:3.5696 train_time:1402721ms step_avg:2118.91ms +step:673/3242 train_loss:3.6125 train_time:1404839ms step_avg:2118.91ms +step:674/3242 train_loss:5.1121 train_time:1406961ms step_avg:2118.92ms +step:675/3242 train_loss:3.8448 train_time:1409080ms step_avg:2118.92ms +step:676/3242 train_loss:3.9923 train_time:1411196ms step_avg:2118.91ms +step:677/3242 train_loss:3.6071 train_time:1413321ms step_avg:2118.92ms +step:678/3242 train_loss:3.7271 train_time:1415440ms step_avg:2118.92ms +step:679/3242 train_loss:3.6525 train_time:1417553ms step_avg:2118.91ms +step:680/3242 train_loss:3.8132 train_time:1419680ms step_avg:2118.93ms +step:681/3242 train_loss:3.7473 train_time:1421792ms step_avg:2118.92ms +step:682/3242 train_loss:3.5674 train_time:1423912ms step_avg:2118.92ms +step:683/3242 train_loss:3.7040 train_time:1426031ms step_avg:2118.92ms +step:684/3242 train_loss:3.9680 train_time:1428152ms step_avg:2118.92ms +step:685/3242 train_loss:3.7906 train_time:1430267ms step_avg:2118.91ms +step:686/3242 train_loss:3.9428 train_time:1432390ms step_avg:2118.92ms +step:687/3242 train_loss:3.7634 train_time:1434513ms step_avg:2118.93ms +step:688/3242 train_loss:3.8702 train_time:1436633ms step_avg:2118.93ms +step:689/3242 train_loss:3.3538 train_time:1438745ms step_avg:2118.92ms +step:690/3242 train_loss:3.5795 train_time:1440865ms step_avg:2118.92ms +step:691/3242 train_loss:3.8415 train_time:1442989ms step_avg:2118.93ms +step:692/3242 train_loss:3.4846 train_time:1445105ms step_avg:2118.92ms +step:693/3242 train_loss:3.7378 train_time:1447224ms step_avg:2118.92ms +step:694/3242 train_loss:3.5844 train_time:1449348ms step_avg:2118.93ms +step:695/3242 train_loss:3.4938 train_time:1451468ms step_avg:2118.93ms +step:696/3242 train_loss:3.4708 train_time:1453583ms step_avg:2118.93ms +step:697/3242 train_loss:3.6120 train_time:1455702ms step_avg:2118.93ms +step:698/3242 train_loss:3.6661 train_time:1457817ms step_avg:2118.92ms +step:699/3242 train_loss:3.6707 train_time:1459939ms step_avg:2118.93ms +step:700/3242 train_loss:3.6038 train_time:1462060ms step_avg:2118.93ms +step:701/3242 train_loss:3.6463 train_time:1464176ms step_avg:2118.92ms +step:702/3242 train_loss:3.6292 train_time:1466297ms step_avg:2118.93ms +step:703/3242 train_loss:3.6929 train_time:1468415ms step_avg:2118.93ms +step:704/3242 train_loss:3.6039 train_time:1470541ms step_avg:2118.93ms +step:705/3242 train_loss:3.9366 train_time:1472656ms step_avg:2118.93ms +step:706/3242 train_loss:3.6484 train_time:1474780ms step_avg:2118.94ms +step:707/3242 train_loss:3.7288 train_time:1476897ms step_avg:2118.93ms +step:708/3242 train_loss:3.8192 train_time:1479015ms step_avg:2118.93ms +step:709/3242 train_loss:3.6428 train_time:1481133ms step_avg:2118.93ms +step:710/3242 train_loss:3.7615 train_time:1483255ms step_avg:2118.94ms +step:711/3242 train_loss:3.4999 train_time:1485380ms step_avg:2118.94ms +step:712/3242 train_loss:3.5517 train_time:1487491ms step_avg:2118.93ms +step:713/3242 train_loss:3.9526 train_time:1489617ms step_avg:2118.94ms +step:714/3242 train_loss:3.7035 train_time:1491735ms step_avg:2118.94ms +step:715/3242 train_loss:3.7158 train_time:1493854ms step_avg:2118.94ms +step:716/3242 train_loss:3.8474 train_time:1495975ms step_avg:2118.94ms +step:717/3242 train_loss:3.8125 train_time:1498089ms step_avg:2118.94ms +step:718/3242 train_loss:3.8668 train_time:1500205ms step_avg:2118.93ms +step:719/3242 train_loss:3.5099 train_time:1502321ms step_avg:2118.93ms +step:720/3242 train_loss:3.5830 train_time:1504439ms step_avg:2118.93ms +step:721/3242 train_loss:4.5190 train_time:1506548ms step_avg:2118.91ms +step:722/3242 train_loss:3.4701 train_time:1508666ms step_avg:2118.91ms +step:723/3242 train_loss:3.9272 train_time:1510787ms step_avg:2118.92ms +step:724/3242 train_loss:3.7091 train_time:1512906ms step_avg:2118.92ms +step:725/3242 train_loss:3.6869 train_time:1515023ms step_avg:2118.91ms +step:726/3242 train_loss:3.8555 train_time:1517144ms step_avg:2118.92ms +step:727/3242 train_loss:3.6990 train_time:1519261ms step_avg:2118.91ms +step:728/3242 train_loss:3.5758 train_time:1521383ms step_avg:2118.92ms +step:729/3242 train_loss:3.7827 train_time:1523502ms step_avg:2118.92ms +step:730/3242 train_loss:3.4947 train_time:1525620ms step_avg:2118.92ms +step:731/3242 train_loss:3.7213 train_time:1527740ms step_avg:2118.92ms +step:732/3242 train_loss:3.6748 train_time:1529862ms step_avg:2118.92ms +step:733/3242 train_loss:3.7098 train_time:1531983ms step_avg:2118.92ms +step:734/3242 train_loss:4.6685 train_time:1534097ms step_avg:2118.92ms +step:735/3242 train_loss:3.4273 train_time:1536216ms step_avg:2118.92ms +step:736/3242 train_loss:3.7451 train_time:1538344ms step_avg:2118.93ms +step:737/3242 train_loss:3.9106 train_time:1540452ms step_avg:2118.92ms +step:738/3242 train_loss:3.7309 train_time:1542572ms step_avg:2118.92ms +step:739/3242 train_loss:3.6285 train_time:1544689ms step_avg:2118.92ms +step:740/3242 train_loss:3.9105 train_time:1546802ms step_avg:2118.91ms +step:741/3242 train_loss:3.8443 train_time:1548913ms step_avg:2118.90ms +step:742/3242 train_loss:3.6588 train_time:1551038ms step_avg:2118.90ms +step:743/3242 train_loss:3.9055 train_time:1553154ms step_avg:2118.90ms +step:744/3242 train_loss:3.6383 train_time:1555275ms step_avg:2118.90ms +step:745/3242 train_loss:4.0997 train_time:1557393ms step_avg:2118.90ms +step:746/3242 train_loss:3.7361 train_time:1559513ms step_avg:2118.90ms +step:747/3242 train_loss:3.7533 train_time:1561628ms step_avg:2118.90ms +step:748/3242 train_loss:3.6236 train_time:1563763ms step_avg:2118.92ms +step:749/3242 train_loss:3.6607 train_time:1565879ms step_avg:2118.92ms +step:750/3242 train_loss:3.6014 train_time:1567990ms step_avg:2118.91ms +step:750/3242 val_loss:3.6826 train_time:1568401ms step_avg:2119.46ms +step:751/3242 train_loss:3.7381 train_time:1570113ms step_avg:2118.91ms +step:752/3242 train_loss:3.6551 train_time:1572239ms step_avg:2118.92ms +step:753/3242 train_loss:3.6096 train_time:1574353ms step_avg:2118.91ms +step:754/3242 train_loss:3.5306 train_time:1576476ms step_avg:2118.92ms +step:755/3242 train_loss:3.9260 train_time:1578597ms step_avg:2118.92ms +step:756/3242 train_loss:3.9206 train_time:1580714ms step_avg:2118.92ms +step:757/3242 train_loss:3.4670 train_time:1582840ms step_avg:2118.93ms +step:758/3242 train_loss:3.7984 train_time:1584960ms step_avg:2118.93ms +step:759/3242 train_loss:3.8210 train_time:1587076ms step_avg:2118.93ms +step:760/3242 train_loss:3.6895 train_time:1589189ms step_avg:2118.92ms +step:761/3242 train_loss:3.5270 train_time:1591317ms step_avg:2118.93ms +step:762/3242 train_loss:3.8252 train_time:1593568ms step_avg:2119.11ms +step:763/3242 train_loss:3.4613 train_time:1595686ms step_avg:2119.10ms +step:764/3242 train_loss:3.6317 train_time:1597810ms step_avg:2119.11ms +step:765/3242 train_loss:3.6732 train_time:1599937ms step_avg:2119.12ms +step:766/3242 train_loss:3.5302 train_time:1602052ms step_avg:2119.12ms +step:767/3242 train_loss:4.0467 train_time:1604170ms step_avg:2119.12ms +step:768/3242 train_loss:3.5627 train_time:1606288ms step_avg:2119.11ms +step:769/3242 train_loss:3.6076 train_time:1608415ms step_avg:2119.12ms +step:770/3242 train_loss:3.7531 train_time:1610534ms step_avg:2119.12ms +step:771/3242 train_loss:5.9626 train_time:1612648ms step_avg:2119.12ms +step:772/3242 train_loss:3.8307 train_time:1614767ms step_avg:2119.12ms +step:773/3242 train_loss:3.7725 train_time:1616884ms step_avg:2119.11ms +step:774/3242 train_loss:3.6626 train_time:1619005ms step_avg:2119.12ms +step:775/3242 train_loss:3.8145 train_time:1621127ms step_avg:2119.12ms +step:776/3242 train_loss:3.4875 train_time:1623251ms step_avg:2119.13ms +step:777/3242 train_loss:3.7407 train_time:1625365ms step_avg:2119.12ms +step:778/3242 train_loss:3.6795 train_time:1627485ms step_avg:2119.12ms +step:779/3242 train_loss:3.7152 train_time:1629594ms step_avg:2119.11ms +step:780/3242 train_loss:3.5284 train_time:1631713ms step_avg:2119.11ms +step:781/3242 train_loss:3.7206 train_time:1633834ms step_avg:2119.11ms +step:782/3242 train_loss:3.5649 train_time:1635954ms step_avg:2119.11ms +step:783/3242 train_loss:3.5517 train_time:1638072ms step_avg:2119.11ms +step:784/3242 train_loss:3.7292 train_time:1640196ms step_avg:2119.12ms +step:785/3242 train_loss:3.8327 train_time:1642318ms step_avg:2119.12ms +step:786/3242 train_loss:3.5584 train_time:1644437ms step_avg:2119.12ms +step:787/3242 train_loss:3.5813 train_time:1646555ms step_avg:2119.12ms +step:788/3242 train_loss:3.9217 train_time:1648671ms step_avg:2119.11ms +step:789/3242 train_loss:3.7451 train_time:1650798ms step_avg:2119.12ms +step:790/3242 train_loss:3.7431 train_time:1652912ms step_avg:2119.12ms +step:791/3242 train_loss:3.7097 train_time:1655030ms step_avg:2119.12ms +step:792/3242 train_loss:3.7566 train_time:1657144ms step_avg:2119.11ms +step:793/3242 train_loss:3.7214 train_time:1659269ms step_avg:2119.12ms +step:794/3242 train_loss:3.7614 train_time:1661387ms step_avg:2119.12ms +step:795/3242 train_loss:3.9856 train_time:1663504ms step_avg:2119.11ms +step:796/3242 train_loss:3.7015 train_time:1665623ms step_avg:2119.11ms +step:797/3242 train_loss:3.7368 train_time:1667746ms step_avg:2119.12ms +step:798/3242 train_loss:3.2765 train_time:1669866ms step_avg:2119.12ms +step:799/3242 train_loss:3.5997 train_time:1671982ms step_avg:2119.12ms +step:800/3242 train_loss:3.4316 train_time:1674105ms step_avg:2119.12ms +step:801/3242 train_loss:3.6414 train_time:1676228ms step_avg:2119.13ms +step:802/3242 train_loss:3.6790 train_time:1678350ms step_avg:2119.13ms +step:803/3242 train_loss:3.7899 train_time:1680473ms step_avg:2119.13ms +step:804/3242 train_loss:3.4976 train_time:1682588ms step_avg:2119.13ms +step:805/3242 train_loss:3.6011 train_time:1684705ms step_avg:2119.13ms +step:806/3242 train_loss:3.3165 train_time:1686820ms step_avg:2119.12ms +step:807/3242 train_loss:3.6856 train_time:1688939ms step_avg:2119.12ms +step:808/3242 train_loss:3.9066 train_time:1691063ms step_avg:2119.13ms +step:809/3242 train_loss:3.7004 train_time:1693176ms step_avg:2119.12ms +step:810/3242 train_loss:4.1148 train_time:1695295ms step_avg:2119.12ms +step:811/3242 train_loss:3.6744 train_time:1697421ms step_avg:2119.13ms +step:812/3242 train_loss:3.5135 train_time:1699537ms step_avg:2119.12ms +step:813/3242 train_loss:3.9939 train_time:1701653ms step_avg:2119.12ms +step:814/3242 train_loss:3.6150 train_time:1703781ms step_avg:2119.13ms +step:815/3242 train_loss:3.5419 train_time:1705901ms step_avg:2119.13ms +step:816/3242 train_loss:3.9362 train_time:1708025ms step_avg:2119.14ms +step:817/3242 train_loss:3.7196 train_time:1710140ms step_avg:2119.13ms +step:818/3242 train_loss:3.6351 train_time:1712254ms step_avg:2119.13ms +step:819/3242 train_loss:3.5823 train_time:1714376ms step_avg:2119.13ms +step:820/3242 train_loss:3.6506 train_time:1716492ms step_avg:2119.13ms +step:821/3242 train_loss:3.6776 train_time:1718614ms step_avg:2119.13ms +step:822/3242 train_loss:3.7419 train_time:1720738ms step_avg:2119.14ms +step:823/3242 train_loss:3.8604 train_time:1722855ms step_avg:2119.13ms +step:824/3242 train_loss:3.3909 train_time:1724968ms step_avg:2119.13ms +step:825/3242 train_loss:3.9530 train_time:1727088ms step_avg:2119.13ms +step:826/3242 train_loss:3.7382 train_time:1729207ms step_avg:2119.13ms +step:827/3242 train_loss:3.4732 train_time:1731331ms step_avg:2119.13ms +step:828/3242 train_loss:3.6096 train_time:1733453ms step_avg:2119.14ms +step:829/3242 train_loss:3.6116 train_time:1735572ms step_avg:2119.14ms +step:830/3242 train_loss:3.6287 train_time:1737684ms step_avg:2119.13ms +step:831/3242 train_loss:3.5840 train_time:1739802ms step_avg:2119.13ms +step:832/3242 train_loss:3.9486 train_time:1741933ms step_avg:2119.14ms +step:833/3242 train_loss:3.7739 train_time:1744047ms step_avg:2119.13ms +step:834/3242 train_loss:3.7480 train_time:1746166ms step_avg:2119.13ms +step:835/3242 train_loss:3.4619 train_time:1748290ms step_avg:2119.14ms +step:836/3242 train_loss:3.9138 train_time:1750405ms step_avg:2119.13ms +step:837/3242 train_loss:3.5909 train_time:1752529ms step_avg:2119.14ms +step:838/3242 train_loss:3.4625 train_time:1754647ms step_avg:2119.14ms +step:839/3242 train_loss:3.5817 train_time:1756752ms step_avg:2119.12ms +step:840/3242 train_loss:3.8352 train_time:1758876ms step_avg:2119.13ms +step:841/3242 train_loss:3.6303 train_time:1760995ms step_avg:2119.13ms +step:842/3242 train_loss:3.4852 train_time:1763113ms step_avg:2119.13ms +step:843/3242 train_loss:3.6157 train_time:1765236ms step_avg:2119.13ms +step:844/3242 train_loss:3.6932 train_time:1767355ms step_avg:2119.13ms +step:845/3242 train_loss:3.6260 train_time:1769475ms step_avg:2119.13ms +step:846/3242 train_loss:3.7208 train_time:1771594ms step_avg:2119.13ms +step:847/3242 train_loss:3.8745 train_time:1773707ms step_avg:2119.12ms +step:848/3242 train_loss:3.4237 train_time:1775830ms step_avg:2119.13ms +step:849/3242 train_loss:3.3852 train_time:1777951ms step_avg:2119.13ms +step:850/3242 train_loss:3.6767 train_time:1780065ms step_avg:2119.13ms +step:851/3242 train_loss:3.6376 train_time:1782187ms step_avg:2119.13ms +step:852/3242 train_loss:3.3484 train_time:1784313ms step_avg:2119.14ms +step:853/3242 train_loss:3.2029 train_time:1786433ms step_avg:2119.14ms +step:854/3242 train_loss:3.7095 train_time:1788552ms step_avg:2119.14ms +step:855/3242 train_loss:3.5882 train_time:1790669ms step_avg:2119.14ms +step:856/3242 train_loss:3.6204 train_time:1792793ms step_avg:2119.14ms +step:857/3242 train_loss:3.8076 train_time:1794914ms step_avg:2119.14ms +step:858/3242 train_loss:3.7177 train_time:1797029ms step_avg:2119.14ms +step:859/3242 train_loss:3.1544 train_time:1799144ms step_avg:2119.13ms +step:860/3242 train_loss:3.6541 train_time:1801266ms step_avg:2119.14ms +step:861/3242 train_loss:4.0811 train_time:1803388ms step_avg:2119.14ms +step:862/3242 train_loss:3.6032 train_time:1805505ms step_avg:2119.14ms +step:863/3242 train_loss:3.5280 train_time:1807626ms step_avg:2119.14ms +step:864/3242 train_loss:3.9452 train_time:1809742ms step_avg:2119.14ms +step:865/3242 train_loss:3.7644 train_time:1811856ms step_avg:2119.13ms +step:866/3242 train_loss:3.7481 train_time:1813982ms step_avg:2119.14ms +step:867/3242 train_loss:3.5593 train_time:1816098ms step_avg:2119.13ms +step:868/3242 train_loss:3.7106 train_time:1818218ms step_avg:2119.14ms +step:869/3242 train_loss:3.5596 train_time:1820342ms step_avg:2119.14ms +step:870/3242 train_loss:3.5849 train_time:1822458ms step_avg:2119.14ms +step:871/3242 train_loss:3.8406 train_time:1824581ms step_avg:2119.14ms +step:872/3242 train_loss:3.5656 train_time:1826698ms step_avg:2119.14ms +step:873/3242 train_loss:3.6454 train_time:1828812ms step_avg:2119.13ms +step:874/3242 train_loss:3.6456 train_time:1830933ms step_avg:2119.14ms +step:875/3242 train_loss:3.6999 train_time:1833054ms step_avg:2119.14ms +step:875/3242 val_loss:3.6368 train_time:1833464ms step_avg:2119.61ms +step:876/3242 train_loss:3.8489 train_time:1835179ms step_avg:2119.14ms +step:877/3242 train_loss:3.4515 train_time:1837307ms step_avg:2119.15ms +step:878/3242 train_loss:3.7479 train_time:1839423ms step_avg:2119.15ms +step:879/3242 train_loss:3.5248 train_time:1841547ms step_avg:2119.16ms +step:880/3242 train_loss:4.3258 train_time:1843664ms step_avg:2119.15ms +step:881/3242 train_loss:3.7996 train_time:1845787ms step_avg:2119.16ms +step:882/3242 train_loss:3.2603 train_time:1847904ms step_avg:2119.16ms +step:883/3242 train_loss:3.7454 train_time:1850032ms step_avg:2119.17ms +step:884/3242 train_loss:3.5391 train_time:1852144ms step_avg:2119.16ms +step:885/3242 train_loss:3.6985 train_time:1854267ms step_avg:2119.16ms +step:886/3242 train_loss:3.7800 train_time:1856380ms step_avg:2119.16ms +step:887/3242 train_loss:3.6781 train_time:1858495ms step_avg:2119.15ms +step:888/3242 train_loss:3.7759 train_time:1860611ms step_avg:2119.15ms +step:889/3242 train_loss:3.7485 train_time:1862737ms step_avg:2119.15ms +step:890/3242 train_loss:3.7977 train_time:1864851ms step_avg:2119.15ms +step:891/3242 train_loss:3.5247 train_time:1866961ms step_avg:2119.14ms +step:892/3242 train_loss:3.7121 train_time:1869083ms step_avg:2119.14ms +step:893/3242 train_loss:3.8060 train_time:1871206ms step_avg:2119.15ms +step:894/3242 train_loss:3.4144 train_time:1873321ms step_avg:2119.14ms +step:895/3242 train_loss:3.5476 train_time:1875438ms step_avg:2119.14ms +step:896/3242 train_loss:3.2692 train_time:1877562ms step_avg:2119.14ms +step:897/3242 train_loss:3.5302 train_time:1879673ms step_avg:2119.14ms +step:898/3242 train_loss:3.7952 train_time:1881794ms step_avg:2119.14ms +step:899/3242 train_loss:3.6947 train_time:1883918ms step_avg:2119.14ms +step:900/3242 train_loss:3.6534 train_time:1886040ms step_avg:2119.15ms +step:901/3242 train_loss:3.6628 train_time:1888163ms step_avg:2119.15ms +step:902/3242 train_loss:3.5626 train_time:1890279ms step_avg:2119.15ms +step:903/3242 train_loss:3.4684 train_time:1892391ms step_avg:2119.14ms +step:904/3242 train_loss:3.6092 train_time:1894510ms step_avg:2119.14ms +step:905/3242 train_loss:3.7492 train_time:1896630ms step_avg:2119.14ms +step:906/3242 train_loss:3.8053 train_time:1898753ms step_avg:2119.14ms +step:907/3242 train_loss:3.6172 train_time:1900872ms step_avg:2119.14ms +step:908/3242 train_loss:3.6221 train_time:1902989ms step_avg:2119.14ms +step:909/3242 train_loss:3.8157 train_time:1905113ms step_avg:2119.15ms +step:910/3242 train_loss:4.0093 train_time:1907237ms step_avg:2119.15ms +step:911/3242 train_loss:4.0682 train_time:1909351ms step_avg:2119.15ms +step:912/3242 train_loss:3.3300 train_time:1911469ms step_avg:2119.14ms +step:913/3242 train_loss:3.6845 train_time:1913588ms step_avg:2119.15ms +step:914/3242 train_loss:3.7294 train_time:1915709ms step_avg:2119.15ms +step:915/3242 train_loss:3.7000 train_time:1917825ms step_avg:2119.14ms +step:916/3242 train_loss:3.7761 train_time:1919947ms step_avg:2119.15ms +step:917/3242 train_loss:3.7747 train_time:1922058ms step_avg:2119.14ms +step:918/3242 train_loss:3.5547 train_time:1924183ms step_avg:2119.14ms +step:919/3242 train_loss:3.8700 train_time:1926298ms step_avg:2119.14ms +step:920/3242 train_loss:3.5460 train_time:1928407ms step_avg:2119.13ms +step:921/3242 train_loss:3.5663 train_time:1930535ms step_avg:2119.14ms +step:922/3242 train_loss:3.5128 train_time:1932647ms step_avg:2119.13ms +step:923/3242 train_loss:3.5409 train_time:1934764ms step_avg:2119.13ms +step:924/3242 train_loss:4.0640 train_time:1936883ms step_avg:2119.13ms +step:925/3242 train_loss:3.4224 train_time:1939001ms step_avg:2119.13ms +step:926/3242 train_loss:3.7857 train_time:1941121ms step_avg:2119.13ms +step:927/3242 train_loss:3.6433 train_time:1943238ms step_avg:2119.13ms +step:928/3242 train_loss:3.7448 train_time:1945357ms step_avg:2119.13ms +step:929/3242 train_loss:3.6791 train_time:1947470ms step_avg:2119.12ms +step:930/3242 train_loss:3.8031 train_time:1949596ms step_avg:2119.13ms +step:931/3242 train_loss:3.5792 train_time:1951718ms step_avg:2119.13ms +step:932/3242 train_loss:3.3525 train_time:1953834ms step_avg:2119.13ms +step:933/3242 train_loss:3.7457 train_time:1955962ms step_avg:2119.14ms +step:934/3242 train_loss:3.9442 train_time:1958080ms step_avg:2119.13ms +step:935/3242 train_loss:3.8255 train_time:1960202ms step_avg:2119.14ms +step:936/3242 train_loss:3.6037 train_time:1962325ms step_avg:2119.14ms +step:937/3242 train_loss:3.5558 train_time:1964440ms step_avg:2119.14ms +step:938/3242 train_loss:3.7448 train_time:1966563ms step_avg:2119.14ms +step:939/3242 train_loss:3.0129 train_time:1968671ms step_avg:2119.13ms +step:940/3242 train_loss:3.7738 train_time:1970791ms step_avg:2119.13ms +step:941/3242 train_loss:3.5820 train_time:1972913ms step_avg:2119.13ms +step:942/3242 train_loss:3.5165 train_time:1975029ms step_avg:2119.13ms +step:943/3242 train_loss:3.8134 train_time:1977151ms step_avg:2119.13ms +step:944/3242 train_loss:3.4498 train_time:1979272ms step_avg:2119.14ms +step:945/3242 train_loss:3.4466 train_time:1981389ms step_avg:2119.13ms +step:946/3242 train_loss:3.7803 train_time:1983514ms step_avg:2119.14ms +step:947/3242 train_loss:3.7372 train_time:1985631ms step_avg:2119.14ms +step:948/3242 train_loss:3.6639 train_time:1987744ms step_avg:2119.13ms +step:949/3242 train_loss:3.4474 train_time:1989863ms step_avg:2119.13ms +step:950/3242 train_loss:3.4633 train_time:1991993ms step_avg:2119.14ms +step:951/3242 train_loss:3.3629 train_time:1994116ms step_avg:2119.15ms +step:952/3242 train_loss:3.8853 train_time:1996230ms step_avg:2119.14ms +step:953/3242 train_loss:3.6088 train_time:1998480ms step_avg:2119.28ms +step:954/3242 train_loss:3.5677 train_time:2000598ms step_avg:2119.28ms +step:955/3242 train_loss:3.6925 train_time:2002727ms step_avg:2119.29ms +step:956/3242 train_loss:3.6513 train_time:2004838ms step_avg:2119.28ms +step:957/3242 train_loss:3.6888 train_time:2006961ms step_avg:2119.28ms +step:958/3242 train_loss:3.4481 train_time:2009078ms step_avg:2119.28ms +step:959/3242 train_loss:3.5983 train_time:2011194ms step_avg:2119.28ms +step:960/3242 train_loss:3.6472 train_time:2013317ms step_avg:2119.28ms +step:961/3242 train_loss:3.8033 train_time:2015438ms step_avg:2119.28ms +step:962/3242 train_loss:3.4564 train_time:2017560ms step_avg:2119.29ms +step:963/3242 train_loss:3.8745 train_time:2019675ms step_avg:2119.28ms +step:964/3242 train_loss:3.8707 train_time:2021792ms step_avg:2119.28ms +step:965/3242 train_loss:3.5790 train_time:2023911ms step_avg:2119.28ms +step:966/3242 train_loss:3.2378 train_time:2026031ms step_avg:2119.28ms +step:967/3242 train_loss:3.4905 train_time:2028151ms step_avg:2119.28ms +step:968/3242 train_loss:3.9701 train_time:2030264ms step_avg:2119.27ms +step:969/3242 train_loss:3.6169 train_time:2032385ms step_avg:2119.28ms +step:970/3242 train_loss:3.6093 train_time:2034506ms step_avg:2119.28ms +step:971/3242 train_loss:3.6253 train_time:2036631ms step_avg:2119.28ms +step:972/3242 train_loss:3.5647 train_time:2038748ms step_avg:2119.28ms +step:973/3242 train_loss:3.5781 train_time:2040864ms step_avg:2119.28ms +step:974/3242 train_loss:3.5818 train_time:2042982ms step_avg:2119.28ms +step:975/3242 train_loss:3.7726 train_time:2045106ms step_avg:2119.28ms +step:976/3242 train_loss:3.7153 train_time:2047223ms step_avg:2119.28ms +step:977/3242 train_loss:3.7021 train_time:2049351ms step_avg:2119.29ms +step:978/3242 train_loss:3.5797 train_time:2051469ms step_avg:2119.29ms +step:979/3242 train_loss:3.9803 train_time:2053593ms step_avg:2119.29ms +step:980/3242 train_loss:3.3476 train_time:2055708ms step_avg:2119.29ms +step:981/3242 train_loss:3.8635 train_time:2057827ms step_avg:2119.29ms +step:982/3242 train_loss:3.6590 train_time:2059960ms step_avg:2119.30ms +step:983/3242 train_loss:3.6624 train_time:2062075ms step_avg:2119.30ms +step:984/3242 train_loss:3.5812 train_time:2064188ms step_avg:2119.29ms +step:985/3242 train_loss:3.6035 train_time:2066314ms step_avg:2119.30ms +step:986/3242 train_loss:3.6834 train_time:2068422ms step_avg:2119.28ms +step:987/3242 train_loss:3.8319 train_time:2070536ms step_avg:2119.28ms +step:988/3242 train_loss:3.5075 train_time:2072656ms step_avg:2119.28ms +step:989/3242 train_loss:3.6882 train_time:2074776ms step_avg:2119.28ms +step:990/3242 train_loss:3.5506 train_time:2076903ms step_avg:2119.29ms +step:991/3242 train_loss:3.6064 train_time:2079027ms step_avg:2119.29ms +step:992/3242 train_loss:3.7008 train_time:2081146ms step_avg:2119.29ms +step:993/3242 train_loss:3.4962 train_time:2083264ms step_avg:2119.29ms +step:994/3242 train_loss:3.7199 train_time:2085378ms step_avg:2119.29ms +step:995/3242 train_loss:3.5677 train_time:2087503ms step_avg:2119.29ms +step:996/3242 train_loss:3.7368 train_time:2089621ms step_avg:2119.29ms +step:997/3242 train_loss:3.5208 train_time:2091738ms step_avg:2119.29ms +step:998/3242 train_loss:3.5799 train_time:2093847ms step_avg:2119.28ms +step:999/3242 train_loss:3.9395 train_time:2095967ms step_avg:2119.28ms +step:1000/3242 train_loss:3.5988 train_time:2098088ms step_avg:2119.28ms +step:1000/3242 val_loss:3.5980 train_time:2098502ms step_avg:2119.70ms +step:1001/3242 train_loss:3.8411 train_time:2100215ms step_avg:2119.29ms +step:1002/3242 train_loss:3.5147 train_time:2102334ms step_avg:2119.29ms +step:1003/3242 train_loss:3.3870 train_time:2104460ms step_avg:2119.29ms +step:1004/3242 train_loss:3.5166 train_time:2106578ms step_avg:2119.29ms +step:1005/3242 train_loss:3.9956 train_time:2108693ms step_avg:2119.29ms +step:1006/3242 train_loss:3.4823 train_time:2110815ms step_avg:2119.29ms +step:1007/3242 train_loss:3.5388 train_time:2112936ms step_avg:2119.29ms +step:1008/3242 train_loss:3.4550 train_time:2115055ms step_avg:2119.29ms +step:1009/3242 train_loss:3.5324 train_time:2117171ms step_avg:2119.29ms +step:1010/3242 train_loss:3.8212 train_time:2119297ms step_avg:2119.30ms +step:1011/3242 train_loss:3.5941 train_time:2121414ms step_avg:2119.30ms +step:1012/3242 train_loss:3.8212 train_time:2123532ms step_avg:2119.29ms +step:1013/3242 train_loss:3.6926 train_time:2125648ms step_avg:2119.29ms +step:1014/3242 train_loss:3.4950 train_time:2127769ms step_avg:2119.29ms +step:1015/3242 train_loss:3.6326 train_time:2129892ms step_avg:2119.30ms +step:1016/3242 train_loss:3.6714 train_time:2132002ms step_avg:2119.29ms +step:1017/3242 train_loss:3.3955 train_time:2134128ms step_avg:2119.29ms +step:1018/3242 train_loss:3.5802 train_time:2136246ms step_avg:2119.29ms +step:1019/3242 train_loss:3.5629 train_time:2138367ms step_avg:2119.29ms +step:1020/3242 train_loss:3.4960 train_time:2140502ms step_avg:2119.31ms +step:1021/3242 train_loss:3.4992 train_time:2142615ms step_avg:2119.30ms +step:1022/3242 train_loss:3.4279 train_time:2144736ms step_avg:2119.30ms +step:1023/3242 train_loss:3.2140 train_time:2146853ms step_avg:2119.30ms +step:1024/3242 train_loss:3.7650 train_time:2148969ms step_avg:2119.30ms +step:1025/3242 train_loss:3.4986 train_time:2151090ms step_avg:2119.30ms +step:1026/3242 train_loss:3.7588 train_time:2153210ms step_avg:2119.30ms +step:1027/3242 train_loss:3.5994 train_time:2155332ms step_avg:2119.30ms +step:1028/3242 train_loss:3.4610 train_time:2157441ms step_avg:2119.29ms +step:1029/3242 train_loss:3.6863 train_time:2159569ms step_avg:2119.30ms +step:1030/3242 train_loss:3.5643 train_time:2161685ms step_avg:2119.30ms +step:1031/3242 train_loss:3.4863 train_time:2163804ms step_avg:2119.30ms +step:1032/3242 train_loss:3.5189 train_time:2165923ms step_avg:2119.30ms +step:1033/3242 train_loss:3.4365 train_time:2168048ms step_avg:2119.30ms +step:1034/3242 train_loss:3.6910 train_time:2170165ms step_avg:2119.30ms +step:1035/3242 train_loss:3.4874 train_time:2172280ms step_avg:2119.30ms +step:1036/3242 train_loss:3.5787 train_time:2174401ms step_avg:2119.30ms +step:1037/3242 train_loss:3.6124 train_time:2176526ms step_avg:2119.30ms +step:1038/3242 train_loss:3.8208 train_time:2178644ms step_avg:2119.30ms +step:1039/3242 train_loss:4.6145 train_time:2180755ms step_avg:2119.30ms +step:1040/3242 train_loss:3.5697 train_time:2182877ms step_avg:2119.30ms +step:1041/3242 train_loss:3.5242 train_time:2184997ms step_avg:2119.30ms +step:1042/3242 train_loss:3.5934 train_time:2187121ms step_avg:2119.30ms +step:1043/3242 train_loss:3.4588 train_time:2189241ms step_avg:2119.30ms +step:1044/3242 train_loss:3.4520 train_time:2191359ms step_avg:2119.30ms +step:1045/3242 train_loss:3.4741 train_time:2193475ms step_avg:2119.30ms +step:1046/3242 train_loss:3.5180 train_time:2195593ms step_avg:2119.30ms +step:1047/3242 train_loss:3.8758 train_time:2197713ms step_avg:2119.30ms +step:1048/3242 train_loss:3.7143 train_time:2199839ms step_avg:2119.31ms +step:1049/3242 train_loss:4.0948 train_time:2201952ms step_avg:2119.30ms +step:1050/3242 train_loss:3.6880 train_time:2204073ms step_avg:2119.30ms +step:1051/3242 train_loss:3.6486 train_time:2206190ms step_avg:2119.30ms +step:1052/3242 train_loss:3.4506 train_time:2208310ms step_avg:2119.30ms +step:1053/3242 train_loss:3.6238 train_time:2210428ms step_avg:2119.30ms +step:1054/3242 train_loss:3.4204 train_time:2212554ms step_avg:2119.30ms +step:1055/3242 train_loss:3.6798 train_time:2214671ms step_avg:2119.30ms +step:1056/3242 train_loss:3.4259 train_time:2216790ms step_avg:2119.30ms +step:1057/3242 train_loss:3.5434 train_time:2218900ms step_avg:2119.29ms +step:1058/3242 train_loss:3.8643 train_time:2221018ms step_avg:2119.29ms +step:1059/3242 train_loss:3.6451 train_time:2223137ms step_avg:2119.29ms +step:1060/3242 train_loss:3.3732 train_time:2225260ms step_avg:2119.30ms +step:1061/3242 train_loss:3.6146 train_time:2227380ms step_avg:2119.30ms +step:1062/3242 train_loss:3.6235 train_time:2229495ms step_avg:2119.29ms +step:1063/3242 train_loss:3.4575 train_time:2231619ms step_avg:2119.30ms +step:1064/3242 train_loss:3.4760 train_time:2233731ms step_avg:2119.29ms +step:1065/3242 train_loss:3.5589 train_time:2235853ms step_avg:2119.29ms +step:1066/3242 train_loss:3.4660 train_time:2237976ms step_avg:2119.30ms +step:1067/3242 train_loss:3.4169 train_time:2240101ms step_avg:2119.30ms +step:1068/3242 train_loss:3.6305 train_time:2242216ms step_avg:2119.30ms +step:1069/3242 train_loss:3.2561 train_time:2244337ms step_avg:2119.30ms +step:1070/3242 train_loss:3.5729 train_time:2246462ms step_avg:2119.30ms +step:1071/3242 train_loss:3.0296 train_time:2248579ms step_avg:2119.30ms +step:1072/3242 train_loss:3.8631 train_time:2250696ms step_avg:2119.30ms +step:1073/3242 train_loss:3.8382 train_time:2252821ms step_avg:2119.30ms +step:1074/3242 train_loss:3.5071 train_time:2254934ms step_avg:2119.30ms +step:1075/3242 train_loss:3.8450 train_time:2257057ms step_avg:2119.30ms +step:1076/3242 train_loss:3.6455 train_time:2259177ms step_avg:2119.30ms +step:1077/3242 train_loss:3.5085 train_time:2261299ms step_avg:2119.31ms +step:1078/3242 train_loss:3.6840 train_time:2263416ms step_avg:2119.30ms +step:1079/3242 train_loss:3.6105 train_time:2265536ms step_avg:2119.30ms +step:1080/3242 train_loss:3.3952 train_time:2267649ms step_avg:2119.30ms +step:1081/3242 train_loss:3.4100 train_time:2269770ms step_avg:2119.30ms +step:1082/3242 train_loss:3.7363 train_time:2271888ms step_avg:2119.30ms +step:1083/3242 train_loss:3.5851 train_time:2274014ms step_avg:2119.31ms +step:1084/3242 train_loss:3.7832 train_time:2276130ms step_avg:2119.30ms +step:1085/3242 train_loss:3.8225 train_time:2278247ms step_avg:2119.30ms +step:1086/3242 train_loss:3.5429 train_time:2280367ms step_avg:2119.30ms +step:1087/3242 train_loss:3.6127 train_time:2282485ms step_avg:2119.30ms +step:1088/3242 train_loss:3.3568 train_time:2284606ms step_avg:2119.30ms +step:1089/3242 train_loss:3.5967 train_time:2286727ms step_avg:2119.30ms +step:1090/3242 train_loss:3.4874 train_time:2288849ms step_avg:2119.30ms +step:1091/3242 train_loss:3.5156 train_time:2290964ms step_avg:2119.30ms +step:1092/3242 train_loss:3.8633 train_time:2293085ms step_avg:2119.30ms +step:1093/3242 train_loss:3.4986 train_time:2295203ms step_avg:2119.30ms +step:1094/3242 train_loss:3.5968 train_time:2297322ms step_avg:2119.30ms +step:1095/3242 train_loss:3.4326 train_time:2299442ms step_avg:2119.30ms +step:1096/3242 train_loss:3.7871 train_time:2301559ms step_avg:2119.30ms +step:1097/3242 train_loss:3.5727 train_time:2303681ms step_avg:2119.30ms +step:1098/3242 train_loss:3.5455 train_time:2305797ms step_avg:2119.30ms +step:1099/3242 train_loss:3.5770 train_time:2307921ms step_avg:2119.30ms +step:1100/3242 train_loss:3.3614 train_time:2310042ms step_avg:2119.30ms +step:1101/3242 train_loss:3.4800 train_time:2312161ms step_avg:2119.30ms +step:1102/3242 train_loss:3.6338 train_time:2314281ms step_avg:2119.31ms +step:1103/3242 train_loss:3.6006 train_time:2316395ms step_avg:2119.30ms +step:1104/3242 train_loss:3.4288 train_time:2318513ms step_avg:2119.30ms +step:1105/3242 train_loss:3.5449 train_time:2320636ms step_avg:2119.30ms +step:1106/3242 train_loss:3.5065 train_time:2322753ms step_avg:2119.30ms +step:1107/3242 train_loss:3.7078 train_time:2324882ms step_avg:2119.31ms +step:1108/3242 train_loss:3.3299 train_time:2326995ms step_avg:2119.30ms +step:1109/3242 train_loss:3.6357 train_time:2329112ms step_avg:2119.30ms +step:1110/3242 train_loss:3.5643 train_time:2331237ms step_avg:2119.31ms +step:1111/3242 train_loss:3.5196 train_time:2333356ms step_avg:2119.31ms +step:1112/3242 train_loss:3.5331 train_time:2335474ms step_avg:2119.30ms +step:1113/3242 train_loss:3.6045 train_time:2337591ms step_avg:2119.30ms +step:1114/3242 train_loss:3.9145 train_time:2339711ms step_avg:2119.30ms +step:1115/3242 train_loss:3.5937 train_time:2341828ms step_avg:2119.30ms +step:1116/3242 train_loss:3.6061 train_time:2343947ms step_avg:2119.30ms +step:1117/3242 train_loss:3.4123 train_time:2346070ms step_avg:2119.30ms +step:1118/3242 train_loss:3.5052 train_time:2348187ms step_avg:2119.30ms +step:1119/3242 train_loss:3.5586 train_time:2350310ms step_avg:2119.31ms +step:1120/3242 train_loss:3.5619 train_time:2352428ms step_avg:2119.30ms +step:1121/3242 train_loss:3.5678 train_time:2354541ms step_avg:2119.30ms +step:1122/3242 train_loss:3.6390 train_time:2356663ms step_avg:2119.30ms +step:1123/3242 train_loss:3.7446 train_time:2358783ms step_avg:2119.30ms +step:1124/3242 train_loss:3.5192 train_time:2360901ms step_avg:2119.30ms +step:1125/3242 train_loss:3.7237 train_time:2363023ms step_avg:2119.30ms +step:1125/3242 val_loss:3.5712 train_time:2363435ms step_avg:2119.67ms +step:1126/3242 train_loss:3.4222 train_time:2365155ms step_avg:2119.31ms +step:1127/3242 train_loss:3.3171 train_time:2367272ms step_avg:2119.31ms +step:1128/3242 train_loss:3.5963 train_time:2369392ms step_avg:2119.31ms +step:1129/3242 train_loss:4.5737 train_time:2371513ms step_avg:2119.31ms +step:1130/3242 train_loss:3.4964 train_time:2373630ms step_avg:2119.31ms +step:1131/3242 train_loss:3.5247 train_time:2375744ms step_avg:2119.31ms +step:1132/3242 train_loss:3.8017 train_time:2377865ms step_avg:2119.31ms +step:1133/3242 train_loss:3.6554 train_time:2379990ms step_avg:2119.31ms +step:1134/3242 train_loss:3.6900 train_time:2382110ms step_avg:2119.31ms +step:1135/3242 train_loss:3.8141 train_time:2384228ms step_avg:2119.31ms +step:1136/3242 train_loss:3.6174 train_time:2386348ms step_avg:2119.31ms +step:1137/3242 train_loss:3.7580 train_time:2388464ms step_avg:2119.31ms +step:1138/3242 train_loss:3.7507 train_time:2390586ms step_avg:2119.31ms +step:1139/3242 train_loss:3.7239 train_time:2392701ms step_avg:2119.31ms +step:1140/3242 train_loss:3.5857 train_time:2394822ms step_avg:2119.31ms +step:1141/3242 train_loss:3.6713 train_time:2396940ms step_avg:2119.31ms +step:1142/3242 train_loss:3.7021 train_time:2399061ms step_avg:2119.31ms +step:1143/3242 train_loss:3.5604 train_time:2401302ms step_avg:2119.42ms +step:1144/3242 train_loss:3.8220 train_time:2403423ms step_avg:2119.42ms +step:1145/3242 train_loss:2.9723 train_time:2405541ms step_avg:2119.42ms +step:1146/3242 train_loss:3.7381 train_time:2407659ms step_avg:2119.42ms +step:1147/3242 train_loss:3.4243 train_time:2409776ms step_avg:2119.42ms +step:1148/3242 train_loss:3.9124 train_time:2411898ms step_avg:2119.42ms +step:1149/3242 train_loss:3.6988 train_time:2414013ms step_avg:2119.41ms +step:1150/3242 train_loss:3.7007 train_time:2416137ms step_avg:2119.42ms +step:1151/3242 train_loss:3.6303 train_time:2418255ms step_avg:2119.42ms +step:1152/3242 train_loss:3.5856 train_time:2420378ms step_avg:2119.42ms +step:1153/3242 train_loss:3.6139 train_time:2422494ms step_avg:2119.42ms +step:1154/3242 train_loss:4.6753 train_time:2424611ms step_avg:2119.42ms +step:1155/3242 train_loss:3.7559 train_time:2426731ms step_avg:2119.42ms +step:1156/3242 train_loss:3.1894 train_time:2428850ms step_avg:2119.42ms +step:1157/3242 train_loss:3.5840 train_time:2430967ms step_avg:2119.41ms +step:1158/3242 train_loss:3.5604 train_time:2433097ms step_avg:2119.42ms +step:1159/3242 train_loss:3.7171 train_time:2435213ms step_avg:2119.42ms +step:1160/3242 train_loss:3.4358 train_time:2437340ms step_avg:2119.43ms +step:1161/3242 train_loss:3.6057 train_time:2439459ms step_avg:2119.43ms +step:1162/3242 train_loss:3.6014 train_time:2441576ms step_avg:2119.42ms +step:1163/3242 train_loss:3.6059 train_time:2443700ms step_avg:2119.43ms +step:1164/3242 train_loss:3.5034 train_time:2445819ms step_avg:2119.43ms +step:1165/3242 train_loss:3.6211 train_time:2447931ms step_avg:2119.42ms +step:1166/3242 train_loss:3.7171 train_time:2450048ms step_avg:2119.42ms +step:1167/3242 train_loss:3.4919 train_time:2452178ms step_avg:2119.43ms +step:1168/3242 train_loss:3.4724 train_time:2454292ms step_avg:2119.42ms +step:1169/3242 train_loss:3.6938 train_time:2456419ms step_avg:2119.43ms +step:1170/3242 train_loss:3.5431 train_time:2458536ms step_avg:2119.43ms +step:1171/3242 train_loss:3.5529 train_time:2460650ms step_avg:2119.42ms +step:1172/3242 train_loss:3.6893 train_time:2462775ms step_avg:2119.43ms +step:1173/3242 train_loss:3.5547 train_time:2464894ms step_avg:2119.43ms +step:1174/3242 train_loss:3.4675 train_time:2467008ms step_avg:2119.42ms +step:1175/3242 train_loss:3.5378 train_time:2469130ms step_avg:2119.42ms +step:1176/3242 train_loss:3.5584 train_time:2471252ms step_avg:2119.43ms +step:1177/3242 train_loss:3.3665 train_time:2473368ms step_avg:2119.42ms +step:1178/3242 train_loss:4.0401 train_time:2475486ms step_avg:2119.42ms +step:1179/3242 train_loss:3.5149 train_time:2477606ms step_avg:2119.42ms +step:1180/3242 train_loss:3.4660 train_time:2479728ms step_avg:2119.43ms +step:1181/3242 train_loss:3.5419 train_time:2481847ms step_avg:2119.43ms +step:1182/3242 train_loss:3.8335 train_time:2483968ms step_avg:2119.43ms +step:1183/3242 train_loss:3.4009 train_time:2486087ms step_avg:2119.43ms +step:1184/3242 train_loss:3.6214 train_time:2488201ms step_avg:2119.42ms +step:1185/3242 train_loss:3.7447 train_time:2490327ms step_avg:2119.43ms +step:1186/3242 train_loss:3.8239 train_time:2492448ms step_avg:2119.43ms +step:1187/3242 train_loss:3.5110 train_time:2494563ms step_avg:2119.43ms +step:1188/3242 train_loss:3.4769 train_time:2496679ms step_avg:2119.42ms +step:1189/3242 train_loss:3.5478 train_time:2498803ms step_avg:2119.43ms +step:1190/3242 train_loss:3.3761 train_time:2500919ms step_avg:2119.42ms +step:1191/3242 train_loss:3.5257 train_time:2503036ms step_avg:2119.42ms +step:1192/3242 train_loss:3.6202 train_time:2505159ms step_avg:2119.42ms +step:1193/3242 train_loss:3.2422 train_time:2507283ms step_avg:2119.43ms +step:1194/3242 train_loss:3.7624 train_time:2509392ms step_avg:2119.42ms +step:1195/3242 train_loss:3.4611 train_time:2511513ms step_avg:2119.42ms +step:1196/3242 train_loss:3.5674 train_time:2513640ms step_avg:2119.43ms +step:1197/3242 train_loss:3.4500 train_time:2515759ms step_avg:2119.43ms +step:1198/3242 train_loss:3.7972 train_time:2517879ms step_avg:2119.43ms +step:1199/3242 train_loss:3.5678 train_time:2519998ms step_avg:2119.43ms +step:1200/3242 train_loss:3.6457 train_time:2522114ms step_avg:2119.42ms +step:1201/3242 train_loss:3.8743 train_time:2524230ms step_avg:2119.42ms +step:1202/3242 train_loss:3.3979 train_time:2526349ms step_avg:2119.42ms +step:1203/3242 train_loss:3.4073 train_time:2528463ms step_avg:2119.42ms +step:1204/3242 train_loss:3.6087 train_time:2530584ms step_avg:2119.42ms +step:1205/3242 train_loss:3.6465 train_time:2532708ms step_avg:2119.42ms +step:1206/3242 train_loss:3.3948 train_time:2534827ms step_avg:2119.42ms +step:1207/3242 train_loss:3.4472 train_time:2536953ms step_avg:2119.43ms +step:1208/3242 train_loss:3.6627 train_time:2539063ms step_avg:2119.42ms +step:1209/3242 train_loss:3.4746 train_time:2541192ms step_avg:2119.43ms +step:1210/3242 train_loss:3.4973 train_time:2543313ms step_avg:2119.43ms +step:1211/3242 train_loss:3.4847 train_time:2545431ms step_avg:2119.43ms +step:1212/3242 train_loss:3.9913 train_time:2547545ms step_avg:2119.42ms +step:1213/3242 train_loss:3.4397 train_time:2549668ms step_avg:2119.42ms +step:1214/3242 train_loss:3.7198 train_time:2551789ms step_avg:2119.43ms +step:1215/3242 train_loss:3.4647 train_time:2553906ms step_avg:2119.42ms +step:1216/3242 train_loss:3.6345 train_time:2556022ms step_avg:2119.42ms +step:1217/3242 train_loss:3.6817 train_time:2558139ms step_avg:2119.42ms +step:1218/3242 train_loss:3.5143 train_time:2560261ms step_avg:2119.42ms +step:1219/3242 train_loss:3.7548 train_time:2562383ms step_avg:2119.42ms +step:1220/3242 train_loss:3.6407 train_time:2564498ms step_avg:2119.42ms +step:1221/3242 train_loss:3.7630 train_time:2566618ms step_avg:2119.42ms +step:1222/3242 train_loss:3.5011 train_time:2568738ms step_avg:2119.42ms +step:1223/3242 train_loss:3.7654 train_time:2570855ms step_avg:2119.42ms +step:1224/3242 train_loss:3.4200 train_time:2572972ms step_avg:2119.42ms +step:1225/3242 train_loss:3.8084 train_time:2575098ms step_avg:2119.42ms +step:1226/3242 train_loss:3.5445 train_time:2577216ms step_avg:2119.42ms +step:1227/3242 train_loss:3.4068 train_time:2579334ms step_avg:2119.42ms +step:1228/3242 train_loss:3.4087 train_time:2581454ms step_avg:2119.42ms +step:1229/3242 train_loss:3.5784 train_time:2583578ms step_avg:2119.42ms +step:1230/3242 train_loss:3.5691 train_time:2585696ms step_avg:2119.42ms +step:1231/3242 train_loss:3.5769 train_time:2587813ms step_avg:2119.42ms +step:1232/3242 train_loss:3.6936 train_time:2589931ms step_avg:2119.42ms +step:1233/3242 train_loss:3.3627 train_time:2592049ms step_avg:2119.42ms +step:1234/3242 train_loss:3.3558 train_time:2594173ms step_avg:2119.42ms +step:1235/3242 train_loss:3.5975 train_time:2596289ms step_avg:2119.42ms +step:1236/3242 train_loss:3.6947 train_time:2598409ms step_avg:2119.42ms +step:1237/3242 train_loss:3.4807 train_time:2600534ms step_avg:2119.42ms +step:1238/3242 train_loss:3.9756 train_time:2602647ms step_avg:2119.42ms +step:1239/3242 train_loss:3.8368 train_time:2604772ms step_avg:2119.42ms +step:1240/3242 train_loss:3.4694 train_time:2606886ms step_avg:2119.42ms +step:1241/3242 train_loss:3.1258 train_time:2609008ms step_avg:2119.42ms +step:1242/3242 train_loss:3.7088 train_time:2611127ms step_avg:2119.42ms +step:1243/3242 train_loss:3.5178 train_time:2613247ms step_avg:2119.42ms +step:1244/3242 train_loss:3.4433 train_time:2615365ms step_avg:2119.42ms +step:1245/3242 train_loss:4.2600 train_time:2617486ms step_avg:2119.42ms +step:1246/3242 train_loss:3.4679 train_time:2619609ms step_avg:2119.42ms +step:1247/3242 train_loss:3.5349 train_time:2621730ms step_avg:2119.43ms +step:1248/3242 train_loss:3.5735 train_time:2623843ms step_avg:2119.42ms +step:1249/3242 train_loss:3.5275 train_time:2625963ms step_avg:2119.42ms +step:1250/3242 train_loss:3.3922 train_time:2628088ms step_avg:2119.43ms +step:1250/3242 val_loss:3.5432 train_time:2628502ms step_avg:2119.76ms +step:1251/3242 train_loss:3.4794 train_time:2630218ms step_avg:2119.43ms +step:1252/3242 train_loss:3.5771 train_time:2632344ms step_avg:2119.44ms +step:1253/3242 train_loss:3.4726 train_time:2634459ms step_avg:2119.44ms +step:1254/3242 train_loss:4.3853 train_time:2636582ms step_avg:2119.44ms +step:1255/3242 train_loss:3.5874 train_time:2638699ms step_avg:2119.44ms +step:1256/3242 train_loss:3.8632 train_time:2640818ms step_avg:2119.44ms +step:1257/3242 train_loss:3.4583 train_time:2642935ms step_avg:2119.43ms +step:1258/3242 train_loss:3.6661 train_time:2645055ms step_avg:2119.44ms +step:1259/3242 train_loss:3.6119 train_time:2647179ms step_avg:2119.44ms +step:1260/3242 train_loss:3.4316 train_time:2649294ms step_avg:2119.43ms +step:1261/3242 train_loss:3.5735 train_time:2651411ms step_avg:2119.43ms +step:1262/3242 train_loss:3.5316 train_time:2653534ms step_avg:2119.44ms +step:1263/3242 train_loss:3.4837 train_time:2655653ms step_avg:2119.44ms +step:1264/3242 train_loss:3.4914 train_time:2657778ms step_avg:2119.44ms +step:1265/3242 train_loss:3.4733 train_time:2659895ms step_avg:2119.44ms +step:1266/3242 train_loss:3.4918 train_time:2662015ms step_avg:2119.44ms +step:1267/3242 train_loss:3.5117 train_time:2664129ms step_avg:2119.43ms +step:1268/3242 train_loss:3.0533 train_time:2666250ms step_avg:2119.44ms +step:1269/3242 train_loss:3.7279 train_time:2668368ms step_avg:2119.43ms +step:1270/3242 train_loss:3.5275 train_time:2670493ms step_avg:2119.44ms +step:1271/3242 train_loss:3.5435 train_time:2672613ms step_avg:2119.44ms +step:1272/3242 train_loss:3.3769 train_time:2674727ms step_avg:2119.43ms +step:1273/3242 train_loss:3.7320 train_time:2676849ms step_avg:2119.44ms +step:1274/3242 train_loss:3.5659 train_time:2678966ms step_avg:2119.44ms +step:1275/3242 train_loss:3.7663 train_time:2681084ms step_avg:2119.43ms +step:1276/3242 train_loss:3.7146 train_time:2683201ms step_avg:2119.43ms +step:1277/3242 train_loss:3.5604 train_time:2685323ms step_avg:2119.43ms +step:1278/3242 train_loss:3.5247 train_time:2687448ms step_avg:2119.44ms +step:1279/3242 train_loss:3.3830 train_time:2689568ms step_avg:2119.44ms +step:1280/3242 train_loss:3.1821 train_time:2691685ms step_avg:2119.44ms +step:1281/3242 train_loss:3.3225 train_time:2693804ms step_avg:2119.44ms +step:1282/3242 train_loss:3.4769 train_time:2695917ms step_avg:2119.43ms +step:1283/3242 train_loss:3.9185 train_time:2698031ms step_avg:2119.43ms +step:1284/3242 train_loss:3.5667 train_time:2700158ms step_avg:2119.43ms +step:1285/3242 train_loss:3.3996 train_time:2702272ms step_avg:2119.43ms +step:1286/3242 train_loss:3.2859 train_time:2704390ms step_avg:2119.43ms +step:1287/3242 train_loss:3.5902 train_time:2706515ms step_avg:2119.43ms +step:1288/3242 train_loss:3.6008 train_time:2708628ms step_avg:2119.43ms +step:1289/3242 train_loss:3.7610 train_time:2710751ms step_avg:2119.43ms +step:1290/3242 train_loss:3.5434 train_time:2712868ms step_avg:2119.43ms +step:1291/3242 train_loss:3.6800 train_time:2714991ms step_avg:2119.43ms +step:1292/3242 train_loss:3.5810 train_time:2717110ms step_avg:2119.43ms +step:1293/3242 train_loss:3.4305 train_time:2719235ms step_avg:2119.44ms +step:1294/3242 train_loss:3.3521 train_time:2721352ms step_avg:2119.43ms +step:1295/3242 train_loss:3.5375 train_time:2723478ms step_avg:2119.44ms +step:1296/3242 train_loss:3.6263 train_time:2725614ms step_avg:2119.45ms +step:1297/3242 train_loss:3.7753 train_time:2727727ms step_avg:2119.45ms +step:1298/3242 train_loss:3.6501 train_time:2729848ms step_avg:2119.45ms +step:1299/3242 train_loss:3.5443 train_time:2731965ms step_avg:2119.45ms +step:1300/3242 train_loss:3.3997 train_time:2734086ms step_avg:2119.45ms +step:1301/3242 train_loss:3.9826 train_time:2736204ms step_avg:2119.45ms +step:1302/3242 train_loss:3.5735 train_time:2738319ms step_avg:2119.44ms +step:1303/3242 train_loss:3.7783 train_time:2740443ms step_avg:2119.45ms +step:1304/3242 train_loss:3.7301 train_time:2742564ms step_avg:2119.45ms +step:1305/3242 train_loss:3.4782 train_time:2744679ms step_avg:2119.44ms +step:1306/3242 train_loss:3.6159 train_time:2746797ms step_avg:2119.44ms +step:1307/3242 train_loss:3.5441 train_time:2748916ms step_avg:2119.44ms +step:1308/3242 train_loss:3.5312 train_time:2751034ms step_avg:2119.44ms +step:1309/3242 train_loss:3.4385 train_time:2753153ms step_avg:2119.44ms +step:1310/3242 train_loss:3.7376 train_time:2755283ms step_avg:2119.45ms +step:1311/3242 train_loss:3.5171 train_time:2757401ms step_avg:2119.45ms +step:1312/3242 train_loss:3.4898 train_time:2759521ms step_avg:2119.45ms +step:1313/3242 train_loss:3.6043 train_time:2761629ms step_avg:2119.44ms +step:1314/3242 train_loss:3.8331 train_time:2763758ms step_avg:2119.45ms +step:1315/3242 train_loss:3.6389 train_time:2765878ms step_avg:2119.45ms +step:1316/3242 train_loss:3.6119 train_time:2767991ms step_avg:2119.44ms +step:1317/3242 train_loss:3.5882 train_time:2770110ms step_avg:2119.44ms +step:1318/3242 train_loss:3.4280 train_time:2772233ms step_avg:2119.44ms +step:1319/3242 train_loss:3.6650 train_time:2774349ms step_avg:2119.44ms +step:1320/3242 train_loss:3.5010 train_time:2776465ms step_avg:2119.44ms +step:1321/3242 train_loss:3.9148 train_time:2778590ms step_avg:2119.44ms +step:1322/3242 train_loss:3.6498 train_time:2780712ms step_avg:2119.45ms +step:1323/3242 train_loss:3.5482 train_time:2782830ms step_avg:2119.44ms +step:1324/3242 train_loss:3.6031 train_time:2784954ms step_avg:2119.45ms +step:1325/3242 train_loss:3.7721 train_time:2787066ms step_avg:2119.44ms +step:1326/3242 train_loss:3.5586 train_time:2789186ms step_avg:2119.44ms +step:1327/3242 train_loss:3.6579 train_time:2791300ms step_avg:2119.44ms +step:1328/3242 train_loss:3.4685 train_time:2793419ms step_avg:2119.44ms +step:1329/3242 train_loss:3.4617 train_time:2795544ms step_avg:2119.44ms +step:1330/3242 train_loss:3.4622 train_time:2797665ms step_avg:2119.44ms +step:1331/3242 train_loss:2.3700 train_time:2799775ms step_avg:2119.44ms +step:1332/3242 train_loss:3.8176 train_time:2801896ms step_avg:2119.44ms +step:1333/3242 train_loss:3.6154 train_time:2804021ms step_avg:2119.44ms +step:1334/3242 train_loss:3.5826 train_time:2806274ms step_avg:2119.54ms +step:1335/3242 train_loss:3.4535 train_time:2808396ms step_avg:2119.54ms +step:1336/3242 train_loss:3.7235 train_time:2810516ms step_avg:2119.54ms +step:1337/3242 train_loss:3.6270 train_time:2812634ms step_avg:2119.54ms +step:1338/3242 train_loss:3.4176 train_time:2814753ms step_avg:2119.54ms +step:1339/3242 train_loss:3.6073 train_time:2816880ms step_avg:2119.55ms +step:1340/3242 train_loss:3.6850 train_time:2818993ms step_avg:2119.54ms +step:1341/3242 train_loss:3.7816 train_time:2821119ms step_avg:2119.55ms +step:1342/3242 train_loss:3.4146 train_time:2823234ms step_avg:2119.54ms +step:1343/3242 train_loss:3.5789 train_time:2825354ms step_avg:2119.55ms +step:1344/3242 train_loss:3.7870 train_time:2827479ms step_avg:2119.55ms +step:1345/3242 train_loss:3.6296 train_time:2829601ms step_avg:2119.55ms +step:1346/3242 train_loss:3.7825 train_time:2831718ms step_avg:2119.55ms +step:1347/3242 train_loss:3.6510 train_time:2833829ms step_avg:2119.54ms +step:1348/3242 train_loss:3.4786 train_time:2835952ms step_avg:2119.55ms +step:1349/3242 train_loss:3.4614 train_time:2838074ms step_avg:2119.55ms +step:1350/3242 train_loss:3.4493 train_time:2840187ms step_avg:2119.54ms +step:1351/3242 train_loss:3.4514 train_time:2842310ms step_avg:2119.55ms +step:1352/3242 train_loss:3.5321 train_time:2844428ms step_avg:2119.54ms +step:1353/3242 train_loss:3.6705 train_time:2846555ms step_avg:2119.55ms +step:1354/3242 train_loss:3.7107 train_time:2848666ms step_avg:2119.54ms +step:1355/3242 train_loss:3.5623 train_time:2850788ms step_avg:2119.55ms +step:1356/3242 train_loss:3.5268 train_time:2852906ms step_avg:2119.54ms +step:1357/3242 train_loss:3.3522 train_time:2855023ms step_avg:2119.54ms +step:1358/3242 train_loss:3.5544 train_time:2857150ms step_avg:2119.55ms +step:1359/3242 train_loss:3.5509 train_time:2859265ms step_avg:2119.54ms +step:1360/3242 train_loss:3.9544 train_time:2861387ms step_avg:2119.55ms +step:1361/3242 train_loss:3.2525 train_time:2863508ms step_avg:2119.55ms +step:1362/3242 train_loss:3.6194 train_time:2865619ms step_avg:2119.54ms +step:1363/3242 train_loss:3.6623 train_time:2867744ms step_avg:2119.55ms +step:1364/3242 train_loss:3.3791 train_time:2869860ms step_avg:2119.54ms +step:1365/3242 train_loss:3.5337 train_time:2871985ms step_avg:2119.55ms +step:1366/3242 train_loss:3.5298 train_time:2874097ms step_avg:2119.54ms +step:1367/3242 train_loss:3.6327 train_time:2876222ms step_avg:2119.54ms +step:1368/3242 train_loss:3.5308 train_time:2878339ms step_avg:2119.54ms +step:1369/3242 train_loss:3.7765 train_time:2880457ms step_avg:2119.54ms +step:1370/3242 train_loss:3.6394 train_time:2882577ms step_avg:2119.54ms +step:1371/3242 train_loss:3.6188 train_time:2884700ms step_avg:2119.54ms +step:1372/3242 train_loss:3.6601 train_time:2886820ms step_avg:2119.54ms +step:1373/3242 train_loss:3.7001 train_time:2888935ms step_avg:2119.54ms +step:1374/3242 train_loss:3.5430 train_time:2891055ms step_avg:2119.54ms +step:1375/3242 train_loss:3.2875 train_time:2893176ms step_avg:2119.54ms +step:1375/3242 val_loss:3.5215 train_time:2893588ms step_avg:2119.84ms +step:1376/3242 train_loss:3.5626 train_time:2895307ms step_avg:2119.55ms +step:1377/3242 train_loss:3.4475 train_time:2897421ms step_avg:2119.55ms +step:1378/3242 train_loss:3.9269 train_time:2899539ms step_avg:2119.55ms +step:1379/3242 train_loss:3.7291 train_time:2901668ms step_avg:2119.55ms +step:1380/3242 train_loss:3.6049 train_time:2903784ms step_avg:2119.55ms +step:1381/3242 train_loss:3.5020 train_time:2905912ms step_avg:2119.56ms +step:1382/3242 train_loss:3.4637 train_time:2908034ms step_avg:2119.56ms +step:1383/3242 train_loss:3.5742 train_time:2910147ms step_avg:2119.55ms +step:1384/3242 train_loss:3.6169 train_time:2912262ms step_avg:2119.55ms +step:1385/3242 train_loss:3.5125 train_time:2914391ms step_avg:2119.56ms +step:1386/3242 train_loss:3.3839 train_time:2916511ms step_avg:2119.56ms +step:1387/3242 train_loss:3.5596 train_time:2918625ms step_avg:2119.55ms +step:1388/3242 train_loss:3.5603 train_time:2920748ms step_avg:2119.56ms +step:1389/3242 train_loss:3.6231 train_time:2922864ms step_avg:2119.55ms +step:1390/3242 train_loss:3.5814 train_time:2924985ms step_avg:2119.55ms +step:1391/3242 train_loss:3.6344 train_time:2927106ms step_avg:2119.56ms +step:1392/3242 train_loss:3.3785 train_time:2929223ms step_avg:2119.55ms +step:1393/3242 train_loss:3.5254 train_time:2931343ms step_avg:2119.55ms +step:1394/3242 train_loss:3.5269 train_time:2933466ms step_avg:2119.56ms +step:1395/3242 train_loss:3.4253 train_time:2935584ms step_avg:2119.55ms +step:1396/3242 train_loss:3.6913 train_time:2937700ms step_avg:2119.55ms +step:1397/3242 train_loss:3.4611 train_time:2939822ms step_avg:2119.55ms +step:1398/3242 train_loss:3.5714 train_time:2941940ms step_avg:2119.55ms +step:1399/3242 train_loss:3.4527 train_time:2944049ms step_avg:2119.55ms +step:1400/3242 train_loss:3.3223 train_time:2946174ms step_avg:2119.55ms +step:1401/3242 train_loss:3.6172 train_time:2948298ms step_avg:2119.55ms +step:1402/3242 train_loss:3.5746 train_time:2950415ms step_avg:2119.55ms +step:1403/3242 train_loss:3.4979 train_time:2952530ms step_avg:2119.55ms +step:1404/3242 train_loss:3.5084 train_time:2954652ms step_avg:2119.55ms +step:1405/3242 train_loss:3.4444 train_time:2956776ms step_avg:2119.55ms +step:1406/3242 train_loss:3.5037 train_time:2958890ms step_avg:2119.55ms +step:1407/3242 train_loss:3.5753 train_time:2961016ms step_avg:2119.55ms +step:1408/3242 train_loss:3.6770 train_time:2963133ms step_avg:2119.55ms +step:1409/3242 train_loss:3.5274 train_time:2965251ms step_avg:2119.55ms +step:1410/3242 train_loss:3.5074 train_time:2967373ms step_avg:2119.55ms +step:1411/3242 train_loss:3.4684 train_time:2969492ms step_avg:2119.55ms +step:1412/3242 train_loss:3.5453 train_time:2971609ms step_avg:2119.55ms +step:1413/3242 train_loss:3.5224 train_time:2973728ms step_avg:2119.55ms +step:1414/3242 train_loss:3.5551 train_time:2975842ms step_avg:2119.55ms +step:1415/3242 train_loss:3.5780 train_time:2977962ms step_avg:2119.55ms +step:1416/3242 train_loss:3.3814 train_time:2980090ms step_avg:2119.55ms +step:1417/3242 train_loss:3.4879 train_time:2982202ms step_avg:2119.55ms +step:1418/3242 train_loss:3.3540 train_time:2984320ms step_avg:2119.55ms +step:1419/3242 train_loss:3.4014 train_time:2986433ms step_avg:2119.54ms +step:1420/3242 train_loss:3.2492 train_time:2988559ms step_avg:2119.55ms +step:1421/3242 train_loss:3.3666 train_time:2990685ms step_avg:2119.55ms +step:1422/3242 train_loss:3.6282 train_time:2992799ms step_avg:2119.55ms +step:1423/3242 train_loss:3.7805 train_time:2994914ms step_avg:2119.54ms +step:1424/3242 train_loss:3.4328 train_time:2997033ms step_avg:2119.54ms +step:1425/3242 train_loss:3.7665 train_time:2999160ms step_avg:2119.55ms +step:1426/3242 train_loss:3.5677 train_time:3001283ms step_avg:2119.55ms +step:1427/3242 train_loss:3.6492 train_time:3003399ms step_avg:2119.55ms +step:1428/3242 train_loss:2.9029 train_time:3005522ms step_avg:2119.55ms +step:1429/3242 train_loss:3.7073 train_time:3007637ms step_avg:2119.55ms +step:1430/3242 train_loss:3.4946 train_time:3009754ms step_avg:2119.54ms +step:1431/3242 train_loss:3.4393 train_time:3011878ms step_avg:2119.55ms +step:1432/3242 train_loss:3.4545 train_time:3013991ms step_avg:2119.54ms +step:1433/3242 train_loss:3.9522 train_time:3016110ms step_avg:2119.54ms +step:1434/3242 train_loss:3.5870 train_time:3018248ms step_avg:2119.56ms +step:1435/3242 train_loss:3.3660 train_time:3020368ms step_avg:2119.56ms +step:1436/3242 train_loss:3.4627 train_time:3022485ms step_avg:2119.55ms +step:1437/3242 train_loss:3.4296 train_time:3024603ms step_avg:2119.55ms +step:1438/3242 train_loss:3.4148 train_time:3026730ms step_avg:2119.56ms +step:1439/3242 train_loss:3.3887 train_time:3028849ms step_avg:2119.56ms +step:1440/3242 train_loss:3.5383 train_time:3030969ms step_avg:2119.56ms +step:1441/3242 train_loss:3.5501 train_time:3033081ms step_avg:2119.55ms +step:1442/3242 train_loss:3.9349 train_time:3035201ms step_avg:2119.55ms +step:1443/3242 train_loss:3.4695 train_time:3037321ms step_avg:2119.55ms +step:1444/3242 train_loss:3.4914 train_time:3039439ms step_avg:2119.55ms +step:1445/3242 train_loss:3.8510 train_time:3041559ms step_avg:2119.55ms +step:1446/3242 train_loss:3.5035 train_time:3043670ms step_avg:2119.55ms +step:1447/3242 train_loss:3.5781 train_time:3045789ms step_avg:2119.55ms +step:1448/3242 train_loss:3.5157 train_time:3047915ms step_avg:2119.55ms +step:1449/3242 train_loss:3.2671 train_time:3050033ms step_avg:2119.55ms +step:1450/3242 train_loss:3.4581 train_time:3052151ms step_avg:2119.55ms +step:1451/3242 train_loss:3.4814 train_time:3054272ms step_avg:2119.55ms +step:1452/3242 train_loss:3.6637 train_time:3056391ms step_avg:2119.55ms +step:1453/3242 train_loss:3.8111 train_time:3058506ms step_avg:2119.55ms +step:1454/3242 train_loss:3.3967 train_time:3060626ms step_avg:2119.55ms +step:1455/3242 train_loss:3.8892 train_time:3062743ms step_avg:2119.55ms +step:1456/3242 train_loss:3.4635 train_time:3064868ms step_avg:2119.55ms +step:1457/3242 train_loss:3.5628 train_time:3066997ms step_avg:2119.56ms +step:1458/3242 train_loss:3.6426 train_time:3069112ms step_avg:2119.55ms +step:1459/3242 train_loss:3.4334 train_time:3071237ms step_avg:2119.56ms +step:1460/3242 train_loss:3.4241 train_time:3073361ms step_avg:2119.56ms +step:1461/3242 train_loss:3.4886 train_time:3075478ms step_avg:2119.56ms +step:1462/3242 train_loss:3.4892 train_time:3077599ms step_avg:2119.56ms +step:1463/3242 train_loss:3.3471 train_time:3079714ms step_avg:2119.56ms +step:1464/3242 train_loss:3.4696 train_time:3081830ms step_avg:2119.55ms +step:1465/3242 train_loss:3.4813 train_time:3083954ms step_avg:2119.56ms +step:1466/3242 train_loss:3.8915 train_time:3086071ms step_avg:2119.55ms +step:1467/3242 train_loss:3.4931 train_time:3088186ms step_avg:2119.55ms +step:1468/3242 train_loss:3.3641 train_time:3090303ms step_avg:2119.55ms +step:1469/3242 train_loss:3.3561 train_time:3092429ms step_avg:2119.55ms +step:1470/3242 train_loss:3.7418 train_time:3094543ms step_avg:2119.55ms +step:1471/3242 train_loss:3.6811 train_time:3096660ms step_avg:2119.55ms +step:1472/3242 train_loss:3.5484 train_time:3098782ms step_avg:2119.55ms +step:1473/3242 train_loss:3.6453 train_time:3100907ms step_avg:2119.55ms +step:1474/3242 train_loss:3.5341 train_time:3103023ms step_avg:2119.55ms +step:1475/3242 train_loss:3.4900 train_time:3105136ms step_avg:2119.55ms +step:1476/3242 train_loss:3.6018 train_time:3107260ms step_avg:2119.55ms +step:1477/3242 train_loss:3.4960 train_time:3109371ms step_avg:2119.54ms +step:1478/3242 train_loss:3.2174 train_time:3111496ms step_avg:2119.55ms +step:1479/3242 train_loss:3.4033 train_time:3113609ms step_avg:2119.54ms +step:1480/3242 train_loss:3.3916 train_time:3115739ms step_avg:2119.55ms +step:1481/3242 train_loss:3.5389 train_time:3117853ms step_avg:2119.55ms +step:1482/3242 train_loss:3.5434 train_time:3119980ms step_avg:2119.55ms +step:1483/3242 train_loss:3.5967 train_time:3122088ms step_avg:2119.54ms +step:1484/3242 train_loss:3.4237 train_time:3124217ms step_avg:2119.55ms +step:1485/3242 train_loss:3.6893 train_time:3126332ms step_avg:2119.55ms +step:1486/3242 train_loss:3.5042 train_time:3128452ms step_avg:2119.55ms +step:1487/3242 train_loss:3.4496 train_time:3130575ms step_avg:2119.55ms +step:1488/3242 train_loss:3.4919 train_time:3132698ms step_avg:2119.55ms +step:1489/3242 train_loss:3.5386 train_time:3134819ms step_avg:2119.55ms +step:1490/3242 train_loss:3.4732 train_time:3136940ms step_avg:2119.55ms +step:1491/3242 train_loss:3.3485 train_time:3139060ms step_avg:2119.55ms +step:1492/3242 train_loss:3.6244 train_time:3141179ms step_avg:2119.55ms +step:1493/3242 train_loss:3.3401 train_time:3143294ms step_avg:2119.55ms +step:1494/3242 train_loss:3.3554 train_time:3145417ms step_avg:2119.55ms +step:1495/3242 train_loss:3.6425 train_time:3147531ms step_avg:2119.55ms +step:1496/3242 train_loss:4.0349 train_time:3149650ms step_avg:2119.55ms +step:1497/3242 train_loss:3.5199 train_time:3151766ms step_avg:2119.55ms +step:1498/3242 train_loss:3.4453 train_time:3153887ms step_avg:2119.55ms +step:1499/3242 train_loss:3.4443 train_time:3156002ms step_avg:2119.54ms +step:1500/3242 train_loss:3.4518 train_time:3158117ms step_avg:2119.54ms +step:1500/3242 val_loss:3.5013 train_time:3158528ms step_avg:2119.82ms +step:1501/3242 train_loss:3.6804 train_time:3160245ms step_avg:2119.55ms +step:1502/3242 train_loss:3.4867 train_time:3162364ms step_avg:2119.55ms +step:1503/3242 train_loss:3.3369 train_time:3164482ms step_avg:2119.55ms +step:1504/3242 train_loss:3.3022 train_time:3166594ms step_avg:2119.54ms +step:1505/3242 train_loss:3.5241 train_time:3168717ms step_avg:2119.54ms +step:1506/3242 train_loss:3.3320 train_time:3170830ms step_avg:2119.54ms +step:1507/3242 train_loss:3.4995 train_time:3172958ms step_avg:2119.54ms +step:1508/3242 train_loss:3.3463 train_time:3175079ms step_avg:2119.55ms +step:1509/3242 train_loss:3.6294 train_time:3177204ms step_avg:2119.55ms +step:1510/3242 train_loss:3.6358 train_time:3179323ms step_avg:2119.55ms +step:1511/3242 train_loss:4.5220 train_time:3181436ms step_avg:2119.54ms +step:1512/3242 train_loss:3.5114 train_time:3183562ms step_avg:2119.55ms +step:1513/3242 train_loss:3.5323 train_time:3185671ms step_avg:2119.54ms +step:1514/3242 train_loss:3.6637 train_time:3187787ms step_avg:2119.54ms +step:1515/3242 train_loss:4.0173 train_time:3189902ms step_avg:2119.54ms +step:1516/3242 train_loss:3.4895 train_time:3192025ms step_avg:2119.54ms +step:1517/3242 train_loss:3.3780 train_time:3194140ms step_avg:2119.54ms +step:1518/3242 train_loss:3.4790 train_time:3196263ms step_avg:2119.54ms +step:1519/3242 train_loss:3.4447 train_time:3198389ms step_avg:2119.54ms +step:1520/3242 train_loss:3.5168 train_time:3200507ms step_avg:2119.54ms +step:1521/3242 train_loss:3.4681 train_time:3202630ms step_avg:2119.54ms +step:1522/3242 train_loss:3.3726 train_time:3204743ms step_avg:2119.54ms +step:1523/3242 train_loss:3.4782 train_time:3206860ms step_avg:2119.54ms +step:1524/3242 train_loss:3.5544 train_time:3209109ms step_avg:2119.62ms +step:1525/3242 train_loss:3.6310 train_time:3211226ms step_avg:2119.62ms +step:1526/3242 train_loss:3.5149 train_time:3213349ms step_avg:2119.62ms +step:1527/3242 train_loss:3.1567 train_time:3215466ms step_avg:2119.62ms +step:1528/3242 train_loss:3.5078 train_time:3217584ms step_avg:2119.62ms +step:1529/3242 train_loss:3.4400 train_time:3219700ms step_avg:2119.62ms +step:1530/3242 train_loss:3.5291 train_time:3221814ms step_avg:2119.61ms +step:1531/3242 train_loss:3.5079 train_time:3223937ms step_avg:2119.62ms +step:1532/3242 train_loss:3.6411 train_time:3226052ms step_avg:2119.61ms +step:1533/3242 train_loss:3.4735 train_time:3228168ms step_avg:2119.61ms +step:1534/3242 train_loss:3.7479 train_time:3230291ms step_avg:2119.61ms +step:1535/3242 train_loss:3.4634 train_time:3232412ms step_avg:2119.61ms +step:1536/3242 train_loss:3.5069 train_time:3234524ms step_avg:2119.61ms +step:1537/3242 train_loss:3.6776 train_time:3236649ms step_avg:2119.61ms +step:1538/3242 train_loss:3.3031 train_time:3238770ms step_avg:2119.61ms +step:1539/3242 train_loss:3.2183 train_time:3240884ms step_avg:2119.61ms +step:1540/3242 train_loss:3.0784 train_time:3243003ms step_avg:2119.61ms +step:1541/3242 train_loss:3.3254 train_time:3245125ms step_avg:2119.61ms +step:1542/3242 train_loss:3.5357 train_time:3247240ms step_avg:2119.61ms +step:1543/3242 train_loss:3.4749 train_time:3249357ms step_avg:2119.61ms +step:1544/3242 train_loss:3.5548 train_time:3251474ms step_avg:2119.61ms +step:1545/3242 train_loss:3.0271 train_time:3253599ms step_avg:2119.61ms +step:1546/3242 train_loss:3.4027 train_time:3255717ms step_avg:2119.61ms +step:1547/3242 train_loss:3.4852 train_time:3257831ms step_avg:2119.60ms +step:1548/3242 train_loss:3.4501 train_time:3259943ms step_avg:2119.60ms +step:1549/3242 train_loss:3.6736 train_time:3262075ms step_avg:2119.61ms +step:1550/3242 train_loss:3.4855 train_time:3264190ms step_avg:2119.60ms +step:1551/3242 train_loss:3.4167 train_time:3266316ms step_avg:2119.61ms +step:1552/3242 train_loss:3.9644 train_time:3268434ms step_avg:2119.61ms +step:1553/3242 train_loss:3.3997 train_time:3270545ms step_avg:2119.60ms +step:1554/3242 train_loss:3.5425 train_time:3272664ms step_avg:2119.60ms +step:1555/3242 train_loss:3.8517 train_time:3274787ms step_avg:2119.60ms +step:1556/3242 train_loss:3.9501 train_time:3276910ms step_avg:2119.61ms +step:1557/3242 train_loss:3.5611 train_time:3279034ms step_avg:2119.61ms +step:1558/3242 train_loss:3.5700 train_time:3281145ms step_avg:2119.60ms +step:1559/3242 train_loss:3.6422 train_time:3283264ms step_avg:2119.60ms +step:1560/3242 train_loss:3.4031 train_time:3285386ms step_avg:2119.60ms +step:1561/3242 train_loss:3.4448 train_time:3287499ms step_avg:2119.60ms +step:1562/3242 train_loss:3.4091 train_time:3289618ms step_avg:2119.60ms +step:1563/3242 train_loss:3.4406 train_time:3291741ms step_avg:2119.60ms +step:1564/3242 train_loss:3.3437 train_time:3293861ms step_avg:2119.60ms +step:1565/3242 train_loss:3.6543 train_time:3295976ms step_avg:2119.60ms +step:1566/3242 train_loss:3.4045 train_time:3298100ms step_avg:2119.60ms +step:1567/3242 train_loss:3.6421 train_time:3300226ms step_avg:2119.61ms +step:1568/3242 train_loss:3.2928 train_time:3302340ms step_avg:2119.60ms +step:1569/3242 train_loss:3.4262 train_time:3304460ms step_avg:2119.60ms +step:1570/3242 train_loss:3.6252 train_time:3306577ms step_avg:2119.60ms +step:1571/3242 train_loss:3.2900 train_time:3308710ms step_avg:2119.61ms +step:1572/3242 train_loss:3.4790 train_time:3310843ms step_avg:2119.62ms +step:1573/3242 train_loss:3.5151 train_time:3312961ms step_avg:2119.62ms +step:1574/3242 train_loss:3.1992 train_time:3315073ms step_avg:2119.61ms +step:1575/3242 train_loss:3.3729 train_time:3317198ms step_avg:2119.62ms +step:1576/3242 train_loss:3.2821 train_time:3319321ms step_avg:2119.62ms +step:1577/3242 train_loss:3.5039 train_time:3321430ms step_avg:2119.61ms +step:1578/3242 train_loss:3.3437 train_time:3323554ms step_avg:2119.61ms +step:1579/3242 train_loss:3.3757 train_time:3325678ms step_avg:2119.62ms +step:1580/3242 train_loss:3.5316 train_time:3327794ms step_avg:2119.61ms +step:1581/3242 train_loss:3.6728 train_time:3329917ms step_avg:2119.62ms +step:1582/3242 train_loss:3.4517 train_time:3332031ms step_avg:2119.61ms +step:1583/3242 train_loss:3.1316 train_time:3334151ms step_avg:2119.61ms +step:1584/3242 train_loss:3.2460 train_time:3336274ms step_avg:2119.61ms +step:1585/3242 train_loss:3.5522 train_time:3338389ms step_avg:2119.61ms +step:1586/3242 train_loss:3.4598 train_time:3340500ms step_avg:2119.61ms +step:1587/3242 train_loss:3.4384 train_time:3342621ms step_avg:2119.61ms +step:1588/3242 train_loss:3.3952 train_time:3344737ms step_avg:2119.60ms +step:1589/3242 train_loss:3.4211 train_time:3346852ms step_avg:2119.60ms +step:1590/3242 train_loss:3.5748 train_time:3348970ms step_avg:2119.60ms +step:1591/3242 train_loss:3.4220 train_time:3351096ms step_avg:2119.61ms +step:1592/3242 train_loss:3.2561 train_time:3353208ms step_avg:2119.60ms +step:1593/3242 train_loss:3.3668 train_time:3355327ms step_avg:2119.60ms +step:1594/3242 train_loss:3.5184 train_time:3357448ms step_avg:2119.60ms +step:1595/3242 train_loss:3.3802 train_time:3359573ms step_avg:2119.60ms +step:1596/3242 train_loss:3.5062 train_time:3361687ms step_avg:2119.60ms +step:1597/3242 train_loss:3.4616 train_time:3363814ms step_avg:2119.61ms +step:1598/3242 train_loss:3.3295 train_time:3365929ms step_avg:2119.60ms +step:1599/3242 train_loss:3.1897 train_time:3368046ms step_avg:2119.60ms +step:1600/3242 train_loss:3.3657 train_time:3370167ms step_avg:2119.60ms +step:1601/3242 train_loss:3.3028 train_time:3372285ms step_avg:2119.60ms +step:1602/3242 train_loss:3.4883 train_time:3374408ms step_avg:2119.60ms +step:1603/3242 train_loss:3.3894 train_time:3376526ms step_avg:2119.60ms +step:1604/3242 train_loss:3.4608 train_time:3378638ms step_avg:2119.60ms +step:1605/3242 train_loss:3.5879 train_time:3380762ms step_avg:2119.60ms +step:1606/3242 train_loss:3.3109 train_time:3382880ms step_avg:2119.60ms +step:1607/3242 train_loss:3.4666 train_time:3384996ms step_avg:2119.60ms +step:1608/3242 train_loss:3.3024 train_time:3387115ms step_avg:2119.60ms +step:1609/3242 train_loss:3.5168 train_time:3389240ms step_avg:2119.60ms +step:1610/3242 train_loss:3.5448 train_time:3391359ms step_avg:2119.60ms +step:1611/3242 train_loss:3.4959 train_time:3393481ms step_avg:2119.60ms +step:1612/3242 train_loss:3.4961 train_time:3395595ms step_avg:2119.60ms +step:1613/3242 train_loss:3.2505 train_time:3397720ms step_avg:2119.60ms +step:1614/3242 train_loss:3.5770 train_time:3399838ms step_avg:2119.60ms +step:1615/3242 train_loss:3.6026 train_time:3401946ms step_avg:2119.59ms +step:1616/3242 train_loss:3.6241 train_time:3404067ms step_avg:2119.59ms +step:1617/3242 train_loss:2.9955 train_time:3406186ms step_avg:2119.59ms +step:1618/3242 train_loss:3.6883 train_time:3408304ms step_avg:2119.59ms +step:1619/3242 train_loss:3.5715 train_time:3410422ms step_avg:2119.59ms +step:1620/3242 train_loss:3.4652 train_time:3412545ms step_avg:2119.59ms +step:1621/3242 train_loss:3.6468 train_time:3414661ms step_avg:2119.59ms +step:1622/3242 train_loss:3.4937 train_time:3416775ms step_avg:2119.59ms +step:1623/3242 train_loss:3.4295 train_time:3418899ms step_avg:2119.59ms +step:1624/3242 train_loss:3.5470 train_time:3421025ms step_avg:2119.59ms +step:1625/3242 train_loss:3.1697 train_time:3423143ms step_avg:2119.59ms +step:1625/3242 val_loss:3.4852 train_time:3423558ms step_avg:2119.85ms +step:1626/3242 train_loss:3.3646 train_time:3425276ms step_avg:2119.60ms +step:1627/3242 train_loss:3.3040 train_time:3427387ms step_avg:2119.60ms +step:1628/3242 train_loss:3.5323 train_time:3429513ms step_avg:2119.60ms +step:1629/3242 train_loss:3.4612 train_time:3431622ms step_avg:2119.59ms +step:1630/3242 train_loss:3.4006 train_time:3433742ms step_avg:2119.59ms +step:1631/3242 train_loss:3.6162 train_time:3435866ms step_avg:2119.60ms +step:1632/3242 train_loss:3.3448 train_time:3437990ms step_avg:2119.60ms +step:1633/3242 train_loss:3.5994 train_time:3440102ms step_avg:2119.59ms +step:1634/3242 train_loss:3.4897 train_time:3442223ms step_avg:2119.60ms +step:1635/3242 train_loss:3.4569 train_time:3444348ms step_avg:2119.60ms +step:1636/3242 train_loss:3.4829 train_time:3446462ms step_avg:2119.60ms +step:1637/3242 train_loss:3.4521 train_time:3448582ms step_avg:2119.60ms +step:1638/3242 train_loss:3.4551 train_time:3450702ms step_avg:2119.60ms +step:1639/3242 train_loss:3.2560 train_time:3452818ms step_avg:2119.59ms +step:1640/3242 train_loss:3.2797 train_time:3454935ms step_avg:2119.59ms +step:1641/3242 train_loss:3.4110 train_time:3457058ms step_avg:2119.59ms +step:1642/3242 train_loss:3.3739 train_time:3459171ms step_avg:2119.59ms +step:1643/3242 train_loss:3.3711 train_time:3461294ms step_avg:2119.59ms +step:1644/3242 train_loss:3.4091 train_time:3463412ms step_avg:2119.59ms +step:1645/3242 train_loss:3.3830 train_time:3465537ms step_avg:2119.59ms +step:1646/3242 train_loss:4.1591 train_time:3467654ms step_avg:2119.59ms +step:1647/3242 train_loss:3.3163 train_time:3469777ms step_avg:2119.59ms +step:1648/3242 train_loss:3.6801 train_time:3471898ms step_avg:2119.60ms +step:1649/3242 train_loss:3.7059 train_time:3474017ms step_avg:2119.60ms +step:1650/3242 train_loss:3.5528 train_time:3476133ms step_avg:2119.59ms +step:1651/3242 train_loss:3.4085 train_time:3478254ms step_avg:2119.59ms +step:1652/3242 train_loss:3.7575 train_time:3480369ms step_avg:2119.59ms +step:1653/3242 train_loss:3.4883 train_time:3482487ms step_avg:2119.59ms +step:1654/3242 train_loss:3.3931 train_time:3484607ms step_avg:2119.59ms +step:1655/3242 train_loss:3.3726 train_time:3486730ms step_avg:2119.59ms +step:1656/3242 train_loss:3.3748 train_time:3488854ms step_avg:2119.60ms +step:1657/3242 train_loss:3.3866 train_time:3490973ms step_avg:2119.60ms +step:1658/3242 train_loss:3.6482 train_time:3493090ms step_avg:2119.59ms +step:1659/3242 train_loss:3.7062 train_time:3495214ms step_avg:2119.60ms +step:1660/3242 train_loss:3.4081 train_time:3497322ms step_avg:2119.59ms +step:1661/3242 train_loss:3.4343 train_time:3499445ms step_avg:2119.59ms +step:1662/3242 train_loss:3.4479 train_time:3501560ms step_avg:2119.59ms +step:1663/3242 train_loss:3.6517 train_time:3503672ms step_avg:2119.58ms +step:1664/3242 train_loss:3.6184 train_time:3505797ms step_avg:2119.59ms +step:1665/3242 train_loss:3.4049 train_time:3507910ms step_avg:2119.58ms +step:1666/3242 train_loss:3.6736 train_time:3510027ms step_avg:2119.58ms +step:1667/3242 train_loss:3.0325 train_time:3512141ms step_avg:2119.58ms +step:1668/3242 train_loss:3.4971 train_time:3514258ms step_avg:2119.58ms +step:1669/3242 train_loss:3.3817 train_time:3516380ms step_avg:2119.58ms +step:1670/3242 train_loss:3.3933 train_time:3518492ms step_avg:2119.57ms +step:1671/3242 train_loss:3.4005 train_time:3520613ms step_avg:2119.57ms +step:1672/3242 train_loss:3.3727 train_time:3522737ms step_avg:2119.58ms +step:1673/3242 train_loss:3.6101 train_time:3524852ms step_avg:2119.57ms +step:1674/3242 train_loss:3.7494 train_time:3526974ms step_avg:2119.58ms +step:1675/3242 train_loss:3.5212 train_time:3529098ms step_avg:2119.58ms +step:1676/3242 train_loss:3.3404 train_time:3531206ms step_avg:2119.57ms +step:1677/3242 train_loss:3.3758 train_time:3533332ms step_avg:2119.58ms +step:1678/3242 train_loss:3.4256 train_time:3535449ms step_avg:2119.57ms +step:1679/3242 train_loss:3.4601 train_time:3537568ms step_avg:2119.57ms +step:1680/3242 train_loss:3.2377 train_time:3539692ms step_avg:2119.58ms +step:1681/3242 train_loss:3.5289 train_time:3541804ms step_avg:2119.57ms +step:1682/3242 train_loss:3.5686 train_time:3543926ms step_avg:2119.57ms +step:1683/3242 train_loss:3.4754 train_time:3546052ms step_avg:2119.58ms +step:1684/3242 train_loss:3.4320 train_time:3548169ms step_avg:2119.58ms +step:1685/3242 train_loss:3.3995 train_time:3550293ms step_avg:2119.58ms +step:1686/3242 train_loss:3.5722 train_time:3552421ms step_avg:2119.58ms +step:1687/3242 train_loss:3.4110 train_time:3554541ms step_avg:2119.58ms +step:1688/3242 train_loss:3.3468 train_time:3556665ms step_avg:2119.59ms +step:1689/3242 train_loss:3.4182 train_time:3558784ms step_avg:2119.59ms +step:1690/3242 train_loss:3.4936 train_time:3560904ms step_avg:2119.59ms +step:1691/3242 train_loss:3.7457 train_time:3563021ms step_avg:2119.58ms +step:1692/3242 train_loss:3.4403 train_time:3565142ms step_avg:2119.58ms +step:1693/3242 train_loss:3.4894 train_time:3567256ms step_avg:2119.58ms +step:1694/3242 train_loss:3.5847 train_time:3569378ms step_avg:2119.58ms +step:1695/3242 train_loss:3.4475 train_time:3571498ms step_avg:2119.58ms +step:1696/3242 train_loss:3.6607 train_time:3573619ms step_avg:2119.58ms +step:1697/3242 train_loss:3.3804 train_time:3575732ms step_avg:2119.58ms +step:1698/3242 train_loss:3.3887 train_time:3577846ms step_avg:2119.58ms +step:1699/3242 train_loss:3.4558 train_time:3579963ms step_avg:2119.58ms +step:1700/3242 train_loss:3.3918 train_time:3582085ms step_avg:2119.58ms +step:1701/3242 train_loss:3.4457 train_time:3584204ms step_avg:2119.58ms +step:1702/3242 train_loss:3.3082 train_time:3586319ms step_avg:2119.57ms +step:1703/3242 train_loss:3.3784 train_time:3588440ms step_avg:2119.57ms +step:1704/3242 train_loss:3.5521 train_time:3590561ms step_avg:2119.58ms +step:1705/3242 train_loss:3.4438 train_time:3592678ms step_avg:2119.57ms +step:1706/3242 train_loss:3.4812 train_time:3594802ms step_avg:2119.58ms +step:1707/3242 train_loss:3.4974 train_time:3596919ms step_avg:2119.58ms +step:1708/3242 train_loss:3.5187 train_time:3599037ms step_avg:2119.57ms +step:1709/3242 train_loss:3.2631 train_time:3601168ms step_avg:2119.58ms +step:1710/3242 train_loss:3.3513 train_time:3603291ms step_avg:2119.58ms +step:1711/3242 train_loss:3.4724 train_time:3605415ms step_avg:2119.59ms +step:1712/3242 train_loss:3.4850 train_time:3607542ms step_avg:2119.59ms +step:1713/3242 train_loss:3.6087 train_time:3609663ms step_avg:2119.59ms +step:1714/3242 train_loss:3.5474 train_time:3611772ms step_avg:2119.58ms +step:1715/3242 train_loss:3.4615 train_time:3614023ms step_avg:2119.66ms +step:1716/3242 train_loss:3.5745 train_time:3616144ms step_avg:2119.66ms +step:1717/3242 train_loss:3.7903 train_time:3618256ms step_avg:2119.66ms +step:1718/3242 train_loss:3.4114 train_time:3620379ms step_avg:2119.66ms +step:1719/3242 train_loss:3.5813 train_time:3622493ms step_avg:2119.66ms +step:1720/3242 train_loss:3.2624 train_time:3624615ms step_avg:2119.66ms +step:1721/3242 train_loss:3.3338 train_time:3626722ms step_avg:2119.65ms +step:1722/3242 train_loss:3.6369 train_time:3628849ms step_avg:2119.65ms +step:1723/3242 train_loss:3.4300 train_time:3630961ms step_avg:2119.65ms +step:1724/3242 train_loss:3.4251 train_time:3633080ms step_avg:2119.65ms +step:1725/3242 train_loss:3.0775 train_time:3635203ms step_avg:2119.65ms +step:1726/3242 train_loss:3.5844 train_time:3637321ms step_avg:2119.65ms +step:1727/3242 train_loss:3.3202 train_time:3639437ms step_avg:2119.65ms +step:1728/3242 train_loss:3.6652 train_time:3641570ms step_avg:2119.66ms +step:1729/3242 train_loss:3.5332 train_time:3643687ms step_avg:2119.65ms +step:1730/3242 train_loss:3.5986 train_time:3645806ms step_avg:2119.65ms +step:1731/3242 train_loss:3.5532 train_time:3647922ms step_avg:2119.65ms +step:1732/3242 train_loss:3.3706 train_time:3650042ms step_avg:2119.65ms +step:1733/3242 train_loss:3.5594 train_time:3652167ms step_avg:2119.66ms +step:1734/3242 train_loss:3.4489 train_time:3654284ms step_avg:2119.65ms +step:1735/3242 train_loss:3.6722 train_time:3656407ms step_avg:2119.66ms +step:1736/3242 train_loss:3.7529 train_time:3658531ms step_avg:2119.66ms +step:1737/3242 train_loss:3.3057 train_time:3660644ms step_avg:2119.66ms +step:1738/3242 train_loss:3.5073 train_time:3662767ms step_avg:2119.66ms +step:1739/3242 train_loss:3.5432 train_time:3664876ms step_avg:2119.65ms +step:1740/3242 train_loss:3.3966 train_time:3666999ms step_avg:2119.65ms +step:1741/3242 train_loss:3.5794 train_time:3669121ms step_avg:2119.65ms +step:1742/3242 train_loss:3.5331 train_time:3671244ms step_avg:2119.66ms +step:1743/3242 train_loss:3.5089 train_time:3673360ms step_avg:2119.65ms +step:1744/3242 train_loss:3.7631 train_time:3675471ms step_avg:2119.65ms +step:1745/3242 train_loss:3.3410 train_time:3677594ms step_avg:2119.65ms +step:1746/3242 train_loss:3.1632 train_time:3679716ms step_avg:2119.65ms +step:1747/3242 train_loss:3.0471 train_time:3681838ms step_avg:2119.65ms +step:1748/3242 train_loss:3.5164 train_time:3683960ms step_avg:2119.65ms +step:1749/3242 train_loss:3.3675 train_time:3686072ms step_avg:2119.65ms +step:1750/3242 train_loss:3.4245 train_time:3688189ms step_avg:2119.65ms +step:1750/3242 val_loss:3.4656 train_time:3688603ms step_avg:2119.89ms +step:1751/3242 train_loss:3.2810 train_time:3690320ms step_avg:2119.66ms +step:1752/3242 train_loss:3.6116 train_time:3692433ms step_avg:2119.65ms +step:1753/3242 train_loss:3.5514 train_time:3694554ms step_avg:2119.65ms +step:1754/3242 train_loss:3.4771 train_time:3696674ms step_avg:2119.65ms +step:1755/3242 train_loss:3.4426 train_time:3698790ms step_avg:2119.65ms +step:1756/3242 train_loss:3.5058 train_time:3700911ms step_avg:2119.65ms +step:1757/3242 train_loss:2.9446 train_time:3703027ms step_avg:2119.65ms +step:1758/3242 train_loss:3.1874 train_time:3705143ms step_avg:2119.65ms +step:1759/3242 train_loss:3.1550 train_time:3707263ms step_avg:2119.65ms +step:1760/3242 train_loss:3.3705 train_time:3709384ms step_avg:2119.65ms +step:1761/3242 train_loss:3.4381 train_time:3711510ms step_avg:2119.65ms +step:1762/3242 train_loss:3.2731 train_time:3713624ms step_avg:2119.65ms +step:1763/3242 train_loss:6.1167 train_time:3715742ms step_avg:2119.65ms +step:1764/3242 train_loss:3.4816 train_time:3717860ms step_avg:2119.65ms +step:1765/3242 train_loss:3.5889 train_time:3719980ms step_avg:2119.65ms +step:1766/3242 train_loss:3.5619 train_time:3722106ms step_avg:2119.65ms +step:1767/3242 train_loss:3.4887 train_time:3724235ms step_avg:2119.66ms +step:1768/3242 train_loss:3.4001 train_time:3726353ms step_avg:2119.65ms +step:1769/3242 train_loss:3.4998 train_time:3728478ms step_avg:2119.66ms +step:1770/3242 train_loss:3.3556 train_time:3730600ms step_avg:2119.66ms +step:1771/3242 train_loss:3.6527 train_time:3732717ms step_avg:2119.66ms +step:1772/3242 train_loss:3.5040 train_time:3734842ms step_avg:2119.66ms +step:1773/3242 train_loss:3.5218 train_time:3736959ms step_avg:2119.66ms +step:1774/3242 train_loss:4.1731 train_time:3739083ms step_avg:2119.66ms +step:1775/3242 train_loss:3.2685 train_time:3741197ms step_avg:2119.66ms +step:1776/3242 train_loss:3.3545 train_time:3743316ms step_avg:2119.66ms +step:1777/3242 train_loss:3.9354 train_time:3745441ms step_avg:2119.66ms +step:1778/3242 train_loss:3.1222 train_time:3747552ms step_avg:2119.66ms +step:1779/3242 train_loss:3.5533 train_time:3749678ms step_avg:2119.66ms +step:1780/3242 train_loss:3.4605 train_time:3751795ms step_avg:2119.66ms +step:1781/3242 train_loss:3.7465 train_time:3753911ms step_avg:2119.66ms +step:1782/3242 train_loss:3.6964 train_time:3756032ms step_avg:2119.66ms +step:1783/3242 train_loss:3.4373 train_time:3758157ms step_avg:2119.66ms +step:1784/3242 train_loss:3.5168 train_time:3760272ms step_avg:2119.66ms +step:1785/3242 train_loss:3.6219 train_time:3762398ms step_avg:2119.66ms +step:1786/3242 train_loss:3.1661 train_time:3764510ms step_avg:2119.66ms +step:1787/3242 train_loss:3.6142 train_time:3766628ms step_avg:2119.66ms +step:1788/3242 train_loss:3.4093 train_time:3768745ms step_avg:2119.65ms +step:1789/3242 train_loss:3.3824 train_time:3770870ms step_avg:2119.66ms +step:1790/3242 train_loss:3.6932 train_time:3772994ms step_avg:2119.66ms +step:1791/3242 train_loss:3.3830 train_time:3775106ms step_avg:2119.66ms +step:1792/3242 train_loss:3.3513 train_time:3777229ms step_avg:2119.66ms +step:1793/3242 train_loss:3.4742 train_time:3779351ms step_avg:2119.66ms +step:1794/3242 train_loss:3.5952 train_time:3781465ms step_avg:2119.66ms +step:1795/3242 train_loss:3.4591 train_time:3783583ms step_avg:2119.65ms +step:1796/3242 train_loss:3.3916 train_time:3785707ms step_avg:2119.66ms +step:1797/3242 train_loss:3.4876 train_time:3787818ms step_avg:2119.65ms +step:1798/3242 train_loss:3.3321 train_time:3789938ms step_avg:2119.65ms +step:1799/3242 train_loss:3.4241 train_time:3792060ms step_avg:2119.65ms +step:1800/3242 train_loss:3.3826 train_time:3794178ms step_avg:2119.65ms +step:1801/3242 train_loss:3.4544 train_time:3796300ms step_avg:2119.65ms +step:1802/3242 train_loss:3.4997 train_time:3798420ms step_avg:2119.65ms +step:1803/3242 train_loss:3.4917 train_time:3800532ms step_avg:2119.65ms +step:1804/3242 train_loss:3.6285 train_time:3802655ms step_avg:2119.65ms +step:1805/3242 train_loss:3.5549 train_time:3804772ms step_avg:2119.65ms +step:1806/3242 train_loss:3.2920 train_time:3806894ms step_avg:2119.65ms +step:1807/3242 train_loss:3.4073 train_time:3809006ms step_avg:2119.65ms +step:1808/3242 train_loss:3.4932 train_time:3811132ms step_avg:2119.65ms +step:1809/3242 train_loss:3.3276 train_time:3813244ms step_avg:2119.65ms +step:1810/3242 train_loss:3.5066 train_time:3815364ms step_avg:2119.65ms +step:1811/3242 train_loss:3.6688 train_time:3817488ms step_avg:2119.65ms +step:1812/3242 train_loss:3.4523 train_time:3819607ms step_avg:2119.65ms +step:1813/3242 train_loss:3.5890 train_time:3821729ms step_avg:2119.65ms +step:1814/3242 train_loss:3.5800 train_time:3823850ms step_avg:2119.65ms +step:1815/3242 train_loss:3.2874 train_time:3825970ms step_avg:2119.65ms +step:1816/3242 train_loss:3.5164 train_time:3828091ms step_avg:2119.65ms +step:1817/3242 train_loss:3.4910 train_time:3830215ms step_avg:2119.65ms +step:1818/3242 train_loss:3.5074 train_time:3832332ms step_avg:2119.65ms +step:1819/3242 train_loss:3.3899 train_time:3834451ms step_avg:2119.65ms +step:1820/3242 train_loss:3.5173 train_time:3836570ms step_avg:2119.65ms +step:1821/3242 train_loss:3.7259 train_time:3838689ms step_avg:2119.65ms +step:1822/3242 train_loss:3.3673 train_time:3840810ms step_avg:2119.65ms +step:1823/3242 train_loss:3.6247 train_time:3842924ms step_avg:2119.65ms +step:1824/3242 train_loss:3.4389 train_time:3845047ms step_avg:2119.65ms +step:1825/3242 train_loss:2.9390 train_time:3847163ms step_avg:2119.65ms +step:1826/3242 train_loss:3.1528 train_time:3849287ms step_avg:2119.65ms +step:1827/3242 train_loss:3.4779 train_time:3851401ms step_avg:2119.65ms +step:1828/3242 train_loss:3.5335 train_time:3853526ms step_avg:2119.65ms +step:1829/3242 train_loss:3.5226 train_time:3855640ms step_avg:2119.65ms +step:1830/3242 train_loss:3.5291 train_time:3857761ms step_avg:2119.65ms +step:1831/3242 train_loss:3.2741 train_time:3859879ms step_avg:2119.65ms +step:1832/3242 train_loss:3.5029 train_time:3861999ms step_avg:2119.65ms +step:1833/3242 train_loss:3.5283 train_time:3864113ms step_avg:2119.65ms +step:1834/3242 train_loss:3.3687 train_time:3866229ms step_avg:2119.64ms +step:1835/3242 train_loss:3.4490 train_time:3868346ms step_avg:2119.64ms +step:1836/3242 train_loss:3.4008 train_time:3870463ms step_avg:2119.64ms +step:1837/3242 train_loss:4.0004 train_time:3872576ms step_avg:2119.64ms +step:1838/3242 train_loss:3.4579 train_time:3874695ms step_avg:2119.64ms +step:1839/3242 train_loss:3.6287 train_time:3876817ms step_avg:2119.64ms +step:1840/3242 train_loss:3.4779 train_time:3878938ms step_avg:2119.64ms +step:1841/3242 train_loss:3.3760 train_time:3881056ms step_avg:2119.64ms +step:1842/3242 train_loss:3.3791 train_time:3883170ms step_avg:2119.63ms +step:1843/3242 train_loss:3.4051 train_time:3885287ms step_avg:2119.63ms +step:1844/3242 train_loss:3.4097 train_time:3887409ms step_avg:2119.63ms +step:1845/3242 train_loss:3.4194 train_time:3889532ms step_avg:2119.64ms +step:1846/3242 train_loss:3.5037 train_time:3891653ms step_avg:2119.64ms +step:1847/3242 train_loss:3.3352 train_time:3893781ms step_avg:2119.64ms +step:1848/3242 train_loss:3.4128 train_time:3895900ms step_avg:2119.64ms +step:1849/3242 train_loss:3.3783 train_time:3898014ms step_avg:2119.64ms +step:1850/3242 train_loss:3.4517 train_time:3900134ms step_avg:2119.64ms +step:1851/3242 train_loss:3.3207 train_time:3902252ms step_avg:2119.64ms +step:1852/3242 train_loss:3.7128 train_time:3904368ms step_avg:2119.64ms +step:1853/3242 train_loss:3.2316 train_time:3906485ms step_avg:2119.63ms +step:1854/3242 train_loss:3.4707 train_time:3908604ms step_avg:2119.63ms +step:1855/3242 train_loss:3.1455 train_time:3910725ms step_avg:2119.63ms +step:1856/3242 train_loss:3.4959 train_time:3912853ms step_avg:2119.64ms +step:1857/3242 train_loss:3.6196 train_time:3914969ms step_avg:2119.64ms +step:1858/3242 train_loss:3.4065 train_time:3917092ms step_avg:2119.64ms +step:1859/3242 train_loss:3.2999 train_time:3919216ms step_avg:2119.64ms +step:1860/3242 train_loss:3.4049 train_time:3921336ms step_avg:2119.64ms +step:1861/3242 train_loss:3.7319 train_time:3923449ms step_avg:2119.64ms +step:1862/3242 train_loss:3.3247 train_time:3925570ms step_avg:2119.64ms +step:1863/3242 train_loss:3.4266 train_time:3927688ms step_avg:2119.64ms +step:1864/3242 train_loss:3.4233 train_time:3929803ms step_avg:2119.63ms +step:1865/3242 train_loss:3.2626 train_time:3931924ms step_avg:2119.64ms +step:1866/3242 train_loss:3.3469 train_time:3934036ms step_avg:2119.63ms +step:1867/3242 train_loss:3.4315 train_time:3936154ms step_avg:2119.63ms +step:1868/3242 train_loss:3.5161 train_time:3938282ms step_avg:2119.63ms +step:1869/3242 train_loss:3.1933 train_time:3940393ms step_avg:2119.63ms +step:1870/3242 train_loss:3.4713 train_time:3942518ms step_avg:2119.63ms +step:1871/3242 train_loss:3.3184 train_time:3944640ms step_avg:2119.63ms +step:1872/3242 train_loss:3.5531 train_time:3946761ms step_avg:2119.64ms +step:1873/3242 train_loss:3.3211 train_time:3948870ms step_avg:2119.63ms +step:1874/3242 train_loss:3.4192 train_time:3950990ms step_avg:2119.63ms +step:1875/3242 train_loss:3.5401 train_time:3953114ms step_avg:2119.63ms +step:1875/3242 val_loss:3.4521 train_time:3953525ms step_avg:2119.85ms +step:1876/3242 train_loss:3.4726 train_time:3955244ms step_avg:2119.64ms +step:1877/3242 train_loss:3.2466 train_time:3957359ms step_avg:2119.64ms +step:1878/3242 train_loss:3.4871 train_time:3959478ms step_avg:2119.64ms +step:1879/3242 train_loss:3.3823 train_time:3961597ms step_avg:2119.63ms +step:1880/3242 train_loss:3.5230 train_time:3963721ms step_avg:2119.64ms +step:1881/3242 train_loss:3.4974 train_time:3965838ms step_avg:2119.64ms +step:1882/3242 train_loss:3.6028 train_time:3967957ms step_avg:2119.64ms +step:1883/3242 train_loss:3.4604 train_time:3970073ms step_avg:2119.63ms +step:1884/3242 train_loss:3.5829 train_time:3972198ms step_avg:2119.64ms +step:1885/3242 train_loss:3.4131 train_time:3974314ms step_avg:2119.63ms +step:1886/3242 train_loss:3.6502 train_time:3976429ms step_avg:2119.63ms +step:1887/3242 train_loss:3.3920 train_time:3978555ms step_avg:2119.63ms +step:1888/3242 train_loss:2.3346 train_time:3980670ms step_avg:2119.63ms +step:1889/3242 train_loss:3.4762 train_time:3982795ms step_avg:2119.64ms +step:1890/3242 train_loss:3.3490 train_time:3984908ms step_avg:2119.63ms +step:1891/3242 train_loss:3.1349 train_time:3987028ms step_avg:2119.63ms +step:1892/3242 train_loss:3.6279 train_time:3989150ms step_avg:2119.63ms +step:1893/3242 train_loss:3.4479 train_time:3991269ms step_avg:2119.63ms +step:1894/3242 train_loss:3.1823 train_time:3993388ms step_avg:2119.63ms +step:1895/3242 train_loss:4.0310 train_time:3995504ms step_avg:2119.63ms +step:1896/3242 train_loss:3.4529 train_time:3997626ms step_avg:2119.63ms +step:1897/3242 train_loss:3.3223 train_time:3999744ms step_avg:2119.63ms +step:1898/3242 train_loss:3.5901 train_time:4001862ms step_avg:2119.63ms +step:1899/3242 train_loss:3.2482 train_time:4003981ms step_avg:2119.63ms +step:1900/3242 train_loss:3.4190 train_time:4006100ms step_avg:2119.63ms +step:1901/3242 train_loss:3.4738 train_time:4008221ms step_avg:2119.63ms +step:1902/3242 train_loss:3.3099 train_time:4010340ms step_avg:2119.63ms +step:1903/3242 train_loss:3.5461 train_time:4012463ms step_avg:2119.63ms +step:1904/3242 train_loss:3.5827 train_time:4014585ms step_avg:2119.63ms +step:1905/3242 train_loss:3.6248 train_time:4016825ms step_avg:2119.70ms +step:1906/3242 train_loss:3.2566 train_time:4018942ms step_avg:2119.70ms +step:1907/3242 train_loss:3.3412 train_time:4021060ms step_avg:2119.69ms +step:1908/3242 train_loss:3.2465 train_time:4023174ms step_avg:2119.69ms +step:1909/3242 train_loss:3.5117 train_time:4025296ms step_avg:2119.69ms +step:1910/3242 train_loss:3.3578 train_time:4027415ms step_avg:2119.69ms +step:1911/3242 train_loss:3.2808 train_time:4029531ms step_avg:2119.69ms +step:1912/3242 train_loss:3.3342 train_time:4031648ms step_avg:2119.69ms +step:1913/3242 train_loss:3.7130 train_time:4033770ms step_avg:2119.69ms +step:1914/3242 train_loss:3.8699 train_time:4035880ms step_avg:2119.68ms +step:1915/3242 train_loss:3.3731 train_time:4038004ms step_avg:2119.69ms +step:1916/3242 train_loss:3.6128 train_time:4040124ms step_avg:2119.69ms +step:1917/3242 train_loss:3.5619 train_time:4042241ms step_avg:2119.69ms +step:1918/3242 train_loss:3.5510 train_time:4044365ms step_avg:2119.69ms +step:1919/3242 train_loss:3.3644 train_time:4046473ms step_avg:2119.68ms +step:1920/3242 train_loss:3.5403 train_time:4048591ms step_avg:2119.68ms +step:1921/3242 train_loss:3.5582 train_time:4050707ms step_avg:2119.68ms +step:1922/3242 train_loss:3.4308 train_time:4052826ms step_avg:2119.68ms +step:1923/3242 train_loss:3.5327 train_time:4054949ms step_avg:2119.68ms +step:1924/3242 train_loss:3.5460 train_time:4057064ms step_avg:2119.68ms +step:1925/3242 train_loss:3.3144 train_time:4059184ms step_avg:2119.68ms +step:1926/3242 train_loss:3.2485 train_time:4061302ms step_avg:2119.68ms +step:1927/3242 train_loss:3.2885 train_time:4063423ms step_avg:2119.68ms +step:1928/3242 train_loss:3.4701 train_time:4065530ms step_avg:2119.67ms +step:1929/3242 train_loss:3.4073 train_time:4067653ms step_avg:2119.67ms +step:1930/3242 train_loss:3.5558 train_time:4069773ms step_avg:2119.67ms +step:1931/3242 train_loss:3.6663 train_time:4071899ms step_avg:2119.68ms +step:1932/3242 train_loss:3.3590 train_time:4074010ms step_avg:2119.67ms +step:1933/3242 train_loss:3.4467 train_time:4076127ms step_avg:2119.67ms +step:1934/3242 train_loss:3.4177 train_time:4078250ms step_avg:2119.67ms +step:1935/3242 train_loss:3.5014 train_time:4080371ms step_avg:2119.67ms +step:1936/3242 train_loss:3.2021 train_time:4082497ms step_avg:2119.68ms +step:1937/3242 train_loss:5.4144 train_time:4084613ms step_avg:2119.67ms +step:1938/3242 train_loss:3.3840 train_time:4086742ms step_avg:2119.68ms +step:1939/3242 train_loss:3.5023 train_time:4088862ms step_avg:2119.68ms +step:1940/3242 train_loss:3.5780 train_time:4090978ms step_avg:2119.68ms +step:1941/3242 train_loss:3.4517 train_time:4093096ms step_avg:2119.68ms +step:1942/3242 train_loss:3.4151 train_time:4095211ms step_avg:2119.67ms +step:1943/3242 train_loss:3.6752 train_time:4097330ms step_avg:2119.67ms +step:1944/3242 train_loss:3.7859 train_time:4099447ms step_avg:2119.67ms +step:1945/3242 train_loss:3.4586 train_time:4101565ms step_avg:2119.67ms +step:1946/3242 train_loss:3.4222 train_time:4103686ms step_avg:2119.67ms +step:1947/3242 train_loss:3.1182 train_time:4105801ms step_avg:2119.67ms +step:1948/3242 train_loss:3.6157 train_time:4107921ms step_avg:2119.67ms +step:1949/3242 train_loss:3.2268 train_time:4110041ms step_avg:2119.67ms +step:1950/3242 train_loss:3.4568 train_time:4112161ms step_avg:2119.67ms +step:1951/3242 train_loss:3.5036 train_time:4114285ms step_avg:2119.67ms +step:1952/3242 train_loss:3.3380 train_time:4116404ms step_avg:2119.67ms +step:1953/3242 train_loss:3.5792 train_time:4118527ms step_avg:2119.67ms +step:1954/3242 train_loss:3.5620 train_time:4120646ms step_avg:2119.67ms +step:1955/3242 train_loss:3.3760 train_time:4122763ms step_avg:2119.67ms +step:1956/3242 train_loss:3.3805 train_time:4124881ms step_avg:2119.67ms +step:1957/3242 train_loss:3.4551 train_time:4127005ms step_avg:2119.67ms +step:1958/3242 train_loss:3.3062 train_time:4129120ms step_avg:2119.67ms +step:1959/3242 train_loss:3.3807 train_time:4131246ms step_avg:2119.67ms +step:1960/3242 train_loss:3.7919 train_time:4133363ms step_avg:2119.67ms +step:1961/3242 train_loss:3.4892 train_time:4135484ms step_avg:2119.67ms +step:1962/3242 train_loss:3.2911 train_time:4137603ms step_avg:2119.67ms +step:1963/3242 train_loss:3.3460 train_time:4139723ms step_avg:2119.67ms +step:1964/3242 train_loss:3.6767 train_time:4141836ms step_avg:2119.67ms +step:1965/3242 train_loss:3.3250 train_time:4143956ms step_avg:2119.67ms +step:1966/3242 train_loss:3.3771 train_time:4146077ms step_avg:2119.67ms +step:1967/3242 train_loss:3.3807 train_time:4148201ms step_avg:2119.67ms +step:1968/3242 train_loss:3.4235 train_time:4150323ms step_avg:2119.67ms +step:1969/3242 train_loss:3.2297 train_time:4152440ms step_avg:2119.67ms +step:1970/3242 train_loss:3.5473 train_time:4154562ms step_avg:2119.67ms +step:1971/3242 train_loss:3.3849 train_time:4156675ms step_avg:2119.67ms +step:1972/3242 train_loss:3.3369 train_time:4158798ms step_avg:2119.67ms +step:1973/3242 train_loss:3.3563 train_time:4160911ms step_avg:2119.67ms +step:1974/3242 train_loss:3.6445 train_time:4163034ms step_avg:2119.67ms +step:1975/3242 train_loss:3.6377 train_time:4165150ms step_avg:2119.67ms +step:1976/3242 train_loss:3.6490 train_time:4167265ms step_avg:2119.67ms +step:1977/3242 train_loss:3.3283 train_time:4169380ms step_avg:2119.66ms +step:1978/3242 train_loss:3.4752 train_time:4171500ms step_avg:2119.66ms +step:1979/3242 train_loss:3.4693 train_time:4173631ms step_avg:2119.67ms +step:1980/3242 train_loss:3.4799 train_time:4175750ms step_avg:2119.67ms +step:1981/3242 train_loss:3.6608 train_time:4177867ms step_avg:2119.67ms +step:1982/3242 train_loss:3.5032 train_time:4179988ms step_avg:2119.67ms +step:1983/3242 train_loss:3.8318 train_time:4182111ms step_avg:2119.67ms +step:1984/3242 train_loss:3.3727 train_time:4184224ms step_avg:2119.67ms +step:1985/3242 train_loss:3.4123 train_time:4186353ms step_avg:2119.67ms +step:1986/3242 train_loss:3.5410 train_time:4188476ms step_avg:2119.67ms +step:1987/3242 train_loss:3.6410 train_time:4190592ms step_avg:2119.67ms +step:1988/3242 train_loss:3.3432 train_time:4192709ms step_avg:2119.67ms +step:1989/3242 train_loss:3.7295 train_time:4194830ms step_avg:2119.67ms +step:1990/3242 train_loss:3.4640 train_time:4196947ms step_avg:2119.67ms +step:1991/3242 train_loss:3.5865 train_time:4199060ms step_avg:2119.67ms +step:1992/3242 train_loss:3.3834 train_time:4201178ms step_avg:2119.67ms +step:1993/3242 train_loss:3.5201 train_time:4203305ms step_avg:2119.67ms +step:1994/3242 train_loss:3.3786 train_time:4205424ms step_avg:2119.67ms +step:1995/3242 train_loss:3.4570 train_time:4207544ms step_avg:2119.67ms +step:1996/3242 train_loss:3.5674 train_time:4209658ms step_avg:2119.67ms +step:1997/3242 train_loss:3.3275 train_time:4211779ms step_avg:2119.67ms +step:1998/3242 train_loss:3.4396 train_time:4213902ms step_avg:2119.67ms +step:1999/3242 train_loss:3.9967 train_time:4216021ms step_avg:2119.67ms +step:2000/3242 train_loss:3.4893 train_time:4218137ms step_avg:2119.67ms +step:2000/3242 val_loss:3.4385 train_time:4218549ms step_avg:2119.87ms +step:2001/3242 train_loss:3.1532 train_time:4220258ms step_avg:2119.67ms +step:2002/3242 train_loss:3.4324 train_time:4222381ms step_avg:2119.67ms +step:2003/3242 train_loss:3.4081 train_time:4224503ms step_avg:2119.67ms +step:2004/3242 train_loss:3.3052 train_time:4226628ms step_avg:2119.67ms +step:2005/3242 train_loss:3.5475 train_time:4228747ms step_avg:2119.67ms +step:2006/3242 train_loss:3.6199 train_time:4230866ms step_avg:2119.67ms +step:2007/3242 train_loss:3.2218 train_time:4232982ms step_avg:2119.67ms +step:2008/3242 train_loss:3.5205 train_time:4235104ms step_avg:2119.67ms +step:2009/3242 train_loss:3.5072 train_time:4237216ms step_avg:2119.67ms +step:2010/3242 train_loss:3.4848 train_time:4239336ms step_avg:2119.67ms +step:2011/3242 train_loss:3.5013 train_time:4241449ms step_avg:2119.66ms +step:2012/3242 train_loss:3.3200 train_time:4243570ms step_avg:2119.67ms +step:2013/3242 train_loss:3.4359 train_time:4245684ms step_avg:2119.66ms +step:2014/3242 train_loss:3.2646 train_time:4247804ms step_avg:2119.66ms +step:2015/3242 train_loss:3.2800 train_time:4249926ms step_avg:2119.66ms +step:2016/3242 train_loss:3.5833 train_time:4252048ms step_avg:2119.67ms +step:2017/3242 train_loss:3.5156 train_time:4254170ms step_avg:2119.67ms +step:2018/3242 train_loss:3.4493 train_time:4256284ms step_avg:2119.66ms +step:2019/3242 train_loss:3.3006 train_time:4258410ms step_avg:2119.67ms +step:2020/3242 train_loss:3.3482 train_time:4260524ms step_avg:2119.66ms +step:2021/3242 train_loss:3.4782 train_time:4262645ms step_avg:2119.66ms +step:2022/3242 train_loss:3.5441 train_time:4264775ms step_avg:2119.67ms +step:2023/3242 train_loss:3.2928 train_time:4266886ms step_avg:2119.67ms +step:2024/3242 train_loss:3.5609 train_time:4269008ms step_avg:2119.67ms +step:2025/3242 train_loss:3.2749 train_time:4271123ms step_avg:2119.66ms +step:2026/3242 train_loss:3.2121 train_time:4273246ms step_avg:2119.67ms +step:2027/3242 train_loss:3.4931 train_time:4275361ms step_avg:2119.66ms +step:2028/3242 train_loss:3.6786 train_time:4277476ms step_avg:2119.66ms +step:2029/3242 train_loss:3.5614 train_time:4279600ms step_avg:2119.66ms +step:2030/3242 train_loss:3.4052 train_time:4281717ms step_avg:2119.66ms +step:2031/3242 train_loss:3.4764 train_time:4283836ms step_avg:2119.66ms +step:2032/3242 train_loss:3.3435 train_time:4285961ms step_avg:2119.66ms +step:2033/3242 train_loss:3.3480 train_time:4288070ms step_avg:2119.66ms +step:2034/3242 train_loss:3.5706 train_time:4290193ms step_avg:2119.66ms +step:2035/3242 train_loss:3.5399 train_time:4292313ms step_avg:2119.66ms +step:2036/3242 train_loss:3.4833 train_time:4294435ms step_avg:2119.66ms +step:2037/3242 train_loss:3.4814 train_time:4296551ms step_avg:2119.66ms +step:2038/3242 train_loss:3.5270 train_time:4298671ms step_avg:2119.66ms +step:2039/3242 train_loss:3.5383 train_time:4300790ms step_avg:2119.66ms +step:2040/3242 train_loss:3.3856 train_time:4302909ms step_avg:2119.66ms +step:2041/3242 train_loss:3.5329 train_time:4305026ms step_avg:2119.66ms +step:2042/3242 train_loss:3.3195 train_time:4307138ms step_avg:2119.65ms +step:2043/3242 train_loss:3.2809 train_time:4309263ms step_avg:2119.66ms +step:2044/3242 train_loss:3.2600 train_time:4311379ms step_avg:2119.66ms +step:2045/3242 train_loss:3.4841 train_time:4313500ms step_avg:2119.66ms +step:2046/3242 train_loss:3.3659 train_time:4315627ms step_avg:2119.66ms +step:2047/3242 train_loss:3.1393 train_time:4317746ms step_avg:2119.66ms +step:2048/3242 train_loss:3.2316 train_time:4319862ms step_avg:2119.66ms +step:2049/3242 train_loss:3.2950 train_time:4321984ms step_avg:2119.66ms +step:2050/3242 train_loss:3.5870 train_time:4324103ms step_avg:2119.66ms +step:2051/3242 train_loss:3.3742 train_time:4326227ms step_avg:2119.66ms +step:2052/3242 train_loss:3.6409 train_time:4328340ms step_avg:2119.66ms +step:2053/3242 train_loss:3.3845 train_time:4330465ms step_avg:2119.66ms +step:2054/3242 train_loss:3.4850 train_time:4332577ms step_avg:2119.66ms +step:2055/3242 train_loss:3.4082 train_time:4334705ms step_avg:2119.66ms +step:2056/3242 train_loss:3.3537 train_time:4336818ms step_avg:2119.66ms +step:2057/3242 train_loss:3.4750 train_time:4338940ms step_avg:2119.66ms +step:2058/3242 train_loss:3.4154 train_time:4341054ms step_avg:2119.66ms +step:2059/3242 train_loss:3.2999 train_time:4343175ms step_avg:2119.66ms +step:2060/3242 train_loss:3.4826 train_time:4345300ms step_avg:2119.66ms +step:2061/3242 train_loss:3.2760 train_time:4347425ms step_avg:2119.66ms +step:2062/3242 train_loss:3.2562 train_time:4349538ms step_avg:2119.66ms +step:2063/3242 train_loss:3.9054 train_time:4351661ms step_avg:2119.66ms +step:2064/3242 train_loss:3.5779 train_time:4353779ms step_avg:2119.66ms +step:2065/3242 train_loss:3.6983 train_time:4355895ms step_avg:2119.66ms +step:2066/3242 train_loss:3.4297 train_time:4358011ms step_avg:2119.66ms +step:2067/3242 train_loss:3.6949 train_time:4360135ms step_avg:2119.66ms +step:2068/3242 train_loss:3.3882 train_time:4362249ms step_avg:2119.65ms +step:2069/3242 train_loss:3.3814 train_time:4364372ms step_avg:2119.66ms +step:2070/3242 train_loss:3.4240 train_time:4366495ms step_avg:2119.66ms +step:2071/3242 train_loss:3.4263 train_time:4368605ms step_avg:2119.65ms +step:2072/3242 train_loss:3.6681 train_time:4370728ms step_avg:2119.65ms +step:2073/3242 train_loss:3.7212 train_time:4372851ms step_avg:2119.66ms +step:2074/3242 train_loss:3.3360 train_time:4374971ms step_avg:2119.66ms +step:2075/3242 train_loss:3.4243 train_time:4377089ms step_avg:2119.66ms +step:2076/3242 train_loss:3.7035 train_time:4379205ms step_avg:2119.65ms +step:2077/3242 train_loss:3.4120 train_time:4381325ms step_avg:2119.65ms +step:2078/3242 train_loss:3.4541 train_time:4383441ms step_avg:2119.65ms +step:2079/3242 train_loss:3.6448 train_time:4385563ms step_avg:2119.65ms +step:2080/3242 train_loss:3.5007 train_time:4387681ms step_avg:2119.65ms +step:2081/3242 train_loss:3.6250 train_time:4389807ms step_avg:2119.66ms +step:2082/3242 train_loss:3.6782 train_time:4391922ms step_avg:2119.65ms +step:2083/3242 train_loss:3.3187 train_time:4394045ms step_avg:2119.65ms +step:2084/3242 train_loss:3.3041 train_time:4396161ms step_avg:2119.65ms +step:2085/3242 train_loss:3.5235 train_time:4398278ms step_avg:2119.65ms +step:2086/3242 train_loss:3.6531 train_time:4400403ms step_avg:2119.65ms +step:2087/3242 train_loss:3.3277 train_time:4402516ms step_avg:2119.65ms +step:2088/3242 train_loss:3.4240 train_time:4404637ms step_avg:2119.65ms +step:2089/3242 train_loss:3.1828 train_time:4406763ms step_avg:2119.66ms +step:2090/3242 train_loss:3.4369 train_time:4408879ms step_avg:2119.65ms +step:2091/3242 train_loss:3.5117 train_time:4411003ms step_avg:2119.66ms +step:2092/3242 train_loss:3.1497 train_time:4413122ms step_avg:2119.66ms +step:2093/3242 train_loss:3.3540 train_time:4415238ms step_avg:2119.65ms +step:2094/3242 train_loss:3.4361 train_time:4417353ms step_avg:2119.65ms +step:2095/3242 train_loss:3.4281 train_time:4419473ms step_avg:2119.65ms +step:2096/3242 train_loss:3.4113 train_time:4421720ms step_avg:2119.71ms +step:2097/3242 train_loss:3.4553 train_time:4423846ms step_avg:2119.72ms +step:2098/3242 train_loss:3.2516 train_time:4425965ms step_avg:2119.72ms +step:2099/3242 train_loss:2.9094 train_time:4428079ms step_avg:2119.71ms +step:2100/3242 train_loss:3.3896 train_time:4430199ms step_avg:2119.71ms +step:2101/3242 train_loss:3.5766 train_time:4432321ms step_avg:2119.71ms +step:2102/3242 train_loss:3.5331 train_time:4434443ms step_avg:2119.71ms +step:2103/3242 train_loss:3.3036 train_time:4436561ms step_avg:2119.71ms +step:2104/3242 train_loss:3.3789 train_time:4438683ms step_avg:2119.71ms +step:2105/3242 train_loss:3.3771 train_time:4440809ms step_avg:2119.72ms +step:2106/3242 train_loss:3.9191 train_time:4442932ms step_avg:2119.72ms +step:2107/3242 train_loss:3.2559 train_time:4445049ms step_avg:2119.72ms +step:2108/3242 train_loss:3.4521 train_time:4447169ms step_avg:2119.72ms +step:2109/3242 train_loss:3.5815 train_time:4449286ms step_avg:2119.72ms +step:2110/3242 train_loss:2.8610 train_time:4451407ms step_avg:2119.72ms +step:2111/3242 train_loss:3.4325 train_time:4453519ms step_avg:2119.71ms +step:2112/3242 train_loss:3.2786 train_time:4455648ms step_avg:2119.72ms +step:2113/3242 train_loss:3.5014 train_time:4457768ms step_avg:2119.72ms +step:2114/3242 train_loss:3.7126 train_time:4459882ms step_avg:2119.72ms +step:2115/3242 train_loss:3.1733 train_time:4462002ms step_avg:2119.72ms +step:2116/3242 train_loss:3.7538 train_time:4464126ms step_avg:2119.72ms +step:2117/3242 train_loss:3.3018 train_time:4466246ms step_avg:2119.72ms +step:2118/3242 train_loss:3.4998 train_time:4468360ms step_avg:2119.72ms +step:2119/3242 train_loss:3.3556 train_time:4470482ms step_avg:2119.72ms +step:2120/3242 train_loss:3.5152 train_time:4472600ms step_avg:2119.72ms +step:2121/3242 train_loss:3.3847 train_time:4474720ms step_avg:2119.72ms +step:2122/3242 train_loss:3.5651 train_time:4476839ms step_avg:2119.72ms +step:2123/3242 train_loss:3.3645 train_time:4478971ms step_avg:2119.72ms +step:2124/3242 train_loss:3.3067 train_time:4481088ms step_avg:2119.72ms +step:2125/3242 train_loss:3.3794 train_time:4483196ms step_avg:2119.71ms +step:2125/3242 val_loss:3.4285 train_time:4483609ms step_avg:2119.91ms +step:2126/3242 train_loss:3.3234 train_time:4485326ms step_avg:2119.72ms +step:2127/3242 train_loss:3.6166 train_time:4487442ms step_avg:2119.72ms +step:2128/3242 train_loss:3.2157 train_time:4489564ms step_avg:2119.72ms +step:2129/3242 train_loss:3.2274 train_time:4491690ms step_avg:2119.72ms +step:2130/3242 train_loss:3.4238 train_time:4493805ms step_avg:2119.72ms +step:2131/3242 train_loss:3.4892 train_time:4495924ms step_avg:2119.72ms +step:2132/3242 train_loss:3.3672 train_time:4498044ms step_avg:2119.72ms +step:2133/3242 train_loss:3.5538 train_time:4500161ms step_avg:2119.72ms +step:2134/3242 train_loss:3.3285 train_time:4502281ms step_avg:2119.72ms +step:2135/3242 train_loss:3.0223 train_time:4504405ms step_avg:2119.72ms +step:2136/3242 train_loss:3.4899 train_time:4506520ms step_avg:2119.72ms +step:2137/3242 train_loss:3.5242 train_time:4508648ms step_avg:2119.72ms +step:2138/3242 train_loss:3.3832 train_time:4510764ms step_avg:2119.72ms +step:2139/3242 train_loss:3.4710 train_time:4512884ms step_avg:2119.72ms +step:2140/3242 train_loss:3.2653 train_time:4514999ms step_avg:2119.72ms +step:2141/3242 train_loss:3.5166 train_time:4517120ms step_avg:2119.72ms +step:2142/3242 train_loss:3.4038 train_time:4519239ms step_avg:2119.72ms +step:2143/3242 train_loss:3.2545 train_time:4521359ms step_avg:2119.72ms +step:2144/3242 train_loss:3.5714 train_time:4523485ms step_avg:2119.72ms +step:2145/3242 train_loss:3.8195 train_time:4525599ms step_avg:2119.72ms +step:2146/3242 train_loss:3.8426 train_time:4527722ms step_avg:2119.72ms +step:2147/3242 train_loss:3.2341 train_time:4529842ms step_avg:2119.72ms +step:2148/3242 train_loss:3.1328 train_time:4531954ms step_avg:2119.72ms +step:2149/3242 train_loss:3.5412 train_time:4534080ms step_avg:2119.72ms +step:2150/3242 train_loss:3.7569 train_time:4536197ms step_avg:2119.72ms +step:2151/3242 train_loss:3.3502 train_time:4538322ms step_avg:2119.72ms +step:2152/3242 train_loss:3.4050 train_time:4540441ms step_avg:2119.72ms +step:2153/3242 train_loss:3.4048 train_time:4542556ms step_avg:2119.72ms +step:2154/3242 train_loss:3.2486 train_time:4544685ms step_avg:2119.72ms +step:2155/3242 train_loss:3.3939 train_time:4546800ms step_avg:2119.72ms +step:2156/3242 train_loss:3.5303 train_time:4548924ms step_avg:2119.72ms +step:2157/3242 train_loss:3.3694 train_time:4551044ms step_avg:2119.72ms +step:2158/3242 train_loss:3.2622 train_time:4553164ms step_avg:2119.72ms +step:2159/3242 train_loss:3.4747 train_time:4555276ms step_avg:2119.72ms +step:2160/3242 train_loss:3.3646 train_time:4557399ms step_avg:2119.72ms +step:2161/3242 train_loss:3.4212 train_time:4559513ms step_avg:2119.72ms +step:2162/3242 train_loss:3.4378 train_time:4561627ms step_avg:2119.72ms +step:2163/3242 train_loss:3.4036 train_time:4563754ms step_avg:2119.72ms +step:2164/3242 train_loss:3.5034 train_time:4565864ms step_avg:2119.71ms +step:2165/3242 train_loss:3.4332 train_time:4567987ms step_avg:2119.72ms +step:2166/3242 train_loss:3.4296 train_time:4570112ms step_avg:2119.72ms +step:2167/3242 train_loss:3.4135 train_time:4572228ms step_avg:2119.72ms +step:2168/3242 train_loss:3.4756 train_time:4574342ms step_avg:2119.71ms +step:2169/3242 train_loss:3.3480 train_time:4576465ms step_avg:2119.71ms +step:2170/3242 train_loss:3.3437 train_time:4578586ms step_avg:2119.72ms +step:2171/3242 train_loss:3.4508 train_time:4580711ms step_avg:2119.72ms +step:2172/3242 train_loss:3.3600 train_time:4582834ms step_avg:2119.72ms +step:2173/3242 train_loss:3.3111 train_time:4584955ms step_avg:2119.72ms +step:2174/3242 train_loss:3.3049 train_time:4587063ms step_avg:2119.71ms +step:2175/3242 train_loss:3.5337 train_time:4589183ms step_avg:2119.72ms +step:2176/3242 train_loss:3.4129 train_time:4591302ms step_avg:2119.71ms +step:2177/3242 train_loss:3.2675 train_time:4593418ms step_avg:2119.71ms +step:2178/3242 train_loss:3.5133 train_time:4595541ms step_avg:2119.71ms +step:2179/3242 train_loss:3.3463 train_time:4597652ms step_avg:2119.71ms +step:2180/3242 train_loss:3.5017 train_time:4599778ms step_avg:2119.71ms +step:2181/3242 train_loss:3.3326 train_time:4601888ms step_avg:2119.71ms +step:2182/3242 train_loss:3.5120 train_time:4604011ms step_avg:2119.71ms +step:2183/3242 train_loss:3.3561 train_time:4606135ms step_avg:2119.71ms +step:2184/3242 train_loss:3.3729 train_time:4608252ms step_avg:2119.71ms +step:2185/3242 train_loss:3.5301 train_time:4610373ms step_avg:2119.71ms +step:2186/3242 train_loss:3.3957 train_time:4612495ms step_avg:2119.71ms +step:2187/3242 train_loss:3.5435 train_time:4614611ms step_avg:2119.71ms +step:2188/3242 train_loss:3.4216 train_time:4616728ms step_avg:2119.71ms +step:2189/3242 train_loss:3.2584 train_time:4618845ms step_avg:2119.71ms +step:2190/3242 train_loss:3.3923 train_time:4620959ms step_avg:2119.71ms +step:2191/3242 train_loss:3.4915 train_time:4623085ms step_avg:2119.71ms +step:2192/3242 train_loss:3.1449 train_time:4625207ms step_avg:2119.71ms +step:2193/3242 train_loss:3.3801 train_time:4627319ms step_avg:2119.71ms +step:2194/3242 train_loss:3.3844 train_time:4629442ms step_avg:2119.71ms +step:2195/3242 train_loss:3.5402 train_time:4631561ms step_avg:2119.71ms +step:2196/3242 train_loss:3.4071 train_time:4633678ms step_avg:2119.71ms +step:2197/3242 train_loss:3.8490 train_time:4635802ms step_avg:2119.71ms +step:2198/3242 train_loss:3.5570 train_time:4637915ms step_avg:2119.71ms +step:2199/3242 train_loss:3.4441 train_time:4640039ms step_avg:2119.71ms +step:2200/3242 train_loss:3.3602 train_time:4642160ms step_avg:2119.71ms +step:2201/3242 train_loss:3.3219 train_time:4644275ms step_avg:2119.71ms +step:2202/3242 train_loss:3.5748 train_time:4646399ms step_avg:2119.71ms +step:2203/3242 train_loss:3.4034 train_time:4648519ms step_avg:2119.71ms +step:2204/3242 train_loss:3.3353 train_time:4650639ms step_avg:2119.71ms +step:2205/3242 train_loss:3.4257 train_time:4652758ms step_avg:2119.71ms +step:2206/3242 train_loss:3.4518 train_time:4654878ms step_avg:2119.71ms +step:2207/3242 train_loss:3.4327 train_time:4656993ms step_avg:2119.71ms +step:2208/3242 train_loss:4.0304 train_time:4659112ms step_avg:2119.71ms +step:2209/3242 train_loss:3.5702 train_time:4661226ms step_avg:2119.70ms +step:2210/3242 train_loss:3.2939 train_time:4663348ms step_avg:2119.70ms +step:2211/3242 train_loss:3.2453 train_time:4665476ms step_avg:2119.71ms +step:2212/3242 train_loss:3.5237 train_time:4667596ms step_avg:2119.71ms +step:2213/3242 train_loss:3.4567 train_time:4669720ms step_avg:2119.71ms +step:2214/3242 train_loss:3.4873 train_time:4671840ms step_avg:2119.71ms +step:2215/3242 train_loss:3.4123 train_time:4673953ms step_avg:2119.71ms +step:2216/3242 train_loss:3.4211 train_time:4676072ms step_avg:2119.71ms +step:2217/3242 train_loss:3.4025 train_time:4678196ms step_avg:2119.71ms +step:2218/3242 train_loss:3.3341 train_time:4680322ms step_avg:2119.71ms +step:2219/3242 train_loss:3.3977 train_time:4682432ms step_avg:2119.71ms +step:2220/3242 train_loss:3.7434 train_time:4684559ms step_avg:2119.71ms +step:2221/3242 train_loss:3.5533 train_time:4686671ms step_avg:2119.71ms +step:2222/3242 train_loss:3.0743 train_time:4688791ms step_avg:2119.71ms +step:2223/3242 train_loss:3.4977 train_time:4690911ms step_avg:2119.71ms +step:2224/3242 train_loss:3.2364 train_time:4693027ms step_avg:2119.70ms +step:2225/3242 train_loss:3.4362 train_time:4695142ms step_avg:2119.70ms +step:2226/3242 train_loss:3.5021 train_time:4697265ms step_avg:2119.70ms +step:2227/3242 train_loss:4.0086 train_time:4699387ms step_avg:2119.71ms +step:2228/3242 train_loss:3.6390 train_time:4701502ms step_avg:2119.70ms +step:2229/3242 train_loss:3.5134 train_time:4703629ms step_avg:2119.71ms +step:2230/3242 train_loss:3.3818 train_time:4705740ms step_avg:2119.70ms +step:2231/3242 train_loss:3.5419 train_time:4707860ms step_avg:2119.70ms +step:2232/3242 train_loss:3.5246 train_time:4709980ms step_avg:2119.70ms +step:2233/3242 train_loss:3.1693 train_time:4712102ms step_avg:2119.70ms +step:2234/3242 train_loss:3.3590 train_time:4714215ms step_avg:2119.70ms +step:2235/3242 train_loss:3.2049 train_time:4716338ms step_avg:2119.70ms +step:2236/3242 train_loss:3.2812 train_time:4718461ms step_avg:2119.70ms +step:2237/3242 train_loss:3.5421 train_time:4720575ms step_avg:2119.70ms +step:2238/3242 train_loss:3.7000 train_time:4722699ms step_avg:2119.70ms +step:2239/3242 train_loss:3.3911 train_time:4724814ms step_avg:2119.70ms +step:2240/3242 train_loss:3.3835 train_time:4726929ms step_avg:2119.70ms +step:2241/3242 train_loss:3.2739 train_time:4729049ms step_avg:2119.70ms +step:2242/3242 train_loss:3.5350 train_time:4731171ms step_avg:2119.70ms +step:2243/3242 train_loss:3.3360 train_time:4733291ms step_avg:2119.70ms +step:2244/3242 train_loss:3.1861 train_time:4735406ms step_avg:2119.70ms +step:2245/3242 train_loss:3.3425 train_time:4737529ms step_avg:2119.70ms +step:2246/3242 train_loss:3.3701 train_time:4739644ms step_avg:2119.70ms +step:2247/3242 train_loss:3.6667 train_time:4741763ms step_avg:2119.70ms +step:2248/3242 train_loss:3.2696 train_time:4743890ms step_avg:2119.70ms +step:2249/3242 train_loss:3.5655 train_time:4746008ms step_avg:2119.70ms +step:2250/3242 train_loss:3.2504 train_time:4748121ms step_avg:2119.70ms +step:2250/3242 val_loss:3.4172 train_time:4748536ms step_avg:2119.88ms +step:2251/3242 train_loss:3.0562 train_time:4750256ms step_avg:2119.70ms +step:2252/3242 train_loss:3.1408 train_time:4752384ms step_avg:2119.71ms +step:2253/3242 train_loss:3.6432 train_time:4754497ms step_avg:2119.70ms +step:2254/3242 train_loss:3.4838 train_time:4756626ms step_avg:2119.71ms +step:2255/3242 train_loss:3.2998 train_time:4758736ms step_avg:2119.70ms +step:2256/3242 train_loss:3.3492 train_time:4760867ms step_avg:2119.71ms +step:2257/3242 train_loss:3.4299 train_time:4762983ms step_avg:2119.71ms +step:2258/3242 train_loss:3.4226 train_time:4765111ms step_avg:2119.71ms +step:2259/3242 train_loss:3.4073 train_time:4767232ms step_avg:2119.71ms +step:2260/3242 train_loss:3.3815 train_time:4769359ms step_avg:2119.72ms +step:2261/3242 train_loss:3.5398 train_time:4771478ms step_avg:2119.71ms +step:2262/3242 train_loss:3.3874 train_time:4773593ms step_avg:2119.71ms +step:2263/3242 train_loss:4.5833 train_time:4775714ms step_avg:2119.71ms +step:2264/3242 train_loss:3.3061 train_time:4777833ms step_avg:2119.71ms +step:2265/3242 train_loss:3.4366 train_time:4779958ms step_avg:2119.72ms +step:2266/3242 train_loss:3.4769 train_time:4782075ms step_avg:2119.71ms +step:2267/3242 train_loss:3.4856 train_time:4784191ms step_avg:2119.71ms +step:2268/3242 train_loss:3.6302 train_time:4786312ms step_avg:2119.71ms +step:2269/3242 train_loss:3.2573 train_time:4788437ms step_avg:2119.72ms +step:2270/3242 train_loss:3.8455 train_time:4790556ms step_avg:2119.71ms +step:2271/3242 train_loss:3.0306 train_time:4792678ms step_avg:2119.72ms +step:2272/3242 train_loss:3.2548 train_time:4794797ms step_avg:2119.72ms +step:2273/3242 train_loss:3.3294 train_time:4796914ms step_avg:2119.71ms +step:2274/3242 train_loss:3.3092 train_time:4799040ms step_avg:2119.72ms +step:2275/3242 train_loss:4.5400 train_time:4801159ms step_avg:2119.72ms +step:2276/3242 train_loss:3.4416 train_time:4803287ms step_avg:2119.72ms +step:2277/3242 train_loss:3.5309 train_time:4805403ms step_avg:2119.72ms +step:2278/3242 train_loss:3.3476 train_time:4807520ms step_avg:2119.72ms +step:2279/3242 train_loss:3.4436 train_time:4809640ms step_avg:2119.72ms +step:2280/3242 train_loss:3.4166 train_time:4811757ms step_avg:2119.72ms +step:2281/3242 train_loss:3.2413 train_time:4813881ms step_avg:2119.72ms +step:2282/3242 train_loss:3.2097 train_time:4816000ms step_avg:2119.72ms +step:2283/3242 train_loss:3.8226 train_time:4818120ms step_avg:2119.72ms +step:2284/3242 train_loss:3.3043 train_time:4820244ms step_avg:2119.72ms +step:2285/3242 train_loss:3.4693 train_time:4822356ms step_avg:2119.72ms +step:2286/3242 train_loss:3.4783 train_time:4824599ms step_avg:2119.77ms +step:2287/3242 train_loss:3.6230 train_time:4826723ms step_avg:2119.77ms +step:2288/3242 train_loss:3.3569 train_time:4828840ms step_avg:2119.77ms +step:2289/3242 train_loss:3.5808 train_time:4830960ms step_avg:2119.77ms +step:2290/3242 train_loss:3.4517 train_time:4833080ms step_avg:2119.77ms +step:2291/3242 train_loss:3.5224 train_time:4835200ms step_avg:2119.77ms +step:2292/3242 train_loss:3.5014 train_time:4837313ms step_avg:2119.77ms +step:2293/3242 train_loss:3.4263 train_time:4839437ms step_avg:2119.77ms +step:2294/3242 train_loss:3.3293 train_time:4841549ms step_avg:2119.77ms +step:2295/3242 train_loss:3.3187 train_time:4843672ms step_avg:2119.77ms +step:2296/3242 train_loss:3.4064 train_time:4845796ms step_avg:2119.77ms +step:2297/3242 train_loss:3.5492 train_time:4847912ms step_avg:2119.77ms +step:2298/3242 train_loss:3.3327 train_time:4850028ms step_avg:2119.77ms +step:2299/3242 train_loss:3.2785 train_time:4852156ms step_avg:2119.77ms +step:2300/3242 train_loss:3.4503 train_time:4854272ms step_avg:2119.77ms +step:2301/3242 train_loss:3.3708 train_time:4856389ms step_avg:2119.77ms +step:2302/3242 train_loss:3.3954 train_time:4858512ms step_avg:2119.77ms +step:2303/3242 train_loss:3.3229 train_time:4860628ms step_avg:2119.77ms +step:2304/3242 train_loss:3.2663 train_time:4862747ms step_avg:2119.77ms +step:2305/3242 train_loss:3.4484 train_time:4864871ms step_avg:2119.77ms +step:2306/3242 train_loss:3.3610 train_time:4866990ms step_avg:2119.77ms +step:2307/3242 train_loss:3.3608 train_time:4869101ms step_avg:2119.77ms +step:2308/3242 train_loss:3.6405 train_time:4871225ms step_avg:2119.77ms +step:2309/3242 train_loss:3.4674 train_time:4873344ms step_avg:2119.77ms +step:2310/3242 train_loss:3.4371 train_time:4875463ms step_avg:2119.77ms +step:2311/3242 train_loss:3.4128 train_time:4877583ms step_avg:2119.77ms +step:2312/3242 train_loss:2.7873 train_time:4879699ms step_avg:2119.76ms +step:2313/3242 train_loss:3.3890 train_time:4881825ms step_avg:2119.77ms +step:2314/3242 train_loss:3.1472 train_time:4883940ms step_avg:2119.77ms +step:2315/3242 train_loss:3.4993 train_time:4886063ms step_avg:2119.77ms +step:2316/3242 train_loss:3.4546 train_time:4888175ms step_avg:2119.76ms +step:2317/3242 train_loss:3.2885 train_time:4890292ms step_avg:2119.76ms +step:2318/3242 train_loss:3.2459 train_time:4892417ms step_avg:2119.76ms +step:2319/3242 train_loss:3.2675 train_time:4894537ms step_avg:2119.76ms +step:2320/3242 train_loss:3.6169 train_time:4896653ms step_avg:2119.76ms +step:2321/3242 train_loss:3.3508 train_time:4898781ms step_avg:2119.77ms +step:2322/3242 train_loss:3.4323 train_time:4900888ms step_avg:2119.76ms +step:2323/3242 train_loss:3.1342 train_time:4903011ms step_avg:2119.76ms +step:2324/3242 train_loss:3.2667 train_time:4905132ms step_avg:2119.76ms +step:2325/3242 train_loss:3.5175 train_time:4907249ms step_avg:2119.76ms +step:2326/3242 train_loss:3.3318 train_time:4909372ms step_avg:2119.76ms +step:2327/3242 train_loss:3.3687 train_time:4911480ms step_avg:2119.76ms +step:2328/3242 train_loss:3.8659 train_time:4913603ms step_avg:2119.76ms +step:2329/3242 train_loss:3.2865 train_time:4915729ms step_avg:2119.76ms +step:2330/3242 train_loss:3.4339 train_time:4917845ms step_avg:2119.76ms +step:2331/3242 train_loss:3.2274 train_time:4919961ms step_avg:2119.76ms +step:2332/3242 train_loss:3.6946 train_time:4922086ms step_avg:2119.76ms +step:2333/3242 train_loss:3.4909 train_time:4924210ms step_avg:2119.76ms +step:2334/3242 train_loss:3.1472 train_time:4926323ms step_avg:2119.76ms +step:2335/3242 train_loss:3.6681 train_time:4928451ms step_avg:2119.76ms +step:2336/3242 train_loss:3.4437 train_time:4930568ms step_avg:2119.76ms +step:2337/3242 train_loss:3.4113 train_time:4932692ms step_avg:2119.76ms +step:2338/3242 train_loss:3.4476 train_time:4934804ms step_avg:2119.76ms +step:2339/3242 train_loss:3.3800 train_time:4936923ms step_avg:2119.76ms +step:2340/3242 train_loss:3.3514 train_time:4939037ms step_avg:2119.76ms +step:2341/3242 train_loss:3.5193 train_time:4941161ms step_avg:2119.76ms +step:2342/3242 train_loss:3.3160 train_time:4943287ms step_avg:2119.76ms +step:2343/3242 train_loss:3.5235 train_time:4945401ms step_avg:2119.76ms +step:2344/3242 train_loss:3.3869 train_time:4947525ms step_avg:2119.76ms +step:2345/3242 train_loss:4.1808 train_time:4949645ms step_avg:2119.76ms +step:2346/3242 train_loss:3.0793 train_time:4951757ms step_avg:2119.76ms +step:2347/3242 train_loss:3.1761 train_time:4953882ms step_avg:2119.76ms +step:2348/3242 train_loss:3.0446 train_time:4955998ms step_avg:2119.76ms +step:2349/3242 train_loss:3.6688 train_time:4958122ms step_avg:2119.76ms +step:2350/3242 train_loss:3.4541 train_time:4960238ms step_avg:2119.76ms +step:2351/3242 train_loss:3.3582 train_time:4962354ms step_avg:2119.76ms +step:2352/3242 train_loss:3.4661 train_time:4964471ms step_avg:2119.76ms +step:2353/3242 train_loss:4.2977 train_time:4966599ms step_avg:2119.76ms +step:2354/3242 train_loss:3.4545 train_time:4968715ms step_avg:2119.76ms +step:2355/3242 train_loss:3.3739 train_time:4970834ms step_avg:2119.76ms +step:2356/3242 train_loss:3.4991 train_time:4972949ms step_avg:2119.76ms +step:2357/3242 train_loss:3.4104 train_time:4975068ms step_avg:2119.76ms +step:2358/3242 train_loss:3.3739 train_time:4977188ms step_avg:2119.76ms +step:2359/3242 train_loss:3.3938 train_time:4979308ms step_avg:2119.76ms +step:2360/3242 train_loss:4.1056 train_time:4981435ms step_avg:2119.76ms +step:2361/3242 train_loss:3.0631 train_time:4983550ms step_avg:2119.76ms +step:2362/3242 train_loss:3.4146 train_time:4985668ms step_avg:2119.76ms +step:2363/3242 train_loss:3.7403 train_time:4987784ms step_avg:2119.76ms +step:2364/3242 train_loss:3.7548 train_time:4989902ms step_avg:2119.75ms +step:2365/3242 train_loss:3.4795 train_time:4992032ms step_avg:2119.76ms +step:2366/3242 train_loss:3.4785 train_time:4994151ms step_avg:2119.76ms +step:2367/3242 train_loss:3.3278 train_time:4996270ms step_avg:2119.76ms +step:2368/3242 train_loss:3.5621 train_time:4998384ms step_avg:2119.76ms +step:2369/3242 train_loss:3.3495 train_time:5000507ms step_avg:2119.76ms +step:2370/3242 train_loss:3.3570 train_time:5002629ms step_avg:2119.76ms +step:2371/3242 train_loss:3.5239 train_time:5004742ms step_avg:2119.76ms +step:2372/3242 train_loss:3.4504 train_time:5006863ms step_avg:2119.76ms +step:2373/3242 train_loss:3.4935 train_time:5008981ms step_avg:2119.76ms +step:2374/3242 train_loss:3.3320 train_time:5011102ms step_avg:2119.76ms +step:2375/3242 train_loss:3.4043 train_time:5013222ms step_avg:2119.76ms +step:2375/3242 val_loss:3.4045 train_time:5013635ms step_avg:2119.93ms +step:2376/3242 train_loss:3.1914 train_time:5015349ms step_avg:2119.76ms +step:2377/3242 train_loss:3.4865 train_time:5017470ms step_avg:2119.76ms +step:2378/3242 train_loss:3.4272 train_time:5019591ms step_avg:2119.76ms +step:2379/3242 train_loss:3.5912 train_time:5021705ms step_avg:2119.76ms +step:2380/3242 train_loss:3.2161 train_time:5023829ms step_avg:2119.76ms +step:2381/3242 train_loss:3.2650 train_time:5025949ms step_avg:2119.76ms +step:2382/3242 train_loss:3.3344 train_time:5028063ms step_avg:2119.76ms +step:2383/3242 train_loss:4.6873 train_time:5030171ms step_avg:2119.75ms +step:2384/3242 train_loss:3.5648 train_time:5032300ms step_avg:2119.76ms +step:2385/3242 train_loss:3.3573 train_time:5034416ms step_avg:2119.75ms +step:2386/3242 train_loss:3.4486 train_time:5036536ms step_avg:2119.75ms +step:2387/3242 train_loss:3.4071 train_time:5038663ms step_avg:2119.76ms +step:2388/3242 train_loss:3.3624 train_time:5040784ms step_avg:2119.76ms +step:2389/3242 train_loss:3.3818 train_time:5042898ms step_avg:2119.76ms +step:2390/3242 train_loss:3.2012 train_time:5045024ms step_avg:2119.76ms +step:2391/3242 train_loss:3.5133 train_time:5047140ms step_avg:2119.76ms +step:2392/3242 train_loss:3.2747 train_time:5049260ms step_avg:2119.76ms +step:2393/3242 train_loss:3.2574 train_time:5051380ms step_avg:2119.76ms +step:2394/3242 train_loss:3.7756 train_time:5053494ms step_avg:2119.75ms +step:2395/3242 train_loss:3.4903 train_time:5055628ms step_avg:2119.76ms +step:2396/3242 train_loss:3.2628 train_time:5057751ms step_avg:2119.76ms +step:2397/3242 train_loss:3.5143 train_time:5059865ms step_avg:2119.76ms +step:2398/3242 train_loss:3.5498 train_time:5061975ms step_avg:2119.76ms +step:2399/3242 train_loss:3.4697 train_time:5064094ms step_avg:2119.75ms +step:2400/3242 train_loss:3.6679 train_time:5066217ms step_avg:2119.76ms +step:2401/3242 train_loss:3.1620 train_time:5068341ms step_avg:2119.76ms +step:2402/3242 train_loss:3.3792 train_time:5070463ms step_avg:2119.76ms +step:2403/3242 train_loss:3.3545 train_time:5072574ms step_avg:2119.76ms +step:2404/3242 train_loss:3.3285 train_time:5074692ms step_avg:2119.75ms +step:2405/3242 train_loss:3.5478 train_time:5076819ms step_avg:2119.76ms +step:2406/3242 train_loss:3.4118 train_time:5078938ms step_avg:2119.76ms +step:2407/3242 train_loss:3.3921 train_time:5081054ms step_avg:2119.76ms +step:2408/3242 train_loss:3.4524 train_time:5083178ms step_avg:2119.76ms +step:2409/3242 train_loss:3.4277 train_time:5085297ms step_avg:2119.76ms +step:2410/3242 train_loss:3.3739 train_time:5087416ms step_avg:2119.76ms +step:2411/3242 train_loss:3.1950 train_time:5089529ms step_avg:2119.75ms +step:2412/3242 train_loss:3.6431 train_time:5091654ms step_avg:2119.76ms +step:2413/3242 train_loss:3.4375 train_time:5093772ms step_avg:2119.76ms +step:2414/3242 train_loss:3.2429 train_time:5095891ms step_avg:2119.76ms +step:2415/3242 train_loss:3.3914 train_time:5098012ms step_avg:2119.76ms +step:2416/3242 train_loss:3.2557 train_time:5100124ms step_avg:2119.75ms +step:2417/3242 train_loss:3.5868 train_time:5102245ms step_avg:2119.75ms +step:2418/3242 train_loss:3.3253 train_time:5104373ms step_avg:2119.76ms +step:2419/3242 train_loss:3.2734 train_time:5106493ms step_avg:2119.76ms +step:2420/3242 train_loss:3.4293 train_time:5108609ms step_avg:2119.75ms +step:2421/3242 train_loss:3.5362 train_time:5110733ms step_avg:2119.76ms +step:2422/3242 train_loss:3.6537 train_time:5112853ms step_avg:2119.76ms +step:2423/3242 train_loss:3.2931 train_time:5114967ms step_avg:2119.75ms +step:2424/3242 train_loss:3.3011 train_time:5117087ms step_avg:2119.75ms +step:2425/3242 train_loss:3.4993 train_time:5119202ms step_avg:2119.75ms +step:2426/3242 train_loss:3.3880 train_time:5121325ms step_avg:2119.75ms +step:2427/3242 train_loss:3.6215 train_time:5123450ms step_avg:2119.76ms +step:2428/3242 train_loss:3.2339 train_time:5125563ms step_avg:2119.75ms +step:2429/3242 train_loss:3.4268 train_time:5127679ms step_avg:2119.75ms +step:2430/3242 train_loss:3.5105 train_time:5129804ms step_avg:2119.75ms +step:2431/3242 train_loss:3.2374 train_time:5131922ms step_avg:2119.75ms +step:2432/3242 train_loss:3.4313 train_time:5134039ms step_avg:2119.75ms +step:2433/3242 train_loss:3.9435 train_time:5136152ms step_avg:2119.75ms +step:2434/3242 train_loss:3.3462 train_time:5138278ms step_avg:2119.75ms +step:2435/3242 train_loss:3.3818 train_time:5140391ms step_avg:2119.75ms +step:2436/3242 train_loss:3.4395 train_time:5142518ms step_avg:2119.75ms +step:2437/3242 train_loss:4.3872 train_time:5144637ms step_avg:2119.75ms +step:2438/3242 train_loss:3.1463 train_time:5146749ms step_avg:2119.75ms +step:2439/3242 train_loss:3.4765 train_time:5148869ms step_avg:2119.75ms +step:2440/3242 train_loss:3.5230 train_time:5150991ms step_avg:2119.75ms +step:2441/3242 train_loss:3.5364 train_time:5153112ms step_avg:2119.75ms +step:2442/3242 train_loss:3.4614 train_time:5155234ms step_avg:2119.75ms +step:2443/3242 train_loss:3.4988 train_time:5157345ms step_avg:2119.75ms +step:2444/3242 train_loss:3.3440 train_time:5159466ms step_avg:2119.75ms +step:2445/3242 train_loss:3.3746 train_time:5161586ms step_avg:2119.75ms +step:2446/3242 train_loss:3.4312 train_time:5163710ms step_avg:2119.75ms +step:2447/3242 train_loss:3.3496 train_time:5165831ms step_avg:2119.75ms +step:2448/3242 train_loss:3.9290 train_time:5167946ms step_avg:2119.75ms +step:2449/3242 train_loss:3.4523 train_time:5170068ms step_avg:2119.75ms +step:2450/3242 train_loss:3.5750 train_time:5172185ms step_avg:2119.75ms +step:2451/3242 train_loss:3.2596 train_time:5174303ms step_avg:2119.75ms +step:2452/3242 train_loss:3.2786 train_time:5176423ms step_avg:2119.75ms +step:2453/3242 train_loss:3.3594 train_time:5178546ms step_avg:2119.75ms +step:2454/3242 train_loss:3.4980 train_time:5180661ms step_avg:2119.75ms +step:2455/3242 train_loss:3.5108 train_time:5182786ms step_avg:2119.75ms +step:2456/3242 train_loss:3.6082 train_time:5184902ms step_avg:2119.75ms +step:2457/3242 train_loss:3.3536 train_time:5187022ms step_avg:2119.75ms +step:2458/3242 train_loss:3.2785 train_time:5189141ms step_avg:2119.75ms +step:2459/3242 train_loss:3.2985 train_time:5191258ms step_avg:2119.75ms +step:2460/3242 train_loss:3.0608 train_time:5193375ms step_avg:2119.74ms +step:2461/3242 train_loss:3.4479 train_time:5195499ms step_avg:2119.75ms +step:2462/3242 train_loss:3.1169 train_time:5197613ms step_avg:2119.74ms +step:2463/3242 train_loss:3.4219 train_time:5199736ms step_avg:2119.75ms +step:2464/3242 train_loss:3.8876 train_time:5201864ms step_avg:2119.75ms +step:2465/3242 train_loss:3.4156 train_time:5203979ms step_avg:2119.75ms +step:2466/3242 train_loss:3.2838 train_time:5206092ms step_avg:2119.74ms +step:2467/3242 train_loss:3.2596 train_time:5208218ms step_avg:2119.75ms +step:2468/3242 train_loss:3.4595 train_time:5210340ms step_avg:2119.75ms +step:2469/3242 train_loss:3.5478 train_time:5212456ms step_avg:2119.75ms +step:2470/3242 train_loss:3.3263 train_time:5214577ms step_avg:2119.75ms +step:2471/3242 train_loss:3.2568 train_time:5216690ms step_avg:2119.74ms +step:2472/3242 train_loss:3.3472 train_time:5218808ms step_avg:2119.74ms +step:2473/3242 train_loss:3.2494 train_time:5220934ms step_avg:2119.75ms +step:2474/3242 train_loss:3.5385 train_time:5223048ms step_avg:2119.74ms +step:2475/3242 train_loss:3.4835 train_time:5225169ms step_avg:2119.74ms +step:2476/3242 train_loss:3.5472 train_time:5227286ms step_avg:2119.74ms +step:2477/3242 train_loss:3.5031 train_time:5229542ms step_avg:2119.80ms +step:2478/3242 train_loss:3.5808 train_time:5231658ms step_avg:2119.80ms +step:2479/3242 train_loss:3.5202 train_time:5233774ms step_avg:2119.80ms +step:2480/3242 train_loss:3.9725 train_time:5235895ms step_avg:2119.80ms +step:2481/3242 train_loss:3.2274 train_time:5238005ms step_avg:2119.79ms +step:2482/3242 train_loss:3.5796 train_time:5240120ms step_avg:2119.79ms +step:2483/3242 train_loss:3.7744 train_time:5242245ms step_avg:2119.79ms +step:2484/3242 train_loss:3.3179 train_time:5244365ms step_avg:2119.79ms +step:2485/3242 train_loss:3.1474 train_time:5246480ms step_avg:2119.79ms +step:2486/3242 train_loss:3.6100 train_time:5248600ms step_avg:2119.79ms +step:2487/3242 train_loss:3.3611 train_time:5250723ms step_avg:2119.79ms +step:2488/3242 train_loss:3.3325 train_time:5252835ms step_avg:2119.79ms +step:2489/3242 train_loss:3.6969 train_time:5254955ms step_avg:2119.79ms +step:2490/3242 train_loss:3.3205 train_time:5257075ms step_avg:2119.79ms +step:2491/3242 train_loss:2.6899 train_time:5259200ms step_avg:2119.79ms +step:2492/3242 train_loss:3.4621 train_time:5261317ms step_avg:2119.79ms +step:2493/3242 train_loss:3.2518 train_time:5263442ms step_avg:2119.79ms +step:2494/3242 train_loss:3.2283 train_time:5265559ms step_avg:2119.79ms +step:2495/3242 train_loss:3.3699 train_time:5267680ms step_avg:2119.79ms +step:2496/3242 train_loss:3.3494 train_time:5269799ms step_avg:2119.79ms +step:2497/3242 train_loss:3.9065 train_time:5271918ms step_avg:2119.79ms +step:2498/3242 train_loss:3.4392 train_time:5274035ms step_avg:2119.79ms +step:2499/3242 train_loss:3.3141 train_time:5276154ms step_avg:2119.79ms +step:2500/3242 train_loss:3.4247 train_time:5278274ms step_avg:2119.79ms +step:2500/3242 val_loss:3.3818 train_time:5278687ms step_avg:2119.95ms +step:2501/3242 train_loss:3.4147 train_time:5280406ms step_avg:2119.79ms +step:2502/3242 train_loss:3.3794 train_time:5282532ms step_avg:2119.80ms +step:2503/3242 train_loss:3.4002 train_time:5284656ms step_avg:2119.80ms +step:2504/3242 train_loss:3.4697 train_time:5286773ms step_avg:2119.80ms +step:2505/3242 train_loss:3.6745 train_time:5288894ms step_avg:2119.80ms +step:2506/3242 train_loss:3.5886 train_time:5291013ms step_avg:2119.80ms +step:2507/3242 train_loss:3.4909 train_time:5293131ms step_avg:2119.80ms +step:2508/3242 train_loss:3.5124 train_time:5295247ms step_avg:2119.79ms +step:2509/3242 train_loss:3.2094 train_time:5297369ms step_avg:2119.80ms +step:2510/3242 train_loss:3.5182 train_time:5299484ms step_avg:2119.79ms +step:2511/3242 train_loss:3.3315 train_time:5301610ms step_avg:2119.80ms +step:2512/3242 train_loss:3.4609 train_time:5303726ms step_avg:2119.79ms +step:2513/3242 train_loss:3.1942 train_time:5305848ms step_avg:2119.80ms +step:2514/3242 train_loss:3.5250 train_time:5307968ms step_avg:2119.80ms +step:2515/3242 train_loss:3.4938 train_time:5310080ms step_avg:2119.79ms +step:2516/3242 train_loss:3.3152 train_time:5312205ms step_avg:2119.79ms +step:2517/3242 train_loss:3.6068 train_time:5314323ms step_avg:2119.79ms +step:2518/3242 train_loss:3.3901 train_time:5316441ms step_avg:2119.79ms +step:2519/3242 train_loss:3.3904 train_time:5318561ms step_avg:2119.79ms +step:2520/3242 train_loss:3.3817 train_time:5320677ms step_avg:2119.79ms +step:2521/3242 train_loss:3.5348 train_time:5322797ms step_avg:2119.79ms +step:2522/3242 train_loss:3.4953 train_time:5324913ms step_avg:2119.79ms +step:2523/3242 train_loss:3.2790 train_time:5327033ms step_avg:2119.79ms +step:2524/3242 train_loss:3.3976 train_time:5329158ms step_avg:2119.79ms +step:2525/3242 train_loss:3.4042 train_time:5331275ms step_avg:2119.79ms +step:2526/3242 train_loss:3.5462 train_time:5333395ms step_avg:2119.79ms +step:2527/3242 train_loss:3.3490 train_time:5335519ms step_avg:2119.79ms +step:2528/3242 train_loss:3.0359 train_time:5337629ms step_avg:2119.79ms +step:2529/3242 train_loss:3.4010 train_time:5339747ms step_avg:2119.79ms +step:2530/3242 train_loss:3.2735 train_time:5341870ms step_avg:2119.79ms +step:2531/3242 train_loss:3.2738 train_time:5343991ms step_avg:2119.79ms +step:2532/3242 train_loss:3.3387 train_time:5346112ms step_avg:2119.79ms +step:2533/3242 train_loss:3.4198 train_time:5348236ms step_avg:2119.79ms +step:2534/3242 train_loss:3.2238 train_time:5350364ms step_avg:2119.80ms +step:2535/3242 train_loss:3.4973 train_time:5352482ms step_avg:2119.79ms +step:2536/3242 train_loss:3.3140 train_time:5354600ms step_avg:2119.79ms +step:2537/3242 train_loss:3.2099 train_time:5356724ms step_avg:2119.80ms +step:2538/3242 train_loss:3.4965 train_time:5358847ms step_avg:2119.80ms +step:2539/3242 train_loss:3.3607 train_time:5360960ms step_avg:2119.79ms +step:2540/3242 train_loss:3.2250 train_time:5363084ms step_avg:2119.80ms +step:2541/3242 train_loss:3.4169 train_time:5365200ms step_avg:2119.79ms +step:2542/3242 train_loss:3.5862 train_time:5367323ms step_avg:2119.80ms +step:2543/3242 train_loss:3.3122 train_time:5369438ms step_avg:2119.79ms +step:2544/3242 train_loss:3.6447 train_time:5371562ms step_avg:2119.80ms +step:2545/3242 train_loss:3.3307 train_time:5373682ms step_avg:2119.80ms +step:2546/3242 train_loss:3.3599 train_time:5375801ms step_avg:2119.80ms +step:2547/3242 train_loss:3.7806 train_time:5377915ms step_avg:2119.79ms +step:2548/3242 train_loss:3.3342 train_time:5380039ms step_avg:2119.79ms +step:2549/3242 train_loss:3.4748 train_time:5382156ms step_avg:2119.79ms +step:2550/3242 train_loss:3.3697 train_time:5384276ms step_avg:2119.79ms +step:2551/3242 train_loss:3.3612 train_time:5386393ms step_avg:2119.79ms +step:2552/3242 train_loss:3.3336 train_time:5388511ms step_avg:2119.79ms +step:2553/3242 train_loss:3.2172 train_time:5390633ms step_avg:2119.79ms +step:2554/3242 train_loss:3.3136 train_time:5392750ms step_avg:2119.79ms +step:2555/3242 train_loss:3.3399 train_time:5394871ms step_avg:2119.79ms +step:2556/3242 train_loss:3.2590 train_time:5396990ms step_avg:2119.79ms +step:2557/3242 train_loss:3.4409 train_time:5399107ms step_avg:2119.79ms +step:2558/3242 train_loss:3.3979 train_time:5401235ms step_avg:2119.79ms +step:2559/3242 train_loss:3.2610 train_time:5403349ms step_avg:2119.79ms +step:2560/3242 train_loss:3.5770 train_time:5405465ms step_avg:2119.79ms +step:2561/3242 train_loss:3.4677 train_time:5407586ms step_avg:2119.79ms +step:2562/3242 train_loss:3.5558 train_time:5409705ms step_avg:2119.79ms +step:2563/3242 train_loss:3.4757 train_time:5411831ms step_avg:2119.79ms +step:2564/3242 train_loss:3.3952 train_time:5413947ms step_avg:2119.79ms +step:2565/3242 train_loss:3.4538 train_time:5416068ms step_avg:2119.79ms +step:2566/3242 train_loss:3.5066 train_time:5418184ms step_avg:2119.79ms +step:2567/3242 train_loss:3.3411 train_time:5420298ms step_avg:2119.79ms +step:2568/3242 train_loss:3.3673 train_time:5422424ms step_avg:2119.79ms +step:2569/3242 train_loss:3.5033 train_time:5424544ms step_avg:2119.79ms +step:2570/3242 train_loss:3.2874 train_time:5426661ms step_avg:2119.79ms +step:2571/3242 train_loss:3.3749 train_time:5428784ms step_avg:2119.79ms +step:2572/3242 train_loss:3.4139 train_time:5430899ms step_avg:2119.79ms +step:2573/3242 train_loss:3.0946 train_time:5433017ms step_avg:2119.79ms +step:2574/3242 train_loss:3.3143 train_time:5435144ms step_avg:2119.79ms +step:2575/3242 train_loss:3.5833 train_time:5437259ms step_avg:2119.79ms +step:2576/3242 train_loss:3.2066 train_time:5439375ms step_avg:2119.79ms +step:2577/3242 train_loss:3.1474 train_time:5441495ms step_avg:2119.79ms +step:2578/3242 train_loss:3.2921 train_time:5443617ms step_avg:2119.79ms +step:2579/3242 train_loss:3.2833 train_time:5445734ms step_avg:2119.79ms +step:2580/3242 train_loss:3.4379 train_time:5447857ms step_avg:2119.79ms +step:2581/3242 train_loss:2.7539 train_time:5449977ms step_avg:2119.79ms +step:2582/3242 train_loss:3.2734 train_time:5452094ms step_avg:2119.79ms +step:2583/3242 train_loss:3.3257 train_time:5454212ms step_avg:2119.79ms +step:2584/3242 train_loss:3.2431 train_time:5456333ms step_avg:2119.79ms +step:2585/3242 train_loss:3.4543 train_time:5458455ms step_avg:2119.79ms +step:2586/3242 train_loss:3.2934 train_time:5460574ms step_avg:2119.79ms +step:2587/3242 train_loss:3.6321 train_time:5462694ms step_avg:2119.79ms +step:2588/3242 train_loss:3.1659 train_time:5464815ms step_avg:2119.79ms +step:2589/3242 train_loss:3.5476 train_time:5466933ms step_avg:2119.79ms +step:2590/3242 train_loss:3.4523 train_time:5469047ms step_avg:2119.79ms +step:2591/3242 train_loss:3.5580 train_time:5471160ms step_avg:2119.78ms +step:2592/3242 train_loss:3.5967 train_time:5473276ms step_avg:2119.78ms +step:2593/3242 train_loss:3.1862 train_time:5475399ms step_avg:2119.78ms +step:2594/3242 train_loss:3.1917 train_time:5477511ms step_avg:2119.78ms +step:2595/3242 train_loss:3.4463 train_time:5479623ms step_avg:2119.78ms +step:2596/3242 train_loss:3.1924 train_time:5481749ms step_avg:2119.78ms +step:2597/3242 train_loss:3.7293 train_time:5483864ms step_avg:2119.78ms +step:2598/3242 train_loss:3.4043 train_time:5485978ms step_avg:2119.78ms +step:2599/3242 train_loss:3.3721 train_time:5488099ms step_avg:2119.78ms +step:2600/3242 train_loss:3.2271 train_time:5490218ms step_avg:2119.78ms +step:2601/3242 train_loss:3.3074 train_time:5492345ms step_avg:2119.78ms +step:2602/3242 train_loss:3.4662 train_time:5494467ms step_avg:2119.78ms +step:2603/3242 train_loss:3.3469 train_time:5496585ms step_avg:2119.78ms +step:2604/3242 train_loss:3.3064 train_time:5498702ms step_avg:2119.78ms +step:2605/3242 train_loss:3.5953 train_time:5500816ms step_avg:2119.77ms +step:2606/3242 train_loss:3.2642 train_time:5502939ms step_avg:2119.78ms +step:2607/3242 train_loss:2.7624 train_time:5505057ms step_avg:2119.78ms +step:2608/3242 train_loss:3.3234 train_time:5507177ms step_avg:2119.78ms +step:2609/3242 train_loss:3.3395 train_time:5509299ms step_avg:2119.78ms +step:2610/3242 train_loss:3.5379 train_time:5511419ms step_avg:2119.78ms +step:2611/3242 train_loss:3.3394 train_time:5513538ms step_avg:2119.78ms +step:2612/3242 train_loss:3.4429 train_time:5515650ms step_avg:2119.77ms +step:2613/3242 train_loss:3.3500 train_time:5517764ms step_avg:2119.77ms +step:2614/3242 train_loss:3.4103 train_time:5519889ms step_avg:2119.77ms +step:2615/3242 train_loss:3.4041 train_time:5522012ms step_avg:2119.77ms +step:2616/3242 train_loss:3.4657 train_time:5524127ms step_avg:2119.77ms +step:2617/3242 train_loss:3.1694 train_time:5526245ms step_avg:2119.77ms +step:2618/3242 train_loss:3.6111 train_time:5528367ms step_avg:2119.77ms +step:2619/3242 train_loss:3.5714 train_time:5530479ms step_avg:2119.77ms +step:2620/3242 train_loss:3.2513 train_time:5532599ms step_avg:2119.77ms +step:2621/3242 train_loss:3.5364 train_time:5534716ms step_avg:2119.77ms +step:2622/3242 train_loss:3.3666 train_time:5536831ms step_avg:2119.77ms +step:2623/3242 train_loss:3.3293 train_time:5538950ms step_avg:2119.77ms +step:2624/3242 train_loss:3.3322 train_time:5541064ms step_avg:2119.76ms +step:2625/3242 train_loss:3.5324 train_time:5543185ms step_avg:2119.76ms +step:2625/3242 val_loss:3.3591 train_time:5543599ms step_avg:2119.92ms +step:2626/3242 train_loss:3.5944 train_time:5545316ms step_avg:2119.77ms +step:2627/3242 train_loss:3.7381 train_time:5547433ms step_avg:2119.77ms +step:2628/3242 train_loss:3.6102 train_time:5549547ms step_avg:2119.77ms +step:2629/3242 train_loss:3.4330 train_time:5551668ms step_avg:2119.77ms +step:2630/3242 train_loss:3.4446 train_time:5553784ms step_avg:2119.76ms +step:2631/3242 train_loss:3.1253 train_time:5555910ms step_avg:2119.77ms +step:2632/3242 train_loss:3.3672 train_time:5558029ms step_avg:2119.77ms +step:2633/3242 train_loss:3.1377 train_time:5560149ms step_avg:2119.77ms +step:2634/3242 train_loss:3.5488 train_time:5562265ms step_avg:2119.77ms +step:2635/3242 train_loss:3.5465 train_time:5564389ms step_avg:2119.77ms +step:2636/3242 train_loss:3.3629 train_time:5566504ms step_avg:2119.77ms +step:2637/3242 train_loss:3.5346 train_time:5568628ms step_avg:2119.77ms +step:2638/3242 train_loss:3.5535 train_time:5570748ms step_avg:2119.77ms +step:2639/3242 train_loss:3.3850 train_time:5572866ms step_avg:2119.77ms +step:2640/3242 train_loss:3.2145 train_time:5574984ms step_avg:2119.77ms +step:2641/3242 train_loss:3.5568 train_time:5577107ms step_avg:2119.77ms +step:2642/3242 train_loss:3.0494 train_time:5579227ms step_avg:2119.77ms +step:2643/3242 train_loss:3.4571 train_time:5581348ms step_avg:2119.77ms +step:2644/3242 train_loss:3.2921 train_time:5583466ms step_avg:2119.77ms +step:2645/3242 train_loss:2.9145 train_time:5585582ms step_avg:2119.77ms +step:2646/3242 train_loss:3.3237 train_time:5587698ms step_avg:2119.76ms +step:2647/3242 train_loss:3.3514 train_time:5589819ms step_avg:2119.76ms +step:2648/3242 train_loss:3.1341 train_time:5591944ms step_avg:2119.77ms +step:2649/3242 train_loss:3.2728 train_time:5594058ms step_avg:2119.76ms +step:2650/3242 train_loss:3.5836 train_time:5596178ms step_avg:2119.76ms +step:2651/3242 train_loss:3.3644 train_time:5598301ms step_avg:2119.77ms +step:2652/3242 train_loss:3.5077 train_time:5600417ms step_avg:2119.76ms +step:2653/3242 train_loss:3.4404 train_time:5602538ms step_avg:2119.76ms +step:2654/3242 train_loss:3.4557 train_time:5604655ms step_avg:2119.76ms +step:2655/3242 train_loss:3.1686 train_time:5606774ms step_avg:2119.76ms +step:2656/3242 train_loss:3.2994 train_time:5608898ms step_avg:2119.77ms +step:2657/3242 train_loss:3.3139 train_time:5611017ms step_avg:2119.76ms +step:2658/3242 train_loss:3.3062 train_time:5613134ms step_avg:2119.76ms +step:2659/3242 train_loss:3.2552 train_time:5615250ms step_avg:2119.76ms +step:2660/3242 train_loss:3.3566 train_time:5617374ms step_avg:2119.76ms +step:2661/3242 train_loss:3.3625 train_time:5619494ms step_avg:2119.76ms +step:2662/3242 train_loss:3.2118 train_time:5621609ms step_avg:2119.76ms +step:2663/3242 train_loss:3.0232 train_time:5623732ms step_avg:2119.76ms +step:2664/3242 train_loss:3.3309 train_time:5625841ms step_avg:2119.76ms +step:2665/3242 train_loss:3.3498 train_time:5627969ms step_avg:2119.76ms +step:2666/3242 train_loss:3.3166 train_time:5630087ms step_avg:2119.76ms +step:2667/3242 train_loss:3.2098 train_time:5632336ms step_avg:2119.81ms +step:2668/3242 train_loss:3.4284 train_time:5634460ms step_avg:2119.81ms +step:2669/3242 train_loss:3.4173 train_time:5636580ms step_avg:2119.81ms +step:2670/3242 train_loss:3.3304 train_time:5638698ms step_avg:2119.81ms +step:2671/3242 train_loss:3.2465 train_time:5640834ms step_avg:2119.82ms +step:2672/3242 train_loss:3.5025 train_time:5642951ms step_avg:2119.82ms +step:2673/3242 train_loss:3.3231 train_time:5645064ms step_avg:2119.81ms +step:2674/3242 train_loss:3.1600 train_time:5647179ms step_avg:2119.81ms +step:2675/3242 train_loss:3.4602 train_time:5649301ms step_avg:2119.81ms +step:2676/3242 train_loss:3.2840 train_time:5651420ms step_avg:2119.81ms +step:2677/3242 train_loss:3.4460 train_time:5653540ms step_avg:2119.81ms +step:2678/3242 train_loss:3.2974 train_time:5655665ms step_avg:2119.81ms +step:2679/3242 train_loss:3.2504 train_time:5657790ms step_avg:2119.82ms +step:2680/3242 train_loss:3.3553 train_time:5659905ms step_avg:2119.81ms +step:2681/3242 train_loss:3.1711 train_time:5662014ms step_avg:2119.81ms +step:2682/3242 train_loss:3.6416 train_time:5664138ms step_avg:2119.81ms +step:2683/3242 train_loss:3.1858 train_time:5666260ms step_avg:2119.81ms +step:2684/3242 train_loss:3.2146 train_time:5668375ms step_avg:2119.81ms +step:2685/3242 train_loss:3.9691 train_time:5670498ms step_avg:2119.81ms +step:2686/3242 train_loss:3.5317 train_time:5672611ms step_avg:2119.81ms +step:2687/3242 train_loss:3.2326 train_time:5674731ms step_avg:2119.81ms +step:2688/3242 train_loss:3.3651 train_time:5676855ms step_avg:2119.81ms +step:2689/3242 train_loss:3.4422 train_time:5678969ms step_avg:2119.81ms +step:2690/3242 train_loss:3.4255 train_time:5681090ms step_avg:2119.81ms +step:2691/3242 train_loss:3.4213 train_time:5683212ms step_avg:2119.81ms +step:2692/3242 train_loss:3.5566 train_time:5685330ms step_avg:2119.81ms +step:2693/3242 train_loss:3.3279 train_time:5687451ms step_avg:2119.81ms +step:2694/3242 train_loss:3.1941 train_time:5689566ms step_avg:2119.81ms +step:2695/3242 train_loss:3.6920 train_time:5691689ms step_avg:2119.81ms +step:2696/3242 train_loss:3.2931 train_time:5693809ms step_avg:2119.81ms +step:2697/3242 train_loss:3.3598 train_time:5695929ms step_avg:2119.81ms +step:2698/3242 train_loss:3.3478 train_time:5698047ms step_avg:2119.81ms +step:2699/3242 train_loss:3.3328 train_time:5700161ms step_avg:2119.81ms +step:2700/3242 train_loss:3.1623 train_time:5702284ms step_avg:2119.81ms +step:2701/3242 train_loss:3.2491 train_time:5704403ms step_avg:2119.81ms +step:2702/3242 train_loss:3.2573 train_time:5706522ms step_avg:2119.81ms +step:2703/3242 train_loss:3.3734 train_time:5708643ms step_avg:2119.81ms +step:2704/3242 train_loss:3.3853 train_time:5710766ms step_avg:2119.81ms +step:2705/3242 train_loss:3.3590 train_time:5712880ms step_avg:2119.81ms +step:2706/3242 train_loss:3.7085 train_time:5714997ms step_avg:2119.81ms +step:2707/3242 train_loss:3.3417 train_time:5717111ms step_avg:2119.80ms +step:2708/3242 train_loss:3.4201 train_time:5719225ms step_avg:2119.80ms +step:2709/3242 train_loss:3.2609 train_time:5721345ms step_avg:2119.80ms +step:2710/3242 train_loss:3.2755 train_time:5723464ms step_avg:2119.80ms +step:2711/3242 train_loss:3.3978 train_time:5725576ms step_avg:2119.80ms +step:2712/3242 train_loss:3.2871 train_time:5727694ms step_avg:2119.80ms +step:2713/3242 train_loss:3.5066 train_time:5729820ms step_avg:2119.80ms +step:2714/3242 train_loss:3.3273 train_time:5731936ms step_avg:2119.80ms +step:2715/3242 train_loss:3.1108 train_time:5734060ms step_avg:2119.80ms +step:2716/3242 train_loss:3.4645 train_time:5736179ms step_avg:2119.80ms +step:2717/3242 train_loss:3.5783 train_time:5738296ms step_avg:2119.80ms +step:2718/3242 train_loss:3.5315 train_time:5740409ms step_avg:2119.80ms +step:2719/3242 train_loss:3.5559 train_time:5742532ms step_avg:2119.80ms +step:2720/3242 train_loss:3.4227 train_time:5744649ms step_avg:2119.80ms +step:2721/3242 train_loss:3.3299 train_time:5746770ms step_avg:2119.80ms +step:2722/3242 train_loss:3.2123 train_time:5748896ms step_avg:2119.80ms +step:2723/3242 train_loss:3.1227 train_time:5751012ms step_avg:2119.80ms +step:2724/3242 train_loss:3.2683 train_time:5753130ms step_avg:2119.80ms +step:2725/3242 train_loss:4.0355 train_time:5755248ms step_avg:2119.80ms +step:2726/3242 train_loss:4.2679 train_time:5757366ms step_avg:2119.80ms +step:2727/3242 train_loss:3.2432 train_time:5759490ms step_avg:2119.80ms +step:2728/3242 train_loss:3.3761 train_time:5761610ms step_avg:2119.80ms +step:2729/3242 train_loss:3.3419 train_time:5763720ms step_avg:2119.79ms +step:2730/3242 train_loss:3.3791 train_time:5765838ms step_avg:2119.79ms +step:2731/3242 train_loss:3.4337 train_time:5767960ms step_avg:2119.79ms +step:2732/3242 train_loss:3.2469 train_time:5770081ms step_avg:2119.79ms +step:2733/3242 train_loss:3.3649 train_time:5772197ms step_avg:2119.79ms +step:2734/3242 train_loss:3.3464 train_time:5774316ms step_avg:2119.79ms +step:2735/3242 train_loss:3.2338 train_time:5776436ms step_avg:2119.79ms +step:2736/3242 train_loss:3.3051 train_time:5778558ms step_avg:2119.79ms +step:2737/3242 train_loss:3.2963 train_time:5780679ms step_avg:2119.79ms +step:2738/3242 train_loss:3.2521 train_time:5782794ms step_avg:2119.79ms +step:2739/3242 train_loss:3.3969 train_time:5784913ms step_avg:2119.79ms +step:2740/3242 train_loss:3.7157 train_time:5787037ms step_avg:2119.79ms +step:2741/3242 train_loss:3.2414 train_time:5789152ms step_avg:2119.79ms +step:2742/3242 train_loss:3.3609 train_time:5791267ms step_avg:2119.79ms +step:2743/3242 train_loss:3.1502 train_time:5793385ms step_avg:2119.79ms +step:2744/3242 train_loss:3.6301 train_time:5795507ms step_avg:2119.79ms +step:2745/3242 train_loss:3.2407 train_time:5797629ms step_avg:2119.79ms +step:2746/3242 train_loss:3.3231 train_time:5799740ms step_avg:2119.79ms +step:2747/3242 train_loss:3.3985 train_time:5801857ms step_avg:2119.79ms +step:2748/3242 train_loss:3.3098 train_time:5803971ms step_avg:2119.78ms +step:2749/3242 train_loss:3.2774 train_time:5806093ms step_avg:2119.79ms +step:2750/3242 train_loss:3.4929 train_time:5808210ms step_avg:2119.78ms +step:2750/3242 val_loss:3.3382 train_time:5808622ms step_avg:2119.94ms +step:2751/3242 train_loss:3.3563 train_time:5810336ms step_avg:2119.79ms +step:2752/3242 train_loss:3.1683 train_time:5812450ms step_avg:2119.78ms +step:2753/3242 train_loss:3.3440 train_time:5814569ms step_avg:2119.78ms +step:2754/3242 train_loss:3.0866 train_time:5816686ms step_avg:2119.78ms +step:2755/3242 train_loss:3.5071 train_time:5818812ms step_avg:2119.79ms +step:2756/3242 train_loss:3.5356 train_time:5820926ms step_avg:2119.78ms +step:2757/3242 train_loss:3.3653 train_time:5823049ms step_avg:2119.78ms +step:2758/3242 train_loss:3.1052 train_time:5825165ms step_avg:2119.78ms +step:2759/3242 train_loss:3.2979 train_time:5827275ms step_avg:2119.78ms +step:2760/3242 train_loss:3.4185 train_time:5829392ms step_avg:2119.78ms +step:2761/3242 train_loss:3.3043 train_time:5831510ms step_avg:2119.78ms +step:2762/3242 train_loss:3.1873 train_time:5833624ms step_avg:2119.78ms +step:2763/3242 train_loss:3.2711 train_time:5835744ms step_avg:2119.78ms +step:2764/3242 train_loss:3.4488 train_time:5837863ms step_avg:2119.78ms +step:2765/3242 train_loss:3.1292 train_time:5839985ms step_avg:2119.78ms +step:2766/3242 train_loss:3.6651 train_time:5842103ms step_avg:2119.78ms +step:2767/3242 train_loss:3.1282 train_time:5844222ms step_avg:2119.78ms +step:2768/3242 train_loss:3.3620 train_time:5846342ms step_avg:2119.78ms +step:2769/3242 train_loss:3.3332 train_time:5848457ms step_avg:2119.77ms +step:2770/3242 train_loss:3.3339 train_time:5850581ms step_avg:2119.78ms +step:2771/3242 train_loss:3.1505 train_time:5852695ms step_avg:2119.77ms +step:2772/3242 train_loss:3.4535 train_time:5854816ms step_avg:2119.77ms +step:2773/3242 train_loss:3.2274 train_time:5856940ms step_avg:2119.78ms +step:2774/3242 train_loss:3.2750 train_time:5859053ms step_avg:2119.77ms +step:2775/3242 train_loss:3.2748 train_time:5861180ms step_avg:2119.78ms +step:2776/3242 train_loss:3.3396 train_time:5863302ms step_avg:2119.78ms +step:2777/3242 train_loss:3.2455 train_time:5865423ms step_avg:2119.78ms +step:2778/3242 train_loss:3.4131 train_time:5867532ms step_avg:2119.77ms +step:2779/3242 train_loss:3.4299 train_time:5869655ms step_avg:2119.77ms +step:2780/3242 train_loss:3.3615 train_time:5871773ms step_avg:2119.77ms +step:2781/3242 train_loss:3.3939 train_time:5873887ms step_avg:2119.77ms +step:2782/3242 train_loss:3.4685 train_time:5876012ms step_avg:2119.77ms +step:2783/3242 train_loss:3.3451 train_time:5878125ms step_avg:2119.77ms +step:2784/3242 train_loss:3.6689 train_time:5880250ms step_avg:2119.77ms +step:2785/3242 train_loss:2.9960 train_time:5882369ms step_avg:2119.77ms +step:2786/3242 train_loss:3.2932 train_time:5884491ms step_avg:2119.77ms +step:2787/3242 train_loss:3.3841 train_time:5886611ms step_avg:2119.77ms +step:2788/3242 train_loss:3.2215 train_time:5888724ms step_avg:2119.77ms +step:2789/3242 train_loss:3.2694 train_time:5890845ms step_avg:2119.77ms +step:2790/3242 train_loss:3.2918 train_time:5892961ms step_avg:2119.77ms +step:2791/3242 train_loss:3.3107 train_time:5895087ms step_avg:2119.77ms +step:2792/3242 train_loss:3.4870 train_time:5897208ms step_avg:2119.77ms +step:2793/3242 train_loss:3.2279 train_time:5899321ms step_avg:2119.77ms +step:2794/3242 train_loss:3.3718 train_time:5901441ms step_avg:2119.77ms +step:2795/3242 train_loss:3.3924 train_time:5903556ms step_avg:2119.77ms +step:2796/3242 train_loss:3.7596 train_time:5905685ms step_avg:2119.77ms +step:2797/3242 train_loss:3.3330 train_time:5907805ms step_avg:2119.77ms +step:2798/3242 train_loss:3.2704 train_time:5909919ms step_avg:2119.77ms +step:2799/3242 train_loss:3.2872 train_time:5912036ms step_avg:2119.77ms +step:2800/3242 train_loss:3.4041 train_time:5914159ms step_avg:2119.77ms +step:2801/3242 train_loss:3.2599 train_time:5916277ms step_avg:2119.77ms +step:2802/3242 train_loss:3.0748 train_time:5918397ms step_avg:2119.77ms +step:2803/3242 train_loss:3.4697 train_time:5920519ms step_avg:2119.77ms +step:2804/3242 train_loss:3.3954 train_time:5922635ms step_avg:2119.77ms +step:2805/3242 train_loss:3.3316 train_time:5924752ms step_avg:2119.77ms +step:2806/3242 train_loss:3.2538 train_time:5926873ms step_avg:2119.77ms +step:2807/3242 train_loss:3.5196 train_time:5928995ms step_avg:2119.77ms +step:2808/3242 train_loss:3.2324 train_time:5931116ms step_avg:2119.77ms +step:2809/3242 train_loss:3.4950 train_time:5933245ms step_avg:2119.77ms +step:2810/3242 train_loss:3.3470 train_time:5935360ms step_avg:2119.77ms +step:2811/3242 train_loss:3.5355 train_time:5937480ms step_avg:2119.77ms +step:2812/3242 train_loss:3.0544 train_time:5939604ms step_avg:2119.77ms +step:2813/3242 train_loss:3.3896 train_time:5941719ms step_avg:2119.77ms +step:2814/3242 train_loss:3.6650 train_time:5943841ms step_avg:2119.77ms +step:2815/3242 train_loss:3.6735 train_time:5945955ms step_avg:2119.77ms +step:2816/3242 train_loss:3.2790 train_time:5948075ms step_avg:2119.77ms +step:2817/3242 train_loss:3.2713 train_time:5950193ms step_avg:2119.77ms +step:2818/3242 train_loss:3.5066 train_time:5952315ms step_avg:2119.77ms +step:2819/3242 train_loss:3.3986 train_time:5954429ms step_avg:2119.77ms +step:2820/3242 train_loss:3.5911 train_time:5956547ms step_avg:2119.77ms +step:2821/3242 train_loss:3.4309 train_time:5958664ms step_avg:2119.77ms +step:2822/3242 train_loss:3.3984 train_time:5960775ms step_avg:2119.76ms +step:2823/3242 train_loss:3.3446 train_time:5962891ms step_avg:2119.76ms +step:2824/3242 train_loss:3.3862 train_time:5965018ms step_avg:2119.76ms +step:2825/3242 train_loss:3.5183 train_time:5967136ms step_avg:2119.76ms +step:2826/3242 train_loss:3.2345 train_time:5969257ms step_avg:2119.76ms +step:2827/3242 train_loss:3.1691 train_time:5971373ms step_avg:2119.76ms +step:2828/3242 train_loss:3.3255 train_time:5973489ms step_avg:2119.76ms +step:2829/3242 train_loss:3.2482 train_time:5975611ms step_avg:2119.76ms +step:2830/3242 train_loss:3.3628 train_time:5977729ms step_avg:2119.76ms +step:2831/3242 train_loss:3.2587 train_time:5979851ms step_avg:2119.76ms +step:2832/3242 train_loss:3.3023 train_time:5981974ms step_avg:2119.76ms +step:2833/3242 train_loss:3.4771 train_time:5984085ms step_avg:2119.76ms +step:2834/3242 train_loss:3.2517 train_time:5986207ms step_avg:2119.76ms +step:2835/3242 train_loss:3.1804 train_time:5988323ms step_avg:2119.76ms +step:2836/3242 train_loss:3.7603 train_time:5990442ms step_avg:2119.76ms +step:2837/3242 train_loss:3.3705 train_time:5992565ms step_avg:2119.76ms +step:2838/3242 train_loss:3.7688 train_time:5994680ms step_avg:2119.76ms +step:2839/3242 train_loss:3.2815 train_time:5996798ms step_avg:2119.76ms +step:2840/3242 train_loss:3.0609 train_time:5998913ms step_avg:2119.76ms +step:2841/3242 train_loss:3.2857 train_time:6001031ms step_avg:2119.76ms +step:2842/3242 train_loss:3.0669 train_time:6003145ms step_avg:2119.75ms +step:2843/3242 train_loss:2.9614 train_time:6005258ms step_avg:2119.75ms +step:2844/3242 train_loss:3.3177 train_time:6007378ms step_avg:2119.75ms +step:2845/3242 train_loss:3.4875 train_time:6009497ms step_avg:2119.75ms +step:2846/3242 train_loss:3.0735 train_time:6011618ms step_avg:2119.75ms +step:2847/3242 train_loss:3.2884 train_time:6013733ms step_avg:2119.75ms +step:2848/3242 train_loss:3.4004 train_time:6015859ms step_avg:2119.75ms +step:2849/3242 train_loss:3.2786 train_time:6017974ms step_avg:2119.75ms +step:2850/3242 train_loss:3.6624 train_time:6020099ms step_avg:2119.75ms +step:2851/3242 train_loss:3.3932 train_time:6022221ms step_avg:2119.75ms +step:2852/3242 train_loss:3.2952 train_time:6024337ms step_avg:2119.75ms +step:2853/3242 train_loss:3.1740 train_time:6026462ms step_avg:2119.75ms +step:2854/3242 train_loss:3.4881 train_time:6028578ms step_avg:2119.75ms +step:2855/3242 train_loss:3.5567 train_time:6030693ms step_avg:2119.75ms +step:2856/3242 train_loss:3.2968 train_time:6032810ms step_avg:2119.75ms +step:2857/3242 train_loss:3.2442 train_time:6034934ms step_avg:2119.75ms +step:2858/3242 train_loss:3.2125 train_time:6037187ms step_avg:2119.80ms +step:2859/3242 train_loss:3.2651 train_time:6039294ms step_avg:2119.79ms +step:2860/3242 train_loss:3.3710 train_time:6041417ms step_avg:2119.80ms +step:2861/3242 train_loss:3.2825 train_time:6043537ms step_avg:2119.80ms +step:2862/3242 train_loss:3.3492 train_time:6045652ms step_avg:2119.79ms +step:2863/3242 train_loss:3.2442 train_time:6047766ms step_avg:2119.79ms +step:2864/3242 train_loss:3.4141 train_time:6049887ms step_avg:2119.79ms +step:2865/3242 train_loss:2.9505 train_time:6052008ms step_avg:2119.79ms +step:2866/3242 train_loss:3.4178 train_time:6054128ms step_avg:2119.79ms +step:2867/3242 train_loss:3.2724 train_time:6056244ms step_avg:2119.79ms +step:2868/3242 train_loss:3.5048 train_time:6058364ms step_avg:2119.79ms +step:2869/3242 train_loss:3.2459 train_time:6060485ms step_avg:2119.79ms +step:2870/3242 train_loss:3.5320 train_time:6062602ms step_avg:2119.79ms +step:2871/3242 train_loss:3.4724 train_time:6064727ms step_avg:2119.79ms +step:2872/3242 train_loss:3.4740 train_time:6066850ms step_avg:2119.79ms +step:2873/3242 train_loss:3.4703 train_time:6068965ms step_avg:2119.79ms +step:2874/3242 train_loss:3.4484 train_time:6071083ms step_avg:2119.79ms +step:2875/3242 train_loss:3.3000 train_time:6073198ms step_avg:2119.79ms +step:2875/3242 val_loss:3.3181 train_time:6073611ms step_avg:2119.93ms +step:2876/3242 train_loss:3.2756 train_time:6075324ms step_avg:2119.79ms +step:2877/3242 train_loss:3.4287 train_time:6077446ms step_avg:2119.79ms +step:2878/3242 train_loss:3.3138 train_time:6079554ms step_avg:2119.79ms +step:2879/3242 train_loss:3.3499 train_time:6081668ms step_avg:2119.79ms +step:2880/3242 train_loss:3.0601 train_time:6083794ms step_avg:2119.79ms +step:2881/3242 train_loss:3.1318 train_time:6085910ms step_avg:2119.79ms +step:2882/3242 train_loss:3.1334 train_time:6088028ms step_avg:2119.79ms +step:2883/3242 train_loss:3.2360 train_time:6090144ms step_avg:2119.79ms +step:2884/3242 train_loss:3.1943 train_time:6092272ms step_avg:2119.79ms +step:2885/3242 train_loss:3.4012 train_time:6094393ms step_avg:2119.79ms +step:2886/3242 train_loss:3.3008 train_time:6096502ms step_avg:2119.79ms +step:2887/3242 train_loss:3.3891 train_time:6098618ms step_avg:2119.78ms +step:2888/3242 train_loss:3.3827 train_time:6100738ms step_avg:2119.78ms +step:2889/3242 train_loss:3.2798 train_time:6102854ms step_avg:2119.78ms +step:2890/3242 train_loss:3.4059 train_time:6104968ms step_avg:2119.78ms +step:2891/3242 train_loss:3.2695 train_time:6107077ms step_avg:2119.78ms +step:2892/3242 train_loss:3.3054 train_time:6109198ms step_avg:2119.78ms +step:2893/3242 train_loss:3.2896 train_time:6111317ms step_avg:2119.78ms +step:2894/3242 train_loss:3.2487 train_time:6113436ms step_avg:2119.78ms +step:2895/3242 train_loss:3.4751 train_time:6115556ms step_avg:2119.78ms +step:2896/3242 train_loss:3.6082 train_time:6117679ms step_avg:2119.78ms +step:2897/3242 train_loss:3.1609 train_time:6119799ms step_avg:2119.78ms +step:2898/3242 train_loss:3.3134 train_time:6121924ms step_avg:2119.78ms +step:2899/3242 train_loss:3.3128 train_time:6124045ms step_avg:2119.78ms +step:2900/3242 train_loss:3.2186 train_time:6126154ms step_avg:2119.78ms +step:2901/3242 train_loss:3.3696 train_time:6128275ms step_avg:2119.78ms +step:2902/3242 train_loss:3.3050 train_time:6130395ms step_avg:2119.78ms +step:2903/3242 train_loss:3.5851 train_time:6132522ms step_avg:2119.78ms +step:2904/3242 train_loss:3.1864 train_time:6134641ms step_avg:2119.78ms +step:2905/3242 train_loss:3.4421 train_time:6136763ms step_avg:2119.78ms +step:2906/3242 train_loss:3.0832 train_time:6138870ms step_avg:2119.78ms +step:2907/3242 train_loss:3.4780 train_time:6140991ms step_avg:2119.78ms +step:2908/3242 train_loss:3.4978 train_time:6143105ms step_avg:2119.77ms +step:2909/3242 train_loss:3.1690 train_time:6145224ms step_avg:2119.77ms +step:2910/3242 train_loss:3.3113 train_time:6147345ms step_avg:2119.77ms +step:2911/3242 train_loss:3.2978 train_time:6149468ms step_avg:2119.78ms +step:2912/3242 train_loss:3.4161 train_time:6151593ms step_avg:2119.78ms +step:2913/3242 train_loss:3.1410 train_time:6153706ms step_avg:2119.77ms +step:2914/3242 train_loss:3.3610 train_time:6155823ms step_avg:2119.77ms +step:2915/3242 train_loss:3.1954 train_time:6157946ms step_avg:2119.77ms +step:2916/3242 train_loss:3.4185 train_time:6160068ms step_avg:2119.78ms +step:2917/3242 train_loss:2.9906 train_time:6162192ms step_avg:2119.78ms +step:2918/3242 train_loss:3.4247 train_time:6164299ms step_avg:2119.77ms +step:2919/3242 train_loss:3.3535 train_time:6166421ms step_avg:2119.77ms +step:2920/3242 train_loss:3.1851 train_time:6168539ms step_avg:2119.77ms +step:2921/3242 train_loss:3.3948 train_time:6170657ms step_avg:2119.77ms +step:2922/3242 train_loss:3.2337 train_time:6172778ms step_avg:2119.77ms +step:2923/3242 train_loss:3.3096 train_time:6174894ms step_avg:2119.77ms +step:2924/3242 train_loss:3.2365 train_time:6177013ms step_avg:2119.77ms +step:2925/3242 train_loss:3.3470 train_time:6179132ms step_avg:2119.77ms +step:2926/3242 train_loss:3.3529 train_time:6181250ms step_avg:2119.77ms +step:2927/3242 train_loss:3.2265 train_time:6183374ms step_avg:2119.77ms +step:2928/3242 train_loss:3.2512 train_time:6185487ms step_avg:2119.77ms +step:2929/3242 train_loss:3.3691 train_time:6187603ms step_avg:2119.77ms +step:2930/3242 train_loss:3.2340 train_time:6189719ms step_avg:2119.77ms +step:2931/3242 train_loss:3.3486 train_time:6191837ms step_avg:2119.77ms +step:2932/3242 train_loss:3.4340 train_time:6193961ms step_avg:2119.77ms +step:2933/3242 train_loss:3.3363 train_time:6196076ms step_avg:2119.77ms +step:2934/3242 train_loss:3.5248 train_time:6198197ms step_avg:2119.77ms +step:2935/3242 train_loss:3.3262 train_time:6200312ms step_avg:2119.76ms +step:2936/3242 train_loss:3.2616 train_time:6202431ms step_avg:2119.76ms +step:2937/3242 train_loss:3.4063 train_time:6204550ms step_avg:2119.76ms +step:2938/3242 train_loss:3.2609 train_time:6206665ms step_avg:2119.76ms +step:2939/3242 train_loss:3.8417 train_time:6208783ms step_avg:2119.76ms +step:2940/3242 train_loss:3.4407 train_time:6210905ms step_avg:2119.76ms +step:2941/3242 train_loss:3.3522 train_time:6213026ms step_avg:2119.76ms +step:2942/3242 train_loss:3.3714 train_time:6215142ms step_avg:2119.76ms +step:2943/3242 train_loss:3.1978 train_time:6217268ms step_avg:2119.76ms +step:2944/3242 train_loss:3.2651 train_time:6219387ms step_avg:2119.76ms +step:2945/3242 train_loss:3.3982 train_time:6221508ms step_avg:2119.76ms +step:2946/3242 train_loss:3.2032 train_time:6223625ms step_avg:2119.76ms +step:2947/3242 train_loss:3.2238 train_time:6225762ms step_avg:2119.77ms +step:2948/3242 train_loss:3.4100 train_time:6227881ms step_avg:2119.77ms +step:2949/3242 train_loss:3.2938 train_time:6229991ms step_avg:2119.77ms +step:2950/3242 train_loss:3.0262 train_time:6232114ms step_avg:2119.77ms +step:2951/3242 train_loss:3.2638 train_time:6234240ms step_avg:2119.77ms +step:2952/3242 train_loss:3.3472 train_time:6236356ms step_avg:2119.77ms +step:2953/3242 train_loss:3.3733 train_time:6238475ms step_avg:2119.77ms +step:2954/3242 train_loss:3.3392 train_time:6240593ms step_avg:2119.77ms +step:2955/3242 train_loss:3.3497 train_time:6242709ms step_avg:2119.77ms +step:2956/3242 train_loss:3.3606 train_time:6244827ms step_avg:2119.76ms +step:2957/3242 train_loss:3.3615 train_time:6246942ms step_avg:2119.76ms +step:2958/3242 train_loss:3.2681 train_time:6249067ms step_avg:2119.76ms +step:2959/3242 train_loss:3.2512 train_time:6251184ms step_avg:2119.76ms +step:2960/3242 train_loss:3.0898 train_time:6253304ms step_avg:2119.76ms +step:2961/3242 train_loss:3.3322 train_time:6255422ms step_avg:2119.76ms +step:2962/3242 train_loss:3.2159 train_time:6257542ms step_avg:2119.76ms +step:2963/3242 train_loss:3.3699 train_time:6259662ms step_avg:2119.76ms +step:2964/3242 train_loss:3.4231 train_time:6261785ms step_avg:2119.76ms +step:2965/3242 train_loss:3.2943 train_time:6263899ms step_avg:2119.76ms +step:2966/3242 train_loss:3.5520 train_time:6266021ms step_avg:2119.76ms +step:2967/3242 train_loss:3.2582 train_time:6268141ms step_avg:2119.76ms +step:2968/3242 train_loss:3.4697 train_time:6270261ms step_avg:2119.76ms +step:2969/3242 train_loss:3.2475 train_time:6272383ms step_avg:2119.76ms +step:2970/3242 train_loss:3.2196 train_time:6274509ms step_avg:2119.77ms +step:2971/3242 train_loss:3.1198 train_time:6276619ms step_avg:2119.76ms +step:2972/3242 train_loss:3.4165 train_time:6278745ms step_avg:2119.77ms +step:2973/3242 train_loss:3.1148 train_time:6280860ms step_avg:2119.76ms +step:2974/3242 train_loss:3.9623 train_time:6282979ms step_avg:2119.76ms +step:2975/3242 train_loss:3.2887 train_time:6285098ms step_avg:2119.76ms +step:2976/3242 train_loss:3.1509 train_time:6287220ms step_avg:2119.76ms +step:2977/3242 train_loss:3.4488 train_time:6289335ms step_avg:2119.76ms +step:2978/3242 train_loss:3.4397 train_time:6291467ms step_avg:2119.77ms +step:2979/3242 train_loss:3.0679 train_time:6293583ms step_avg:2119.77ms +step:2980/3242 train_loss:3.1608 train_time:6295698ms step_avg:2119.76ms +step:2981/3242 train_loss:3.2256 train_time:6297826ms step_avg:2119.77ms +step:2982/3242 train_loss:3.2498 train_time:6299936ms step_avg:2119.76ms +step:2983/3242 train_loss:2.9359 train_time:6302060ms step_avg:2119.76ms +step:2984/3242 train_loss:3.5974 train_time:6304172ms step_avg:2119.76ms +step:2985/3242 train_loss:3.2125 train_time:6306294ms step_avg:2119.76ms +step:2986/3242 train_loss:3.5671 train_time:6308422ms step_avg:2119.77ms +step:2987/3242 train_loss:3.3421 train_time:6310539ms step_avg:2119.76ms +step:2988/3242 train_loss:3.6025 train_time:6312652ms step_avg:2119.76ms +step:2989/3242 train_loss:3.4006 train_time:6314771ms step_avg:2119.76ms +step:2990/3242 train_loss:3.3642 train_time:6316895ms step_avg:2119.76ms +step:2991/3242 train_loss:3.0409 train_time:6319007ms step_avg:2119.76ms +step:2992/3242 train_loss:3.5225 train_time:6321124ms step_avg:2119.76ms +step:2993/3242 train_loss:3.2448 train_time:6323244ms step_avg:2119.76ms +step:2994/3242 train_loss:3.5730 train_time:6325354ms step_avg:2119.76ms +step:2995/3242 train_loss:3.1875 train_time:6327470ms step_avg:2119.76ms +step:2996/3242 train_loss:3.4517 train_time:6329590ms step_avg:2119.76ms +step:2997/3242 train_loss:3.3433 train_time:6331706ms step_avg:2119.75ms +step:2998/3242 train_loss:3.2675 train_time:6333824ms step_avg:2119.75ms +step:2999/3242 train_loss:3.1222 train_time:6335940ms step_avg:2119.75ms +step:3000/3242 train_loss:3.8397 train_time:6338064ms step_avg:2119.75ms +step:3000/3242 val_loss:3.3002 train_time:6338477ms step_avg:2119.89ms +step:3001/3242 train_loss:3.0700 train_time:6340192ms step_avg:2119.76ms +step:3002/3242 train_loss:3.2138 train_time:6342301ms step_avg:2119.75ms +step:3003/3242 train_loss:2.9899 train_time:6344417ms step_avg:2119.75ms +step:3004/3242 train_loss:3.2510 train_time:6346541ms step_avg:2119.75ms +step:3005/3242 train_loss:3.3430 train_time:6348656ms step_avg:2119.75ms +step:3006/3242 train_loss:3.1821 train_time:6350780ms step_avg:2119.75ms +step:3007/3242 train_loss:3.4596 train_time:6352901ms step_avg:2119.75ms +step:3008/3242 train_loss:3.1350 train_time:6355012ms step_avg:2119.75ms +step:3009/3242 train_loss:3.2444 train_time:6357136ms step_avg:2119.75ms +step:3010/3242 train_loss:3.1102 train_time:6359257ms step_avg:2119.75ms +step:3011/3242 train_loss:3.3098 train_time:6361378ms step_avg:2119.75ms +step:3012/3242 train_loss:3.3694 train_time:6363493ms step_avg:2119.75ms +step:3013/3242 train_loss:3.2723 train_time:6365610ms step_avg:2119.75ms +step:3014/3242 train_loss:2.9779 train_time:6367733ms step_avg:2119.75ms +step:3015/3242 train_loss:3.4942 train_time:6369855ms step_avg:2119.75ms +step:3016/3242 train_loss:3.4860 train_time:6371975ms step_avg:2119.75ms +step:3017/3242 train_loss:3.3694 train_time:6374085ms step_avg:2119.75ms +step:3018/3242 train_loss:3.1293 train_time:6376212ms step_avg:2119.75ms +step:3019/3242 train_loss:3.0690 train_time:6378327ms step_avg:2119.75ms +step:3020/3242 train_loss:3.1977 train_time:6380445ms step_avg:2119.75ms +step:3021/3242 train_loss:3.3916 train_time:6382571ms step_avg:2119.75ms +step:3022/3242 train_loss:3.1974 train_time:6384696ms step_avg:2119.75ms +step:3023/3242 train_loss:3.1839 train_time:6386812ms step_avg:2119.75ms +step:3024/3242 train_loss:3.4033 train_time:6388923ms step_avg:2119.75ms +step:3025/3242 train_loss:3.5668 train_time:6391041ms step_avg:2119.75ms +step:3026/3242 train_loss:3.2817 train_time:6393163ms step_avg:2119.75ms +step:3027/3242 train_loss:3.7684 train_time:6395288ms step_avg:2119.75ms +step:3028/3242 train_loss:3.2211 train_time:6397398ms step_avg:2119.75ms +step:3029/3242 train_loss:3.0785 train_time:6399521ms step_avg:2119.75ms +step:3030/3242 train_loss:3.1225 train_time:6401642ms step_avg:2119.75ms +step:3031/3242 train_loss:3.6003 train_time:6403764ms step_avg:2119.75ms +step:3032/3242 train_loss:3.2711 train_time:6405880ms step_avg:2119.75ms +step:3033/3242 train_loss:3.5171 train_time:6408003ms step_avg:2119.75ms +step:3034/3242 train_loss:3.1818 train_time:6410115ms step_avg:2119.75ms +step:3035/3242 train_loss:2.9355 train_time:6412232ms step_avg:2119.75ms +step:3036/3242 train_loss:3.3554 train_time:6414355ms step_avg:2119.75ms +step:3037/3242 train_loss:3.0546 train_time:6416474ms step_avg:2119.75ms +step:3038/3242 train_loss:2.9749 train_time:6418591ms step_avg:2119.75ms +step:3039/3242 train_loss:3.5151 train_time:6420713ms step_avg:2119.75ms +step:3040/3242 train_loss:3.7323 train_time:6422838ms step_avg:2119.75ms +step:3041/3242 train_loss:3.0899 train_time:6424955ms step_avg:2119.75ms +step:3042/3242 train_loss:3.1911 train_time:6427070ms step_avg:2119.75ms +step:3043/3242 train_loss:3.3358 train_time:6429193ms step_avg:2119.75ms +step:3044/3242 train_loss:3.0740 train_time:6431305ms step_avg:2119.74ms +step:3045/3242 train_loss:3.3018 train_time:6433427ms step_avg:2119.75ms +step:3046/3242 train_loss:3.2639 train_time:6435547ms step_avg:2119.75ms +step:3047/3242 train_loss:3.4696 train_time:6437663ms step_avg:2119.74ms +step:3048/3242 train_loss:3.6329 train_time:6439917ms step_avg:2119.79ms +step:3049/3242 train_loss:3.2149 train_time:6442043ms step_avg:2119.79ms +step:3050/3242 train_loss:3.1968 train_time:6444170ms step_avg:2119.79ms +step:3051/3242 train_loss:3.3361 train_time:6446285ms step_avg:2119.79ms +step:3052/3242 train_loss:3.3689 train_time:6448402ms step_avg:2119.79ms +step:3053/3242 train_loss:3.3234 train_time:6450532ms step_avg:2119.79ms +step:3054/3242 train_loss:3.1707 train_time:6452643ms step_avg:2119.79ms +step:3055/3242 train_loss:3.4597 train_time:6454762ms step_avg:2119.79ms +step:3056/3242 train_loss:3.4737 train_time:6456879ms step_avg:2119.79ms +step:3057/3242 train_loss:3.1278 train_time:6458999ms step_avg:2119.79ms +step:3058/3242 train_loss:3.2033 train_time:6461123ms step_avg:2119.79ms +step:3059/3242 train_loss:3.2685 train_time:6463235ms step_avg:2119.79ms +step:3060/3242 train_loss:3.2419 train_time:6465355ms step_avg:2119.79ms +step:3061/3242 train_loss:3.2403 train_time:6467472ms step_avg:2119.79ms +step:3062/3242 train_loss:3.5228 train_time:6469595ms step_avg:2119.79ms +step:3063/3242 train_loss:3.1432 train_time:6471713ms step_avg:2119.79ms +step:3064/3242 train_loss:3.3952 train_time:6473838ms step_avg:2119.79ms +step:3065/3242 train_loss:3.2318 train_time:6475956ms step_avg:2119.79ms +step:3066/3242 train_loss:3.1761 train_time:6478073ms step_avg:2119.79ms +step:3067/3242 train_loss:3.2692 train_time:6480191ms step_avg:2119.79ms +step:3068/3242 train_loss:3.3111 train_time:6482313ms step_avg:2119.79ms +step:3069/3242 train_loss:3.2347 train_time:6484440ms step_avg:2119.79ms +step:3070/3242 train_loss:3.3321 train_time:6486557ms step_avg:2119.79ms +step:3071/3242 train_loss:3.4997 train_time:6488673ms step_avg:2119.79ms +step:3072/3242 train_loss:3.0657 train_time:6490791ms step_avg:2119.79ms +step:3073/3242 train_loss:3.3840 train_time:6492912ms step_avg:2119.79ms +step:3074/3242 train_loss:3.2784 train_time:6495034ms step_avg:2119.79ms +step:3075/3242 train_loss:3.4845 train_time:6497154ms step_avg:2119.79ms +step:3076/3242 train_loss:3.6453 train_time:6499268ms step_avg:2119.79ms +step:3077/3242 train_loss:3.5815 train_time:6501394ms step_avg:2119.79ms +step:3078/3242 train_loss:3.2928 train_time:6503508ms step_avg:2119.79ms +step:3079/3242 train_loss:4.0029 train_time:6505637ms step_avg:2119.79ms +step:3080/3242 train_loss:3.2216 train_time:6507748ms step_avg:2119.79ms +step:3081/3242 train_loss:3.4141 train_time:6509869ms step_avg:2119.79ms +step:3082/3242 train_loss:3.7200 train_time:6511983ms step_avg:2119.79ms +step:3083/3242 train_loss:3.2334 train_time:6514103ms step_avg:2119.79ms +step:3084/3242 train_loss:3.1798 train_time:6516225ms step_avg:2119.79ms +step:3085/3242 train_loss:3.4376 train_time:6518349ms step_avg:2119.79ms +step:3086/3242 train_loss:3.4128 train_time:6520474ms step_avg:2119.79ms +step:3087/3242 train_loss:3.2221 train_time:6522600ms step_avg:2119.79ms +step:3088/3242 train_loss:3.1839 train_time:6524719ms step_avg:2119.79ms +step:3089/3242 train_loss:3.0786 train_time:6526838ms step_avg:2119.79ms +step:3090/3242 train_loss:3.2073 train_time:6528958ms step_avg:2119.79ms +step:3091/3242 train_loss:3.8854 train_time:6531077ms step_avg:2119.79ms +step:3092/3242 train_loss:3.2542 train_time:6533201ms step_avg:2119.79ms +step:3093/3242 train_loss:3.7270 train_time:6535317ms step_avg:2119.79ms +step:3094/3242 train_loss:3.2849 train_time:6537440ms step_avg:2119.79ms +step:3095/3242 train_loss:3.3776 train_time:6539553ms step_avg:2119.79ms +step:3096/3242 train_loss:3.2335 train_time:6541680ms step_avg:2119.79ms +step:3097/3242 train_loss:3.5039 train_time:6543793ms step_avg:2119.79ms +step:3098/3242 train_loss:3.2758 train_time:6545912ms step_avg:2119.79ms +step:3099/3242 train_loss:3.3194 train_time:6548032ms step_avg:2119.79ms +step:3100/3242 train_loss:3.5136 train_time:6550149ms step_avg:2119.79ms +step:3101/3242 train_loss:3.2739 train_time:6552265ms step_avg:2119.79ms +step:3102/3242 train_loss:3.3083 train_time:6554386ms step_avg:2119.79ms +step:3103/3242 train_loss:3.0499 train_time:6556508ms step_avg:2119.79ms +step:3104/3242 train_loss:3.3409 train_time:6558624ms step_avg:2119.79ms +step:3105/3242 train_loss:3.0013 train_time:6560748ms step_avg:2119.79ms +step:3106/3242 train_loss:3.2089 train_time:6562864ms step_avg:2119.79ms +step:3107/3242 train_loss:3.2557 train_time:6564991ms step_avg:2119.79ms +step:3108/3242 train_loss:3.1074 train_time:6567111ms step_avg:2119.79ms +step:3109/3242 train_loss:2.9143 train_time:6569225ms step_avg:2119.79ms +step:3110/3242 train_loss:3.2096 train_time:6571346ms step_avg:2119.79ms +step:3111/3242 train_loss:3.2674 train_time:6573465ms step_avg:2119.79ms +step:3112/3242 train_loss:3.1906 train_time:6575591ms step_avg:2119.79ms +step:3113/3242 train_loss:3.4406 train_time:6577700ms step_avg:2119.79ms +step:3114/3242 train_loss:3.1066 train_time:6579822ms step_avg:2119.79ms +step:3115/3242 train_loss:3.2811 train_time:6581945ms step_avg:2119.79ms +step:3116/3242 train_loss:3.1731 train_time:6584064ms step_avg:2119.79ms +step:3117/3242 train_loss:3.1582 train_time:6586184ms step_avg:2119.79ms +step:3118/3242 train_loss:3.1371 train_time:6588304ms step_avg:2119.79ms +step:3119/3242 train_loss:3.1551 train_time:6590412ms step_avg:2119.79ms +step:3120/3242 train_loss:3.4394 train_time:6592536ms step_avg:2119.79ms +step:3121/3242 train_loss:3.1249 train_time:6594657ms step_avg:2119.79ms +step:3122/3242 train_loss:3.3468 train_time:6596777ms step_avg:2119.79ms +step:3123/3242 train_loss:3.2312 train_time:6598899ms step_avg:2119.79ms +step:3124/3242 train_loss:3.4576 train_time:6601015ms step_avg:2119.79ms +step:3125/3242 train_loss:3.1686 train_time:6603126ms step_avg:2119.78ms +step:3125/3242 val_loss:3.2847 train_time:6603541ms step_avg:2119.92ms +step:3126/3242 train_loss:3.4074 train_time:6605250ms step_avg:2119.79ms +step:3127/3242 train_loss:3.3307 train_time:6607376ms step_avg:2119.79ms +step:3128/3242 train_loss:3.0285 train_time:6609494ms step_avg:2119.79ms +step:3129/3242 train_loss:3.3921 train_time:6611613ms step_avg:2119.79ms +step:3130/3242 train_loss:3.3346 train_time:6613723ms step_avg:2119.78ms +step:3131/3242 train_loss:3.2547 train_time:6615846ms step_avg:2119.78ms +step:3132/3242 train_loss:2.9634 train_time:6617965ms step_avg:2119.78ms +step:3133/3242 train_loss:2.9934 train_time:6620085ms step_avg:2119.78ms +step:3134/3242 train_loss:3.4797 train_time:6622208ms step_avg:2119.78ms +step:3135/3242 train_loss:3.3686 train_time:6624327ms step_avg:2119.78ms +step:3136/3242 train_loss:3.4404 train_time:6626446ms step_avg:2119.78ms +step:3137/3242 train_loss:3.5070 train_time:6628561ms step_avg:2119.78ms +step:3138/3242 train_loss:3.3019 train_time:6630682ms step_avg:2119.78ms +step:3139/3242 train_loss:3.5055 train_time:6632803ms step_avg:2119.78ms +step:3140/3242 train_loss:3.4036 train_time:6634925ms step_avg:2119.78ms +step:3141/3242 train_loss:3.0638 train_time:6637043ms step_avg:2119.78ms +step:3142/3242 train_loss:3.3287 train_time:6639159ms step_avg:2119.78ms +step:3143/3242 train_loss:3.4826 train_time:6641282ms step_avg:2119.78ms +step:3144/3242 train_loss:3.3395 train_time:6643399ms step_avg:2119.78ms +step:3145/3242 train_loss:3.7002 train_time:6645516ms step_avg:2119.78ms +step:3146/3242 train_loss:3.0817 train_time:6647638ms step_avg:2119.78ms +step:3147/3242 train_loss:3.1414 train_time:6649759ms step_avg:2119.78ms +step:3148/3242 train_loss:3.0790 train_time:6651876ms step_avg:2119.78ms +step:3149/3242 train_loss:3.3224 train_time:6653998ms step_avg:2119.78ms +step:3150/3242 train_loss:3.1780 train_time:6656119ms step_avg:2119.78ms +step:3151/3242 train_loss:3.3368 train_time:6658231ms step_avg:2119.78ms +step:3152/3242 train_loss:3.2745 train_time:6660358ms step_avg:2119.78ms +step:3153/3242 train_loss:3.5317 train_time:6662476ms step_avg:2119.78ms +step:3154/3242 train_loss:3.7946 train_time:6664587ms step_avg:2119.78ms +step:3155/3242 train_loss:3.3289 train_time:6666705ms step_avg:2119.78ms +step:3156/3242 train_loss:3.5808 train_time:6668828ms step_avg:2119.78ms +step:3157/3242 train_loss:3.9576 train_time:6670952ms step_avg:2119.78ms +step:3158/3242 train_loss:3.3442 train_time:6673067ms step_avg:2119.78ms +step:3159/3242 train_loss:2.7204 train_time:6675187ms step_avg:2119.78ms +step:3160/3242 train_loss:3.3368 train_time:6677309ms step_avg:2119.78ms +step:3161/3242 train_loss:3.0769 train_time:6679431ms step_avg:2119.78ms +step:3162/3242 train_loss:3.3087 train_time:6681547ms step_avg:2119.78ms +step:3163/3242 train_loss:3.0946 train_time:6683670ms step_avg:2119.78ms +step:3164/3242 train_loss:2.9390 train_time:6685792ms step_avg:2119.78ms +step:3165/3242 train_loss:3.6156 train_time:6687904ms step_avg:2119.78ms +step:3166/3242 train_loss:3.4484 train_time:6690027ms step_avg:2119.78ms +step:3167/3242 train_loss:3.2907 train_time:6692149ms step_avg:2119.78ms +step:3168/3242 train_loss:3.2721 train_time:6694262ms step_avg:2119.78ms +step:3169/3242 train_loss:3.4923 train_time:6696389ms step_avg:2119.78ms +step:3170/3242 train_loss:3.2608 train_time:6698502ms step_avg:2119.78ms +step:3171/3242 train_loss:3.3152 train_time:6700619ms step_avg:2119.78ms +step:3172/3242 train_loss:3.1673 train_time:6702744ms step_avg:2119.78ms +step:3173/3242 train_loss:3.0675 train_time:6704864ms step_avg:2119.78ms +step:3174/3242 train_loss:3.1459 train_time:6706983ms step_avg:2119.78ms +step:3175/3242 train_loss:3.3545 train_time:6709095ms step_avg:2119.78ms +step:3176/3242 train_loss:3.1568 train_time:6711217ms step_avg:2119.78ms +step:3177/3242 train_loss:3.0997 train_time:6713341ms step_avg:2119.78ms +step:3178/3242 train_loss:3.3731 train_time:6715453ms step_avg:2119.78ms +step:3179/3242 train_loss:3.4747 train_time:6717573ms step_avg:2119.78ms +step:3180/3242 train_loss:3.5700 train_time:6719691ms step_avg:2119.78ms +step:3181/3242 train_loss:3.3467 train_time:6721814ms step_avg:2119.78ms +step:3182/3242 train_loss:3.2036 train_time:6723930ms step_avg:2119.78ms +step:3183/3242 train_loss:3.2228 train_time:6726056ms step_avg:2119.78ms +step:3184/3242 train_loss:3.4308 train_time:6728172ms step_avg:2119.78ms +step:3185/3242 train_loss:3.3464 train_time:6730289ms step_avg:2119.78ms +step:3186/3242 train_loss:3.4618 train_time:6732400ms step_avg:2119.77ms +step:3187/3242 train_loss:3.5543 train_time:6734526ms step_avg:2119.78ms +step:3188/3242 train_loss:3.2765 train_time:6736648ms step_avg:2119.78ms +step:3189/3242 train_loss:3.0990 train_time:6738766ms step_avg:2119.78ms +step:3190/3242 train_loss:3.2562 train_time:6740881ms step_avg:2119.77ms +step:3191/3242 train_loss:3.3627 train_time:6743001ms step_avg:2119.77ms +step:3192/3242 train_loss:3.3496 train_time:6745130ms step_avg:2119.78ms +step:3193/3242 train_loss:3.3941 train_time:6747246ms step_avg:2119.78ms +step:3194/3242 train_loss:3.3200 train_time:6749364ms step_avg:2119.78ms +step:3195/3242 train_loss:3.1111 train_time:6751486ms step_avg:2119.78ms +step:3196/3242 train_loss:3.3054 train_time:6753606ms step_avg:2119.78ms +step:3197/3242 train_loss:3.2637 train_time:6755727ms step_avg:2119.78ms +step:3198/3242 train_loss:3.1240 train_time:6757845ms step_avg:2119.78ms +step:3199/3242 train_loss:3.4378 train_time:6759966ms step_avg:2119.78ms +step:3200/3242 train_loss:2.9798 train_time:6762085ms step_avg:2119.78ms +step:3201/3242 train_loss:3.2845 train_time:6764198ms step_avg:2119.77ms +step:3202/3242 train_loss:3.3709 train_time:6766317ms step_avg:2119.77ms +step:3203/3242 train_loss:3.2168 train_time:6768427ms step_avg:2119.77ms +step:3204/3242 train_loss:3.4285 train_time:6770556ms step_avg:2119.77ms +step:3205/3242 train_loss:2.9487 train_time:6772674ms step_avg:2119.77ms +step:3206/3242 train_loss:3.2000 train_time:6774795ms step_avg:2119.77ms +step:3207/3242 train_loss:3.4044 train_time:6776905ms step_avg:2119.77ms +step:3208/3242 train_loss:3.3836 train_time:6779034ms step_avg:2119.77ms +step:3209/3242 train_loss:3.0552 train_time:6781150ms step_avg:2119.77ms +step:3210/3242 train_loss:3.1545 train_time:6783276ms step_avg:2119.77ms +step:3211/3242 train_loss:3.3328 train_time:6785396ms step_avg:2119.77ms +step:3212/3242 train_loss:3.0186 train_time:6787507ms step_avg:2119.77ms +step:3213/3242 train_loss:3.4077 train_time:6789620ms step_avg:2119.77ms +step:3214/3242 train_loss:3.2135 train_time:6791744ms step_avg:2119.77ms +step:3215/3242 train_loss:3.2829 train_time:6793866ms step_avg:2119.77ms +step:3216/3242 train_loss:3.1332 train_time:6795995ms step_avg:2119.77ms +step:3217/3242 train_loss:3.2351 train_time:6798117ms step_avg:2119.77ms +step:3218/3242 train_loss:3.2247 train_time:6800232ms step_avg:2119.77ms +step:3219/3242 train_loss:3.5992 train_time:6802352ms step_avg:2119.77ms +step:3220/3242 train_loss:3.3933 train_time:6804470ms step_avg:2119.77ms +step:3221/3242 train_loss:3.0974 train_time:6806585ms step_avg:2119.77ms +step:3222/3242 train_loss:3.3090 train_time:6808706ms step_avg:2119.77ms +step:3223/3242 train_loss:3.4560 train_time:6810829ms step_avg:2119.77ms +step:3224/3242 train_loss:3.2066 train_time:6812946ms step_avg:2119.77ms +step:3225/3242 train_loss:3.1243 train_time:6815061ms step_avg:2119.77ms +step:3226/3242 train_loss:3.3359 train_time:6817184ms step_avg:2119.77ms +step:3227/3242 train_loss:3.3807 train_time:6819300ms step_avg:2119.77ms +step:3228/3242 train_loss:3.4734 train_time:6821416ms step_avg:2119.77ms +step:3229/3242 train_loss:3.2083 train_time:6823537ms step_avg:2119.77ms +step:3230/3242 train_loss:3.3496 train_time:6825659ms step_avg:2119.77ms +step:3231/3242 train_loss:3.1277 train_time:6827781ms step_avg:2119.77ms +step:3232/3242 train_loss:2.8974 train_time:6829894ms step_avg:2119.77ms +step:3233/3242 train_loss:3.5845 train_time:6832020ms step_avg:2119.77ms +step:3234/3242 train_loss:3.3806 train_time:6834138ms step_avg:2119.77ms +step:3235/3242 train_loss:3.2423 train_time:6836261ms step_avg:2119.77ms +step:3236/3242 train_loss:3.1889 train_time:6838377ms step_avg:2119.77ms +step:3237/3242 train_loss:3.1047 train_time:6840489ms step_avg:2119.77ms +step:3238/3242 train_loss:3.2250 train_time:6842614ms step_avg:2119.77ms +step:3239/3242 train_loss:3.3745 train_time:6844865ms step_avg:2119.81ms +step:3240/3242 train_loss:3.3412 train_time:6846989ms step_avg:2119.81ms +step:3241/3242 train_loss:3.9135 train_time:6849104ms step_avg:2119.81ms +step:3242/3242 train_loss:3.2765 train_time:6851224ms step_avg:2119.81ms +step:3242/3242 val_loss:3.2766 train_time:6851639ms step_avg:2119.94ms