Simon Duerr
add fast af
85bd48b
raw
history blame
21.9 kB
import jax
import jax.numpy as jnp
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')
import numpy as np
from alphafold.common import protein
from alphafold.common import residue_constants
from alphafold.model import model
from alphafold.model import folding
from alphafold.model import all_atom
from alphafold.model.tf import shape_placeholders
#######################
# reshape inputs
#######################
def make_fixed_size(feat, model_runner, length, batch_axis=True):
'''pad input features'''
cfg = model_runner.config
if batch_axis:
shape_schema = {k:[None]+v for k,v in dict(cfg.data.eval.feat).items()}
else:
shape_schema = {k:v for k,v in dict(cfg.data.eval.feat).items()}
pad_size_map = {
shape_placeholders.NUM_RES: length,
shape_placeholders.NUM_MSA_SEQ: cfg.data.eval.max_msa_clusters,
shape_placeholders.NUM_EXTRA_SEQ: cfg.data.common.max_extra_msa,
shape_placeholders.NUM_TEMPLATES: cfg.data.eval.max_templates
}
for k, v in feat.items():
# Don't transfer this to the accelerator.
if k == 'extra_cluster_assignment':
continue
shape = list(v.shape)
schema = shape_schema[k]
assert len(shape) == len(schema), (
f'Rank mismatch between shape and shape schema for {k}: '
f'{shape} vs {schema}')
pad_size = [pad_size_map.get(s2, None) or s1 for (s1, s2) in zip(shape, schema)]
padding = [(0, p - tf.shape(v)[i]) for i, p in enumerate(pad_size)]
if padding:
feat[k] = tf.pad(v, padding, name=f'pad_to_fixed_{k}')
feat[k].set_shape(pad_size)
return {k:np.asarray(v) for k,v in feat.items()}
#########################
# rmsd
#########################
def jnp_rmsdist(true, pred):
return _np_rmsdist(true, pred)
def jnp_rmsd(true, pred, add_dist=False):
rmsd = _np_rmsd(true, pred)
if add_dist: rmsd = (rmsd + _np_rmsdist(true, pred))/2
return rmsd
def jnp_kabsch_w(a, b, weights):
return _np_kabsch(a * weights[:,None], b)
def jnp_rmsd_w(true, pred, weights):
p = true - (true * weights[:,None]).sum(0,keepdims=True)/weights.sum()
q = pred - (pred * weights[:,None]).sum(0,keepdims=True)/weights.sum()
p = p @ _np_kabsch(p * weights[:,None], q)
return jnp.sqrt((weights*jnp.square(p-q).sum(-1)).sum()/weights.sum() + 1e-8)
def get_rmsd_loss_w(batch, outputs, copies=1):
weights = batch["all_atom_mask"][:,1]
true = batch["all_atom_positions"][:,1,:]
pred = outputs["structure_module"]["final_atom_positions"][:,1,:]
if copies == 1:
return jnp_rmsd_w(true, pred, weights)
else:
# TODO add support for weights
I = copies - 1
L = true.shape[0] // copies
p = true - true[:L].mean(0)
q = pred - pred[:L].mean(0)
p = p @ _np_kabsch(p[:L], q[:L])
rm = jnp.square(p[:L]-q[:L]).sum(-1).mean()
p,q = p[L:].reshape(I,1,L,-1),q[L:].reshape(1,I,L,-1)
rm += jnp.square(p-q).sum(-1).mean(-1).min(-1).sum()
return jnp.sqrt(rm / copies)
####################
# confidence metrics
####################
def get_plddt(outputs):
logits = outputs["predicted_lddt"]["logits"]
num_bins = logits.shape[-1]
bin_width = 1.0 / num_bins
bin_centers = jnp.arange(start=0.5 * bin_width, stop=1.0, step=bin_width)
probs = jax.nn.softmax(logits, axis=-1)
return jnp.sum(probs * bin_centers[None, :], axis=-1)
def get_pae(outputs):
prob = jax.nn.softmax(outputs["predicted_aligned_error"]["logits"],-1)
breaks = outputs["predicted_aligned_error"]["breaks"]
step = breaks[1]-breaks[0]
bin_centers = breaks + step/2
bin_centers = jnp.append(bin_centers,bin_centers[-1]+step)
return (prob*bin_centers).sum(-1)
####################
# loss functions
####################
def get_rmsd_loss(batch, outputs):
true = batch["all_atom_positions"][:,1,:]
pred = outputs["structure_module"]["final_atom_positions"][:,1,:]
return _np_rmsd(true,pred)
def _distogram_log_loss(logits, bin_edges, batch, num_bins, copies=1):
"""Log loss of a distogram."""
pos,mask = batch['pseudo_beta'],batch['pseudo_beta_mask']
sq_breaks = jnp.square(bin_edges)
dist2 = jnp.square(pos[:,None] - pos[None,:]).sum(-1,keepdims=True)
true_bins = jnp.sum(dist2 > sq_breaks, axis=-1)
true = jax.nn.one_hot(true_bins, num_bins)
if copies == 1:
errors = -(true * jax.nn.log_softmax(logits)).sum(-1)
sq_mask = mask[:,None] * mask[None,:]
avg_error = (errors * sq_mask).sum()/(1e-6 + sq_mask.sum())
return avg_error
else:
# TODO add support for masks
L = pos.shape[0] // copies
I = copies - 1
true_, pred_ = true[:L,:L], logits[:L,:L]
errors = -(true_ * jax.nn.log_softmax(pred_)).sum(-1)
avg_error = errors.mean()
true_, pred_ = true[:L,L:], logits[:L,L:]
true_, pred_ = true_.reshape(L,I,1,L,-1), pred_.reshape(L,1,I,L,-1)
errors = -(true_ * jax.nn.log_softmax(pred_)).sum(-1)
avg_error += errors.mean((0,-1)).min(-1).sum()
return avg_error / copies
def get_dgram_loss(batch, outputs, model_config, logits=None, copies=1):
# get cb features (ca in case of glycine)
pb, pb_mask = model.modules.pseudo_beta_fn(batch["aatype"],
batch["all_atom_positions"],
batch["all_atom_mask"])
if logits is None: logits = outputs["distogram"]["logits"]
dgram_loss = _distogram_log_loss(logits,
outputs["distogram"]["bin_edges"],
batch={"pseudo_beta":pb,"pseudo_beta_mask":pb_mask},
num_bins=model_config.model.heads.distogram.num_bins,
copies=copies)
return dgram_loss
def get_fape_loss(batch, outputs, model_config, use_clamped_fape=False):
sub_batch = jax.tree_map(lambda x: x, batch)
sub_batch["use_clamped_fape"] = use_clamped_fape
loss = {"loss":0.0}
folding.backbone_loss(loss, sub_batch, outputs["structure_module"], model_config.model.heads.structure_module)
return loss["loss"]
####################
# loss functions (restricted to idx and/or sidechains)
####################
def get_dgram_loss_idx(batch, outputs, idx, model_config):
idx_ref = batch["idx"]
pb, pb_mask = model.modules.pseudo_beta_fn(batch["aatype"][idx_ref],
batch["all_atom_positions"][idx_ref],
batch["all_atom_mask"][idx_ref])
dgram_loss = model.modules._distogram_log_loss(outputs["distogram"]["logits"][:,idx][idx,:],
outputs["distogram"]["bin_edges"],
batch={"pseudo_beta":pb,"pseudo_beta_mask":pb_mask},
num_bins=model_config.model.heads.distogram.num_bins)
return dgram_loss["loss"]
def get_fape_loss_idx(batch, outputs, idx, model_config, backbone=False, sidechain=True, use_clamped_fape=False):
idx_ref = batch["idx"]
sub_batch = batch.copy()
sub_batch.pop("idx")
sub_batch = jax.tree_map(lambda x: x[idx_ref,...],sub_batch)
sub_batch["use_clamped_fape"] = use_clamped_fape
value = jax.tree_map(lambda x: x, outputs["structure_module"])
loss = {"loss":0.0}
if sidechain:
value.update(folding.compute_renamed_ground_truth(sub_batch, value['final_atom14_positions'][idx,...]))
value['sidechains']['frames'] = jax.tree_map(lambda x: x[:,idx,:], value["sidechains"]["frames"])
value['sidechains']['atom_pos'] = jax.tree_map(lambda x: x[:,idx,:], value["sidechains"]["atom_pos"])
loss.update(folding.sidechain_loss(sub_batch, value, model_config.model.heads.structure_module))
if backbone:
value["traj"] = value["traj"][...,idx,:]
folding.backbone_loss(loss, sub_batch, value, model_config.model.heads.structure_module)
return loss["loss"]
def get_sc_rmsd(true_pos, pred_pos, aa_ident, atoms_to_exclude=None):
if atoms_to_exclude is None: atoms_to_exclude = ["N","C","O"]
# collect atom indices
idx,idx_alt = [],[]
for n,a in enumerate(aa_ident):
aa = idx_to_resname[a]
atoms = set(residue_constants.residue_atoms[aa])
atoms14 = residue_constants.restype_name_to_atom14_names[aa]
swaps = residue_constants.residue_atom_renaming_swaps.get(aa,{})
swaps.update({v:k for k,v in swaps.items()})
for atom in atoms.difference(atoms_to_exclude):
idx.append(n * 14 + atoms14.index(atom))
if atom in swaps:
idx_alt.append(n * 14 + atoms14.index(swaps[atom]))
else:
idx_alt.append(idx[-1])
idx, idx_alt = np.asarray(idx), np.asarray(idx_alt)
# select atoms
T, P = true_pos.reshape(-1,3)[idx], pred_pos.reshape(-1,3)[idx]
# select non-ambigious atoms
non_amb = idx == idx_alt
t, p = T[non_amb], P[non_amb]
# align non-ambigious atoms
aln = _np_kabsch(t-t.mean(0), p-p.mean(0))
T,P = (T-t.mean(0)) @ aln, P-p.mean(0)
P_alt = pred_pos.reshape(-1,3)[idx_alt]-p.mean(0)
# compute rmsd
msd = jnp.minimum(jnp.square(T-P).sum(-1),jnp.square(T-P_alt).sum(-1)).mean()
return jnp.sqrt(msd + 1e-8)
def get_sidechain_rmsd_idx(batch, outputs, idx, model_config, include_ca=True):
idx_ref = batch["idx"]
true_aa_idx = batch["aatype"][idx_ref]
true_pos = all_atom.atom37_to_atom14(batch["all_atom_positions"],batch)[idx_ref,:,:]
pred_pos = outputs["structure_module"]["final_atom14_positions"][idx,:,:]
bb_atoms_to_exclude = ["N","C","O"] if include_ca else ["N","CA","C","O"]
return get_sc_rmsd(true_pos, pred_pos, true_aa_idx, bb_atoms_to_exclude)
#################################################################################
#################################################################################
#################################################################################
def _np_len_pw(x, use_jax=True):
'''compute pairwise distance'''
_np = jnp if use_jax else np
x_norm = _np.square(x).sum(-1)
xx = _np.einsum("...ia,...ja->...ij",x,x)
sq_dist = x_norm[...,:,None] + x_norm[...,None,:] - 2 * xx
# due to precision errors the values can sometimes be negative
if use_jax: sq_dist = jax.nn.relu(sq_dist)
else: sq_dist[sq_dist < 0] = 0
# return euclidean pairwise distance matrix
return _np.sqrt(sq_dist + 1e-8)
def _np_rmsdist(true, pred, use_jax=True):
'''compute RMSD of distance matrices'''
_np = jnp if use_jax else np
t = _np_len_pw(true, use_jax=use_jax)
p = _np_len_pw(pred, use_jax=use_jax)
return _np.sqrt(_np.square(t-p).mean() + 1e-8)
def _np_kabsch(a, b, return_v=False, use_jax=True):
'''get alignment matrix for two sets of coodinates'''
_np = jnp if use_jax else np
ab = a.swapaxes(-1,-2) @ b
u, s, vh = _np.linalg.svd(ab, full_matrices=False)
flip = _np.linalg.det(u @ vh) < 0
u_ = _np.where(flip, -u[...,-1].T, u[...,-1].T).T
if use_jax: u = u.at[...,-1].set(u_)
else: u[...,-1] = u_
return u if return_v else (u @ vh)
def _np_rmsd(true, pred, use_jax=True):
'''compute RMSD of coordinates after alignment'''
_np = jnp if use_jax else np
p = true - true.mean(-2,keepdims=True)
q = pred - pred.mean(-2,keepdims=True)
p = p @ _np_kabsch(p, q, use_jax=use_jax)
return _np.sqrt(_np.square(p-q).sum(-1).mean(-1) + 1e-8)
def _np_norm(x, axis=-1, keepdims=True, eps=1e-8, use_jax=True):
'''compute norm of vector'''
_np = jnp if use_jax else np
return _np.sqrt(_np.square(x).sum(axis,keepdims=keepdims) + 1e-8)
def _np_len(a, b, use_jax=True):
'''given coordinates a-b, return length or distance'''
return _np_norm(a-b, use_jax=use_jax)
def _np_ang(a, b, c, use_acos=False, use_jax=True):
'''given coordinates a-b-c, return angle'''
_np = jnp if use_jax else np
norm = lambda x: _np_norm(x, use_jax=use_jax)
ba, bc = b-a, b-c
cos_ang = (ba * bc).sum(-1,keepdims=True) / (norm(ba) * norm(bc))
# note the derivative at acos(-1 or 1) is inf, to avoid nans we use cos(ang)
if use_acos: return _np.arccos(cos_ang)
else: return cos_ang
def _np_dih(a, b, c, d, use_atan2=False, standardize=False, use_jax=True):
'''given coordinates a-b-c-d, return dihedral'''
_np = jnp if use_jax else np
normalize = lambda x: x/_np_norm(x, use_jax=use_jax)
ab, bc, cd = normalize(a-b), normalize(b-c), normalize(c-d)
n1,n2 = _np.cross(ab, bc), _np.cross(bc, cd)
sin_ang = (_np.cross(n1, bc) * n2).sum(-1,keepdims=True)
cos_ang = (n1 * n2).sum(-1,keepdims=True)
if use_atan2:
return _np.arctan2(sin_ang, cos_ang)
else:
angs = _np.concatenate([sin_ang, cos_ang],-1)
if standardize: return normalize(angs)
else: return angs
def _np_extend(a,b,c, L,A,D, use_jax=True):
'''
given coordinates a-b-c,
c-d (L)ength, b-c-d (A)ngle, and a-b-c-d (D)ihedral
return 4th coordinate d
'''
_np = jnp if use_jax else np
normalize = lambda x: x/_np_norm(x, use_jax=use_jax)
bc = normalize(b-c)
n = normalize(_np.cross(b-a, bc))
return c + sum([L * _np.cos(A) * bc,
L * _np.sin(A) * _np.cos(D) * _np.cross(n, bc),
L * _np.sin(A) * _np.sin(D) * -n])
def _np_get_cb(N,CA,C, use_jax=True):
'''compute CB placement from N, CA, C'''
return _np_extend(C, N, CA, 1.522, 1.927, -2.143, use_jax=use_jax)
def _np_get_6D(all_atom_positions, all_atom_mask=None, use_jax=True):
'''get 6D features (see TrRosetta paper)'''
# get CB coordinate
atom_idx = {k:residue_constants.atom_order[k] for k in ["N","CA","C"]}
out = {k:all_atom_positions[...,i,:] for k,i in atom_idx.items()}
out["CB"] = _np_get_cb(**out, use_jax=use_jax)
if all_atom_mask is not None:
idx = np.fromiter(atom_idx.values(),int)
out["CB_mask"] = all_atom_mask[...,idx].prod(-1)
# get pairwise features
N,A,B = (out[k] for k in ["N","CA","CB"])
j = {"use_jax":use_jax}
out.update({"dist": _np_len_pw(B,**j),
"phi": _np_ang(A[...,:,None,:],B[...,:,None,:],B[...,None,:,:],**j),
"omega": _np_dih(A[...,:,None,:],B[...,:,None,:],B[...,None,:,:],A[...,None,:,:],**j),
"theta": _np_dih(N[...,:,None,:],A[...,:,None,:],B[...,:,None,:],B[...,None,:,:],**j),
})
return out
####################
# 6D loss (see TrRosetta paper)
####################
def _np_get_6D_loss(true, pred, mask=None, use_theta=True, use_dist=False, use_jax=True):
_np = jnp if use_jax else np
f = {"T":_np_get_6D(true, mask, use_jax=use_jax),
"P":_np_get_6D(pred, use_jax=use_jax)}
for k in f: f[k]["dist"] /= 10.0
keys = ["omega","phi"]
if use_theta: keys.append("theta")
if use_dist: keys.append("dist")
sq_diff = sum([_np.square(f["T"][k]-f["P"][k]).sum(-1) for k in keys])
mask = _np.ones(true.shape[0]) if mask is None else f["T"]["CB_mask"]
mask = mask[:,None] * mask[None,:]
loss = (sq_diff * mask).sum((-1,-2)) / mask.sum((-1,-2))
return _np.sqrt(loss + 1e-8).mean()
def get_6D_loss(batch, outputs, **kwargs):
true = batch["all_atom_positions"]
pred = outputs["structure_module"]["final_atom_positions"]
mask = batch["all_atom_mask"]
return _np_get_6D_loss(true, pred, mask, **kwargs)
#################################################################################
#################################################################################
#################################################################################
####################
# update sequence
####################
def soft_seq(seq_logits, temp=1.0, hard=True):
seq_soft = jax.nn.softmax(seq_logits / temp)
if hard:
seq_hard = jax.nn.one_hot(seq_soft.argmax(-1),20)
return jax.lax.stop_gradient(seq_hard - seq_soft) + seq_soft
else:
return seq_soft
def update_seq(seq, inputs, seq_1hot=None, seq_pssm=None, msa_input=None):
'''update the sequence features'''
if seq_1hot is None: seq_1hot = seq
if seq_pssm is None: seq_pssm = seq
msa_feat = jnp.zeros_like(inputs["msa_feat"]).at[...,0:20].set(seq_1hot).at[...,25:45].set(seq_pssm)
if seq.ndim == 3:
target_feat = jnp.zeros_like(inputs["target_feat"]).at[...,1:21].set(seq[0])
else:
target_feat = jnp.zeros_like(inputs["target_feat"]).at[...,1:21].set(seq)
inputs.update({"target_feat":target_feat,"msa_feat":msa_feat})
def update_aatype(aatype, inputs):
if jnp.issubdtype(aatype.dtype, jnp.integer):
inputs.update({"aatype":aatype,
"atom14_atom_exists":residue_constants.restype_atom14_mask[aatype],
"atom37_atom_exists":residue_constants.restype_atom37_mask[aatype],
"residx_atom14_to_atom37":residue_constants.restype_atom14_to_atom37[aatype],
"residx_atom37_to_atom14":residue_constants.restype_atom37_to_atom14[aatype]})
else:
restype_atom14_to_atom37 = jax.nn.one_hot(residue_constants.restype_atom14_to_atom37,37)
restype_atom37_to_atom14 = jax.nn.one_hot(residue_constants.restype_atom37_to_atom14,14)
inputs.update({"aatype":aatype,
"atom14_atom_exists":jnp.einsum("...a,am->...m", aatype, residue_constants.restype_atom14_mask),
"atom37_atom_exists":jnp.einsum("...a,am->...m", aatype, residue_constants.restype_atom37_mask),
"residx_atom14_to_atom37":jnp.einsum("...a,abc->...bc", aatype, restype_atom14_to_atom37),
"residx_atom37_to_atom14":jnp.einsum("...a,abc->...bc", aatype, restype_atom37_to_atom14)})
####################
# utils
####################
def pdb_to_string(pdb_file):
lines = []
for line in open(pdb_file,"r"):
if line[:6] == "HETATM" and line[17:20] == "MSE":
line = "ATOM "+line[6:17]+"MET"+line[20:]
if line[:4] == "ATOM":
lines.append(line)
return "".join(lines)
def save_pdb(outs, filename="tmp.pdb"):
seq = outs["seq"].argmax(-1)
while seq.ndim > 1: seq = seq[0]
b_factors = np.zeros_like(outs["outputs"]['final_atom_mask'])
p = protein.Protein(
aatype=seq,
atom_positions=outs["outputs"]["final_atom_positions"],
atom_mask=outs["outputs"]['final_atom_mask'],
residue_index=jnp.arange(len(seq))+1,
b_factors=b_factors)
pdb_lines = protein.to_pdb(p)
with open(filename, 'w') as f:
f.write(pdb_lines)
order_restype = {v: k for k, v in residue_constants.restype_order.items()}
idx_to_resname = dict((v,k) for k,v in residue_constants.resname_to_idx.items())
template_aa_map = np.eye(20)[[residue_constants.HHBLITS_AA_TO_ID[order_restype[i]] for i in range(20)]].T
###########################
# MISC
###########################
jalview_color_list = {"Clustal": ["#80a0f0","#f01505","#00ff00","#c048c0","#f08080","#00ff00","#c048c0","#f09048","#15a4a4","#80a0f0","#80a0f0","#f01505","#80a0f0","#80a0f0","#ffff00","#00ff00","#00ff00","#80a0f0","#15a4a4","#80a0f0"],
"Zappo": ["#ffafaf","#6464ff","#00ff00","#ff0000","#ffff00","#00ff00","#ff0000","#ff00ff","#6464ff","#ffafaf","#ffafaf","#6464ff","#ffafaf","#ffc800","#ff00ff","#00ff00","#00ff00","#ffc800","#ffc800","#ffafaf"],
"Taylor": ["#ccff00","#0000ff","#cc00ff","#ff0000","#ffff00","#ff00cc","#ff0066","#ff9900","#0066ff","#66ff00","#33ff00","#6600ff","#00ff00","#00ff66","#ffcc00","#ff3300","#ff6600","#00ccff","#00ffcc","#99ff00"],
"Hydrophobicity": ["#ad0052","#0000ff","#0c00f3","#0c00f3","#c2003d","#0c00f3","#0c00f3","#6a0095","#1500ea","#ff0000","#ea0015","#0000ff","#b0004f","#cb0034","#4600b9","#5e00a1","#61009e","#5b00a4","#4f00b0","#f60009","#0c00f3","#680097","#0c00f3"],
"Helix Propensity": ["#e718e7","#6f906f","#1be41b","#778877","#23dc23","#926d92","#ff00ff","#00ff00","#758a75","#8a758a","#ae51ae","#a05fa0","#ef10ef","#986798","#00ff00","#36c936","#47b847","#8a758a","#21de21","#857a85","#49b649","#758a75","#c936c9"],
"Strand Propensity": ["#5858a7","#6b6b94","#64649b","#2121de","#9d9d62","#8c8c73","#0000ff","#4949b6","#60609f","#ecec13","#b2b24d","#4747b8","#82827d","#c2c23d","#2323dc","#4949b6","#9d9d62","#c0c03f","#d3d32c","#ffff00","#4343bc","#797986","#4747b8"],
"Turn Propensity": ["#2cd3d3","#708f8f","#ff0000","#e81717","#a85757","#3fc0c0","#778888","#ff0000","#708f8f","#00ffff","#1ce3e3","#7e8181","#1ee1e1","#1ee1e1","#f60909","#e11e1e","#738c8c","#738c8c","#9d6262","#07f8f8","#f30c0c","#7c8383","#5ba4a4"],
"Buried Index": ["#00a35c","#00fc03","#00eb14","#00eb14","#0000ff","#00f10e","#00f10e","#009d62","#00d52a","#0054ab","#007b84","#00ff00","#009768","#008778","#00e01f","#00d52a","#00db24","#00a857","#00e619","#005fa0","#00eb14","#00b649","#00f10e"]}
###########################
# to be deprecated functions
###########################
def set_dropout(model_config, dropout=0.0):
model_config.model.embeddings_and_evoformer.evoformer.msa_row_attention_with_pair_bias.dropout_rate = dropout
model_config.model.embeddings_and_evoformer.evoformer.triangle_attention_ending_node.dropout_rate = dropout
model_config.model.embeddings_and_evoformer.evoformer.triangle_attention_starting_node.dropout_rate = dropout
model_config.model.embeddings_and_evoformer.evoformer.triangle_multiplication_incoming.dropout_rate = dropout
model_config.model.embeddings_and_evoformer.evoformer.triangle_multiplication_outgoing.dropout_rate = dropout
model_config.model.embeddings_and_evoformer.template.template_pair_stack.triangle_attention_ending_node.dropout_rate = dropout
model_config.model.embeddings_and_evoformer.template.template_pair_stack.triangle_attention_starting_node.dropout_rate = dropout
model_config.model.embeddings_and_evoformer.template.template_pair_stack.triangle_multiplication_incoming.dropout_rate = dropout
model_config.model.embeddings_and_evoformer.template.template_pair_stack.triangle_multiplication_outgoing.dropout_rate = dropout
model_config.model.heads.structure_module.dropout = dropout
return model_config