|
{ |
|
"_name_or_path": "distributed/optimized-gpt2-1b", |
|
"activation_function": "gelu_new", |
|
"all_reduce_scores": { |
|
"0": "NON_PARTICIPATING", |
|
"1": "NON_PARTICIPATING", |
|
"10": "NON_PARTICIPATING", |
|
"100": "NON_PARTICIPATING", |
|
"101": "SUCCESS", |
|
"102": "NON_PARTICIPATING", |
|
"103": "NON_PARTICIPATING", |
|
"104": "NON_PARTICIPATING", |
|
"105": "NON_PARTICIPATING", |
|
"106": "NON_PARTICIPATING", |
|
"107": "SUCCESS", |
|
"108": "NON_PARTICIPATING", |
|
"109": "NON_PARTICIPATING", |
|
"11": "NON_PARTICIPATING", |
|
"110": "NON_PARTICIPATING", |
|
"111": "SUCCESS", |
|
"112": "NON_PARTICIPATING", |
|
"113": "NON_PARTICIPATING", |
|
"114": "NON_PARTICIPATING", |
|
"115": "NON_PARTICIPATING", |
|
"116": "SUCCESS", |
|
"117": "NON_PARTICIPATING", |
|
"118": "NON_PARTICIPATING", |
|
"119": "NON_PARTICIPATING", |
|
"12": "SUCCESS", |
|
"120": "NON_PARTICIPATING", |
|
"121": "NON_PARTICIPATING", |
|
"122": "NON_PARTICIPATING", |
|
"123": "NON_PARTICIPATING", |
|
"124": "NON_PARTICIPATING", |
|
"125": "NON_PARTICIPATING", |
|
"126": "NON_PARTICIPATING", |
|
"127": "NON_PARTICIPATING", |
|
"128": "NON_PARTICIPATING", |
|
"129": "NON_PARTICIPATING", |
|
"13": "NON_PARTICIPATING", |
|
"130": "NON_PARTICIPATING", |
|
"131": "NON_PARTICIPATING", |
|
"132": "NON_PARTICIPATING", |
|
"133": "NON_PARTICIPATING", |
|
"134": "NON_PARTICIPATING", |
|
"135": "NON_PARTICIPATING", |
|
"136": "NON_PARTICIPATING", |
|
"137": "SUCCESS", |
|
"138": "NON_PARTICIPATING", |
|
"139": "NON_PARTICIPATING", |
|
"14": "NON_PARTICIPATING", |
|
"140": "NON_PARTICIPATING", |
|
"141": "NON_PARTICIPATING", |
|
"142": "NON_PARTICIPATING", |
|
"143": "NON_PARTICIPATING", |
|
"144": "NON_PARTICIPATING", |
|
"145": "NON_PARTICIPATING", |
|
"146": "NON_PARTICIPATING", |
|
"147": "NON_PARTICIPATING", |
|
"148": "NON_PARTICIPATING", |
|
"149": "NON_PARTICIPATING", |
|
"15": "NON_PARTICIPATING", |
|
"150": "SUCCESS", |
|
"151": "NON_PARTICIPATING", |
|
"152": "NON_PARTICIPATING", |
|
"153": "NON_PARTICIPATING", |
|
"154": "NON_PARTICIPATING", |
|
"155": "NON_PARTICIPATING", |
|
"156": "NON_PARTICIPATING", |
|
"157": "NON_PARTICIPATING", |
|
"158": "NON_PARTICIPATING", |
|
"159": "NON_PARTICIPATING", |
|
"16": "NON_PARTICIPATING", |
|
"160": "NON_PARTICIPATING", |
|
"161": "NON_PARTICIPATING", |
|
"162": "NON_PARTICIPATING", |
|
"163": "SUCCESS", |
|
"164": "NON_PARTICIPATING", |
|
"165": "NON_PARTICIPATING", |
|
"166": "NON_PARTICIPATING", |
|
"167": "SUCCESS", |
|
"168": "NON_PARTICIPATING", |
|
"169": "NON_PARTICIPATING", |
|
"17": "NON_PARTICIPATING", |
|
"170": "NON_PARTICIPATING", |
|
"171": "NON_PARTICIPATING", |
|
"172": "NON_PARTICIPATING", |
|
"173": "NON_PARTICIPATING", |
|
"174": "SUCCESS", |
|
"175": "SUCCESS", |
|
"176": "SUCCESS", |
|
"177": "NON_PARTICIPATING", |
|
"178": "NON_PARTICIPATING", |
|
"179": "NON_PARTICIPATING", |
|
"18": "NON_PARTICIPATING", |
|
"180": "NON_PARTICIPATING", |
|
"181": "NON_PARTICIPATING", |
|
"182": "NON_PARTICIPATING", |
|
"183": "NON_PARTICIPATING", |
|
"184": "SUCCESS", |
|
"185": "NON_PARTICIPATING", |
|
"186": "SUCCESS", |
|
"187": "NON_PARTICIPATING", |
|
"188": "NON_PARTICIPATING", |
|
"189": "NON_PARTICIPATING", |
|
"19": "NON_PARTICIPATING", |
|
"190": "NON_PARTICIPATING", |
|
"191": "NON_PARTICIPATING", |
|
"192": "NON_PARTICIPATING", |
|
"193": "SUCCESS", |
|
"194": "NON_PARTICIPATING", |
|
"195": "NON_PARTICIPATING", |
|
"196": "SUCCESS", |
|
"197": "NON_PARTICIPATING", |
|
"198": "NON_PARTICIPATING", |
|
"199": "NON_PARTICIPATING", |
|
"2": "NON_PARTICIPATING", |
|
"20": "NON_PARTICIPATING", |
|
"200": "NON_PARTICIPATING", |
|
"201": "NON_PARTICIPATING", |
|
"202": "NON_PARTICIPATING", |
|
"203": "NON_PARTICIPATING", |
|
"204": "NON_PARTICIPATING", |
|
"205": "NON_PARTICIPATING", |
|
"206": "NON_PARTICIPATING", |
|
"207": "NON_PARTICIPATING", |
|
"208": "NON_PARTICIPATING", |
|
"209": "NON_PARTICIPATING", |
|
"21": "NON_PARTICIPATING", |
|
"210": "NON_PARTICIPATING", |
|
"211": "NON_PARTICIPATING", |
|
"212": "NON_PARTICIPATING", |
|
"213": "NON_PARTICIPATING", |
|
"214": "NON_PARTICIPATING", |
|
"215": "NON_PARTICIPATING", |
|
"216": "NON_PARTICIPATING", |
|
"217": "NON_PARTICIPATING", |
|
"218": "NON_PARTICIPATING", |
|
"219": "NON_PARTICIPATING", |
|
"22": "NON_PARTICIPATING", |
|
"220": "NON_PARTICIPATING", |
|
"221": "SUCCESS", |
|
"222": "NON_PARTICIPATING", |
|
"223": "NON_PARTICIPATING", |
|
"224": "NON_PARTICIPATING", |
|
"225": "NON_PARTICIPATING", |
|
"226": "SUCCESS", |
|
"227": "NON_PARTICIPATING", |
|
"228": "NON_PARTICIPATING", |
|
"229": "NON_PARTICIPATING", |
|
"23": "SUCCESS", |
|
"230": "SUCCESS", |
|
"231": "NON_PARTICIPATING", |
|
"232": "NON_PARTICIPATING", |
|
"233": "NON_PARTICIPATING", |
|
"234": "NON_PARTICIPATING", |
|
"235": "NON_PARTICIPATING", |
|
"236": "SUCCESS", |
|
"237": "NON_PARTICIPATING", |
|
"238": "SUCCESS", |
|
"239": "NON_PARTICIPATING", |
|
"24": "NON_PARTICIPATING", |
|
"240": "NON_PARTICIPATING", |
|
"241": "NON_PARTICIPATING", |
|
"242": "SUCCESS", |
|
"243": "NON_PARTICIPATING", |
|
"244": "SUCCESS", |
|
"245": "NON_PARTICIPATING", |
|
"246": "NON_PARTICIPATING", |
|
"247": "NON_PARTICIPATING", |
|
"248": "NON_PARTICIPATING", |
|
"249": "NON_PARTICIPATING", |
|
"25": "SUCCESS", |
|
"250": "NON_PARTICIPATING", |
|
"251": "NON_PARTICIPATING", |
|
"252": "NON_PARTICIPATING", |
|
"253": "NON_PARTICIPATING", |
|
"254": "NON_PARTICIPATING", |
|
"255": "NON_PARTICIPATING", |
|
"26": "NON_PARTICIPATING", |
|
"27": "NON_PARTICIPATING", |
|
"28": "NON_PARTICIPATING", |
|
"29": "NON_PARTICIPATING", |
|
"3": "NON_PARTICIPATING", |
|
"30": "NON_PARTICIPATING", |
|
"31": "SUCCESS", |
|
"32": "SUCCESS", |
|
"33": "NON_PARTICIPATING", |
|
"34": "SUCCESS", |
|
"35": "NON_PARTICIPATING", |
|
"36": "NON_PARTICIPATING", |
|
"37": "NON_PARTICIPATING", |
|
"38": "NON_PARTICIPATING", |
|
"39": "SUCCESS", |
|
"4": "NON_PARTICIPATING", |
|
"40": "NON_PARTICIPATING", |
|
"41": "NON_PARTICIPATING", |
|
"42": "NON_PARTICIPATING", |
|
"43": "NON_PARTICIPATING", |
|
"44": "SUCCESS", |
|
"45": "NON_PARTICIPATING", |
|
"46": "NON_PARTICIPATING", |
|
"47": "NON_PARTICIPATING", |
|
"48": "NON_PARTICIPATING", |
|
"49": "NON_PARTICIPATING", |
|
"5": "NON_PARTICIPATING", |
|
"50": "NON_PARTICIPATING", |
|
"51": "NON_PARTICIPATING", |
|
"52": "NON_PARTICIPATING", |
|
"53": "NON_PARTICIPATING", |
|
"54": "NON_PARTICIPATING", |
|
"55": "SUCCESS", |
|
"56": "NON_PARTICIPATING", |
|
"57": "NON_PARTICIPATING", |
|
"58": "NON_PARTICIPATING", |
|
"59": "NON_PARTICIPATING", |
|
"6": "NON_PARTICIPATING", |
|
"60": "NON_PARTICIPATING", |
|
"61": "NON_PARTICIPATING", |
|
"62": "NON_PARTICIPATING", |
|
"63": "NON_PARTICIPATING", |
|
"64": "NON_PARTICIPATING", |
|
"65": "NON_PARTICIPATING", |
|
"66": "NON_PARTICIPATING", |
|
"67": "NON_PARTICIPATING", |
|
"68": "NON_PARTICIPATING", |
|
"69": "NON_PARTICIPATING", |
|
"7": "NON_PARTICIPATING", |
|
"70": "NON_PARTICIPATING", |
|
"71": "NON_PARTICIPATING", |
|
"72": "NON_PARTICIPATING", |
|
"73": "SUCCESS", |
|
"74": "NON_PARTICIPATING", |
|
"75": "NON_PARTICIPATING", |
|
"76": "NON_PARTICIPATING", |
|
"77": "NON_PARTICIPATING", |
|
"78": "NON_PARTICIPATING", |
|
"79": "SUCCESS", |
|
"8": "NON_PARTICIPATING", |
|
"80": "NON_PARTICIPATING", |
|
"81": "NON_PARTICIPATING", |
|
"82": "NON_PARTICIPATING", |
|
"83": "SUCCESS", |
|
"84": "NON_PARTICIPATING", |
|
"85": "NON_PARTICIPATING", |
|
"86": "NON_PARTICIPATING", |
|
"87": "NON_PARTICIPATING", |
|
"88": "NON_PARTICIPATING", |
|
"89": "NON_PARTICIPATING", |
|
"9": "NON_PARTICIPATING", |
|
"90": "SUCCESS", |
|
"91": "NON_PARTICIPATING", |
|
"92": "NON_PARTICIPATING", |
|
"93": "SUCCESS", |
|
"94": "NON_PARTICIPATING", |
|
"95": "NON_PARTICIPATING", |
|
"96": "NON_PARTICIPATING", |
|
"97": "NON_PARTICIPATING", |
|
"98": "NON_PARTICIPATING", |
|
"99": "NON_PARTICIPATING" |
|
}, |
|
"architectures": [ |
|
"GPTOptim" |
|
], |
|
"attn_pdrop": 0.1, |
|
"auto_map": { |
|
"AutoConfig": "distributed/optimized-gpt2-500m--configuration_gpt_optimized.GPTOptimConfig", |
|
"AutoModelForCausalLM": "distributed/optimized-gpt2-500m--modeling_gpt_optimized.GPTOptim" |
|
}, |
|
"block_size": 1024, |
|
"bos_token_id": 50256, |
|
"embd_pdrop": 0.1, |
|
"eos_token_id": 50256, |
|
"initializer_range": 0.02, |
|
"layer_norm_epsilon": 1e-05, |
|
"model_type": "gpt_optimized", |
|
"n_embd": 1280, |
|
"n_head": 32, |
|
"n_inner": null, |
|
"n_layer": 48, |
|
"n_positions": 1024, |
|
"reorder_and_upcast_attn": false, |
|
"resid_pdrop": 0.1, |
|
"scale_attn_by_inverse_layer_idx": false, |
|
"scale_attn_weights": true, |
|
"summary_activation": null, |
|
"summary_first_dropout": 0.1, |
|
"summary_proj_to_labels": true, |
|
"summary_type": "cls_index", |
|
"summary_use_proj": true, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.39.3", |
|
"use_cache": true, |
|
"vocab_size": 50257 |
|
} |
|
|