orionweller
/

test-flex-gpt

Model card Files Files and versions Community

oweller2 commited on 9 days ago

Commit

4753b37

•

1 Parent(s): c87aa93

loss

Files changed (2) hide show

loss.py +30 -0
modeling_flexbert.py +1 -1

loss.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# Copyright 2024 **AUTHORS_TODO**
+# License: Apache-2.0
+import inspect
+import torch.nn as nn
+from .configuration_bert import FlexBertConfig
+try:
+    from flash_attn.losses.cross_entropy import CrossEntropyLoss
+except ImportError:
+    CrossEntropyLoss = None
+LOSS2CLS = {
+    "cross_entropy": nn.CrossEntropyLoss,
+    "binary_cross_entropy": nn.BCEWithLogitsLoss,
+    "mean_squared_error": nn.MSELoss,
+}
+if CrossEntropyLoss is not None:
+    LOSS2CLS["fa_cross_entropy"] = CrossEntropyLoss
+def get_loss_fn(config: FlexBertConfig) -> nn.Module:
+    try:
+        loss_class = LOSS2CLS[config.loss_function]
+        signature = inspect.signature(loss_class)
+        loss_kwargs = {k: v for k, v in config.loss_kwargs.items() if k in signature.parameters}
+        return loss_class(**loss_kwargs)
+    except KeyError:
+        raise ValueError(f"Invalid loss function type: {config.loss_function}, must be one of {LOSS2CLS.keys()}.")

modeling_flexbert.py CHANGED Viewed

@@ -116,7 +116,7 @@ from .layers import (
 from .mlp import FlexBertGLU, FlexBertMLP, FlexBertParallelGLU
 from .normalization import get_norm_layer
 from .padding import pad_input, unpad_input
-from .bert_layers.loss import get_loss_fn
 # TODO: This is not used here, but this is so these files are copied when saving the model in ST/PyLate
 from .utils import StrEnum

 from .mlp import FlexBertGLU, FlexBertMLP, FlexBertParallelGLU
 from .normalization import get_norm_layer
 from .padding import pad_input, unpad_input
+from .loss import get_loss_fn
 # TODO: This is not used here, but this is so these files are copied when saving the model in ST/PyLate
 from .utils import StrEnum