BirdL
/

DeepSeek-Coder-V2-Lite-Instruct-FlashAttnPatch

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

puffy310 commited on Jun 19

Commit

9998550

•

1 Parent(s): 0368120

Update modeling_deepseek.py

Files changed (1) hide show

modeling_deepseek.py +0 -7

modeling_deepseek.py CHANGED Viewed

@@ -48,8 +48,6 @@ from transformers.pytorch_utils import (
 from transformers.utils import (
     add_start_docstrings,
     add_start_docstrings_to_model_forward,
-    is_flash_attn_2_available,
-    is_flash_attn_greater_or_equal_2_10,
     logging,
     replace_return_docstrings,
 )
@@ -58,11 +56,6 @@ from .configuration_deepseek import DeepseekV2Config
 import torch.distributed as dist
 import numpy as np
-if is_flash_attn_2_available():
-    from flash_attn import flash_attn_func, flash_attn_varlen_func
-    from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input  # noqa
 # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 # It means that the function will not be traced through and simply appear as a node in the graph.
 if is_torch_fx_available():

 from transformers.utils import (
     add_start_docstrings,
     add_start_docstrings_to_model_forward,
     logging,
     replace_return_docstrings,
 )
 import torch.distributed as dist
 import numpy as np
 # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 # It means that the function will not be traced through and simply appear as a node in the graph.
 if is_torch_fx_available():