Re. No operator found for `memory_efficient_attention_forward` with inputs

by Andyrasika - opened

Thanks for sharing with amazing model , here is some issues i am getting running the model:

NotImplementedError Traceback (most recent call last)
Cell In[4], line 17
12 device = torch.device('cuda')
14 # For Multi-modal Image Generation, must set load_tokenizer=True to load the tokenizer to tokenize input image.
15 # If you have already install xformers, set use_xformers=True to save the GPU memory (Xformers is not supported on V100 GPU)
16 # If you have already download the checkpoint, set `local_files_only=True`` to avoid auto-downloading from remote
---> 17 model = build_model(model_path=model_path, model_dtype=model_dtype, check_safety=False,
18 device_id=device_id, use_xformers=True, understanding=False, load_tokenizer=True)
19 model =
20 print("Building Model Finsished")

File /workspace/LaVIT/models/, in build_model(model_path, model_dtype, device_id, image_size, use_xformers, understanding, load_tokenizer, pixel_decoding, check_safety, local_files_only)
44 lavit = LaVITforUnderstanding(model_path=model_path, model_dtype=model_dtype,
45 device_id=device_id, use_xformers=use_xformers)
46 else:
---> 47 lavit = LaVITforGeneration(model_path=model_path, model_dtype=model_dtype, device_id=device_id,
48 use_xformers=use_xformers, check_safety=check_safety, load_tokenizer=load_tokenizer, pixel_decoding=pixel_decoding)
50 # Convert the model parameters to the defined precision
51 if model_dtype == 'bf16':

File /workspace/LaVIT/models/, in LaVITforGeneration.init(self, model_path, model_dtype, device_id, use_xformers, check_safety, load_tokenizer, pixel_decoding, **kwargs)
117 if xformers_version == version.parse("0.0.16"):
118 print(
119 "xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See for more details."
120 )
--> 121 self.unet.enable_xformers_memory_efficient_attention()
122 else:
123 raise ValueError("xformers is not available. Make sure it is installed correctly or set use_xformers=False")

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.enable_xformers_memory_efficient_attention(self, attention_op)
253 def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
254 r"""
255 Enable memory efficient attention from xFormers.
285 ```
286 """
--> 287 self.set_use_memory_efficient_attention_xformers(True, attention_op)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.set_use_memory_efficient_attention_xformers(self, valid, attention_op)
249 for module in self.children():
250 if isinstance(module, torch.nn.Module):
--> 251 fn_recursive_set_mem_eff(module)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.set_use_memory_efficient_attention_xformers..fn_recursive_set_mem_eff(module)
244 module.set_use_memory_efficient_attention_xformers(valid, attention_op)
246 for child in module.children():
--> 247 fn_recursive_set_mem_eff(child)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.set_use_memory_efficient_attention_xformers..fn_recursive_set_mem_eff(module)
244 module.set_use_memory_efficient_attention_xformers(valid, attention_op)
246 for child in module.children():
--> 247 fn_recursive_set_mem_eff(child)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.set_use_memory_efficient_attention_xformers..fn_recursive_set_mem_eff(module)
244 module.set_use_memory_efficient_attention_xformers(valid, attention_op)
246 for child in module.children():
--> 247 fn_recursive_set_mem_eff(child)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.set_use_memory_efficient_attention_xformers..fn_recursive_set_mem_eff(module)
242 def fn_recursive_set_mem_eff(module: torch.nn.Module):
243 if hasattr(module, "set_use_memory_efficient_attention_xformers"):
--> 244 module.set_use_memory_efficient_attention_xformers(valid, attention_op)
246 for child in module.children():
247 fn_recursive_set_mem_eff(child)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.set_use_memory_efficient_attention_xformers(self, valid, attention_op)
249 for module in self.children():
250 if isinstance(module, torch.nn.Module):
--> 251 fn_recursive_set_mem_eff(module)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.set_use_memory_efficient_attention_xformers..fn_recursive_set_mem_eff(module)
244 module.set_use_memory_efficient_attention_xformers(valid, attention_op)
246 for child in module.children():
--> 247 fn_recursive_set_mem_eff(child)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.set_use_memory_efficient_attention_xformers..fn_recursive_set_mem_eff(module)
244 module.set_use_memory_efficient_attention_xformers(valid, attention_op)
246 for child in module.children():
--> 247 fn_recursive_set_mem_eff(child)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in ModelMixin.set_use_memory_efficient_attention_xformers..fn_recursive_set_mem_eff(module)
242 def fn_recursive_set_mem_eff(module: torch.nn.Module):
243 if hasattr(module, "set_use_memory_efficient_attention_xformers"):
--> 244 module.set_use_memory_efficient_attention_xformers(valid, attention_op)
246 for child in module.children():
247 fn_recursive_set_mem_eff(child)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in Attention.set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers, attention_op)
210 _ = xformers.ops.memory_efficient_attention(
211 torch.randn((1, 2, 40), device="cuda"),
212 torch.randn((1, 2, 40), device="cuda"),
213 torch.randn((1, 2, 40), device="cuda"),
214 )
215 except Exception as e:
--> 216 raise e
218 if is_lora:
219 # TODO (sayakpaul): should we throw a warning if someone wants to use the xformers
220 # variant when using PT 2.0 now that we have LoRAAttnProcessor2_0?
221 processor = LoRAXFormersAttnProcessor(
222 hidden_size=self.processor.hidden_size,
223 cross_attention_dim=self.processor.cross_attention_dim,
224 rank=self.processor.rank,
225 attention_op=attention_op,
226 )

File /usr/local/lib/python3.10/dist-packages/diffusers/models/, in Attention.set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers, attention_op)
207 else:
208 try:
209 # Make sure we can run the memory efficient attention
--> 210 _ = xformers.ops.memory_efficient_attention(
211 torch.randn((1, 2, 40), device="cuda"),
212 torch.randn((1, 2, 40), device="cuda"),
213 torch.randn((1, 2, 40), device="cuda"),
214 )
215 except Exception as e:
216 raise e

File /usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/, in memory_efficient_attention(query, key, value, attn_bias, p, scale, op)
116 def memory_efficient_attention(
117 query: torch.Tensor,
118 key: torch.Tensor,
124 op: Optional[AttentionOp] = None,
125 ) -> torch.Tensor:
126 """Implements the memory-efficient attention mechanism following
127 "Self-Attention Does Not Need O(n^2) Memory" <>_.
221 :return: multi-head attention Tensor with shape [B, Mq, H, Kv]
222 """
--> 223 return _memory_efficient_attention(
224 Inputs(
225 query=query, key=key, value=value, p=p, attn_bias=attn_bias, scale=scale
226 ),
227 op=op,
228 )

File /usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/, in _memory_efficient_attention(inp, op)
316 def _memory_efficient_attention(
317 inp: Inputs, op: Optional[AttentionOp] = None
318 ) -> torch.Tensor:
319 # fast-path that doesn't require computing the logsumexp for backward computation
320 if all(x.requires_grad is False for x in [inp.query, inp.key, inp.value]):
--> 321 return _memory_efficient_attention_forward(
322 inp, op=op[0] if op is not None else None
323 )
325 output_shape = inp.normalize_bmhk()
326 return _fMHA.apply(
327 op, inp.query, inp.key, inp.value, inp.attn_bias, inp.p, inp.scale
328 ).reshape(output_shape)

File /usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/, in _memory_efficient_attention_forward(inp, op)
335 output_shape = inp.normalize_bmhk()
336 if op is None:
--> 337 op = _dispatch_fw(inp, False)
338 else:
339 _ensure_op_supports_or_raise(ValueError, "memory_efficient_attention", op, inp)

File /usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/, in _dispatch_fw(inp, needs_gradient)
111 def _dispatch_fw(inp: Inputs, needs_gradient: bool) -> Type[AttentionFwOpBase]:
112 """Computes the best operator for forward
114 Raises:
118 AttentionOp: The best operator for the configuration
119 """
--> 120 return _run_priority_list(
121 "memory_efficient_attention_forward",
122 _dispatch_fw_priority_list(inp, needs_gradient),
123 inp,
124 )

File /usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/, in _run_priority_list(name, priority_list, inp)
61 for op, not_supported in zip(priority_list, not_supported_reasons):
62 msg += "\n" + _format_not_supported_reasons(op, not_supported)
---> 63 raise NotImplementedError(msg)

NotImplementedError: No operator found for memory_efficient_attention_forward with inputs:
query : shape=(1, 2, 1, 40) (torch.float32)
key : shape=(1, 2, 1, 40) (torch.float32)
value : shape=(1, 2, 1, 40) (torch.float32)
attn_bias : <class 'NoneType'>
p : 0.0
decoderF is not supported because:
xFormers wasn't build with CUDA support
attn_bias type is <class 'NoneType'>
operator wasn't built - see python -m for more info
[email protected] is not supported because:
xFormers wasn't build with CUDA support
dtype=torch.float32 (supported: {torch.bfloat16, torch.float16})
operator wasn't built - see python -m for more info
tritonflashattF is not supported because:
xFormers wasn't build with CUDA support
dtype=torch.float32 (supported: {torch.bfloat16, torch.float16})
operator wasn't built - see python -m for more info
triton is not available
Only work on pre-MLIR triton for now
cutlassF is not supported because:
xFormers wasn't build with CUDA support
operator wasn't built - see python -m for more info
smallkF is not supported because:
max(query.shape[-1] != value.shape[-1]) > 32
xFormers wasn't build with CUDA support
operator wasn't built - see python -m for more info
unsupported embed per head: 40

Any help to resolve it would be appreciated.

You can set use_xformers=False, this error is raised because you do not install the xformers in your environment

Sign up or log in to comment