Spaces:

li-qing
/

SG3D-Demo

Runtime error

zfzhang-thu commited on Aug 7

Commit

587ae20

•

1 Parent(s): 0cfc205

using bf16

Files changed (1) hide show

leo/model.py CHANGED Viewed

@@ -11,7 +11,7 @@ from leo.grounding_head import SequentialGroundHead
 from leo.utils import get_mlp_head
-def maybe_autocast(model, dtype='float32', enabled=True): ### not-half mode
     # if on cpu, don't use autocast
     # if on gpu, use autocast with dtype if provided, otherwise use torch.float16
     enable_autocast = model.device != torch.device('cpu')
@@ -75,7 +75,7 @@ class SequentialGrounder(torch.nn.Module):
         if 'vicuna' in llm_name.lower():
             self.llm_tokenizer = LlamaTokenizer.from_pretrained(llm_cfg_path, truncation_side=llm_truncation_side)
             self.llm_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
-            self.llm_model = LlamaForCausalLM.from_pretrained(llm_cfg_path, torch_dtype=torch.float32) # not-half mode torch_dtype=torch.float16
             self.llm_model.resize_token_embeddings(len(self.llm_tokenizer))
         else:
             self.llm_tokenizer = AutoTokenizer.from_pretrained(llm_cfg_path, truncation_side=llm_truncation_side)
@@ -320,7 +320,7 @@ class SequentialGrounder(torch.nn.Module):
         with maybe_autocast(self):
             outputs = self.llm_model(
-                inputs_embeds=inputs_embeds.float(), # not-half mode
                 attention_mask=attention_mask,
                 return_dict=True,
                 output_hidden_states=True,

 from leo.utils import get_mlp_head
+def maybe_autocast(model, dtype='bf16', enabled=True):
     # if on cpu, don't use autocast
     # if on gpu, use autocast with dtype if provided, otherwise use torch.float16
     enable_autocast = model.device != torch.device('cpu')
         if 'vicuna' in llm_name.lower():
             self.llm_tokenizer = LlamaTokenizer.from_pretrained(llm_cfg_path, truncation_side=llm_truncation_side)
             self.llm_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+            self.llm_model = LlamaForCausalLM.from_pretrained(llm_cfg_path, torch_dtype=torch.float16)
             self.llm_model.resize_token_embeddings(len(self.llm_tokenizer))
         else:
             self.llm_tokenizer = AutoTokenizer.from_pretrained(llm_cfg_path, truncation_side=llm_truncation_side)
         with maybe_autocast(self):
             outputs = self.llm_model(
+                inputs_embeds=inputs_embeds,
                 attention_mask=attention_mask,
                 return_dict=True,
                 output_hidden_states=True,