benchang1110 commited on
Commit
842776d
1 Parent(s): d85f539

Upload processor

Browse files
Files changed (1) hide show
  1. processing_taivisionlm.py +1 -29
processing_taivisionlm.py CHANGED
@@ -285,32 +285,4 @@ class TaiVisionProcessor(ProcessorMixin):
285
  def model_input_names(self):
286
  tokenizer_input_names = self.tokenizer.model_input_names
287
  image_processor_input_names = self.image_processor.model_input_names
288
- return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
289
-
290
-
291
-
292
- # if __name__ == '__main__':
293
- # from configuration_taivisionlm import TaiVisionLMConfig
294
- # import transformers
295
- # import torch
296
- # config = TaiVisionLMConfig.from_pretrained("./")
297
- # preprocessor = transformers.SiglipImageProcessor.from_pretrained("google/siglip-base-patch16-224")
298
- # preprocessor.image_seq_length = config.num_image_tokens
299
- # tokenizer = transformers.AutoTokenizer.from_pretrained("benchang1110/Taiwan-tinyllama-v1.0-chat")
300
- # processor = TaiVisionProcessor(tokenizer=tokenizer, image_processor=preprocessor)
301
- # processor.save_pretrained("./")
302
-
303
- # from PIL import Image
304
- # import requests
305
- # processor = TaiVisionProcessor.from_pretrained("./")
306
- # url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg"
307
- # image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
308
- # text = "Hello< what is your name?"
309
- # suffix = "I am fine, thank you."
310
- # inputs = processor(text=text,suffix=suffix,images=image, return_tensors="pt",padding="max_length",max_length=512)
311
- # print(inputs['attention_mask'].shape)
312
- # print(inputs['input_ids'].shape)
313
- # print(inputs['token_type_ids'].shape)
314
- # # print number of 0 in token_type_ids
315
- # print(torch.sum(inputs['token_type_ids']==0))
316
- # print(inputs)
 
285
  def model_input_names(self):
286
  tokenizer_input_names = self.tokenizer.model_input_names
287
  image_processor_input_names = self.image_processor.model_input_names
288
+ return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))