benchang1110
commited on
Commit
•
842776d
1
Parent(s):
d85f539
Upload processor
Browse files- processing_taivisionlm.py +1 -29
processing_taivisionlm.py
CHANGED
@@ -285,32 +285,4 @@ class TaiVisionProcessor(ProcessorMixin):
|
|
285 |
def model_input_names(self):
|
286 |
tokenizer_input_names = self.tokenizer.model_input_names
|
287 |
image_processor_input_names = self.image_processor.model_input_names
|
288 |
-
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
# if __name__ == '__main__':
|
293 |
-
# from configuration_taivisionlm import TaiVisionLMConfig
|
294 |
-
# import transformers
|
295 |
-
# import torch
|
296 |
-
# config = TaiVisionLMConfig.from_pretrained("./")
|
297 |
-
# preprocessor = transformers.SiglipImageProcessor.from_pretrained("google/siglip-base-patch16-224")
|
298 |
-
# preprocessor.image_seq_length = config.num_image_tokens
|
299 |
-
# tokenizer = transformers.AutoTokenizer.from_pretrained("benchang1110/Taiwan-tinyllama-v1.0-chat")
|
300 |
-
# processor = TaiVisionProcessor(tokenizer=tokenizer, image_processor=preprocessor)
|
301 |
-
# processor.save_pretrained("./")
|
302 |
-
|
303 |
-
# from PIL import Image
|
304 |
-
# import requests
|
305 |
-
# processor = TaiVisionProcessor.from_pretrained("./")
|
306 |
-
# url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg"
|
307 |
-
# image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
|
308 |
-
# text = "Hello< what is your name?"
|
309 |
-
# suffix = "I am fine, thank you."
|
310 |
-
# inputs = processor(text=text,suffix=suffix,images=image, return_tensors="pt",padding="max_length",max_length=512)
|
311 |
-
# print(inputs['attention_mask'].shape)
|
312 |
-
# print(inputs['input_ids'].shape)
|
313 |
-
# print(inputs['token_type_ids'].shape)
|
314 |
-
# # print number of 0 in token_type_ids
|
315 |
-
# print(torch.sum(inputs['token_type_ids']==0))
|
316 |
-
# print(inputs)
|
|
|
285 |
def model_input_names(self):
|
286 |
tokenizer_input_names = self.tokenizer.model_input_names
|
287 |
image_processor_input_names = self.image_processor.model_input_names
|
288 |
+
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|