andrewqian123
commited on
Commit
•
a0e05d2
1
Parent(s):
ad7a067
Update processing_minicpmv.py
Browse files- processing_minicpmv.py +8 -0
processing_minicpmv.py
CHANGED
@@ -164,6 +164,14 @@ class MiniCPMVProcessor(ProcessorMixin):
|
|
164 |
return input_ids.unsqueeze(0), image_bounds
|
165 |
|
166 |
def _convert_images_texts_to_inputs(self, images, texts, do_pad=False, truncation=None, max_length=None, return_tensors=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
if not len(images):
|
168 |
model_inputs = self.tokenizer(texts, return_tensors=return_tensors, padding=do_pad, truncation=truncation, max_length=max_length)
|
169 |
return MiniCPMVBatchFeature(data={**model_inputs})
|
|
|
164 |
return input_ids.unsqueeze(0), image_bounds
|
165 |
|
166 |
def _convert_images_texts_to_inputs(self, images, texts, do_pad=False, truncation=None, max_length=None, return_tensors=None):
|
167 |
+
assert len(images) == len(texts)
|
168 |
+
batch = []
|
169 |
+
for ind in range(len(images)):
|
170 |
+
result = _convert_images_texts_to_inputs2(self, images[ind], texts[ind], do_pad, truncation, max_length, return_tensors)
|
171 |
+
batch.append(result)
|
172 |
+
return batch
|
173 |
+
|
174 |
+
def _convert_images_texts_to_inputs2(self, images, texts, do_pad=False, truncation=None, max_length=None, return_tensors=None):
|
175 |
if not len(images):
|
176 |
model_inputs = self.tokenizer(texts, return_tensors=return_tensors, padding=do_pad, truncation=truncation, max_length=max_length)
|
177 |
return MiniCPMVBatchFeature(data={**model_inputs})
|