Commit
•
b161793
1
Parent(s):
866d169
Fix number of image tokens (#51)
Browse files- Fix number of image tokens (4eedefefd840f457373d13c6043ef9aed8153c78)
Co-authored-by: Moshe Berchansky <[email protected]>
image_processing_phi3_v.py
CHANGED
@@ -250,7 +250,7 @@ class Phi3VImageProcessor(BaseImageProcessor):
|
|
250 |
|
251 |
# [(3, h, w)], where h, w is multiple of 336
|
252 |
shapes = [[im.size(1), im.size(2)] for im in hd_images]
|
253 |
-
num_img_tokens = [int((h//336*w//336+1)*144 + 1 + (h//336+1)*12) for h, w in shapes]
|
254 |
# reshape to channel dimension -> (num_images, num_crops, 3, 336, 336)
|
255 |
# (1, 3, h//336, 336, w//336, 336) -> (1, h//336, w//336, 3, 336, 336) -> (h//336*w//336, 3, 336, 336)
|
256 |
hd_images_reshape = [im.reshape(1, 3, h//336, 336, w//336, 336).permute(0,2,4,1,3,5).reshape(-1, 3, 336, 336).contiguous() for im, (h, w) in zip(hd_images, shapes)]
|
|
|
250 |
|
251 |
# [(3, h, w)], where h, w is multiple of 336
|
252 |
shapes = [[im.size(1), im.size(2)] for im in hd_images]
|
253 |
+
num_img_tokens = [int(((h//336)*(w//336)+1)*144 + 1 + (h//336+1)*12) for h, w in shapes]
|
254 |
# reshape to channel dimension -> (num_images, num_crops, 3, 336, 336)
|
255 |
# (1, 3, h//336, 336, w//336, 336) -> (1, h//336, w//336, 3, 336, 336) -> (h//336*w//336, 3, 336, 336)
|
256 |
hd_images_reshape = [im.reshape(1, 3, h//336, 336, w//336, 336).permute(0,2,4,1,3,5).reshape(-1, 3, 336, 336).contiguous() for im, (h, w) in zip(hd_images, shapes)]
|