czczup commited on
Commit
253108a
1 Parent(s): 56dcc06

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +55 -0
README.md CHANGED
@@ -1,3 +1,58 @@
1
  ---
2
  license: mit
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
  ---
4
+
5
+ ```python
6
+ import torch
7
+ from PIL import Image
8
+ from transformers import AutoModel, CLIPImageProcessor
9
+ from transformers import AutoTokenizer
10
+
11
+
12
+ model = AutoModel.from_pretrained(
13
+ 'OpenGVLab/InternVL-14B-224px',
14
+ torch_dtype=torch.bfloat16,
15
+ low_cpu_mem_usage=True,
16
+ trust_remote_code=True).cuda().eval()
17
+
18
+ image_processor = CLIPImageProcessor.from_pretrained('OpenGVLab/InternVL-14B-224px')
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained(
21
+ 'OpenGVLab/InternVL-14B-224px', use_fast=False, add_eos_token=True)
22
+ tokenizer.pad_token_id = 0 # set pad_token_id to 0
23
+
24
+ images = [
25
+ Image.open('./examples/image1.jpg').convert('RGB'),
26
+ Image.open('./examples/image2.jpg').convert('RGB'),
27
+ Image.open('./examples/image3.jpg').convert('RGB')
28
+ ]
29
+ prefix = 'summarize:'
30
+ texts = [
31
+ prefix + 'a photo of a red panda', # English
32
+ prefix + '一张熊猫的照片', # Chinese
33
+ prefix + '二匹の猫の写真' # Japanese
34
+ ]
35
+
36
+ pixel_values = image_processor(images=images, return_tensors='pt').pixel_values
37
+ pixel_values = pixel_values.to(torch.bfloat16).cuda()
38
+ input_ids = tokenizer(texts, return_tensors='pt', max_length=80,
39
+ truncation=True, padding='max_length').input_ids.cuda()
40
+
41
+ # InternVL-C
42
+ logits_per_image, logits_per_text = model(
43
+ image=pixel_values, text=input_ids, mode='InternVL-C')
44
+ probs = logits_per_image.softmax(dim=-1)
45
+ # tensor([[9.9609e-01, 5.2185e-03, 6.0070e-08],
46
+ # [2.2949e-02, 9.7656e-01, 5.9903e-06],
47
+ # [3.2932e-06, 7.4863e-05, 1.0000e+00]], device='cuda:0',
48
+ # dtype=torch.bfloat16, grad_fn=<SoftmaxBackward0>)
49
+
50
+ # InternVL-G
51
+ logits_per_image, logits_per_text = model(
52
+ image=pixel_values, text=input_ids, mode='InternVL-G')
53
+ probs = logits_per_image.softmax(dim=-1)
54
+ # tensor([[9.9609e-01, 3.1738e-03, 3.6322e-08],
55
+ # [8.6060e-03, 9.9219e-01, 2.8759e-06],
56
+ # [1.7583e-06, 3.1233e-05, 1.0000e+00]], device='cuda:0',
57
+ # dtype=torch.bfloat16, grad_fn=<SoftmaxBackward0>)
58
+ ```