stepfun-ai
/

GOT-OCR2_0

Image-Text-to-Text

feature-extraction

vision-language

Model card Files Files and versions Community

ucaslcl commited on Sep 13

Commit

cd358bb

•

1 Parent(s): e5f851c

Update README.md

Files changed (1) hide show

README.md +24 -28

README.md CHANGED Viewed

@@ -15,39 +15,35 @@ megfile==3.1.2
 ```python
-# test.py
-import torch
-from PIL import Image
 from transformers import AutoModel, AutoTokenizer
-model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
-    attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
 model = model.eval().cuda()
-tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
-image = Image.open('xx.jpg').convert('RGB')
-question = 'What is in the image?'
-msgs = [{'role': 'user', 'content': [image, question]}]
-res = model.chat(
-    image=None,
-    msgs=msgs,
-    tokenizer=tokenizer
-)
 print(res)
-## if you want to use streaming, please make sure sampling=True and stream=True
-## the model.chat will return a generator
-res = model.chat(
-    image=None,
-    msgs=msgs,
-    tokenizer=tokenizer,
-    sampling=True,
-    stream=True
-)
-generated_text = ""
-for new_text in res:
-    generated_text += new_text
-    print(new_text, flush=True, end='')
 ```

 ```python
 from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
 model = model.eval().cuda()
+# input your test image
+image_file = 'xxx.jpg'
+# plain texts OCR
+model.chat(tokenizer, image_file, ocr_type='ocr')
+# format texts OCR:
+model.chat(tokenizer, image_file, ocr_type='format')
+# fine-grained OCR:
+model.chat(tokenizer, image_file, ocr_type='ocr', ocr_box='')
+model.chat(tokenizer, image_file, ocr_type='format', ocr_box='')
+model.chat(tokenizer, image_file, ocr_type='ocr', ocr_color='')
+model.chat(tokenizer, image_file, ocr_type='format', ocr_color='')
+# multi-crop OCR:
+res = model.chat_crop(tokenizer, image_file = image_file)
+# render the formatted OCR results:
+model.chat(tokenizer, image_file, ocr_type='format', ocr_box='', ocr_color='', render=True, save_render_file = '/data/code/a2hf/chat_plus.html')
 print(res)
 ```