|
from transformers import ( |
|
TrOCRConfig, |
|
TrOCRProcessor, |
|
TrOCRForCausalLM, |
|
ViTConfig, |
|
ViTModel, |
|
VisionEncoderDecoderModel, |
|
) |
|
import gradio as gr |
|
|
|
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") |
|
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") |
|
def ocr(image): |
|
pixel_values = processor(image, return_tensors="pt").pixel_values |
|
generated_ids = model.generate(pixel_values) |
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
return generated_text |
|
|
|
|
|
demo = gr.Interface(fn=ocr, inputs="image",outputs= ["text"]) |
|
demo.launch() |