Spaces:
Runtime error
Runtime error
File size: 2,577 Bytes
8091862 4833b6f 8091862 4833b6f 80b4326 fc15d59 6221641 80b4326 8c792ce df74d65 8c792ce df74d65 8c792ce 80b4326 e2452eb 80b4326 0da27a0 df74d65 bc2751f 1355606 80b4326 8c792ce df74d65 80b4326 df74d65 654ca8c bc2751f 2e80f2b 654ca8c df74d65 75c9a0c df74d65 2e80f2b 654ca8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import subprocess
# Install SentencePiece untuk keperluan translate bahasa Indonesia
subprocess.run(["pip", "install", "sentencepiece"])
from PIL import Image # library untuk image
import gradio as gr # library untuk tampilan interface di huggingface
from transformers import BlipProcessor, BlipForConditionalGeneration,MarianTokenizer, MarianMTModel #library blip (image captioning) dan marian untuk translate
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Model untuk menghasilkan caption dalam bahasa Indonesia
translation_model_id = "Helsinki-NLP/opus-mt-en-id"
translation_model = MarianMTModel.from_pretrained(translation_model_id)
translation_tokenizer = MarianTokenizer.from_pretrained(translation_model_id)
# Model untuk menghasilkan caption dalam bahasa Inggris
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# caption_model_id = "Salesforce/blip-image-captioning-base"
# caption_model = BlipForConditionalGeneration.from_pretrained(caption_model_id)
# caption_processor = BlipProcessor.from_pretrained(caption_model_id)
def generate_caption(input_image):
# Mengonversi gambar ke mode RGB
image = input_image.convert('RGB')
inputs = caption_processor(images=image, return_tensors="pt")
# inisial variabel inputs
inputs["max_length"] = 20
inputs["num_beams"] = 1
inputs["do_sample"] = True
inputs["top_k"] = 50
inputs["top_p"] = 0.95
# Menghasilkan caption dalam bahasa Inggris
# caption_inputs = caption_processor()
caption_output = caption_model.generate(**inputs)
english_caption = caption_processor.decode(caption_output[0], skip_special_tokens=True)
# Menerjemahkan caption ke bahasa Indonesia
translation_inputs = translation_tokenizer.encode(english_caption, return_tensors="pt", max_length=512, truncation=True)
translation_output = translation_model.generate(translation_inputs)
indonesian_caption = translation_tokenizer.decode(translation_output[0], skip_special_tokens=True)
return english_caption, indonesian_caption
iface = gr.Interface(
generate_caption,
inputs=gr.inputs.Image(type="pil"),
outputs=[gr.outputs.Textbox(type="text"), gr.outputs.Textbox(type="text")], # Dua output teks
live=True
)
iface.launch()
|