Spaces:
Runtime error
Runtime error
import subprocess | |
# Install SentencePiece untuk keperluan translate bahasa Indonesia | |
subprocess.run(["pip", "install", "sentencepiece"]) | |
from PIL import Image # library untuk image | |
import gradio as gr # library untuk tampilan interface di huggingface | |
from transformers import BlipProcessor, BlipForConditionalGeneration,MarianTokenizer, MarianMTModel #library blip (image captioning) dan marian untuk translate | |
import torch | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
# Model untuk menghasilkan caption dalam bahasa Indonesia | |
translation_model_id = "Helsinki-NLP/opus-mt-en-id" | |
translation_model = MarianMTModel.from_pretrained(translation_model_id) | |
translation_tokenizer = MarianTokenizer.from_pretrained(translation_model_id) | |
# Model untuk menghasilkan caption dalam bahasa Inggris | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") | |
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device) | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
# caption_model_id = "Salesforce/blip-image-captioning-base" | |
# caption_model = BlipForConditionalGeneration.from_pretrained(caption_model_id) | |
# caption_processor = BlipProcessor.from_pretrained(caption_model_id) | |
def generate_caption(input_image): | |
# Mengonversi gambar ke mode RGB | |
image = input_image.convert('RGB') | |
inputs = caption_processor(images=image, return_tensors="pt") | |
# inisial variabel inputs | |
inputs["max_length"] = 20 | |
inputs["num_beams"] = 1 | |
inputs["do_sample"] = True | |
inputs["top_k"] = 50 | |
inputs["top_p"] = 0.95 | |
# Menghasilkan caption dalam bahasa Inggris | |
# caption_inputs = caption_processor() | |
caption_output = caption_model.generate(**inputs) | |
english_caption = caption_processor.decode(caption_output[0], skip_special_tokens=True) | |
# Menerjemahkan caption ke bahasa Indonesia | |
translation_inputs = translation_tokenizer.encode(english_caption, return_tensors="pt", max_length=512, truncation=True) | |
translation_output = translation_model.generate(translation_inputs) | |
indonesian_caption = translation_tokenizer.decode(translation_output[0], skip_special_tokens=True) | |
return english_caption, indonesian_caption | |
iface = gr.Interface( | |
generate_caption, | |
inputs=gr.inputs.Image(type="pil"), | |
outputs=[gr.outputs.Text(type="text"), gr.outputs.Text(type="text")], # Dua output teks | |
live=True | |
) | |
iface.launch() | |