File size: 1,028 Bytes
db30e0b c09e971 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import gradio as gr
#from transformers import AutoModelForCausalLM, AutoProcessor
# Load the model and processor
model_id = "microsoft/Phi-3-vision-128k-instruct"
#model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto", _attn_implementation='flash_attention_2')
#processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
# Define the function to generate text
def generate_text(image, prompt):
# Process the input
inputs = ""
# Generate the text
generation_args = {
"max_new_tokens": 500,
"temperature": 0.0,
"do_sample": False,
}
return image + prompt
# Create the Gradio application
gr.Interface(
fn=generate_text,
inputs=[
gr.Image(type="pil"),
gr.Textbox(label="Prompt")
],
outputs=gr.Textbox(),
title="Phi-3-Vision Model",
description="Generate text based on an image and prompt using the Phi-3-Vision model."
).launch(share=True,show_error=True) |