Flux-Florence-2 / app.py
gokaygokay's picture
Update app.py
169a2a8 verified
raw
history blame contribute delete
No virus
2.88 kB
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM
import spaces
from PIL import Image
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
models = {
'gokaygokay/Florence-2-Flux-Large': AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-Flux-Large', trust_remote_code=True).eval(),
'gokaygokay/Florence-2-Flux': AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-Flux', trust_remote_code=True).eval(),
}
processors = {
'gokaygokay/Florence-2-Flux-Large': AutoProcessor.from_pretrained('gokaygokay/Florence-2-Flux-Large', trust_remote_code=True),
'gokaygokay/Florence-2-Flux': AutoProcessor.from_pretrained('gokaygokay/Florence-2-Flux', trust_remote_code=True),
}
title = """<h1 align="center">Florence-2 Captioner for Flux Prompts</h1>
<p><center>
<a href="https://huggingface.co/gokaygokay/Florence-2-Flux-Large" target="_blank">[Florence-2 Flux Large]</a>
<a href="https://huggingface.co/gokaygokay/Florence-2-Flux" target="_blank">[Florence-2 Flux Base]</a>
</center></p>
"""
@spaces.GPU
def run_example(image, model_name='gokaygokay/Florence-2-Flux-Large'):
image = Image.fromarray(image)
task_prompt = "<DESCRIPTION>"
prompt = task_prompt + "Describe this image in great detail."
if image.mode != "RGB":
image = image.convert("RGB")
model = models[model_name]
processor = processors[model_name]
inputs = processor(text=prompt, images=image, return_tensors="pt")
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
num_beams=3,
repetition_penalty=1.10,
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
return parsed_answer["<DESCRIPTION>"]
with gr.Blocks(theme='bethecloud/storj_theme') as demo:
gr.HTML(title)
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Picture")
model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='gokaygokay/Florence-2-Flux-Large')
submit_btn = gr.Button(value="Submit")
with gr.Column():
output_text = gr.Textbox(label="Output Text")
gr.Examples(
[["image1.jpg"],
["image2.jpg"],
["image3.png"],
["image5.jpg"]],
inputs=[input_img, model_selector],
outputs=[output_text],
fn=run_example,
label='Try captioning on below examples',
cache_examples=True
)
submit_btn.click(run_example, [input_img, model_selector], [output_text])
demo.launch(debug=True)