Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,106 Bytes
4e70ef0 704a53e 4e70ef0 704a53e 48fc5f0 f64ce70 f263b93 4e70ef0 f263b93 4e70ef0 1c8fb6e 4e70ef0 b5c347a 4e70ef0 b5c347a 4e70ef0 cb6599c 4e70ef0 cb6599c 4e70ef0 b5c347a 4e70ef0 c569a26 4e70ef0 79ce0f7 65853cc 4e70ef0 65853cc 4e70ef0 7946cb5 4e70ef0 ed4007b d1e3414 ed4007b 902ec6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM
import spaces
import re
from PIL import Image
import torch
import subprocess
#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
subprocess.run('pip install einops', shell=True)
#device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-Flux-Large', trust_remote_code=True).to("cuda:0").eval()
processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-Flux-Large', trust_remote_code=True)
TITLE = "# Florence-2-SD3-Captioner"
DESCRIPTION = ""
def modify_caption(caption: str) -> str:
"""
Removes specific prefixes from captions if present, otherwise returns the original caption.
Args:
caption (str): A string containing a caption.
Returns:
str: The caption with the prefix removed if it was present, or the original caption.
"""
# Define the prefixes to remove
prefix_substrings = [
('captured from ', ''),
('captured at ', '')
]
# Create a regex pattern to match any of the prefixes
pattern = '|'.join([re.escape(opening) for opening, _ in prefix_substrings])
replacers = {opening.lower(): replacer for opening, replacer in prefix_substrings}
# Function to replace matched prefix with its corresponding replacement
def replace_fn(match):
return replacers[match.group(0).lower()]
# Apply the regex to the caption
modified_caption = re.sub(pattern, replace_fn, caption, count=1, flags=re.IGNORECASE)
# If the caption was modified, return the modified version; otherwise, return the original
return modified_caption if modified_caption != caption else caption
@spaces.GPU
def run_example(image):
image = Image.fromarray(image)
task_prompt = "<DESCRIPTION>"
prompt = task_prompt + "Describe this image in great detail."
# Ensure the image is in RGB mode
if image.mode != "RGB":
image = image.convert("RGB")
inputs = processor(text=prompt, images=image, return_tensors="pt")
# Move inputs to GPU
inputs = {key: value.to("cuda:0") for key, value in inputs.items()}
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
num_beams=3
)
# Move generated_ids to CPU for decoding
generated_ids = generated_ids.to("cpu")
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
return modify_caption(parsed_answer["<DESCRIPTION>"])
with gr.Blocks() as demo:
gr.Markdown(TITLE)
submit_btn = gr.Button(value="Submit")
output_text = gr.Textbox(label="Output Text")
input_img = gr.Image(label="Input Picture")
submit_btn.click(run_example, [input_img], [output_text])
demo.launch(debug=True)
|