Spaces:

gokaygokay
/

Flux-Florence-2

Running on Zero

App Files Files Community

Flux-Florence-2 / app.py

gokaygokay

Update app.py

169a2a8 verified 24 days ago

raw

history blame contribute delete

No virus

2.88 kB

	import gradio as gr
	from transformers import AutoProcessor, AutoModelForCausalLM
	import spaces
	from PIL import Image

	import subprocess
	subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

	models = {
	'gokaygokay/Florence-2-Flux-Large': AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-Flux-Large', trust_remote_code=True).eval(),
	'gokaygokay/Florence-2-Flux': AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-Flux', trust_remote_code=True).eval(),
	}

	processors = {
	'gokaygokay/Florence-2-Flux-Large': AutoProcessor.from_pretrained('gokaygokay/Florence-2-Flux-Large', trust_remote_code=True),
	'gokaygokay/Florence-2-Flux': AutoProcessor.from_pretrained('gokaygokay/Florence-2-Flux', trust_remote_code=True),
	}

	title = """<h1 align="center">Florence-2 Captioner for Flux Prompts</h1>
	<p><center>
	<a href="https://huggingface.co/gokaygokay/Florence-2-Flux-Large" target="_blank">[Florence-2 Flux Large]</a>
	<a href="https://huggingface.co/gokaygokay/Florence-2-Flux" target="_blank">[Florence-2 Flux Base]</a>
	</center></p>
	"""

	@spaces.GPU
	def run_example(image, model_name='gokaygokay/Florence-2-Flux-Large'):
	image = Image.fromarray(image)
	task_prompt = "<DESCRIPTION>"
	prompt = task_prompt + "Describe this image in great detail."

	if image.mode != "RGB":
	image = image.convert("RGB")

	model = models[model_name]
	processor = processors[model_name]

	inputs = processor(text=prompt, images=image, return_tensors="pt")
	generated_ids = model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	num_beams=3,
	repetition_penalty=1.10,
	)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
	return parsed_answer["<DESCRIPTION>"]

	with gr.Blocks(theme='bethecloud/storj_theme') as demo:
	gr.HTML(title)

	with gr.Row():
	with gr.Column():
	input_img = gr.Image(label="Input Picture")
	model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='gokaygokay/Florence-2-Flux-Large')
	submit_btn = gr.Button(value="Submit")
	with gr.Column():
	output_text = gr.Textbox(label="Output Text")

	gr.Examples(
	[["image1.jpg"],
	["image2.jpg"],
	["image3.png"],
	["image5.jpg"]],
	inputs=[input_img, model_selector],
	outputs=[output_text],
	fn=run_example,
	label='Try captioning on below examples',
	cache_examples=True
	)

	submit_btn.click(run_example, [input_img, model_selector], [output_text])

	demo.launch(debug=True)