logo-in-context

Paused

App Files Files Community

logo-in-context / app.py

multimodalart HF staff

Update app.py

daa5f41 verified 11 days ago

raw

history blame

6.95 kB

	import gradio as gr
	import torch
	import spaces
	from diffusers import FluxInpaintPipeline
	from PIL import Image, ImageFile

	#ImageFile.LOAD_TRUNCATED_IMAGES = True

	# Initialize the pipeline
	pipe = FluxInpaintPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.bfloat16
	)
	pipe.to("cuda")
	pipe.load_lora_weights(
	"ali-vilab/In-Context-LoRA",
	weight_name="visual-identity-design.safetensors"
	)

	def square_center_crop(img, target_size=768):
	if img.mode in ('RGBA', 'P'):
	img = img.convert('RGB')

	width, height = img.size
	crop_size = min(width, height)

	left = (width - crop_size) // 2
	top = (height - crop_size) // 2
	right = left + crop_size
	bottom = top + crop_size

	img_cropped = img.crop((left, top, right, bottom))
	return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)

	def duplicate_horizontally(img):
	width, height = img.size
	if width != height:
	raise ValueError(f"Input image must be square, got {width}x{height}")

	new_image = Image.new('RGB', (width * 2, height))
	new_image.paste(img, (0, 0))
	new_image.paste(img, (width, 0))
	return new_image

	# Load the mask image
	mask = Image.open("mask_square.png")

	@spaces.GPU
	def generate(image, prompt_description, prompt_user, progress=gr.Progress(track_tqdm=True)):
	prompt_structure = "The two-panel image showcases the logo on the left and the application on the right, [LEFT] the left panel is showing "+prompt_description+" [RIGHT] this logo is applied to "
	prompt = prompt_structure + prompt_user

	cropped_image = square_center_crop(image)
	logo_dupli = duplicate_horizontally(cropped_image)

	out = pipe(
	prompt=prompt,
	image=logo_dupli,
	mask_image=mask,
	guidance_scale=6,
	height=768,
	width=1536,
	num_inference_steps=28,
	max_sequence_length=256,
	strength=1
	).images[0]

	width, height = out.size
	half_width = width // 2
	image_2 = out.crop((half_width, 0, width, height))
	return image_2, out

	with gr.Blocks() as demo:
	gr.Markdown("# Logo in Context")
	gr.Markdown("### In-Context LoRA + Image-to-Image + Inpainting, apply your logo to anything. diffusers implementation based on the [workflow by WizardWhitebeard/klinter](https://civitai.com/articles/8779)")

	with gr.Tab("Demo"):
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(
	label="Upload Logo Image",
	type="pil",
	height=384
	)
	prompt_description = gr.Textbox(
	label="Describe your logo",
	placeholder="A Hugging Face emoji logo",
	)
	prompt_input = gr.Textbox(
	label="Where should the logo be applied?",
	placeholder="e.g., a coffee cup on a wooden table"
	)
	generate_btn = gr.Button("Generate Application", variant="primary")

	with gr.Column():
	output_image = gr.Image(label="Generated Application")
	output_side = gr.Image(label="Side by side")

	gr.Examples(
	examples=[
	["huggingface.png", "A Hugging Face emoji logo", "An embroidered hat"],
	["awesome.png", "An awesome face logo", "A tattoo on a leg"],
	["dvd_logo.png", "A DVD logo", "a flower pot"]
	],
	inputs=[input_image, prompt_description, prompt_input],
	outputs=[output_image, output_side],
	fn=generate,
	cache_examples="lazy"
	)

	with gr.Row():
	gr.Markdown("""
	### Instructions:
	1. Upload a logo image (preferably square)
	2. Describe where you'd like to see the logo applied
	3. Click 'Generate Application' and wait for the result

	Note: The generation process might take a few moments.
	""")

	with gr.Tab("🧨 diffusers"):
	gr.Markdown("The way this works is combining the [IC LoRA](https://github.com/ali-vilab/In-Context-LoRA) with image-to-image + inpainting. Where the image on the left (the logo) is uploaded by the user, and the image on the right is masked and applied on the product by the LoRA. Based on the [ComfyUI workflow by WizardWhitebeard/klinter](https://civitai.com/articles/8779). Below is a diffusers implementation of the idea")
	gr.Code(language="python", value="""
	import torch
	from diffusers import FluxInpaintPipeline

	pipe = FluxInpaintPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
	pipe.to("cuda")
	pipe.load_lora_weights("ali-vilab/In-Context-LoRA", weight_name="visual-identity-design.safetensors")

	from PIL import Image

	def square_center_crop(img, target_size=768):
	if img.mode in ('RGBA', 'P'):
	img = img.convert('RGB')

	width, height = img.size
	crop_size = min(width, height)

	left = (width - crop_size) // 2
	top = (height - crop_size) // 2
	right = left + crop_size
	bottom = top + crop_size

	img_cropped = img.crop((left, top, right, bottom))
	return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)

	def duplicate_horizontally(img):
	width, height = img.size
	if width != height:
	raise ValueError(f"Input image must be square, got {width}x{height}")

	new_image = Image.new('RGB', (width * 2, height))
	new_image.paste(img, (0, 0))
	new_image.paste(img, (width, 0))
	return new_image

	mask = load_image("mask_square.png")
	image = load_image("the_logo.png")
	cropped_image = square_center_crop(image)
	logo_dupli = duplicate_horizontally(cropped_image)

	prompt_structure = "The two-panel image showcases the logo of a brand, [LEFT] the left panel is showing the logo [RIGHT] the right panel has this logo applied to "
	prompt = prompt_structure + "an coconut, engraved logo on a green coconut"
	out = pipe(
	prompt=prompt,
	image=logo_dupli,
	mask_image=mask,
	guidance_scale=6,
	height=768,
	width=1536,
	num_inference_steps=28,
	max_sequence_length=256,
	strength=1
	).images[0]
	out
	"""
	)

	# Set up the click event
	generate_btn.click(
	fn=generate,
	inputs=[input_image, prompt_description, prompt_input],
	outputs=[output_image, output_side]
	)

	demo.launch()