karim.ai

Running

App Files Files Community

karim.ai / gradio_app.py

fffiloni

Update gradio_app.py

071420e verified 10 days ago

raw

history blame

9.79 kB

	import torch
	import imageio
	import os
	import gradio as gr
	from diffusers.schedulers import EulerAncestralDiscreteScheduler
	from transformers import T5EncoderModel, T5Tokenizer
	from allegro.pipelines.pipeline_allegro import AllegroPipeline
	from allegro.models.vae.vae_allegro import AllegroAutoencoderKL3D
	from allegro.models.transformers.transformer_3d_allegro import AllegroTransformer3DModel

	from huggingface_hub import snapshot_download

	weights_dir = './allegro_weights'
	os.makedirs(weights_dir, exist_ok=True)

	is_shared_ui = True if "fffiloni/allegro-t2v" in os.environ['SPACE_ID'] else False
	is_gpu_associated = torch.cuda.is_available()

	if not is_shared_ui:
	snapshot_download(
	repo_id='rhymes-ai/Allegro',
	allow_patterns=[
	'scheduler/**',
	'text_encoder/**',
	'tokenizer/**',
	'transformer/**',
	'vae/**',
	],
	local_dir=weights_dir,
	)


	def single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload):
	dtype = torch.bfloat16

	# Load models
	vae = AllegroAutoencoderKL3D.from_pretrained(
	"./allegro_weights/vae/",
	torch_dtype=torch.float32
	).cuda()
	vae.eval()

	text_encoder = T5EncoderModel.from_pretrained("./allegro_weights/text_encoder/", torch_dtype=dtype)
	text_encoder.eval()

	tokenizer = T5Tokenizer.from_pretrained("./allegro_weights/tokenizer/")

	scheduler = EulerAncestralDiscreteScheduler()

	transformer = AllegroTransformer3DModel.from_pretrained("./allegro_weights/transformer/", torch_dtype=dtype).cuda()
	transformer.eval()

	allegro_pipeline = AllegroPipeline(
	vae=vae,
	text_encoder=text_encoder,
	tokenizer=tokenizer,
	scheduler=scheduler,
	transformer=transformer
	).to("cuda:0")

	positive_prompt = """
	(masterpiece), (best quality), (ultra-detailed), (unwatermarked),
	{}
	emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo,
	sharp focus, high budget, cinemascope, moody, epic, gorgeous
	"""

	negative_prompt = """
	nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality,
	low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry.
	"""

	# Process user prompt
	user_prompt = positive_prompt.format(user_prompt.lower().strip())

	if enable_cpu_offload:
	allegro_pipeline.enable_sequential_cpu_offload()

	out_video = allegro_pipeline(
	user_prompt,
	negative_prompt=negative_prompt,
	num_frames=88,
	height=720,
	width=1280,
	num_inference_steps=num_sampling_steps,
	guidance_scale=guidance_scale,
	max_sequence_length=512,
	generator=torch.Generator(device="cuda:0").manual_seed(seed)
	).video[0]

	# Save video
	os.makedirs(os.path.dirname(save_path), exist_ok=True)
	imageio.mimwrite(save_path, out_video, fps=15, quality=8)

	return save_path


	# Gradio interface function
	def run_inference(user_prompt, guidance_scale, num_sampling_steps, seed, enable_cpu_offload, progress=gr.Progress(track_tqdm=True)):
	save_path = "./output_videos/generated_video.mp4"
	result_path = single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload)
	return result_path

	css="""
	#upl-dataset-group {background-color: none!important;}
	div#warning-ready {
	background-color: #ecfdf5;
	padding: 0 16px 16px;
	margin: 20px 0;
	}
	div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
	color: #057857!important;
	}
	div#warning-duplicate {
	background-color: #ebf5ff;
	padding: 0 16px 16px;
	margin: 20px 0;
	}
	div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
	color: #0f4592!important;
	}
	div#warning-duplicate strong {
	color: #0f4592;
	}
	p.actions {
	display: flex;
	align-items: center;
	margin: 20px 0;
	}
	div#warning-duplicate .actions a {
	display: inline-block;
	margin-right: 10px;
	}
	div#warning-setgpu {
	background-color: #fff4eb;
	padding: 0 16px 16px;
	margin: 20px 0;
	}
	div#warning-setgpu > .gr-prose > h2, div#warning-setgpu > .gr-prose > p {
	color: #92220f!important;
	}
	div#warning-setgpu a, div#warning-setgpu b {
	color: #91230f;
	}
	div#warning-setgpu p.actions > a {
	display: inline-block;
	background: #1f1f23;
	border-radius: 40px;
	padding: 6px 24px;
	color: antiquewhite;
	text-decoration: none;
	font-weight: 600;
	font-size: 1.2em;
	}
	"""

	# Create Gradio interface
	with gr.Blocks(css=css) as demo:
	with gr.Column():
	gr.Markdown("# Allegro Video Generation")
	gr.Markdown("Generate a video based on a text prompt using the Allegro pipeline.")
	with gr.Row():
	with gr.Column():
	user_prompt=gr.Textbox(label="User Prompt")
	with gr.Row():
	guidance_scale=gr.Slider(minimum=0, maximum=20, step=0.1, label="Guidance Scale", value=7.5)
	num_sampling_steps=gr.Slider(minimum=10, maximum=100, step=1, label="Number of Sampling Steps", value=20)
	with gr.Row():
	seed=gr.Slider(minimum=0, maximum=10000, step=1, label="Random Seed", value=42)
	enable_cpu_offload=gr.Checkbox(label="Enable CPU Offload", value=False, scale=1)
	if is_shared_ui:
	top_description = gr.HTML(f'''
	<div class="gr-prose">
	<h2><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
	Attention: this Space need to be duplicated to work</h2>
	<p class="main-message">
	To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU.<br />
	You'll be able to offload the model into CPU for less GPU memory cost (about 9.3G, compared to 27.5G if CPU offload is not enabled), but the inference time will increase significantly.
	</p>
	<p class="actions">
	<a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true">
	<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" />
	</a>
	</p>
	</div>
	''', elem_id="warning-duplicate")
	else:
	if(is_gpu_associated):
	submit_btn = gr.Button("Generate Video", visible=False)
	else:
	top_description = gr.HTML(f'''
	<div class="gr-prose">
	<h2><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
	You have successfully duplicated the Allegro Video Generation Space 🎉</h2>
	<p>There's only one step left before you can generate a video: <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings" style="text-decoration: underline" target="_blank">attribute a GPU</b> to it (via the Settings tab)</a>.
	You will be billed by the minute from when you activate the GPU until when it is turned off.</p>
	<p class="actions">
	<a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings">🔥   Set recommended GPU</a>
	</p>
	</div>
	''', elem_id="warning-setgpu")

	with gr.Column():
	video_output=gr.Video(label="Generated Video")

	submit_btn.click(
	fn=run_inference,
	inputs=[user_prompt, guidance_scale, num_sampling_steps, seed, enable_cpu_offload],
	outputs=video_output
	)

	# Launch the interface
	demo.launch(show_error=True, show_api=False)