import torch import imageio import os import gradio as gr import subprocess from subprocess import getoutput from diffusers.schedulers import EulerAncestralDiscreteScheduler from transformers import T5EncoderModel, T5Tokenizer from allegro.pipelines.pipeline_allegro import AllegroPipeline from allegro.models.vae.vae_allegro import AllegroAutoencoderKL3D from allegro.models.transformers.transformer_3d_allegro import AllegroTransformer3DModel from huggingface_hub import snapshot_download weights_dir = './allegro_weights' os.makedirs(weights_dir, exist_ok=True) is_shared_ui = True if "fffiloni/allegro-t2v" in os.environ['SPACE_ID'] else False is_gpu_associated = torch.cuda.is_available() if not is_shared_ui: snapshot_download( repo_id='rhymes-ai/Allegro', allow_patterns=[ 'scheduler/**', 'text_encoder/**', 'tokenizer/**', 'transformer/**', 'vae/**', ], local_dir=weights_dir, ) if is_gpu_associated: gpu_info = getoutput('nvidia-smi') def single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload): dtype = torch.bfloat16 # Load models vae = AllegroAutoencoderKL3D.from_pretrained( "./allegro_weights/vae/", torch_dtype=torch.float32 ).cuda() vae.eval() text_encoder = T5EncoderModel.from_pretrained("./allegro_weights/text_encoder/", torch_dtype=dtype) text_encoder.eval() tokenizer = T5Tokenizer.from_pretrained("./allegro_weights/tokenizer/") scheduler = EulerAncestralDiscreteScheduler() transformer = AllegroTransformer3DModel.from_pretrained("./allegro_weights/transformer/", torch_dtype=dtype).cuda() transformer.eval() allegro_pipeline = AllegroPipeline( vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, scheduler=scheduler, transformer=transformer ).to("cuda:0") positive_prompt = """ (masterpiece), (best quality), (ultra-detailed), (unwatermarked), {} emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, sharp focus, high budget, cinemascope, moody, epic, gorgeous """ negative_prompt = """ nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry. """ # Process user prompt user_prompt = positive_prompt.format(user_prompt.lower().strip()) if enable_cpu_offload: allegro_pipeline.enable_sequential_cpu_offload() out_video = allegro_pipeline( user_prompt, negative_prompt=negative_prompt, num_frames=88, height=720, width=1280, num_inference_steps=num_sampling_steps, guidance_scale=guidance_scale, max_sequence_length=512, generator=torch.Generator(device="cuda:0").manual_seed(seed) ).video[0] # Save video os.makedirs(os.path.dirname(save_path), exist_ok=True) imageio.mimwrite(save_path, out_video, fps=15, quality=8) return save_path # Gradio interface function def run_inference(user_prompt, guidance_scale, num_sampling_steps, seed, enable_cpu_offload, progress=gr.Progress(track_tqdm=True)): save_path = "./output_videos/generated_video.mp4" result_path = single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload) return result_path css=""" div#col-container{ margin: 0 auto; max-width: 800px; } div#warning-ready { background-color: #ecfdf5; padding: 0 16px 16px; margin: 20px 0; color: #030303!important; } div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p { color: #057857!important; } div#warning-duplicate { background-color: #ebf5ff; padding: 0 16px 16px; margin: 20px 0; color: #030303!important; } div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p { color: #0f4592!important; } div#warning-duplicate strong { color: #0f4592; } p.actions { display: flex; align-items: center; margin: 20px 0; } div#warning-duplicate .actions a { display: inline-block; margin-right: 10px; } div#warning-setgpu { background-color: #fff4eb; padding: 0 16px 16px; margin: 20px 0; color: #030303!important; } div#warning-setgpu > .gr-prose > h2, div#warning-setgpu > .gr-prose > p { color: #92220f!important; } div#warning-setgpu a, div#warning-setgpu b { color: #91230f; } div#warning-setgpu p.actions > a { display: inline-block; background: #1f1f23; border-radius: 40px; padding: 6px 24px; color: antiquewhite; text-decoration: none; font-weight: 600; font-size: 1.2em; } div#warning-setsleeptime { background-color: #fff4eb; padding: 10px 10px; margin: 0!important; color: #030303!important; } .custom-color { color: #030303 !important; } """ # Create Gradio interface with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown("# Allegro Video Generation") gr.Markdown("Generate a video based on a text prompt using the Allegro pipeline.") gr.HTML("""
""") user_prompt=gr.Textbox(label="User Prompt") with gr.Row(): guidance_scale=gr.Slider(minimum=0, maximum=20, step=0.1, label="Guidance Scale", value=7.5) num_sampling_steps=gr.Slider(minimum=10, maximum=100, step=1, label="Number of Sampling Steps", value=20) with gr.Row(): seed=gr.Slider(minimum=0, maximum=10000, step=1, label="Random Seed", value=42) enable_cpu_offload=gr.Checkbox(label="Enable CPU Offload", value=False, scale=1) if is_shared_ui: top_description = gr.HTML(f''' ''', elem_id="warning-duplicate") submit_btn = gr.Button("Generate Video", visible=False) else: if(is_gpu_associated): submit_btn = gr.Button("Generate Video", visible=True) top_description = gr.HTML(f'''You can now generate a video! You will be billed by the minute from when you activated the GPU until when it is turned off. You can offload the model into CPU for less GPU memory cost (about 9.3G, compared to 27.5G if CPU offload is not enabled), but the inference time will increase significantly.
There's only one step left before you can generate a video: we recommend to attribute a L40S GPU to it (via the Settings tab). You will be billed by the minute from when you activate the GPU until when it is turned off.