import torch import imageio import os import gradio as gr import subprocess from subprocess import getoutput from diffusers.schedulers import EulerAncestralDiscreteScheduler from transformers import T5EncoderModel, T5Tokenizer from allegro.pipelines.pipeline_allegro import AllegroPipeline from allegro.models.vae.vae_allegro import AllegroAutoencoderKL3D from allegro.models.transformers.transformer_3d_allegro import AllegroTransformer3DModel from huggingface_hub import snapshot_download weights_dir = './allegro_weights' os.makedirs(weights_dir, exist_ok=True) is_shared_ui = True if "fffiloni/allegro-t2v" in os.environ['SPACE_ID'] else False is_gpu_associated = torch.cuda.is_available() if not is_shared_ui: snapshot_download( repo_id='rhymes-ai/Allegro', allow_patterns=[ 'scheduler/**', 'text_encoder/**', 'tokenizer/**', 'transformer/**', 'vae/**', ], local_dir=weights_dir, ) if is_gpu_associated: gpu_info = getoutput('nvidia-smi') def single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload): dtype = torch.bfloat16 # Load models vae = AllegroAutoencoderKL3D.from_pretrained( "./allegro_weights/vae/", torch_dtype=torch.float32 ).cuda() vae.eval() text_encoder = T5EncoderModel.from_pretrained("./allegro_weights/text_encoder/", torch_dtype=dtype) text_encoder.eval() tokenizer = T5Tokenizer.from_pretrained("./allegro_weights/tokenizer/") scheduler = EulerAncestralDiscreteScheduler() transformer = AllegroTransformer3DModel.from_pretrained("./allegro_weights/transformer/", torch_dtype=dtype).cuda() transformer.eval() allegro_pipeline = AllegroPipeline( vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, scheduler=scheduler, transformer=transformer ).to("cuda:0") positive_prompt = """ (masterpiece), (best quality), (ultra-detailed), (unwatermarked), {} emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, sharp focus, high budget, cinemascope, moody, epic, gorgeous """ negative_prompt = """ nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry. """ # Process user prompt user_prompt = positive_prompt.format(user_prompt.lower().strip()) if enable_cpu_offload: allegro_pipeline.enable_sequential_cpu_offload() out_video = allegro_pipeline( user_prompt, negative_prompt=negative_prompt, num_frames=88, height=720, width=1280, num_inference_steps=num_sampling_steps, guidance_scale=guidance_scale, max_sequence_length=512, generator=torch.Generator(device="cuda:0").manual_seed(seed) ).video[0] # Save video os.makedirs(os.path.dirname(save_path), exist_ok=True) imageio.mimwrite(save_path, out_video, fps=15, quality=8) return save_path # Gradio interface function def run_inference(user_prompt, guidance_scale, num_sampling_steps, seed, enable_cpu_offload, progress=gr.Progress(track_tqdm=True)): save_path = "./output_videos/generated_video.mp4" result_path = single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload) return result_path css=""" div#col-container{ margin: 0 auto; max-width: 800px; } div#warning-ready { background-color: #ecfdf5; padding: 0 16px 16px; margin: 20px 0; color: #030303!important; } div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p { color: #057857!important; } div#warning-duplicate { background-color: #ebf5ff; padding: 0 16px 16px; margin: 20px 0; color: #030303!important; } div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p { color: #0f4592!important; } div#warning-duplicate strong { color: #0f4592; } p.actions { display: flex; align-items: center; margin: 20px 0; } div#warning-duplicate .actions a { display: inline-block; margin-right: 10px; } div#warning-setgpu { background-color: #fff4eb; padding: 0 16px 16px; margin: 20px 0; color: #030303!important; } div#warning-setgpu > .gr-prose > h2, div#warning-setgpu > .gr-prose > p { color: #92220f!important; } div#warning-setgpu a, div#warning-setgpu b { color: #91230f; } div#warning-setgpu p.actions > a { display: inline-block; background: #1f1f23; border-radius: 40px; padding: 6px 24px; color: antiquewhite; text-decoration: none; font-weight: 600; font-size: 1.2em; } div#warning-setsleeptime { background-color: #fff4eb; padding: 10px 10px; margin: 0!important; color: #030303!important; } .custom-color { color: #030303 !important; } """ # Create Gradio interface with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown("# Allegro Video Generation") gr.Markdown("Generate a video based on a text prompt using the Allegro pipeline.") user_prompt=gr.Textbox(label="User Prompt") with gr.Row(): guidance_scale=gr.Slider(minimum=0, maximum=20, step=0.1, label="Guidance Scale", value=7.5) num_sampling_steps=gr.Slider(minimum=10, maximum=100, step=1, label="Number of Sampling Steps", value=20) with gr.Row(): seed=gr.Slider(minimum=0, maximum=10000, step=1, label="Random Seed", value=42) enable_cpu_offload=gr.Checkbox(label="Enable CPU Offload", value=False, scale=1) if is_shared_ui: top_description = gr.HTML(f'''
''', elem_id="warning-duplicate") submit_btn = gr.Button("Generate Video", visible=False) else: if(is_gpu_associated): submit_btn = gr.Button("Generate Video", visible=True) top_description = gr.HTML(f'''You can now generate a video! You will be billed by the minute from when you activated the GPU until when it is turned off.
There's only one step left before you can generate a video: we recommend to attribute a L40S GPU to it (via the Settings tab). You will be billed by the minute from when you activate the GPU until when it is turned off.