|
import torch |
|
import imageio |
|
import os |
|
import gradio as gr |
|
import subprocess |
|
from subprocess import getoutput |
|
from diffusers.schedulers import EulerAncestralDiscreteScheduler |
|
from transformers import T5EncoderModel, T5Tokenizer |
|
from allegro.pipelines.pipeline_allegro import AllegroPipeline |
|
from allegro.models.vae.vae_allegro import AllegroAutoencoderKL3D |
|
from allegro.models.transformers.transformer_3d_allegro import AllegroTransformer3DModel |
|
|
|
from huggingface_hub import snapshot_download |
|
|
|
weights_dir = './allegro_weights' |
|
os.makedirs(weights_dir, exist_ok=True) |
|
|
|
is_shared_ui = True if "fffiloni/allegro-t2v" in os.environ['SPACE_ID'] else False |
|
is_gpu_associated = torch.cuda.is_available() |
|
|
|
if not is_shared_ui: |
|
snapshot_download( |
|
repo_id='rhymes-ai/Allegro', |
|
allow_patterns=[ |
|
'scheduler/**', |
|
'text_encoder/**', |
|
'tokenizer/**', |
|
'transformer/**', |
|
'vae/**', |
|
], |
|
local_dir=weights_dir, |
|
) |
|
|
|
if is_gpu_associated: |
|
gpu_info = getoutput('nvidia-smi') |
|
|
|
def single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload): |
|
dtype = torch.bfloat16 |
|
|
|
|
|
vae = AllegroAutoencoderKL3D.from_pretrained( |
|
"./allegro_weights/vae/", |
|
torch_dtype=torch.float32 |
|
).cuda() |
|
vae.eval() |
|
|
|
text_encoder = T5EncoderModel.from_pretrained("./allegro_weights/text_encoder/", torch_dtype=dtype) |
|
text_encoder.eval() |
|
|
|
tokenizer = T5Tokenizer.from_pretrained("./allegro_weights/tokenizer/") |
|
|
|
scheduler = EulerAncestralDiscreteScheduler() |
|
|
|
transformer = AllegroTransformer3DModel.from_pretrained("./allegro_weights/transformer/", torch_dtype=dtype).cuda() |
|
transformer.eval() |
|
|
|
allegro_pipeline = AllegroPipeline( |
|
vae=vae, |
|
text_encoder=text_encoder, |
|
tokenizer=tokenizer, |
|
scheduler=scheduler, |
|
transformer=transformer |
|
).to("cuda:0") |
|
|
|
positive_prompt = """ |
|
(masterpiece), (best quality), (ultra-detailed), (unwatermarked), |
|
{} |
|
emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, |
|
sharp focus, high budget, cinemascope, moody, epic, gorgeous |
|
""" |
|
|
|
negative_prompt = """ |
|
nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, |
|
low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry. |
|
""" |
|
|
|
|
|
user_prompt = positive_prompt.format(user_prompt.lower().strip()) |
|
|
|
if enable_cpu_offload: |
|
allegro_pipeline.enable_sequential_cpu_offload() |
|
|
|
out_video = allegro_pipeline( |
|
user_prompt, |
|
negative_prompt=negative_prompt, |
|
num_frames=88, |
|
height=720, |
|
width=1280, |
|
num_inference_steps=num_sampling_steps, |
|
guidance_scale=guidance_scale, |
|
max_sequence_length=512, |
|
generator=torch.Generator(device="cuda:0").manual_seed(seed) |
|
).video[0] |
|
|
|
|
|
os.makedirs(os.path.dirname(save_path), exist_ok=True) |
|
imageio.mimwrite(save_path, out_video, fps=15, quality=8) |
|
|
|
return save_path |
|
|
|
|
|
|
|
def run_inference(user_prompt, guidance_scale, num_sampling_steps, seed, enable_cpu_offload, progress=gr.Progress(track_tqdm=True)): |
|
save_path = "./output_videos/generated_video.mp4" |
|
result_path = single_inference(user_prompt, save_path, guidance_scale, num_sampling_steps, seed, enable_cpu_offload) |
|
return result_path |
|
|
|
css=""" |
|
div#col-container{ |
|
margin: 0 auto; |
|
max-width: 800px; |
|
} |
|
div#warning-ready { |
|
background-color: #ecfdf5; |
|
padding: 0 16px 16px; |
|
margin: 20px 0; |
|
color: #030303!important; |
|
} |
|
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p { |
|
color: #057857!important; |
|
} |
|
div#warning-duplicate { |
|
background-color: #ebf5ff; |
|
padding: 0 16px 16px; |
|
margin: 20px 0; |
|
color: #030303!important; |
|
} |
|
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p { |
|
color: #0f4592!important; |
|
} |
|
div#warning-duplicate strong { |
|
color: #0f4592; |
|
} |
|
p.actions { |
|
display: flex; |
|
align-items: center; |
|
margin: 20px 0; |
|
} |
|
div#warning-duplicate .actions a { |
|
display: inline-block; |
|
margin-right: 10px; |
|
} |
|
div#warning-setgpu { |
|
background-color: #fff4eb; |
|
padding: 0 16px 16px; |
|
margin: 20px 0; |
|
color: #030303!important; |
|
} |
|
div#warning-setgpu > .gr-prose > h2, div#warning-setgpu > .gr-prose > p { |
|
color: #92220f!important; |
|
} |
|
div#warning-setgpu a, div#warning-setgpu b { |
|
color: #91230f; |
|
} |
|
div#warning-setgpu p.actions > a { |
|
display: inline-block; |
|
background: #1f1f23; |
|
border-radius: 40px; |
|
padding: 6px 24px; |
|
color: antiquewhite; |
|
text-decoration: none; |
|
font-weight: 600; |
|
font-size: 1.2em; |
|
} |
|
div#warning-setsleeptime { |
|
background-color: #fff4eb; |
|
padding: 10px 10px; |
|
margin: 0!important; |
|
color: #030303!important; |
|
} |
|
.custom-color { |
|
color: #030303 !important; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=css) as demo: |
|
with gr.Column(elem_id="col-container"): |
|
gr.Markdown("# Allegro Video Generation") |
|
gr.Markdown("Generate a video based on a text prompt using the Allegro pipeline.") |
|
gr.HTML(""" |
|
<div style="display:flex;column-gap:4px;"> |
|
<a href='https://huggingface.co/rhymes-ai/Allegro'> |
|
<img src='https://img.shields.io/badge/HuggingFace-Model-orange'> |
|
</a> |
|
<a href='https://github.com/rhymes-ai/Allegro/tree/main'> |
|
<img src='https://img.shields.io/badge/GitHub-Repo-blue'> |
|
</a> |
|
<a href='https://arxiv.org/abs/2410.15458'> |
|
<img src='https://img.shields.io/badge/ArXivPaper-red'> |
|
</a> |
|
</div> |
|
""") |
|
user_prompt=gr.Textbox(label="User Prompt") |
|
with gr.Row(): |
|
guidance_scale=gr.Slider(minimum=0, maximum=20, step=0.1, label="Guidance Scale", value=7.5) |
|
num_sampling_steps=gr.Slider(minimum=10, maximum=100, step=1, label="Number of Sampling Steps", value=20) |
|
with gr.Row(): |
|
seed=gr.Slider(minimum=0, maximum=10000, step=1, label="Random Seed", value=42) |
|
enable_cpu_offload=gr.Checkbox(label="Enable CPU Offload", value=False, scale=1) |
|
if is_shared_ui: |
|
top_description = gr.HTML(f''' |
|
<div class="gr-prose"> |
|
<h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg> |
|
Attention: this Space need to be duplicated to work</h2> |
|
<p class="main-message custom-color"> |
|
To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU.<br /> |
|
You'll be able to offload the model into CPU for less GPU memory cost (about 9.3G, compared to 27.5G if CPU offload is not enabled), but the inference time will increase significantly. |
|
</p> |
|
<p class="actions custom-color"> |
|
<a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true"> |
|
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" /> |
|
</a> |
|
</p> |
|
</div> |
|
''', elem_id="warning-duplicate") |
|
submit_btn = gr.Button("Generate Video", visible=False) |
|
else: |
|
if(is_gpu_associated): |
|
submit_btn = gr.Button("Generate Video", visible=True) |
|
top_description = gr.HTML(f''' |
|
<div class="gr-prose"> |
|
<h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg> |
|
You have successfully associated a GPU to this Space ๐</h2> |
|
<p class="custom-color"> |
|
You can now generate a video! You will be billed by the minute from when you activated the GPU until when it is turned off. |
|
You can offload the model into CPU for less GPU memory cost (about 9.3G, compared to 27.5G if CPU offload is not enabled), but the inference time will increase significantly. |
|
</p> |
|
</div> |
|
''', elem_id="warning-ready") |
|
else: |
|
top_description = gr.HTML(f''' |
|
<div class="gr-prose"> |
|
<h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg> |
|
You have successfully duplicated the Allegro Video Generation Space ๐</h2> |
|
<p class="custom-color">There's only one step left before you can generate a video: we recommend to <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings" style="text-decoration: underline" target="_blank">attribute a L40S GPU</b> to it (via the Settings tab)</a>. |
|
You will be billed by the minute from when you activate the GPU until when it is turned off.</p> |
|
<p class="actions custom-color"> |
|
<a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings">๐ฅ Set recommended GPU</a> |
|
</p> |
|
</div> |
|
''', elem_id="warning-setgpu") |
|
submit_btn = gr.Button("Generate Video", visible=False) |
|
|
|
video_output=gr.Video(label="Generated Video") |
|
|
|
def load_allegro_examples(prompt): |
|
if prompt == "A Monkey is playing bass guitar.": |
|
return "https://rhymes.ai/allegroVideos/30_demo_w_watermark_prompt_1018/11.mp4" |
|
elif prompt == "An astronaut riding a horse.": |
|
return "https://rhymes.ai/allegroVideos/30_demo_w_watermark_prompt_1018/15.mp4" |
|
elif prompt == "A tiny finch on a branch with spring flowers on background.": |
|
return "https://rhymes.ai/allegroVideos/30_demo_w_watermark_prompt_1018/22.mp4" |
|
|
|
gr.Examples( |
|
examples=[ |
|
["A Monkey is playing bass guitar."], |
|
["An astronaut riding a horse."], |
|
["A tiny finch on a branch with spring flowers on background."] |
|
], |
|
fn=load_allegro_examples, |
|
inputs=[user_prompt], |
|
outputs=video_output, |
|
run_on_click=True, |
|
|
|
) |
|
|
|
submit_btn.click( |
|
fn=run_inference, |
|
inputs=[user_prompt, guidance_scale, num_sampling_steps, seed, enable_cpu_offload], |
|
outputs=video_output |
|
) |
|
|
|
|
|
demo.launch(show_error=True, show_api=False) |
|
|