import gradio as gr
import torch
import os
from torch import autocast
from diffusers import StableDiffusionPipeline
from moviepy.editor import AudioFileClip, ImageClip
from pathlib import Path


output_dir = Path("temp/")
output_dir.mkdir(exist_ok=True, parents=True)
os.chdir(output_dir)


def process_inputs(prompt, audio):
    image = get_stable_diffusion_image(prompt)
    video = add_static_image_to_audio(image, audio)
    return video


def add_static_image_to_audio(image, audio):
    """Create and save a video file to `output_path` after
    combining a static image that is located in `image_path`
    with an audio file in `audio_path`"""
    # create the audio clip object
    audio_clip = AudioFileClip(audio)
    # create the image clip object
    image_clip = ImageClip(image)
    # use set_audio method from image clip to combine the audio with the image
    video_clip = image_clip.set_audio(audio_clip)
    # specify the duration of the new clip to be the duration of the audio clip
    video_clip.duration = audio_clip.duration
    # set the FPS to 1
    video_clip.fps = 1
    # write the resuling video clip
    path = "out.mp4"
    video_clip.write_videofile(path)
    return path


def get_stable_diffusion_image(prompt):
    path = "temp/image_out.png"
    stable_diffusion = gr.Blocks.load(name="spaces/stabilityai/stable-diffusion")
    gallery_dir = stable_diffusion(prompt, fn_index=2)
    # Rename gallery dir to sdout
    return [os.path.join(gallery_dir, img) for img in os.listdir(gallery_dir)][0]


iface = gr.Interface(
    fn=process_inputs, inputs=["text", gr.Audio(type="filepath")], outputs="video"
)
iface.launch()