Spaces:
Runtime error
Runtime error
jhshao
commited on
Commit
•
78c99d1
1
Parent(s):
8df4a7a
minor bug fixed
Browse files- .lh/app.py.json +18 -0
- app.py +1 -1
.lh/app.py.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"sourceFile": "app.py",
|
3 |
+
"activeCommit": 0,
|
4 |
+
"commits": [
|
5 |
+
{
|
6 |
+
"activePatchIndex": 0,
|
7 |
+
"patches": [
|
8 |
+
{
|
9 |
+
"date": 1718785420663,
|
10 |
+
"content": "Index: \n===================================================================\n--- \n+++ \n"
|
11 |
+
}
|
12 |
+
],
|
13 |
+
"date": 1718785420663,
|
14 |
+
"name": "Commit-0",
|
15 |
+
"content": "# MIT License\n\n# Copyright (c) 2024 Jiahao Shao\n\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\nimport functools\nimport os\nimport zipfile\nimport tempfile\nfrom io import BytesIO\n\nimport spaces\nimport gradio as gr\nimport numpy as np\nimport torch as torch\nfrom PIL import Image\nfrom tqdm import tqdm\nimport mediapy as media\n\nfrom huggingface_hub import login\n\nfrom chronodepth_pipeline import ChronoDepthPipeline\nfrom gradio_patches.examples import Examples\n\ndefault_seed = 2024\n\ndefault_num_inference_steps = 5\ndefault_num_frames = 10\ndefault_window_size = 9\ndefault_video_processing_resolution = 768\ndefault_video_out_max_frames = 90\ndefault_decode_chunk_size = 10\n\ndef process_video(\n pipe,\n path_input,\n num_inference_steps=default_num_inference_steps,\n num_frames=default_num_frames,\n window_size=default_window_size,\n out_max_frames=default_video_out_max_frames,\n progress=gr.Progress(),\n):\n if path_input is None:\n raise gr.Error(\n \"Missing video in the first pane: upload a file or use one from the gallery below.\"\n )\n\n name_base, name_ext = os.path.splitext(os.path.basename(path_input))\n print(f\"Processing video {name_base}{name_ext}\")\n\n path_output_dir = tempfile.mkdtemp()\n path_out_vis = os.path.join(path_output_dir, f\"{name_base}_depth_colored.mp4\")\n path_out_16bit = os.path.join(path_output_dir, f\"{name_base}_depth_16bit.zip\")\n\n generator = torch.Generator(device=pipe.device).manual_seed(default_seed)\n\n import time\n start_time = time.time()\n zipf = None\n try:\n if window_size is None or window_size == num_frames:\n inpaint_inference = False\n else:\n inpaint_inference = True\n data_ls = []\n video_data = media.read_video(path_input)\n video_length = len(video_data)\n fps = video_data.metadata.fps\n\n duration_sec = video_length / fps\n\n out_duration_sec = out_max_frames / fps\n if duration_sec > out_duration_sec:\n gr.Warning(\n f\"Only the first ~{int(out_duration_sec)} seconds will be processed; \"\n f\"use alternative setups such as ChronoDepth on github for full processing\"\n )\n video_length = out_max_frames\n\n for i in tqdm(range(video_length-num_frames+1)):\n is_first_clip = i == 0\n is_last_clip = i == video_length - num_frames\n is_new_clip = (\n (inpaint_inference and i % window_size == 0)\n or (inpaint_inference == False and i % num_frames == 0)\n )\n if is_first_clip or is_last_clip or is_new_clip:\n data_ls.append(np.array(video_data[i: i+num_frames])) # [t, H, W, 3]\n\n zipf = zipfile.ZipFile(path_out_16bit, \"w\", zipfile.ZIP_DEFLATED)\n\n depth_colored_pred = []\n depth_pred = []\n # -------------------- Inference and saving --------------------\n with torch.no_grad():\n for iter, batch in enumerate(tqdm(data_ls)):\n rgb_int = batch\n input_images = [Image.fromarray(rgb_int[i]) for i in range(num_frames)]\n\n # Predict depth\n if iter == 0: # First clip\n pipe_out = pipe(\n input_images,\n num_frames=len(input_images),\n num_inference_steps=num_inference_steps,\n decode_chunk_size=default_decode_chunk_size,\n motion_bucket_id=127,\n fps=7,\n noise_aug_strength=0.0,\n generator=generator,\n )\n elif inpaint_inference and (iter == len(data_ls) - 1): # temporal inpaint inference for last clip\n last_window_size = window_size if video_length%window_size == 0 else video_length%window_size\n pipe_out = pipe(\n input_images,\n num_frames=num_frames,\n num_inference_steps=num_inference_steps,\n decode_chunk_size=default_decode_chunk_size,\n motion_bucket_id=127,\n fps=7,\n noise_aug_strength=0.0,\n generator=generator,\n depth_pred_last=depth_frames_pred_ts[last_window_size:],\n )\n elif inpaint_inference and iter > 0: # temporal inpaint inference\n pipe_out = pipe(\n input_images,\n num_frames=num_frames,\n num_inference_steps=num_inference_steps,\n decode_chunk_size=default_decode_chunk_size,\n motion_bucket_id=127,\n fps=7,\n noise_aug_strength=0.0,\n generator=generator,\n depth_pred_last=depth_frames_pred_ts[window_size:],\n )\n else: # separate inference\n pipe_out = pipe(\n input_images,\n num_frames=num_frames,\n num_inference_steps=num_inference_steps,\n decode_chunk_size=default_decode_chunk_size,\n motion_bucket_id=127,\n fps=7,\n noise_aug_strength=0.0,\n generator=generator,\n )\n\n depth_frames_pred = [pipe_out.depth_np[i] for i in range(num_frames)]\n\n depth_frames_colored_pred = []\n for i in range(num_frames):\n depth_frame_colored_pred = np.array(pipe_out.depth_colored[i])\n depth_frames_colored_pred.append(depth_frame_colored_pred)\n depth_frames_colored_pred = np.stack(depth_frames_colored_pred, axis=0)\n\n depth_frames_pred = np.stack(depth_frames_pred, axis=0)\n depth_frames_pred_ts = torch.from_numpy(depth_frames_pred).to(pipe.device)\n depth_frames_pred_ts = depth_frames_pred_ts * 2 - 1\n\n if inpaint_inference == False:\n if iter == len(data_ls) - 1:\n last_window_size = num_frames if video_length%num_frames == 0 else video_length%num_frames\n depth_colored_pred.append(depth_frames_colored_pred[-last_window_size:])\n depth_pred.append(depth_frames_pred[-last_window_size:])\n else:\n depth_colored_pred.append(depth_frames_colored_pred)\n depth_pred.append(depth_frames_pred)\n else:\n if iter == 0:\n depth_colored_pred.append(depth_frames_colored_pred)\n depth_pred.append(depth_frames_pred)\n elif iter == len(data_ls) - 1:\n depth_colored_pred.append(depth_frames_colored_pred[-last_window_size:])\n depth_pred.append(depth_frames_pred[-last_window_size:])\n else:\n depth_colored_pred.append(depth_frames_colored_pred[-window_size:])\n depth_pred.append(depth_frames_pred[-window_size:])\n\n depth_colored_pred = np.concatenate(depth_colored_pred, axis=0)\n depth_pred = np.concatenate(depth_pred, axis=0)\n\n # -------------------- Save results --------------------\n # Save images\n for i in tqdm(range(len(depth_pred))):\n archive_path = os.path.join(\n f\"{name_base}_depth_16bit\", f\"{i:05d}.png\"\n )\n img_byte_arr = BytesIO()\n depth_16bit = Image.fromarray((depth_pred[i] * 65535.0).astype(np.uint16))\n depth_16bit.save(img_byte_arr, format=\"png\")\n img_byte_arr.seek(0)\n zipf.writestr(archive_path, img_byte_arr.read())\n\n # Export to video\n media.write_video(path_out_vis, depth_colored_pred, fps=fps)\n finally:\n if zipf is not None:\n zipf.close()\n\n end_time = time.time()\n print(f\"Processing time: {end_time - start_time} seconds\")\n return (\n path_out_vis,\n [path_out_vis, path_out_16bit],\n )\n\n\ndef run_demo_server(pipe):\n process_pipe_video = spaces.GPU(\n functools.partial(process_video, pipe), duration=220\n )\n os.environ[\"GRADIO_ALLOW_FLAGGING\"] = \"never\"\n\n with gr.Blocks(\n analytics_enabled=False,\n title=\"ChronoDepth Video Depth Estimation\",\n css=\"\"\"\n #download {\n height: 118px;\n }\n .slider .inner {\n width: 5px;\n background: #FFF;\n }\n .viewport {\n aspect-ratio: 4/3;\n }\n h1 {\n text-align: center;\n display: block;\n }\n h2 {\n text-align: center;\n display: block;\n }\n h3 {\n text-align: center;\n display: block;\n }\n \"\"\",\n ) as demo:\n gr.Markdown(\n \"\"\"\n # ChronoDepth Video Depth Estimation\n\n <p align=\"center\">\n <a title=\"Website\" href=\"https://jhaoshao.github.io/ChronoDepth/\" target=\"_blank\" rel=\"noopener noreferrer\" style=\"display: inline-block;\">\n <img src=\"https://img.shields.io/website?url=https%3A%2F%2Fjhaoshao.github.io%2FChronoDepth%2F&up_message=ChronoDepth&up_color=blue&style=flat&logo=timescale&logoColor=%23FFDC0F\">\n </a>\n <a title=\"arXiv\" href=\"https://arxiv.org/abs/2312.02145\" target=\"_blank\" rel=\"noopener noreferrer\" style=\"display: inline-block;\">\n <img src=\"https://img.shields.io/badge/arXiv-PDF-b31b1b\">\n </a>\n <a title=\"Github\" href=\"https://github.com/jhaoshao/ChronoDepth\" target=\"_blank\" rel=\"noopener noreferrer\" style=\"display: inline-block;\">\n <img src=\"https://img.shields.io/github/stars/jhaoshao/ChronoDepth?label=GitHub%20%E2%98%85&logo=github&color=C8C\" alt=\"badge-github-stars\">\n </a>\n </p>\n\n ChronoDepth is the state-of-the-art video depth estimator for videos in the wild. \n Upload your video and have a try!<br>\n We set denoising steps to 5, number of frames for each video clip to 10, and overlap between clips to 1.\n\n \"\"\"\n )\n\n with gr.Row():\n with gr.Column():\n video_input = gr.Video(\n label=\"Input Video\",\n sources=[\"upload\"],\n )\n with gr.Row():\n video_submit_btn = gr.Button(\n value=\"Compute Depth\", variant=\"primary\"\n )\n video_reset_btn = gr.Button(value=\"Reset\")\n with gr.Column():\n video_output_video = gr.Video(\n label=\"Output video depth (red-near, blue-far)\",\n interactive=False,\n )\n video_output_files = gr.Files(\n label=\"Depth outputs\",\n elem_id=\"download\",\n interactive=False,\n )\n Examples(\n fn=process_pipe_video,\n examples=[\n os.path.join(\"files\", name)\n for name in [\n \"sora_e2.mp4\",\n \"sora_1758192960116785459.mp4\",\n ]\n ],\n inputs=[video_input],\n outputs=[video_output_video, video_output_files],\n cache_examples=True,\n directory_name=\"examples_video\",\n )\n\n video_submit_btn.click(\n fn=process_pipe_video,\n inputs=[video_input],\n outputs=[video_output_video, video_output_files],\n concurrency_limit=1,\n )\n\n video_reset_btn.click(\n fn=lambda: (None, None, None),\n inputs=[],\n outputs=[video_input, video_output_video],\n concurrency_limit=1,\n )\n\n demo.queue(\n api_open=False,\n ).launch(\n server_name=\"0.0.0.0\",\n server_port=7860,\n )\n\n\ndef main():\n CHECKPOINT = \"jhshao/ChronoDepth\"\n\n if \"HF_TOKEN_LOGIN\" in os.environ:\n login(token=os.environ[\"HF_TOKEN_LOGIN\"])\n\n device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n print(f\"Running on device: {device}\")\n pipe = ChronoDepthPipeline.from_pretrained(CHECKPOINT)\n try:\n import xformers\n\n pipe.enable_xformers_memory_efficient_attention()\n except:\n pass # run without xformers\n\n pipe = pipe.to(device)\n run_demo_server(pipe)\n\n\nif __name__ == \"__main__\":\n main()\n"
|
16 |
+
}
|
17 |
+
]
|
18 |
+
}
|
app.py
CHANGED
@@ -225,7 +225,7 @@ def process_video(
|
|
225 |
|
226 |
def run_demo_server(pipe):
|
227 |
process_pipe_video = spaces.GPU(
|
228 |
-
functools.partial(process_video, pipe), duration=
|
229 |
)
|
230 |
os.environ["GRADIO_ALLOW_FLAGGING"] = "never"
|
231 |
|
|
|
225 |
|
226 |
def run_demo_server(pipe):
|
227 |
process_pipe_video = spaces.GPU(
|
228 |
+
functools.partial(process_video, pipe), duration=220
|
229 |
)
|
230 |
os.environ["GRADIO_ALLOW_FLAGGING"] = "never"
|
231 |
|