unknown commited on
Commit
be5b973
1 Parent(s): 12d8e68
Files changed (1) hide show
  1. app.py +70 -75
app.py CHANGED
@@ -96,7 +96,6 @@ class FoleyController:
96
  time_detector_ckpt = osp.join(osp.join(self.model_dir, 'timestamp_detector.pth.tar'))
97
  time_detector = VideoOnsetNet(False)
98
  self.time_detector, _ = torch_utils.load_model(time_detector_ckpt, time_detector, strict=True)
99
- self.time_detector = self.time_detector
100
 
101
  self.pipeline = build_foleycrafter()
102
  ckpt = torch.load(temporal_ckpt_path)
@@ -204,81 +203,77 @@ class FoleyController:
204
  save_sample_path = os.path.join(self.savedir_sample, f"{name}.mp4")
205
 
206
  return save_sample_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
- def ui():
209
- with gr.Blocks(css=css) as demo:
210
- gr.HTML(
211
- '<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  )
213
- with gr.Row():
214
- gr.Markdown(
215
- "<div align='center'><font size='5'><a href='https://foleycrafter.github.io/'>Project Page</a> &ensp;" # noqa
216
- "<a href='https://arxiv.org/abs/xxxx.xxxxx/'>Paper</a> &ensp;"
217
- "<a href='https://github.com/open-mmlab/foleycrafter'>Code</a> &ensp;"
218
- "<a href='https://huggingface.co/spaces/ymzhang319/FoleyCrafter'>Demo</a> </font></div>"
219
- )
220
-
221
- with gr.Column(variant="panel"):
222
- with gr.Row(equal_height=False):
223
- with gr.Column():
224
- with gr.Row():
225
- init_img = gr.Video(label="Input Video")
226
- with gr.Row():
227
- prompt_textbox = gr.Textbox(value='', label="Prompt", lines=1)
228
- with gr.Row():
229
- negative_prompt_textbox = gr.Textbox(value=N_PROMPT, label="Negative prompt", lines=1)
230
-
231
- with gr.Row():
232
- sampler_dropdown = gr.Dropdown(
233
- label="Sampling method",
234
- choices=list(scheduler_dict.keys()),
235
- value=list(scheduler_dict.keys())[0],
236
- )
237
- sample_step_slider = gr.Slider(
238
- label="Sampling steps", value=25, minimum=10, maximum=100, step=1
239
- )
240
-
241
- cfg_scale_slider = gr.Slider(label="CFG Scale", value=7.5, minimum=0, maximum=20)
242
- ip_adapter_scale = gr.Slider(label="Visual Content Scale", value=1.0, minimum=0, maximum=1)
243
- temporal_scale = gr.Slider(label="Temporal Align Scale", value=0., minimum=0., maximum=1.0)
244
-
245
- with gr.Row():
246
- seed_textbox = gr.Textbox(label="Seed", value=42)
247
- seed_button = gr.Button(value="\U0001f3b2", elem_classes="toolbutton")
248
- seed_button.click(fn=lambda x: random.randint(1, 1e8), outputs=[seed_textbox], queue=False)
249
-
250
- generate_button = gr.Button(value="Generate", variant="primary")
251
-
252
- result_video = gr.Video(label="Generated Audio", interactive=False)
253
-
254
- generate_button.click(
255
- fn=controller.foley,
256
- inputs=[
257
- init_img,
258
- prompt_textbox,
259
- negative_prompt_textbox,
260
- ip_adapter_scale,
261
- temporal_scale,
262
- sampler_dropdown,
263
- sample_step_slider,
264
- cfg_scale_slider,
265
- seed_textbox,
266
- ],
267
- outputs=[result_video],
268
- )
269
-
270
- return demo
271
-
272
- if __name__ == "__main__":
273
- controller = FoleyController()
274
- device = "cuda" if torch.cuda.is_available() else "cpu"
275
-
276
- # move to gpu
277
- controller.time_detector = controller.time_detector.to(device)
278
- controller.pipeline = controller.pipeline.to(device)
279
- controller.vocoder = controller.vocoder.to(device)
280
- controller.image_encoder = controller.image_encoder.to(device)
281
-
282
- demo = ui()
283
  demo.queue(10)
284
  demo.launch(server_name=args.server_name, server_port=args.port, share=args.share, allowed_paths=["./foleycrafter.png"])
 
96
  time_detector_ckpt = osp.join(osp.join(self.model_dir, 'timestamp_detector.pth.tar'))
97
  time_detector = VideoOnsetNet(False)
98
  self.time_detector, _ = torch_utils.load_model(time_detector_ckpt, time_detector, strict=True)
 
99
 
100
  self.pipeline = build_foleycrafter()
101
  ckpt = torch.load(temporal_ckpt_path)
 
203
  save_sample_path = os.path.join(self.savedir_sample, f"{name}.mp4")
204
 
205
  return save_sample_path
206
+
207
+
208
+ controller = FoleyController()
209
+ device = "cuda" if torch.cuda.is_available() else "cpu"
210
+
211
+ # move to gpu
212
+ controller.time_detector = controller.time_detector.to(device)
213
+ controller.pipeline = controller.pipeline.to(device)
214
+ controller.vocoder = controller.vocoder.to(device)
215
+ controller.image_encoder = controller.image_encoder.to(device)
216
+
217
+ with gr.Blocks(css=css) as demo:
218
+ gr.HTML(
219
+ '<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'
220
+ )
221
+ with gr.Row():
222
+ gr.Markdown(
223
+ "<div align='center'><font size='5'><a href='https://foleycrafter.github.io/'>Project Page</a> &ensp;" # noqa
224
+ "<a href='https://arxiv.org/abs/xxxx.xxxxx/'>Paper</a> &ensp;"
225
+ "<a href='https://github.com/open-mmlab/foleycrafter'>Code</a> &ensp;"
226
+ "<a href='https://huggingface.co/spaces/ymzhang319/FoleyCrafter'>Demo</a> </font></div>"
227
+ )
228
 
229
+ with gr.Column(variant="panel"):
230
+ with gr.Row(equal_height=False):
231
+ with gr.Column():
232
+ with gr.Row():
233
+ init_img = gr.Video(label="Input Video")
234
+ with gr.Row():
235
+ prompt_textbox = gr.Textbox(value='', label="Prompt", lines=1)
236
+ with gr.Row():
237
+ negative_prompt_textbox = gr.Textbox(value=N_PROMPT, label="Negative prompt", lines=1)
238
+
239
+ with gr.Row():
240
+ sampler_dropdown = gr.Dropdown(
241
+ label="Sampling method",
242
+ choices=list(scheduler_dict.keys()),
243
+ value=list(scheduler_dict.keys())[0],
244
+ )
245
+ sample_step_slider = gr.Slider(
246
+ label="Sampling steps", value=25, minimum=10, maximum=100, step=1
247
+ )
248
+
249
+ cfg_scale_slider = gr.Slider(label="CFG Scale", value=7.5, minimum=0, maximum=20)
250
+ ip_adapter_scale = gr.Slider(label="Visual Content Scale", value=1.0, minimum=0, maximum=1)
251
+ temporal_scale = gr.Slider(label="Temporal Align Scale", value=0., minimum=0., maximum=1.0)
252
+
253
+ with gr.Row():
254
+ seed_textbox = gr.Textbox(label="Seed", value=42)
255
+ seed_button = gr.Button(value="\U0001f3b2", elem_classes="toolbutton")
256
+ seed_button.click(fn=lambda x: random.randint(1, 1e8), outputs=[seed_textbox], queue=False)
257
+
258
+ generate_button = gr.Button(value="Generate", variant="primary")
259
+
260
+ result_video = gr.Video(label="Generated Audio", interactive=False)
261
+
262
+ generate_button.click(
263
+ fn=controller.foley,
264
+ inputs=[
265
+ init_img,
266
+ prompt_textbox,
267
+ negative_prompt_textbox,
268
+ ip_adapter_scale,
269
+ temporal_scale,
270
+ sampler_dropdown,
271
+ sample_step_slider,
272
+ cfg_scale_slider,
273
+ seed_textbox,
274
+ ],
275
+ outputs=[result_video],
276
  )
277
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  demo.queue(10)
279
  demo.launch(server_name=args.server_name, server_port=args.port, share=args.share, allowed_paths=["./foleycrafter.png"])