waveydaveygravy commited on
Commit
a5b2a9d
1 Parent(s): aa5be73

Upload 3 files

Browse files
Files changed (3) hide show
  1. Multicontrolnet.ipynb +659 -0
  2. boxermultiframes.zip +3 -0
  3. multi1.py +62 -0
Multicontrolnet.ipynb ADDED
@@ -0,0 +1,659 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "metadata": {
23
+ "cellView": "form",
24
+ "id": "GjF0Vw1G3CsS"
25
+ },
26
+ "outputs": [],
27
+ "source": [
28
+ "# @title Install requirements\n",
29
+ "import os\n",
30
+ "#!git clone https://github.com/huggingface/diffusers\n",
31
+ "!git clone https://huggingface.co/waveydaveygravy/controlnetexpts\n",
32
+ "!pip install controlnet-aux==0.0.7\n",
33
+ "#!pip install -U openmim\n",
34
+ "#!pip install cog\n",
35
+ "#!pip install mediapipe\n",
36
+ "#!mim install mmengine\n",
37
+ "#!mim install \"mmcv>=2.0.1\"\n",
38
+ "#!mim install \"mmdet>=3.1.0\"\n",
39
+ "#!mim install \"mmpose>=1.1.0\"\n",
40
+ "!pip install diffusers\n",
41
+ "!pip install moviepy\n",
42
+ "!pip install argparse\n",
43
+ "!pip install transformers\n",
44
+ "!pip install pillow\n",
45
+ "!pip install accelerate\n",
46
+ "!pip install xformers\n",
47
+ "#!pip install https://github.com/karaokenerds/python-audio-separator/releases/download/v0.12.1/onnxruntime_gpu-1.17.0-cp310-cp310-linux_x86_64.whl\n",
48
+ "#!git clone https://github.com/danielgatis/rembg.git\n",
49
+ "#!git clone https://huggingface.co/spaces/LiheYoung/Depth-Anything\n",
50
+ "\n",
51
+ "# Create the directory /content/test\n",
52
+ "import os\n",
53
+ "os.makedirs(\"/content/test\", exist_ok=True)\n",
54
+ "os.makedirs(\"/content/frames\", exist_ok=True)\n",
55
+ "os.makedirs(\"/content/op\", exist_ok=True)\n",
56
+ "os.makedirs(\"/content/dp\", exist_ok=True)\n",
57
+ "os.makedirs(\"/content/checkpoints\")\n",
58
+ "os.makedirs(\"/content/checkpoints/openpose\")\n",
59
+ "os.makedirs(\"/content/checkpoints/depth\")\n",
60
+ "os.makedirs(\"/content/checkpoints/realisticvision\")\n",
61
+ "#INPUT_DIR = \"/content/frames\" # replace with your input directory\n",
62
+ "#OUTPUT_DIR = \"/content/test\" # replace with your output directory"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "source": [
68
+ "#@title upload\n",
69
+ "from google.colab import files\n",
70
+ "uploaded = files.upload()"
71
+ ],
72
+ "metadata": {
73
+ "cellView": "form",
74
+ "id": "uLaX0p173O-A"
75
+ },
76
+ "execution_count": null,
77
+ "outputs": []
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "source": [
82
+ "# @title break video down into frames\n",
83
+ "import cv2\n",
84
+ "import os\n",
85
+ "\n",
86
+ "# Create the directory /content/test\n",
87
+ "os.makedirs(\"/content/test\", exist_ok=True)\n",
88
+ "os.makedirs(\"/content/frames\", exist_ok=True)\n",
89
+ "\n",
90
+ "#INPUT_DIR = \"/content/frames\" # replace with your input directory\n",
91
+ "#OUTPUT_DIR = \"/content/testIP\" # replace with your output directory\n",
92
+ "\n",
93
+ "\n",
94
+ "# Open the video file\n",
95
+ "cap = cv2.VideoCapture('/content/trumpoverlay_1.mp4')\n",
96
+ "\n",
97
+ "i = 0\n",
98
+ "while(cap.isOpened()):\n",
99
+ " ret, frame = cap.read()\n",
100
+ "\n",
101
+ " if ret == False:\n",
102
+ " break\n",
103
+ "\n",
104
+ " # Save each frame of the video\n",
105
+ " cv2.imwrite('/content/testIP/frame_' + str(i) + '.jpg', frame)\n",
106
+ "\n",
107
+ " i += 1\n",
108
+ "\n",
109
+ "cap.release()\n",
110
+ "cv2.destroyAllWindows()"
111
+ ],
112
+ "metadata": {
113
+ "cellView": "form",
114
+ "id": "LqdApe0Y3VtH"
115
+ },
116
+ "execution_count": null,
117
+ "outputs": []
118
+ },
119
+ {
120
+ "cell_type": "code",
121
+ "source": [
122
+ "# @title COMMENT OUT PROCESSORS YOU DONT WANT TO USE ALSO COMMENT OUT ONES WITH LARGE MODELS IF YOU WANT TO SAVE SPACE\n",
123
+ "### based on https://github.com/patrickvonplaten/controlnet_aux\n",
124
+ "### which is derived from https://github.com/lllyasviel/ControlNet/tree/main/annotator and connected to the 🤗 Hub.\n",
125
+ "#All credit & copyright goes to https://github.com/lllyasviel .\n",
126
+ "#some of the models are large comment them out to save space if not needed\n",
127
+ "\n",
128
+ "import torch\n",
129
+ "import os\n",
130
+ "import shutil\n",
131
+ "import logging\n",
132
+ "import math\n",
133
+ "import numpy as np\n",
134
+ "from PIL import Image\n",
135
+ "from tqdm.auto import tqdm\n",
136
+ "from PIL import Image\n",
137
+ "from tqdm import tqdm\n",
138
+ "import matplotlib.pyplot as plt\n",
139
+ "import matplotlib.image as mpimg\n",
140
+ "from transformers import AutoModel\n",
141
+ "from diffusers import DiffusionPipeline\n",
142
+ "#from depth_anything.dpt import DepthAnything\n",
143
+ "#from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet\n",
144
+ "from controlnet_aux import (CannyDetector, ContentShuffleDetector, HEDdetector,\n",
145
+ " LeresDetector, LineartAnimeDetector,\n",
146
+ " LineartDetector, MediapipeFaceDetector,\n",
147
+ " MidasDetector, MLSDdetector, NormalBaeDetector,\n",
148
+ " OpenposeDetector, PidiNetDetector, SamDetector,\n",
149
+ " ZoeDetector, DWposeDetector)\n",
150
+ "\n",
151
+ "# Create the directory /content/test\n",
152
+ "os.makedirs(\"/content/test\", exist_ok=True)\n",
153
+ "os.makedirs(\"/content/frames\", exist_ok=True)\n",
154
+ "\n",
155
+ "INPUT_DIR = \"/content/frames\" # replace with your input directory\n",
156
+ "OUTPUT_DIR = \"/content/test\" # replace with your output directory\n",
157
+ "\n",
158
+ "#controlnet_model_path = \"/content/checkpoints\"\n",
159
+ "#controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch.float16)\n",
160
+ "\n",
161
+ "# Check if CUDA is available and set the device accordingly\n",
162
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
163
+ "\n",
164
+ "\n",
165
+ "def output(filename, img):\n",
166
+ " img.save(os.path.join(OUTPUT_DIR, filename))\n",
167
+ "\n",
168
+ "def process_image(processor, img):\n",
169
+ " return processor(img)\n",
170
+ "\n",
171
+ "def load_images():\n",
172
+ " if os.path.exists(OUTPUT_DIR):\n",
173
+ " shutil.rmtree(OUTPUT_DIR)\n",
174
+ " os.mkdir(OUTPUT_DIR)\n",
175
+ " images = []\n",
176
+ " filenames = []\n",
177
+ " for filename in os.listdir(INPUT_DIR):\n",
178
+ " if filename.endswith(\".png\") or filename.endswith(\".jpg\"):\n",
179
+ " img_path = os.path.join(INPUT_DIR, filename)\n",
180
+ " img = Image.open(img_path).convert(\"RGB\").resize((512, 512))\n",
181
+ " images.append(img)\n",
182
+ " filenames.append(filename)\n",
183
+ " return images, filenames\n",
184
+ "\n",
185
+ "def process_images(processor):\n",
186
+ " images, filenames = load_images()\n",
187
+ " for img, filename in tqdm(zip(images, filenames), total=len(images), desc=\"Processing images\"):\n",
188
+ " output_img = process_image(processor, img)\n",
189
+ " output(filename, output_img)\n",
190
+ "\n",
191
+ "# Initialize the detectors\n",
192
+ "\n",
193
+ "#canny = CannyDetector()\n",
194
+ "#hed = HEDdetector.from_pretrained(\"lllyasviel/Annotators\")\n",
195
+ "#shuffle = ContentShuffleDetector()\n",
196
+ "leres = LeresDetector.from_pretrained(\"lllyasviel/Annotators\")\n",
197
+ "#lineart_anime = LineartAnimeDetector.from_pretrained(\"lllyasviel/Annotators\")\n",
198
+ "#lineart = LineartDetector.from_pretrained(\"lllyasviel/Annotators\")\n",
199
+ "#mediapipe_face = MediapipeFaceDetector()\n",
200
+ "#midas = MidasDetector.from_pretrained(\"lllyasviel/Annotators\").to('cuda')\n",
201
+ "#mlsd = MLSDdetector.from_pretrained(\"lllyasviel/Annotators\")\n",
202
+ "#normal_bae = NormalBaeDetector.from_pretrained(\"lllyasviel/Annotators\")\n",
203
+ "openpose = OpenposeDetector.from_pretrained(\"lllyasviel/Annotators\")\n",
204
+ "#pidi_net = PidiNetDetector.from_pretrained(\"lllyasviel/Annotators\")\n",
205
+ "#sam = SamDetector.from_pretrained(\"ybelkada/segment-anything\", subfolder=\"checkpoints\")\n",
206
+ "#zoe = ZoeDetector\n",
207
+ "#depth_anything = AutoModel.from_pretrained(\"waveydaveygravy/depth-anything_pruned\")\n",
208
+ "\n",
209
+ "\n",
210
+ "\n",
211
+ "# Run the image processing\n",
212
+ "# Uncomment the line for the detector you want to use\n",
213
+ "#process_images(canny)\n",
214
+ "#process_images(hed)\n",
215
+ "process_images(openpose)\n",
216
+ "#process_images(midas)\n",
217
+ "process_images(leres)"
218
+ ],
219
+ "metadata": {
220
+ "cellView": "form",
221
+ "id": "ExeWABuE3vjo"
222
+ },
223
+ "execution_count": null,
224
+ "outputs": []
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "source": [
229
+ "#@title zip frames\n",
230
+ "!zip -r (name) path"
231
+ ],
232
+ "metadata": {
233
+ "cellView": "form",
234
+ "id": "hJzsEihl4BVx"
235
+ },
236
+ "execution_count": null,
237
+ "outputs": []
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "source": [
242
+ "#@title download models\n",
243
+ "#%cd /content/checkpoints\n",
244
+ "#!wget https://huggingface.co/lllyasviel/control_v11p_sd15_openpose/resolve/main/diffusion_pytorch_model.fp16.bin?download=true\n",
245
+ "#!wget https://huggingface.co/lllyasviel/control_v11f1p_sd15_depth/resolve/main/diffusion_pytorch_model.fp16.bin?download=true\n",
246
+ "%cd /content/checkpoints/realisticvision\n",
247
+ "!wget https://huggingface.co/SG161222/Realistic_Vision_V4.0_noVAE/resolve/main/Realistic_Vision_V4.0_fp16-no-ema-inpainting.ckpt?download=true\n",
248
+ "%cd /content/"
249
+ ],
250
+ "metadata": {
251
+ "cellView": "form",
252
+ "id": "evRfuqs74HqW"
253
+ },
254
+ "execution_count": null,
255
+ "outputs": []
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "source": [
260
+ "#@title config file for openpose\n",
261
+ "{\n",
262
+ " \"_class_name\": \"ControlNetModel\",\n",
263
+ " \"_diffusers_version\": \"0.16.0.dev0\",\n",
264
+ " \"_name_or_path\": \"/home/patrick/controlnet_v1_1/control_v11p_sd15_openpose\",\n",
265
+ " \"act_fn\": \"silu\",\n",
266
+ " \"attention_head_dim\": 8,\n",
267
+ " \"block_out_channels\": [\n",
268
+ " 320,\n",
269
+ " 640,\n",
270
+ " 1280,\n",
271
+ " 1280\n",
272
+ " ],\n",
273
+ " \"class_embed_type\": null,\n",
274
+ " \"conditioning_embedding_out_channels\": [\n",
275
+ " 16,\n",
276
+ " 32,\n",
277
+ " 96,\n",
278
+ " 256\n",
279
+ " ],\n",
280
+ " \"controlnet_conditioning_channel_order\": \"rgb\",\n",
281
+ " \"cross_attention_dim\": 768,\n",
282
+ " \"down_block_types\": [\n",
283
+ " \"CrossAttnDownBlock2D\",\n",
284
+ " \"CrossAttnDownBlock2D\",\n",
285
+ " \"CrossAttnDownBlock2D\",\n",
286
+ " \"DownBlock2D\"\n",
287
+ " ],\n",
288
+ " \"downsample_padding\": 1,\n",
289
+ " \"flip_sin_to_cos\": true,\n",
290
+ " \"freq_shift\": 0,\n",
291
+ " \"in_channels\": 4,\n",
292
+ " \"layers_per_block\": 2,\n",
293
+ " \"mid_block_scale_factor\": 1,\n",
294
+ " \"norm_eps\": 1e-05,\n",
295
+ " \"norm_num_groups\": 32,\n",
296
+ " \"num_class_embeds\": null,\n",
297
+ " \"only_cross_attention\": false,\n",
298
+ " \"projection_class_embeddings_input_dim\": null,\n",
299
+ " \"resnet_time_scale_shift\": \"default\",\n",
300
+ " \"upcast_attention\": false,\n",
301
+ " \"use_linear_projection\": false\n",
302
+ "}"
303
+ ],
304
+ "metadata": {
305
+ "cellView": "form",
306
+ "id": "ZpkAQLlO4RVf"
307
+ },
308
+ "execution_count": null,
309
+ "outputs": []
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "source": [
314
+ "#@title WORKING MUTICONTROLNET SCRIPT DO NOT CHANGE! save as multi1.py and use as shown in next cell\n",
315
+ "\n",
316
+ "from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler\n",
317
+ "import torch\n",
318
+ "import os\n",
319
+ "from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler\n",
320
+ "from diffusers.utils import load_image\n",
321
+ "import argparse\n",
322
+ "from PIL import Image\n",
323
+ "import cv2\n",
324
+ "import numpy as np\n",
325
+ "import torch\n",
326
+ "import os\n",
327
+ "import shutil\n",
328
+ "from tqdm import tqdm\n",
329
+ "import numpy as np\n",
330
+ "\n",
331
+ "device = (\"cuda\")\n",
332
+ "\n",
333
+ "# Initialize the argument parser\n",
334
+ "parser = argparse.ArgumentParser(description='Choose a processor to run.')\n",
335
+ "parser.add_argument('--op_image', type=str, help='path to pose image')\n",
336
+ "parser.add_argument('--dp_image', type=str, help='path to depth image')\n",
337
+ "parser.add_argument('--output_dir', type=str, default='/content/multi', help='The directory to save the output.')\n",
338
+ "# Parse the arguments\n",
339
+ "args = parser.parse_args()\n",
340
+ "\n",
341
+ "op_image = load_image(args.op_image)\n",
342
+ "dp_image = load_image(args.dp_image)\n",
343
+ "\n",
344
+ "controlnet = [\n",
345
+ " ControlNetModel.from_pretrained(\"/content/checkpoints/openpose\", torch_dtype=torch.float16).to('cuda'),\n",
346
+ " ControlNetModel.from_pretrained(\"/content/checkpoints/depth\", torch_dtype=torch.float16).to('cuda'),\n",
347
+ "]\n",
348
+ "\n",
349
+ "pipe = StableDiffusionControlNetPipeline.from_pretrained(\n",
350
+ " \"SG161222/Realistic_Vision_V4.0_noVAE\", controlnet=controlnet, torch_dtype=torch.float16\n",
351
+ ").to('cuda')\n",
352
+ "\n",
353
+ "pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)\n",
354
+ "\n",
355
+ "prompt = \"a boxer in a boxing ring, best quality\"\n",
356
+ "negative_prompt = \"monochrome, lowres, bad anatomy, worst quality, low quality\"\n",
357
+ "\n",
358
+ "images = [op_image, dp_image]\n",
359
+ "\n",
360
+ "image = pipe(\n",
361
+ " prompt,\n",
362
+ " images,\n",
363
+ " num_inference_steps=20,\n",
364
+ " negative_prompt=negative_prompt,\n",
365
+ " controlnet_conditioning_scale=[1.0, 0.8],\n",
366
+ ").images[0]\n",
367
+ "\n",
368
+ "# Extract the filename and extension from args.op_image\n",
369
+ "filename, extension = os.path.splitext(os.path.basename(args.op_image))\n",
370
+ "\n",
371
+ "# Construct the full output path with the specified output directory\n",
372
+ "output_path = os.path.join(args.output_dir, filename + extension)\n",
373
+ "\n",
374
+ "print(type(image))\n",
375
+ "# Save the image using PIL\n",
376
+ "image.save(output_path) # Assuming image is from PIL\n",
377
+ "print(\"saved in output directory!\")\n"
378
+ ],
379
+ "metadata": {
380
+ "cellView": "form",
381
+ "id": "55GnGKo74VLg"
382
+ },
383
+ "execution_count": null,
384
+ "outputs": []
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "source": [
389
+ "#@title list of commands for multi1.py change to whichever paths needed\n",
390
+ "#!python /content/multi1.py --op_image \"/content/op/test/frame_0.jpg\" --dp_image \"/content/dp/test/frame_0.jpg\"\n",
391
+ "\n",
392
+ "#!python /content/multi1.py --op_image \"/content/op/test/frame_1.jpg\" --dp_image \"/content/dp/test/frame_1.jpg\"\n",
393
+ "\n",
394
+ "#!python /content/multi1.py --op_image \"/content/op/test/frame_2.jpg\" --dp_image \"/content/dp/test/frame_2.jpg\"\n",
395
+ "\n",
396
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_3.jpg\" --dp_image \"/content/dp/test/frame_3.jpg\"\n",
397
+ "\n",
398
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_4.jpg\" --dp_image \"/content/dp/test/frame_4.jpg\"\n",
399
+ "\n",
400
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_5.jpg\" --dp_image \"/content/dp/test/frame_5.jpg\"\n",
401
+ "\n",
402
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_6.jpg\" --dp_image \"/content/dp/test/frame_6.jpg\"\n",
403
+ "\n",
404
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_7.jpg\" --dp_image \"/content/dp/test/frame_7.jpg\"\n",
405
+ "\n",
406
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_8.jpg\" --dp_image \"/content/dp/test/frame_8.jpg\"\n",
407
+ "\n",
408
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_9.jpg\" --dp_image \"/content/dp/test/frame_9.jpg\"\n",
409
+ "\n",
410
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_10.jpg\" --dp_image \"/content/dp/test/frame_10.jpg\"\n",
411
+ "\n",
412
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_11.jpg\" --dp_image \"/content/dp/test/frame_11.jpg\"\n",
413
+ "\n",
414
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_12.jpg\" --dp_image \"/content/dp/test/frame_12.jpg\"\n",
415
+ "\n",
416
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_13.jpg\" --dp_image \"/content/dp/test/frame_13.jpg\"\n",
417
+ "\n",
418
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_14.jpg\" --dp_image \"/content/dp/test/frame_14.jpg\"\n",
419
+ "\n",
420
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_15.jpg\" --dp_image \"/content/dp/test/frame_15.jpg\"\n",
421
+ "\n",
422
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_16.jpg\" --dp_image \"/content/dp/test/frame_16.jpg\"\n",
423
+ "\n",
424
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_17.jpg\" --dp_image \"/content/dp/test/frame_17.jpg\"\n",
425
+ "\n",
426
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_18.jpg\" --dp_image \"/content/dp/test/frame_18.jpg\"\n",
427
+ "\n",
428
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_19.jpg\" --dp_image \"/content/dp/test/frame_19.jpg\"\n",
429
+ "\n",
430
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_20.jpg\" --dp_image \"/content/dp/test/frame_20.jpg\"\n",
431
+ "\n",
432
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_21.jpg\" --dp_image \"/content/dp/test/frame_21.jpg\"\n",
433
+ "\n",
434
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_22.jpg\" --dp_image \"/content/dp/test/frame_22.jpg\"\n",
435
+ "\n",
436
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_23.jpg\" --dp_image \"/content/dp/test/frame_23.jpg\"\n",
437
+ "\n",
438
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_24.jpg\" --dp_image \"/content/dp/test/frame_24.jpg\"\n",
439
+ "\n",
440
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_25.jpg\" --dp_image \"/content/dp/test/frame_25.jpg\"\n",
441
+ "\n",
442
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_26.jpg\" --dp_image \"/content/dp/test/frame_26.jpg\"\n",
443
+ "\n",
444
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_27.jpg\" --dp_image \"/content/dp/test/frame_27.jpg\"\n",
445
+ "\n",
446
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_28.jpg\" --dp_image \"/content/dp/test/frame_28.jpg\"\n",
447
+ "\n",
448
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_29.jpg\" --dp_image \"/content/dp/test/frame_29.jpg\"\n",
449
+ "\n",
450
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_30.jpg\" --dp_image \"/content/dp/test/frame_30.jpg\"\n",
451
+ "\n",
452
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_31.jpg\" --dp_image \"/content/dp/test/frame_31.jpg\"\n",
453
+ "\n",
454
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_32.jpg\" --dp_image \"/content/dp/test/frame_32.jpg\"\n",
455
+ "\n",
456
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_33.jpg\" --dp_image \"/content/dp/test/frame_33.jpg\"\n",
457
+ "\n",
458
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_34.jpg\" --dp_image \"/content/dp/test/frame_34.jpg\"\n",
459
+ "\n",
460
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_35.jpg\" --dp_image \"/content/dp/test/frame_35.jpg\"\n",
461
+ "\n",
462
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_36.jpg\" --dp_image \"/content/dp/test/frame_36.jpg\"\n",
463
+ "\n",
464
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_37.jpg\" --dp_image \"/content/dp/test/frame_37.jpg\"\n",
465
+ "\n",
466
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_38.jpg\" --dp_image \"/content/dp/test/frame_38.jpg\"\n",
467
+ "\n",
468
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_39.jpg\" --dp_image \"/content/dp/test/frame_39.jpg\"\n",
469
+ "\n",
470
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_40.jpg\" --dp_image \"/content/dp/test/frame_40.jpg\"\n",
471
+ "\n",
472
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_41.jpg\" --dp_image \"/content/dp/test/frame_41.jpg\"\n",
473
+ "\n",
474
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_42.jpg\" --dp_image \"/content/dp/test/frame_42.jpg\"\n",
475
+ "\n",
476
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_43.jpg\" --dp_image \"/content/dp/test/frame_43.jpg\"\n",
477
+ "\n",
478
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_44.jpg\" --dp_image \"/content/dp/test/frame_44.jpg\"\n",
479
+ "\n",
480
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_45.jpg\" --dp_image \"/content/dp/test/frame_45.jpg\"\n",
481
+ "\n",
482
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_46.jpg\" --dp_image \"/content/dp/test/frame_46.jpg\"\n",
483
+ "\n",
484
+ "!python /content/multi1.py --op_image \"/content/op/test/frame_47.jpg\" --dp_image \"/content/dp/test/frame_47.jpg\""
485
+ ],
486
+ "metadata": {
487
+ "cellView": "form",
488
+ "id": "IQ4wkiE04sN3"
489
+ },
490
+ "execution_count": null,
491
+ "outputs": []
492
+ },
493
+ {
494
+ "cell_type": "code",
495
+ "source": [
496
+ "#@title interpolate processed frames (best to keep fps same as input video)\n",
497
+ "!ffmpeg -r 12 -i /content/output_processed/frame_%d.jpg -vf \"format=yuv420p\" -c:v libx264 -crf 1 boxermulti1.mp4\n",
498
+ "\n"
499
+ ],
500
+ "metadata": {
501
+ "cellView": "form",
502
+ "id": "fjpga-Ra5-RN"
503
+ },
504
+ "execution_count": null,
505
+ "outputs": []
506
+ },
507
+ {
508
+ "cell_type": "code",
509
+ "source": [
510
+ "#@title display video\n",
511
+ "from IPython.display import HTML\n",
512
+ "from base64 import b64encode\n",
513
+ "\n",
514
+ "# Open the video file and read its contents\n",
515
+ "mp4 = open('/content/boxermulti1.mp4', 'rb').read()\n",
516
+ "\n",
517
+ "# Encode the video data as a base64 string\n",
518
+ "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
519
+ "\n",
520
+ "# Display the video using an HTML video element\n",
521
+ "HTML(f\"\"\"\n",
522
+ "<video width=600 controls>\n",
523
+ " <source src=\"{data_url}\" type=\"video/mp4\">\n",
524
+ "</video>\n",
525
+ "\"\"\")"
526
+ ],
527
+ "metadata": {
528
+ "cellView": "form",
529
+ "id": "4PaTDmZw5_2P"
530
+ },
531
+ "execution_count": null,
532
+ "outputs": []
533
+ },
534
+ {
535
+ "cell_type": "code",
536
+ "source": [
537
+ "#@title clear variables and empty vram cache\n",
538
+ "import gc\n",
539
+ "gc.collect()\n",
540
+ "torch.cuda.empty_cache()"
541
+ ],
542
+ "metadata": {
543
+ "cellView": "form",
544
+ "id": "30XaQxxq5cU5"
545
+ },
546
+ "execution_count": null,
547
+ "outputs": []
548
+ },
549
+ {
550
+ "cell_type": "code",
551
+ "source": [
552
+ "#@title zip up frames\n",
553
+ "!zip -r multiframes.zip /content/multi"
554
+ ],
555
+ "metadata": {
556
+ "cellView": "form",
557
+ "id": "Qjocs_K45A1u"
558
+ },
559
+ "execution_count": null,
560
+ "outputs": []
561
+ },
562
+ {
563
+ "cell_type": "code",
564
+ "execution_count": null,
565
+ "metadata": {
566
+ "id": "FaF3RdKdaFa8",
567
+ "cellView": "form"
568
+ },
569
+ "outputs": [],
570
+ "source": [
571
+ "#@title Login to HuggingFace 🤗\n",
572
+ "\n",
573
+ "#@markdown You need to accept the model license before downloading or using the Stable Diffusion weights. Please, visit the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5), read the license and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work.\n",
574
+ "# https://huggingface.co/settings/tokens\n",
575
+ "!mkdir -p ~/.huggingface\n",
576
+ "HUGGINGFACE_TOKEN = \"\" #@param {type:\"string\"}\n",
577
+ "!echo -n \"{HUGGINGFACE_TOKEN}\" > ~/.huggingface/token"
578
+ ]
579
+ },
580
+ {
581
+ "cell_type": "code",
582
+ "execution_count": null,
583
+ "metadata": {
584
+ "id": "aEJZoFQ2YHIb",
585
+ "cellView": "form"
586
+ },
587
+ "outputs": [],
588
+ "source": [
589
+ "#@title upload to huggingface\n",
590
+ "from huggingface_hub import HfApi\n",
591
+ "api = HfApi()\n",
592
+ "api.upload_file(\n",
593
+ " path_or_fileobj=\"\",\n",
594
+ " path_in_repo=\"\",\n",
595
+ " repo_id=\"\",\n",
596
+ " repo_type=\"model\",\n",
597
+ ")"
598
+ ]
599
+ },
600
+ {
601
+ "cell_type": "code",
602
+ "source": [
603
+ "#=============================="
604
+ ],
605
+ "metadata": {
606
+ "id": "LFm40CCy5Upw"
607
+ },
608
+ "execution_count": null,
609
+ "outputs": []
610
+ },
611
+ {
612
+ "cell_type": "code",
613
+ "source": [
614
+ "#@title OG multi code for reference do not change https://huggingface.co/blog/controlnet\n",
615
+ "from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler\n",
616
+ "import torch\n",
617
+ "\n",
618
+ "controlnet = [\n",
619
+ " ControlNetModel.from_pretrained(\"lllyasviel/sd-controlnet-openpose\", torch_dtype=torch.float16),\n",
620
+ " ControlNetModel.from_pretrained(\"lllyasviel/sd-controlnet-canny\", torch_dtype=torch.float16),\n",
621
+ "]\n",
622
+ "\n",
623
+ "pipe = StableDiffusionControlNetPipeline.from_pretrained(\n",
624
+ " \"runwayml/stable-diffusion-v1-5\", controlnet=controlnet, torch_dtype=torch.float16\n",
625
+ ")\n",
626
+ "pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)\n",
627
+ "\n",
628
+ "pipe.enable_xformers_memory_efficient_attention()\n",
629
+ "pipe.enable_model_cpu_offload()\n",
630
+ "\n",
631
+ "prompt = \"a giant standing in a fantasy landscape, best quality\"\n",
632
+ "negative_prompt = \"monochrome, lowres, bad anatomy, worst quality, low quality\"\n",
633
+ "\n",
634
+ "generator = torch.Generator(device=\"cpu\").manual_seed(1)\n",
635
+ "\n",
636
+ "images = [openpose_image, canny_image]\n",
637
+ "\n",
638
+ "image = pipe(\n",
639
+ " prompt,\n",
640
+ " images,\n",
641
+ " num_inference_steps=20,\n",
642
+ " generator=generator,\n",
643
+ " negative_prompt=negative_prompt,\n",
644
+ " controlnet_conditioning_scale=[1.0, 0.8],\n",
645
+ ").images[0]\n",
646
+ "\n",
647
+ "image.save(\"./multi_controlnet_output.png\")\n",
648
+ "\n",
649
+ "\n"
650
+ ],
651
+ "metadata": {
652
+ "cellView": "form",
653
+ "id": "a44MnBt-5N6d"
654
+ },
655
+ "execution_count": null,
656
+ "outputs": []
657
+ }
658
+ ]
659
+ }
boxermultiframes.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32113b8ee5a722d171b7d5a5ac35b5bc8ff47d05482d1edad6e653c375f204fa
3
+ size 1170711
multi1.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
2
+ import torch
3
+ import os
4
+ from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
5
+ from diffusers.utils import load_image
6
+ import argparse
7
+ from PIL import Image
8
+ import cv2
9
+ import numpy as np
10
+ import torch
11
+ import os
12
+ import shutil
13
+ from tqdm import tqdm
14
+ import numpy as np
15
+
16
+ device = ("cuda")
17
+
18
+ # Initialize the argument parser
19
+ parser = argparse.ArgumentParser(description='Choose a processor to run.')
20
+ parser.add_argument('--op_image', type=str, help='path to pose image')
21
+ parser.add_argument('--dp_image', type=str, help='path to depth image')
22
+ parser.add_argument('--output_dir', type=str, default='/content/multi', help='The directory to save the output.')
23
+ # Parse the arguments
24
+ args = parser.parse_args()
25
+
26
+ op_image = load_image(args.op_image)
27
+ dp_image = load_image(args.dp_image)
28
+
29
+ controlnet = [
30
+ ControlNetModel.from_pretrained("/content/checkpoints/openpose", torch_dtype=torch.float16).to('cuda'),
31
+ ControlNetModel.from_pretrained("/content/checkpoints/depth", torch_dtype=torch.float16).to('cuda'),
32
+ ]
33
+
34
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
35
+ "SG161222/Realistic_Vision_V4.0_noVAE", controlnet=controlnet, torch_dtype=torch.float16
36
+ ).to('cuda')
37
+
38
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
39
+
40
+ prompt = "a boxer in a boxing ring, best quality"
41
+ negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
42
+
43
+ images = [op_image, dp_image]
44
+
45
+ image = pipe(
46
+ prompt,
47
+ images,
48
+ num_inference_steps=20,
49
+ negative_prompt=negative_prompt,
50
+ controlnet_conditioning_scale=[1.0, 0.8],
51
+ ).images[0]
52
+
53
+ # Extract the filename and extension from args.op_image
54
+ filename, extension = os.path.splitext(os.path.basename(args.op_image))
55
+
56
+ # Construct the full output path with the specified output directory
57
+ output_path = os.path.join(args.output_dir, filename + extension)
58
+
59
+ print(type(image))
60
+ # Save the image using PIL
61
+ image.save(output_path) # Assuming image is from PIL
62
+ print("saved in output directory!")