Keiser41
/

Example_Based_Manga_Colorization

Model card Files Files and versions Community

Keiser41 commited on Oct 28, 2023

Commit

22d8ab7

•

1 Parent(s): 04d0629

Upload 98 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +28 -0
.gitignore +24 -0
LICENSE +201 -0
README.md +262 -0
__pycache__/distributed.cpython-310.pyc +0 -0
__pycache__/models.cpython-310.pyc +0 -0
__pycache__/models.cpython-38.pyc +0 -0
__pycache__/utils.cpython-310.pyc +0 -0
__pycache__/utils.cpython-38.pyc +0 -0
__pycache__/vgg_model.cpython-310.pyc +0 -0
__pycache__/vgg_model.cpython-38.pyc +0 -0
assets/PFFB.png +0 -0
assets/Pipeline.png +3 -0
assets/network.png +3 -0
data/__pycache__/data_loader.cpython-310.pyc +0 -0
data/__pycache__/tps_transformation.cpython-310.pyc +0 -0
data/data_loader.py +97 -0
data/data_loader_sketch.py +120 -0
data/prepare_data.py +84 -0
data/prepare_data_sketch.py +84 -0
data/thinplate/__init__.py +9 -0
data/thinplate/__pycache__/__init__.cpython-310.pyc +0 -0
data/thinplate/__pycache__/numpy.cpython-310.pyc +0 -0
data/thinplate/__pycache__/pytorch.cpython-310.pyc +0 -0
data/thinplate/numpy.py +115 -0
data/thinplate/pytorch.py +126 -0
data/thinplate/tests/__init__.py +0 -0
data/thinplate/tests/test_tps_numpy.py +85 -0
data/thinplate/tests/test_tps_pytorch.py +43 -0
data/tps_transformation.py +44 -0
discriminator.py +31 -0
distributed.py +126 -0
experiments/Color2Manga_gray/074000_gray.pt +3 -0
experiments/Color2Manga_sketch/116000_sketch.pt +3 -0
experiments/Discriminator/074000_d.pt +3 -0
experiments/Discriminator/116000_d.pt +3 -0
experiments/VGG19/vgg19-dcbb9e9d.pth +3 -0
extractor/Open-Sans-Bold.ttf +0 -0
extractor/__pycache__/manga_panel_extractor.cpython-310.pyc +0 -0
extractor/__pycache__/manga_panel_extractor.cpython-38.pyc +0 -0
extractor/manga_panel_extractor.py +174 -0
inference.py +229 -0
models.py +223 -0
real_manga/class1/Color 1659315.jpg +3 -0
real_manga/class1/Color 3223141571376159.jpg +3 -0
real_manga/class1/Color 3486521.jpg +3 -0
real_manga/class1/Color 5102676.jpg +3 -0
real_manga/class1/Color 5570824.jpg +3 -0
real_manga/class1/Color 5674950.jpg +3 -0
real_manga/class1/Color 5828407151952509.jpg +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,31 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/network.png filter=lfs diff=lfs merge=lfs -text
+assets/Pipeline.png filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]1659315.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]3223141571376159.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]3486521.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]5102676.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]5570824.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]5674950.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]5828407151952509.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]5851155317235124.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]6429789966786911.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]6813581942189493.jpg filter=lfs diff=lfs merge=lfs -text
+real_manga/class1/Color[[:space:]]8096755.jpg filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/001_in.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/002_in_ref_b.jpeg filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/004_in.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/005_in.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/006_in.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/006_ref.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/out/001_in_color_a.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/out/001_in_color_b.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/out/002_in_color_a.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/out/002_in_color_b.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/out/003_in_color_a.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/out/003_in_color_b.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/out/004_in_color.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/out/005_in_color.png filter=lfs diff=lfs merge=lfs -text
+test_datasets/gray_test/out/006_in_color.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,24 @@

+### Example user template template
+### Example user template
+# IntelliJ project files
+.idea
+*.iml
+out
+gen
+# Debug file
+datacheck.py
+test_gray2color.py
+val.py
+experiments/
+misc/
+results/
+test_datasets/*
+!/test_datasets/gray_test
+!/test_datasets/gray_test/out
+!/test_datasets/sketch_test
+!/test_datasets/sketch_test/out
+train_datasets/
+training_logs/

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,262 @@

+# Reference-Image-Embed-Manga-Colorization
+An amazing manga colorization project
+You can colorize gray manga or character sketches using any reference image you want, this model will faithfully retain the color features and transfer them to your manga. This is useful when you wish the color of the character's hair or clothes to be consistent.
+If the project is helpful, please leave a ⭐ this repo. best luck, my friend 😊 <br>
+## Overview
+<p align="left">
+  <img src="./assets/network.png">
+</p>
+It's basically a cGAN(Conditional Generative Adversarial Network) architecture.
+### Generator
+Generator is divided into two parts.
+`Color Embedding Layer` consists of part of pretrained VGG19 net and an MLP(Multilayer Perceptron), which is used to extract `color embedding` from reference image(for training, its preprocessed Ground Truth Image).
+Another part is a U-net-like network. The encoder layer extracts `content embedding` from gray input image(only contains L-channel information), and the decoder layer reconstructs the image with `color embedding` through PFFB(Progressive Feature Formalization Block) and outputs the ab_channel information.
+<p align="left">
+  <img src="./assets/PFFB.png">
+</p>
+The figure shows how PFFB works.
+It generates a filter by applying color embedding, and then convolving with content features. The figure is from this [paper](https://arxiv.org/abs/2106.08017) and check it for more details.
+### Discriminator
+Discriminator is a PatchGAN, referring to [pix2pix](https://arxiv.org/abs/1611.07004v3). The difference is that there are two conditions used for input. One is the gray image waiting for colorization, and one is the reference image providing color information.
+### Loss
+There are three losses in total, `L1 loss`, `perceptual loss` produced by pretrained vgg19, and `adversarial loss` produced by discriminator. The ratio is `1: 0.1: 0.01`.
+### Pipeline
+<p align="left">
+  <img src="./assets/Pipeline.png">
+</p>
+- a. Segment panels from input manga image, `Manga-Panel-Extractor` is from [here](https://github.com/pvnieo/Manga-Panel-Extractor).
+- b. Select a reference image for each panel, and generator will colorize each panel.
+- c. Concatenate all colorized panels into original format.
+## Results
+### Gray model
+| Original    |                          Reference                           | Colorization |
+|:----------:|:-----------:|:----------:|
+| <img src="test_datasets/gray_test/001_in.png" width="400"> | <img src="test_datasets/gray_test/001_ref_a.png" width="200"> | <img src="test_datasets/gray_test/out/001_in_color_a.png" width="400"> |
+| <img src="test_datasets/gray_test/001_in.png" width="400"> | <img src="test_datasets/gray_test/001_ref_b.png" width="200"> | <img src="test_datasets/gray_test/out/001_in_color_b.png" width="400"> |
+| <img src="test_datasets/gray_test/002_in.jpeg" width="400"> | <img src="test_datasets/gray_test/002_in_ref_a.jpg" width="200"> | <img src="test_datasets/gray_test/out/002_in_color_a.png" width="400"> |
+| <img src="test_datasets/gray_test/002_in.jpeg" width="400"> | <img src="test_datasets/gray_test/002_in_ref_b.jpeg" width="200"> | <img src="test_datasets/gray_test/out/002_in_color_b.png" width="400"> |
+| <img src="test_datasets/gray_test/003_in.jpeg" width="400"> | <img src="test_datasets/gray_test/003_in_ref_a.jpg" width="200"> | <img src="test_datasets/gray_test/out/003_in_color_a.png" width="400"> |
+| <img src="test_datasets/gray_test/003_in.jpeg" width="400"> | <img src="test_datasets/gray_test/003_in_ref_b.jpg" width="200"> | <img src="test_datasets/gray_test/out/003_in_color_b.png" width="400"> |
+| <img src="test_datasets/gray_test/004_in.png" width="400"> |<img src="test_datasets/gray_test/004_ref_1.jpg" width="100"><img src="test_datasets/gray_test/004_ref_2.jpg" width="100">| <img src="test_datasets/gray_test/out/004_in_color.png" width="400">|
+| <img src="test_datasets/gray_test/005_in.png" width="400"> | <img src="test_datasets/gray_test/005_ref_1.jpeg" width="100"><img src="test_datasets/gray_test/005_ref_2.jpg" width="100"><img src="test_datasets/gray_test/005_ref_3.jpeg" width="100"> | <img src="test_datasets/gray_test/out/005_in_color.png" width="400"> |
+| <img src="test_datasets/gray_test/006_in.png" width="400"> | <img src="test_datasets/gray_test/006_ref.png" width="200"> | <img src="test_datasets/gray_test/out/006_in_color.png" width="400"> |
+### sketch model
+| Original                                                     | Reference                                                    | Colorization                                                 |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| <img src="test_datasets/sketch_test/001_in.jpg" width="400"> | <img src="test_datasets/sketch_test/001_ref_a.jpg" width="200"> | <img src="test_datasets/sketch_test/out/001_in_color_a.png" width="400"> |
+| <img src="test_datasets/sketch_test/001_in.jpg" width="400"> | <img src="test_datasets/sketch_test/001_ref_b.jpg" width="200"> | <img src="test_datasets/sketch_test/out/001_in_color_b.png" width="400"> |
+## Dependencies and Installation
+1. Clone this GitHub repo.
+    ```
+    git clone https://github.com/linSensiGit/Example_Based_Manga_Colorization---cGAN.git
+    cd Example_Based_Manga_Colorization---cGAN
+    ```
+2. Create Environment
+   - Python >= 3.6 (Recommend to use [Anaconda](https://www.anaconda.com/download/#linux))
+   - [PyTorch >= 1.5.0](https://pytorch.org/) (Default GPU mode)
+   ```
+   # My environment for reference
+   - Python = 3.9.15
+   - PyTorch = 1.13.0
+   - Torchvision = 0.14.0
+   - Cuda = 11.7
+   - GPU = RTX 3060ti
+   ```
+3. Install Dependencies
+    ```
+    pip3 install -r requirement.txt
+    ```
+## Get Started
+Once you've set up the environment, several things need to be done before colorization.
+### Prepare pretrained models
+1. Download generator. I have trained two generators, for [gray manga](https://drive.google.com/file/d/11RQGvBKySEtRcBdYD8O5ZLb54jB7SAgN/view?usp=drive_link) colorization and [sketch](https://drive.google.com/file/d/1I4XwOYIGAoQwMOicknZl0s6AWcwpARmR/view?usp=drive_link) colorization. Choose what you need.
+2. Download [VGG model](https://drive.google.com/file/d/1S7t3mD-tznEUrMmq5bRsLZk4fkN24QSV/view?usp=drive_link) , it's part of generator.
+3. Download discriminator, for training [gray manga](https://drive.google.com/file/d/1DHHE9um_xOm0brTpbHb_R7K7J4mn37FS/view?usp=drive_link) colorization and [sketch](https://drive.google.com/file/d/1WgIPYY4b4GcpHW9EWFrFoTxL9SlilQbN/view?usp=drive_link) colorization. (optional)
+4. Put the pretrained model in the correct directory:
+   ```
+   Colorful-Manga-GAN
+      	|- experiments
+             |- Color2Manga_gray
+                   |- xxx000_gray.pt
+             |- Color2Manga_sketch
+                   |- xxx000_sketch.pt
+             |- Discriminator
+                   |- xxx000_d.pt
+             |- VGG19
+                   |- vgg19-dcbb9e9d.pth
+   ```
+### Quick test
+I have collected some test datasets which contain manga pages and corresponding reference images. You can check it in the path `./test_datasets`. When you use the file `inference.py` to test, you may need to edit the input file path or pretrained weights path in this file.
+```
+python inference.py
+# If you don't want to segment your manga
+python inference.py -ne
+```
+Initially, `Manga-Panel-Extractor` will segment the manga page into panels.
+Then follow the instructions in the console and you will get the colorized image.
+## Train your Own Model
+###  Prepare Datasets
+There are three datasets I used to train the model.
+For gray model, [Anime Face Dataset](https://www.kaggle.com/datasets/scribbless/another-anime-face-dataset) and Tagged [Anime Illustrations Dataset](https://www.kaggle.com/datasets/mylesoneill/tagged-anime-illustrations) are used. And I only use `danbooru-images` folder in the second Dataset.
+For sketch model, [Anime Sketch Colorization Pair Dataset](https://www.kaggle.com/datasets/ktaebum/anime-sketch-colorization-pair) is used.
+All the datasets are from [Kaggle](https://www.kaggle.com/).
+Follow instructions are based on my dataset, but feel free to use your own dataset if you like.
+### Preprocess training data
+```
+cd data
+python prepare_data.py
+```
+If you are using ` Anime Sketch Colorization Pair` dataset :
+```
+python prepare_data_sketch.py
+```
+Several arguments needed to be assigned :
+```
+usage: prepare_data.py [-h] [--out OUT] [--size SIZE] [--n_worker N_WORKER]
+                       [--resample RESAMPLE]
+                       path
+positional arguments:
+  path					the path of datasets
+optional arguments:
+  -h, --help           	show this help message and exit
+  --out OUT				the path to save generated lmdb
+  --size SIZE			compressed image size (128, 256, 512, 1024) alternative
+  --n_worker N_WORKER 	The number of threads, depends on your CPU
+  --resample RESAMPLE
+```
+For instance, you can run the command like this:
+```
+python prepare_data.py --out ../train_datasets/Sketch_train_lmdb --n_worker 20 --size 256 E:/Dataset/animefaces256cleaner
+```
+### Training
+There are four scripts in total for training
+`train.py` —— train only generator
+`train_disc` —— train only discriminator
+`train_all_gray.py`—— train both generator and discriminator, under the usual dataset
+`train_all_sketch.py`—— train both generator and discriminator, under sketch pair dataset specific
+All of these scripts share similar commands to drive:
+```
+usage: train_all_gray.py [-h] [--datasets DATASETS] [--iter ITER]
+                         [--batch BATCH] [--size SIZE] [--ckpt CKPT]
+                         [--ckpt_disc CKPT_DISC] [--lr LR] [--lr_disc LR_DISC]
+                         [--experiment_name EXPERIMENT_NAME] [--wandb]
+                         [--local_rank LOCAL_RANK]
+optional arguments:
+  -h, --help            show this help message and exit
+  --datasets DATASETS	the path of training dataset
+  --iter ITER			number of iteration in total
+  --batch BATCH			batch size
+  --size SIZE			size of image in dataset, usually 256
+  --ckpt CKPT			path of pretrained generator
+  --ckpt_disc CKPT_DISC	path of pretrained discriminator
+  --lr LR				learning rate of generator
+  --lr_disc LR_DISC		learning rate of discriminator
+  --experiment_name EXPERIMENT_NAME	used to save training_logs and trained model
+  --wandb
+  --local_rank LOCAL_RANK
+```
+There may be a slight difference, you could check the code for more details.
+For instance, you can run the command like this:
+```
+python train_all_gray.py --batch 8 --experiment_name Color2Manga_sketch --ckpt experiments/Color2Manga_sketch/078000.pt --datasets ./train_datasets/Sketch_train_lmdb --ckpt_disc experiments/Discriminator/078000_d.pt
+```
+## Work in Progress
+- [ ] Add SR model instead of directly interpolate upscaling
+- [ ] Optimize the generator network(adding L-channel information to output which is essential for colorize sketch)
+- [ ] Better developed manga-panel-extractor(current segmentation is not precise enough)
+- [ ] Develop a front UI and add color hint so that users could adjust the color of a specific area
+## 😁Contact
+If you have any questions, please feel free to contact me via `[email protected]`
+## 🙌 Acknowledgement
+Based on https://github.com/zhaohengyuan1/Color2Embed
+Thx https://github.com/pvnieo/Manga-Panel-Extractor
+## Reference
+[1] Zhao, Hengyuan et al. “Color2Embed: Fast Exemplar-Based Image Colorization using Color Embeddings.” (2021).
+[2] Isola, Phillip et al. “Image-to-Image Translation with Conditional Adversarial Networks.” *2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)* (2016): 5967-5976.
+[3] Furusawa, Chie et al. “Comicolorization: semi-automatic manga colorization.” *SIGGRAPH Asia 2017 Technical Briefs* (2017): n. pag.
+[4] Satoshi Iizuka, Edgar Simo-Serra, and Hiroshi Ishikawa. "Let there be Color!: Joint End-to-end Learning of Global and Local Image Priors for Automatic Image Colorization with Simultaneous Classification". ACM Transaction on Graphics (Proc. of SIGGRAPH), 35(4):110, 2016.

__pycache__/distributed.cpython-310.pyc ADDED Viewed

Binary file (2.98 kB). View file

__pycache__/models.cpython-310.pyc ADDED Viewed

Binary file (6.13 kB). View file

__pycache__/models.cpython-38.pyc ADDED Viewed

Binary file (6.22 kB). View file

__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (2.72 kB). View file

__pycache__/utils.cpython-38.pyc ADDED Viewed

Binary file (2.72 kB). View file

__pycache__/vgg_model.cpython-310.pyc ADDED Viewed

Binary file (3.97 kB). View file

__pycache__/vgg_model.cpython-38.pyc ADDED Viewed

Binary file (3.96 kB). View file

assets/PFFB.png ADDED Viewed

assets/Pipeline.png ADDED Viewed

Git LFS Details

SHA256: 86e46eb233e565ec37601e8af382f3c1fa87e939adce1aa84648485c97a93840
Pointer size: 132 Bytes
Size of remote file: 1.96 MB

assets/network.png ADDED Viewed

Git LFS Details

SHA256: 136ce29bf0db8f123947506ac32db2f13fefd4ad2b7b5f29db0623558ee66fc9
Pointer size: 132 Bytes
Size of remote file: 1.2 MB

data/__pycache__/data_loader.cpython-310.pyc ADDED Viewed

Binary file (3.04 kB). View file

data/__pycache__/tps_transformation.cpython-310.pyc ADDED Viewed

Binary file (1.1 kB). View file

data/data_loader.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from io import BytesIO
+import numpy as np
+import lmdb
+from PIL import Image
+from skimage import color
+import torch
+from torch.utils.data import Dataset
+from data.tps_transformation import tps_transform
+def RGB2Lab(inputs):
+    return color.rgb2lab(inputs)
+def Normalize(inputs):
+    # output l [-50,50] ab[-128,128]
+    l = inputs[:, :, 0:1]
+    ab = inputs[:, :, 1:3]
+    l = l - 50
+    # ab = ab
+    lab = np.concatenate((l, ab), 2)
+    return lab.astype('float32')
+def selfnormalize(inputs):
+    d = torch.max(inputs) - torch.min(inputs)
+    out = (inputs) / d
+    return out
+def to_gray(inputs):
+    img_gray = np.clip((np.concatenate((inputs[:,:,:1], inputs[:,:,:1], inputs[:,:,:1]), 2)+50)/100*255, 0, 255).astype('uint8')
+    return img_gray
+def numpy2tensor(inputs):
+    out = torch.from_numpy(inputs.transpose(2,0,1))
+    return out
+class MultiResolutionDataset(Dataset):
+    def __init__(self, path, transform, resolution=256):
+        self.env = lmdb.open(
+            path,
+            max_readers=32,
+            readonly=True,
+            lock=False,
+            readahead=False,
+            meminit=False,
+        )
+        if not self.env:
+            raise IOError('Cannot open lmdb dataset', path)
+        with self.env.begin(write=False) as txn:
+            self.length = int(txn.get('length'.encode('utf-8')).decode('utf-8'))
+        self.resolution = resolution
+        self.transform = transform
+    def __len__(self):
+        return self.length
+    def __getitem__(self, index):
+        with self.env.begin(write=False) as txn:
+            key = f'{self.resolution}-{str(index).zfill(5)}'.encode('utf-8')
+            img_bytes = txn.get(key)
+        buffer = BytesIO(img_bytes)
+        img = Image.open(buffer)
+        img_src = np.array(img) # [0,255] uint8
+        # ima_a = img_src
+        # ima_a = ima_a.astype('uint8')
+        # ima_a = Image.fromarray(ima_a)
+        # ima_a.show()
+        ## add gaussian noise
+        noise = np.random.uniform(-5, 5, np.shape(img_src))
+        img_ref = np.clip(np.array(img_src) + noise, 0, 255)
+        img_ref = tps_transform(img_ref) # [0,255] uint8
+        img_ref = np.clip(img_ref, 0, 255)
+        img_ref = img_ref.astype('uint8')
+        img_ref = Image.fromarray(img_ref)
+        img_ref = np.array(self.transform(img_ref)) # [0,255] uint8
+        img_lab = Normalize(RGB2Lab(img_src)) # l [-50,50] ab [-128, 128]
+        img = img_src.astype('float32') # [0,255] float32 RGB
+        img_ref = img_ref.astype('float32') # [0,255] float32 RGB
+        img = numpy2tensor(img)
+        img_ref = numpy2tensor(img_ref) # [B, 3, 256, 256]
+        img_lab = numpy2tensor(img_lab)
+        return img, img_ref, img_lab

data/data_loader_sketch.py ADDED Viewed

	@@ -0,0 +1,120 @@

+from io import BytesIO
+import numpy as np
+import lmdb
+from PIL import Image
+from skimage import color
+import torch
+from torch.utils.data import Dataset
+from data.tps_transformation import tps_transform
+def RGB2Lab(inputs):
+    return color.rgb2lab(inputs)
+def Normalize(inputs):
+    # output l [-50,50] ab[-128,128]
+    l = inputs[:, :, 0:1]
+    ab = inputs[:, :, 1:3]
+    l = l - 50
+    # ab = ab
+    lab = np.concatenate((l, ab), 2)
+    return lab.astype('float32')
+def selfnormalize(inputs):
+    d = torch.max(inputs) - torch.min(inputs)
+    out = (inputs) / d
+    return out
+def to_gray(inputs):
+    img_gray = np.clip((np.concatenate((inputs[:,:,:1], inputs[:,:,:1], inputs[:,:,:1]), 2)+50)/100*255, 0, 255).astype('uint8')
+    return img_gray
+def numpy2tensor(inputs):
+    out = torch.from_numpy(inputs.transpose(2,0,1))
+    return out
+class MultiResolutionDataset(Dataset):
+    def __init__(self, path, transform, resolution=256):
+        self.env = lmdb.open(
+            path,
+            max_readers=32,
+            readonly=True,
+            lock=False,
+            readahead=False,
+            meminit=False,
+        )
+        if not self.env:
+            raise IOError('Cannot open lmdb dataset', path)
+        with self.env.begin(write=False) as txn:
+            self.length = int(txn.get('length'.encode('utf-8')).decode('utf-8'))
+        self.resolution = resolution
+        self.transform = transform
+    def __len__(self):
+        return self.length
+    def __getitem__(self, index):
+        with self.env.begin(write=False) as txn:
+            key = f'{self.resolution}-{str(index).zfill(5)}'.encode('utf-8')
+            img_bytes = txn.get(key)
+        buffer = BytesIO(img_bytes)
+        img = Image.open(buffer)
+        img_src = np.array(img) # [0,255] uint8
+        # ima_a = img_src
+        # ima_a = ima_a.astype('uint8')
+        # ima_a = Image.fromarray(ima_a)
+        # ima_a.show()
+        # get the left color image
+        img_ref = img_src[:, :256]
+        ## add gaussian noise
+        noise = np.random.uniform(-5, 5, np.shape(img_ref))
+        img_ref = np.clip(np.array(img_ref) + noise, 0, 255)
+        img_ref = tps_transform(img_ref) # [0,255] uint8
+        img_ref = np.clip(img_ref, 0, 255)
+        img_ref = img_ref.astype('uint8')
+        img_ref = Image.fromarray(img_ref)
+        img_ref = np.array(self.transform(img_ref)) # [0,255] uint8
+        img_lab = img_src[:, :256]
+        img_lab = Normalize(RGB2Lab(img_lab)) # l [-50,50] ab [-128, 128]
+        img_lab_sketch = img_src[:, 256:]
+        img_lab_sketch = Normalize(RGB2Lab(img_lab_sketch)) # l [-50,50] ab [-128, 128]
+        img = img_src[:, :256].astype('float32') # [0,255] float32 RGB
+        img_ref = img_ref.astype('float32') # [0,255] float32 RGB
+        # ima_a = img
+        # ima_a = ima_a.astype('uint8')
+        # ima_a = Image.fromarray(ima_a)
+        # ima_a.show()
+        #
+        # ima_a = img_ref
+        # ima_a = ima_a.astype('uint8')
+        # ima_a = Image.fromarray(ima_a)
+        # ima_a.show()
+        #
+        # ima_a = img_lab
+        # ima_a = ima_a.astype('uint8')
+        # ima_a = Image.fromarray(ima_a)
+        # ima_a.show()
+        img = numpy2tensor(img)
+        img_ref = numpy2tensor(img_ref) # [B, 3, 256, 256]
+        img_lab = numpy2tensor(img_lab)
+        img_lab_sketch = numpy2tensor(img_lab_sketch)
+        return img, img_ref, img_lab, img_lab_sketch

data/prepare_data.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import argparse
+from io import BytesIO
+import multiprocessing
+from functools import partial
+from PIL import Image
+import lmdb
+from tqdm import tqdm
+from torchvision import datasets
+from torchvision.transforms import functional as trans_fn
+def resize_and_convert(img, size, resample, quality=100):
+    img = trans_fn.resize(img, size, resample)
+    img = trans_fn.center_crop(img, size)
+    buffer = BytesIO()
+    img.save(buffer, format='jpeg', quality=quality)
+    val = buffer.getvalue()
+    return val
+def resize_multiple(img, sizes=(128, 256, 512, 1024), resample=Image.LANCZOS, quality=100):
+    imgs = []
+    for size in sizes:
+        imgs.append(resize_and_convert(img, size, resample, quality))
+    return imgs
+def resize_worker(img_file, sizes, resample):
+    i, file = img_file
+    img = Image.open(file)
+    img = img.convert('RGB')
+    out = resize_multiple(img, sizes=sizes, resample=resample)
+    return i, out
+def prepare(env, dataset, n_worker, sizes=(128, 256, 512, 1024), resample=Image.LANCZOS):
+    resize_fn = partial(resize_worker, sizes=sizes, resample=resample)
+    files = sorted(dataset.imgs, key=lambda x: x[0])
+    # print(files)
+    # eixt()
+    files = [(i, file) for i, (file, label) in enumerate(files)]
+    total = 0
+    with multiprocessing.Pool(n_worker) as pool:
+        for i, imgs in tqdm(pool.imap_unordered(resize_fn, files)):
+            for size, img in zip(sizes, imgs):
+                key = f'{size}-{str(i).zfill(5)}'.encode('utf-8')
+                with env.begin(write=True) as txn:
+                    txn.put(key, img)
+            total += 1
+        with env.begin(write=True) as txn:
+            txn.put('length'.encode('utf-8'), str(total).encode('utf-8'))
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--out', type=str)
+    parser.add_argument('--size', type=str, default='128,256,512,1024')
+    parser.add_argument('--n_worker', type=int, default=8)
+    parser.add_argument('--resample', type=str, default='lanczos')
+    parser.add_argument('path', type=str)
+    args = parser.parse_args()
+    resample_map = {'lanczos': Image.LANCZOS, 'bilinear': Image.BILINEAR}
+    resample = resample_map[args.resample]
+    sizes = [int(s.strip()) for s in args.size.split(',')]
+    print(f'Make dataset of image sizes:', ', '.join(str(s) for s in sizes))
+    imgset = datasets.ImageFolder(args.path)
+    with lmdb.open(args.out, map_size=6 * 1024 * 1024 * 1024, readahead=False) as env:
+        prepare(env, imgset, args.n_worker, sizes=sizes, resample=resample)

data/prepare_data_sketch.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import argparse
+from io import BytesIO
+import multiprocessing
+from functools import partial
+from PIL import Image
+import lmdb
+from tqdm import tqdm
+from torchvision import datasets
+from torchvision.transforms import functional as trans_fn
+def resize_and_convert(img, size, resample, quality=100):
+    img = trans_fn.resize(img, size=[256, 512], interpolation=resample)
+    img = trans_fn.center_crop(img, output_size=[256, 512])
+    buffer = BytesIO()
+    img.save(buffer, format='jpeg', quality=quality)
+    val = buffer.getvalue()
+    return val
+def resize_multiple(img, sizes=(128, 256, 512, 1024), resample=Image.LANCZOS, quality=100):
+    imgs = []
+    for size in sizes:
+        imgs.append(resize_and_convert(img, size, resample, quality))
+    return imgs
+def resize_worker(img_file, sizes, resample):
+    i, file = img_file
+    img = Image.open(file)
+    img = img.convert('RGB')
+    out = resize_multiple(img, sizes=sizes, resample=resample)
+    return i, out
+def prepare(env, dataset, n_worker, sizes=(128, 256, 512, 1024), resample=Image.LANCZOS):
+    resize_fn = partial(resize_worker, sizes=sizes, resample=resample)
+    files = sorted(dataset.imgs, key=lambda x: x[0])
+    # print(files)
+    # eixt()
+    files = [(i, file) for i, (file, label) in enumerate(files)]
+    total = 0
+    with multiprocessing.Pool(n_worker) as pool:
+        for i, imgs in tqdm(pool.imap_unordered(resize_fn, files)):
+            for size, img in zip(sizes, imgs):
+                key = f'{size}-{str(i).zfill(5)}'.encode('utf-8')
+                with env.begin(write=True) as txn:
+                    txn.put(key, img)
+            total += 1
+        with env.begin(write=True) as txn:
+            txn.put('length'.encode('utf-8'), str(total).encode('utf-8'))
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--out', type=str)
+    parser.add_argument('--size', type=str, default='128,256,512,1024')
+    parser.add_argument('--n_worker', type=int, default=8)
+    parser.add_argument('--resample', type=str, default='lanczos')
+    parser.add_argument('path', type=str)
+    args = parser.parse_args()
+    resample_map = {'lanczos': Image.LANCZOS, 'bilinear': Image.BILINEAR}
+    resample = resample_map[args.resample]
+    sizes = [int(s.strip()) for s in args.size.split(',')]
+    print(f'Make dataset of image sizes:', ', '.join(str(s) for s in sizes))
+    imgset = datasets.ImageFolder(args.path)
+    with lmdb.open(args.out, map_size=6 * 1024 * 1024 * 1024, readahead=False) as env:
+        prepare(env, imgset, args.n_worker, sizes=sizes, resample=resample)

data/thinplate/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from data.thinplate.numpy import *
+try:
+    import torch
+    import data.thinplate.pytorch as torch
+except ImportError:
+    pass
+__version__ = '1.0.0'

data/thinplate/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (432 Bytes). View file

data/thinplate/__pycache__/numpy.cpython-310.pyc ADDED Viewed

Binary file (3.59 kB). View file

data/thinplate/__pycache__/pytorch.cpython-310.pyc ADDED Viewed

Binary file (4 kB). View file

data/thinplate/numpy.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# Copyright 2018 Christoph Heindl.
+#
+# Licensed under MIT License
+# ============================================================
+import numpy as np
+class TPS:
+    @staticmethod
+    def fit(c, lambd=0., reduced=False):
+        n = c.shape[0]
+        U = TPS.u(TPS.d(c, c))
+        K = U + np.eye(n, dtype=np.float32)*lambd
+        P = np.ones((n, 3), dtype=np.float32)
+        P[:, 1:] = c[:, :2]
+        v = np.zeros(n+3, dtype=np.float32)
+        v[:n] = c[:, -1]
+        A = np.zeros((n+3, n+3), dtype=np.float32)
+        A[:n, :n] = K
+        A[:n, -3:] = P
+        A[-3:, :n] = P.T
+        theta = np.linalg.solve(A, v) # p has structure w,a
+        return theta[1:] if reduced else theta
+    @staticmethod
+    def d(a, b):
+        return np.sqrt(np.square(a[:, None, :2] - b[None, :, :2]).sum(-1))
+    @staticmethod
+    def u(r):
+        return r**2 * np.log(r + 1e-6)
+    @staticmethod
+    def z(x, c, theta):
+        x = np.atleast_2d(x)
+        U = TPS.u(TPS.d(x, c))
+        w, a = theta[:-3], theta[-3:]
+        reduced = theta.shape[0] == c.shape[0] + 2
+        if reduced:
+            w = np.concatenate((-np.sum(w, keepdims=True), w))
+        b = np.dot(U, w)
+        return a[0] + a[1]*x[:, 0] + a[2]*x[:, 1] + b
+def uniform_grid(shape):
+    '''Uniform grid coordinates.
+    Params
+    ------
+    shape : tuple
+        HxW defining the number of height and width dimension of the grid
+    Returns
+    -------
+    points: HxWx2 tensor
+        Grid coordinates over [0,1] normalized image range.
+    '''
+    H,W = shape[:2]
+    c = np.empty((H, W, 2))
+    c[..., 0] = np.linspace(0, 1, W, dtype=np.float32)
+    c[..., 1] = np.expand_dims(np.linspace(0, 1, H, dtype=np.float32), -1)
+    return c
+def tps_theta_from_points(c_src, c_dst, reduced=False):
+    delta = c_src - c_dst
+    cx = np.column_stack((c_dst, delta[:, 0]))
+    cy = np.column_stack((c_dst, delta[:, 1]))
+    theta_dx = TPS.fit(cx, reduced=reduced)
+    theta_dy = TPS.fit(cy, reduced=reduced)
+    return np.stack((theta_dx, theta_dy), -1)
+def tps_grid(theta, c_dst, dshape):
+    ugrid = uniform_grid(dshape)
+    reduced = c_dst.shape[0] + 2 == theta.shape[0]
+    dx = TPS.z(ugrid.reshape((-1, 2)), c_dst, theta[:, 0]).reshape(dshape[:2])
+    dy = TPS.z(ugrid.reshape((-1, 2)), c_dst, theta[:, 1]).reshape(dshape[:2])
+    dgrid = np.stack((dx, dy), -1)
+    grid = dgrid + ugrid
+    return grid # H'xW'x2 grid[i,j] in range [0..1]
+def tps_grid_to_remap(grid, sshape):
+    '''Convert a dense grid to OpenCV's remap compatible maps.
+    Params
+    ------
+    grid : HxWx2 array
+        Normalized flow field coordinates as computed by compute_densegrid.
+    sshape : tuple
+        Height and width of source image in pixels.
+    Returns
+    -------
+    mapx : HxW array
+    mapy : HxW array
+    '''
+    mx = (grid[:, :, 0] * sshape[1]).astype(np.float32)
+    my = (grid[:, :, 1] * sshape[0]).astype(np.float32)
+    return mx, my

data/thinplate/pytorch.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# Copyright 2018 Christoph Heindl.
+#
+# Licensed under MIT License
+# ============================================================
+import torch
+def tps(theta, ctrl, grid):
+    '''Evaluate the thin-plate-spline (TPS) surface at xy locations arranged in a grid.
+    The TPS surface is a minimum bend interpolation surface defined by a set of control points.
+    The function value for a x,y location is given by
+        TPS(x,y) := theta[-3] + theta[-2]*x + theta[-1]*y + \sum_t=0,T theta[t] U(x,y,ctrl[t])
+    This method computes the TPS value for multiple batches over multiple grid locations for 2
+    surfaces in one go.
+    Params
+    ------
+    theta: Nx(T+3)x2 tensor, or Nx(T+2)x2 tensor
+        Batch size N, T+3 or T+2 (reduced form) model parameters for T control points in dx and dy.
+    ctrl: NxTx2 tensor or Tx2 tensor
+        T control points in normalized image coordinates [0..1]
+    grid: NxHxWx3 tensor
+        Grid locations to evaluate with homogeneous 1 in first coordinate.
+    Returns
+    -------
+    z: NxHxWx2 tensor
+        Function values at each grid location in dx and dy.
+    '''
+    N, H, W, _ = grid.size()
+    if ctrl.dim() == 2:
+        ctrl = ctrl.expand(N, *ctrl.size())
+    T = ctrl.shape[1]
+    diff = grid[...,1:].unsqueeze(-2) - ctrl.unsqueeze(1).unsqueeze(1)
+    D = torch.sqrt((diff**2).sum(-1))
+    U = (D**2) * torch.log(D + 1e-6)
+    w, a = theta[:, :-3, :], theta[:, -3:, :]
+    reduced = T + 2  == theta.shape[1]
+    if reduced:
+        w = torch.cat((-w.sum(dim=1, keepdim=True), w), dim=1)
+    # U is NxHxWxT
+    b = torch.bmm(U.view(N, -1, T), w).view(N,H,W,2)
+    # b is NxHxWx2
+    z = torch.bmm(grid.view(N,-1,3), a).view(N,H,W,2) + b
+    return z
+def tps_grid(theta, ctrl, size):
+    '''Compute a thin-plate-spline grid from parameters for sampling.
+    Params
+    ------
+    theta: Nx(T+3)x2 tensor
+        Batch size N, T+3 model parameters for T control points in dx and dy.
+    ctrl: NxTx2 tensor, or Tx2 tensor
+        T control points in normalized image coordinates [0..1]
+    size: tuple
+        Output grid size as NxCxHxW. C unused. This defines the output image
+        size when sampling.
+    Returns
+    -------
+    grid : NxHxWx2 tensor
+        Grid suitable for sampling in pytorch containing source image
+        locations for each output pixel.
+    '''
+    N, _, H, W = size
+    grid = theta.new(N, H, W, 3)
+    grid[:, :, :, 0] = 1.
+    grid[:, :, :, 1] = torch.linspace(0, 1, W)
+    grid[:, :, :, 2] = torch.linspace(0, 1, H).unsqueeze(-1)
+    z = tps(theta, ctrl, grid)
+    return (grid[...,1:] + z)*2-1 # [-1,1] range required by F.sample_grid
+def tps_sparse(theta, ctrl, xy):
+    if xy.dim() == 2:
+        xy = xy.expand(theta.shape[0], *xy.size())
+    N, M = xy.shape[:2]
+    grid = xy.new(N, M, 3)
+    grid[..., 0] = 1.
+    grid[..., 1:] = xy
+    z = tps(theta, ctrl, grid.view(N,M,1,3))
+    return xy + z.view(N, M, 2)
+def uniform_grid(shape):
+    '''Uniformly places control points aranged in grid accross normalized image coordinates.
+    Params
+    ------
+    shape : tuple
+        HxW defining the number of control points in height and width dimension
+    Returns
+    -------
+    points: HxWx2 tensor
+        Control points over [0,1] normalized image range.
+    '''
+    H,W = shape[:2]
+    c = torch.zeros(H, W, 2)
+    c[..., 0] = torch.linspace(0, 1, W)
+    c[..., 1] = torch.linspace(0, 1, H).unsqueeze(-1)
+    return c
+if __name__ == '__main__':
+    c = torch.tensor([
+        [0., 0],
+        [1., 0],
+        [1., 1],
+        [0, 1],
+    ]).unsqueeze(0)
+    theta = torch.zeros(1, 4+3, 2)
+    size= (1,1,6,3)
+    print(tps_grid(theta, c, size).shape)

data/thinplate/tests/__init__.py ADDED Viewed

File without changes

data/thinplate/tests/test_tps_numpy.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import numpy as np
+from numpy.testing import assert_allclose
+import thinplate as tps
+def test_numpy_fit():
+    c = np.array([
+        [0., 0, 0.0],
+        [1., 0, 0.0],
+        [1., 1, 0.0],
+        [0, 1, 0.0],
+    ])
+    theta = tps.TPS.fit(c)
+    assert_allclose(theta, 0)
+    assert_allclose(tps.TPS.z(c, c, theta), c[:, 2])
+    c = np.array([
+        [0., 0, 1.0],
+        [1., 0, 1.0],
+        [1., 1, 1.0],
+        [0, 1, 1.0],
+    ])
+    theta = tps.TPS.fit(c)
+    assert_allclose(theta[:-3], 0)
+    assert_allclose(theta[-3:], [1, 0, 0])
+    assert_allclose(tps.TPS.z(c, c, theta), c[:, 2], atol=1e-3)
+    # reduced form
+    theta = tps.TPS.fit(c, reduced=True)
+    assert len(theta) == c.shape[0] + 2
+    assert_allclose(theta[:-3], 0)
+    assert_allclose(theta[-3:], [1, 0, 0])
+    assert_allclose(tps.TPS.z(c, c, theta), c[:, 2], atol=1e-3)
+    c = np.array([
+        [0., 0, -.5],
+        [1., 0, 0.5],
+        [1., 1, 0.2],
+        [0, 1, 0.8],
+    ])
+    theta = tps.TPS.fit(c)
+    assert_allclose(tps.TPS.z(c, c, theta), c[:, 2], atol=1e-3)
+def test_numpy_densegrid():
+    # enlarges a small rectangle to full view
+    import cv2
+    img = np.zeros((40, 40), dtype=np.uint8)
+    img[10:21, 10:21] = 255
+    c_dst = np.array([
+        [0., 0],
+        [1., 0],
+        [1, 1],
+        [0, 1],
+    ])
+    c_src = np.array([
+        [10., 10],
+        [20., 10],
+        [20, 20],
+        [10, 20],
+    ]) / 40.
+    theta = tps.tps_theta_from_points(c_src, c_dst)
+    theta_r = tps.tps_theta_from_points(c_src, c_dst, reduced=True)
+    grid = tps.tps_grid(theta, c_dst, (20,20))
+    grid_r = tps.tps_grid(theta_r, c_dst, (20,20))
+    mapx, mapy = tps.tps_grid_to_remap(grid, img.shape)
+    warped = cv2.remap(img, mapx, mapy, cv2.INTER_CUBIC)
+    assert img.min() == 0.
+    assert img.max() == 255.
+    assert warped.shape == (20,20)
+    assert warped.min() == 255.
+    assert warped.max() == 255.
+    assert np.linalg.norm(grid.reshape(-1,2) - grid_r.reshape(-1,2)) < 1e-3

data/thinplate/tests/test_tps_pytorch.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import torch
+import torch.optim as optim
+import torch.nn.functional as F
+import numpy as np
+import thinplate as tps
+from numpy.testing import assert_allclose
+def test_pytorch_grid():
+    c_dst = np.array([
+        [0., 0],
+        [1., 0],
+        [1, 1],
+        [0, 1],
+    ], dtype=np.float32)
+    c_src = np.array([
+        [10., 10],
+        [20., 10],
+        [20, 20],
+        [10, 20],
+    ], dtype=np.float32) / 40.
+    theta = tps.tps_theta_from_points(c_src, c_dst)
+    theta_r = tps.tps_theta_from_points(c_src, c_dst, reduced=True)
+    np_grid = tps.tps_grid(theta, c_dst, (20,20))
+    np_grid_r = tps.tps_grid(theta_r, c_dst, (20,20))
+    pth_theta = torch.tensor(theta).unsqueeze(0)
+    pth_grid = tps.torch.tps_grid(pth_theta, torch.tensor(c_dst), (1, 1, 20, 20)).squeeze().numpy()
+    pth_grid = (pth_grid + 1) / 2 # convert [-1,1] range to [0,1]
+    pth_theta_r = torch.tensor(theta_r).unsqueeze(0)
+    pth_grid_r = tps.torch.tps_grid(pth_theta_r, torch.tensor(c_dst), (1, 1, 20, 20)).squeeze().numpy()
+    pth_grid_r = (pth_grid_r + 1) / 2 # convert [-1,1] range to [0,1]
+    assert_allclose(np_grid, pth_grid)
+    assert_allclose(np_grid_r, pth_grid_r)
+    assert_allclose(np_grid_r, np_grid)

data/tps_transformation.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import numpy as np
+import data.thinplate as tps
+import cv2
+import random
+import math
+# Reference : https://github.com/cheind/py-thin-plate-spline
+def tps_transform(img, dshape=None):
+    while True:
+        point1 = round(random.uniform(0.3, 0.7), 2)
+        point2 = round(random.uniform(0.3, 0.7), 2)
+        range_1 = round(random.uniform(-0.25, 0.25), 2)
+        range_2 = round(random.uniform(-0.25, 0.25), 2)
+        if math.isclose(point1 + range_1, point2 + range_2):
+            continue
+        else:
+            break
+    c_src = np.array([
+        [0.0, 0.0],
+        [1., 0],
+        [1, 1],
+        [0, 1],
+        [point1, point1],
+        [point2, point2],
+    ])
+    c_dst = np.array([
+        [0., 0],
+        [1., 0],
+        [1, 1],
+        [0, 1],
+        [point1 + range_1, point1 + range_1],
+        [point2 + range_2, point2 + range_2],
+    ])
+    dshape = dshape or img.shape
+    theta = tps.tps_theta_from_points(c_src, c_dst, reduced=True)
+    grid = tps.tps_grid(theta, c_dst, dshape)
+    mapx, mapy = tps.tps_grid_to_remap(grid, img.shape)
+    return cv2.remap(img, mapx, mapy, cv2.INTER_CUBIC)

discriminator.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+class Discriminator(nn.Module):
+    def __init__(self, in_channels=3):
+        super(Discriminator, self).__init__()
+        def discriminator_block(in_filters, out_filters, normalization=True):
+            """Returns downsampling layers of each discriminator block"""
+            layers = [nn.Conv2d(in_filters, out_filters, 4, stride=2, padding=1)]
+            if normalization:
+                layers.append(nn.InstanceNorm2d(out_filters))
+            layers.append(nn.LeakyReLU(0.2, inplace=True))
+            return layers
+        self.model = nn.Sequential(
+            *discriminator_block(in_channels * 3, 64, normalization=False),
+            *discriminator_block(64, 128),
+            *discriminator_block(128, 256),
+            *discriminator_block(256, 512),
+            nn.ZeroPad2d((1, 0, 1, 0)),
+            nn.Conv2d(512, 1, 4, padding=1, bias=False)
+        )
+    def forward(self, img_out, img_l, img_ref ):
+        # Concatenate image and condition image by channels to produce input
+        img_input = torch.cat((img_out, img_l, img_ref), 1)
+        return self.model(img_input)

distributed.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import math
+import pickle
+import torch
+from torch import distributed as dist
+from torch.utils.data.sampler import Sampler
+def get_rank():
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+def synchronize():
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+def get_world_size():
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+def reduce_sum(tensor):
+    if not dist.is_available():
+        return tensor
+    if not dist.is_initialized():
+        return tensor
+    tensor = tensor.clone()
+    dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
+    return tensor
+def gather_grad(params):
+    world_size = get_world_size()
+    if world_size == 1:
+        return
+    for param in params:
+        if param.grad is not None:
+            dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM)
+            param.grad.data.div_(world_size)
+def all_gather(data):
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to('cuda')
+    local_size = torch.IntTensor([tensor.numel()]).to('cuda')
+    size_list = [torch.IntTensor([0]).to('cuda') for _ in range(world_size)]
+    dist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.ByteTensor(size=(max_size,)).to('cuda'))
+    if local_size != max_size:
+        padding = torch.ByteTensor(size=(max_size - local_size,)).to('cuda')
+        tensor = torch.cat((tensor, padding), 0)
+    dist.all_gather(tensor_list, tensor)
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+    return data_list
+def reduce_loss_dict(loss_dict):
+    world_size = get_world_size()
+    if world_size < 2:
+        return loss_dict
+    with torch.no_grad():
+        keys = []
+        losses = []
+        for k in sorted(loss_dict.keys()):
+            keys.append(k)
+            losses.append(loss_dict[k])
+        losses = torch.stack(losses, 0)
+        dist.reduce(losses, dst=0)
+        if dist.get_rank() == 0:
+            losses /= world_size
+        reduced_losses = {k: v for k, v in zip(keys, losses)}
+    return reduced_losses

experiments/Color2Manga_gray/074000_gray.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2f4785d00d4463ecb5f02d79f79f9da747a57179b5b016408e65da0e4f62572
+size 1091510163

experiments/Color2Manga_sketch/116000_sketch.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52505452ec908ffd1ae4499a205a55263a8cd7d7bdf4623b59edccf8e8636d33
+size 1091510163

experiments/Discriminator/074000_d.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e622a55cb8bae33377e85963eaa496d7e9bd9e1f4449b853d41235729cc7d40f
+size 33261919

experiments/Discriminator/116000_d.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:169ae73ef7c788ec3921c918d0a9ebdecc4115492b177dfd98660b7816d6ce5a
+size 33261983

experiments/VGG19/vgg19-dcbb9e9d.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dcbb9e9dad569fff7a846263a77324fc34978fea2bfb039c012d710e1776ae44
+size 574673361

extractor/Open-Sans-Bold.ttf ADDED Viewed

Binary file (225 kB). View file

extractor/__pycache__/manga_panel_extractor.cpython-310.pyc ADDED Viewed

Binary file (5.89 kB). View file

extractor/__pycache__/manga_panel_extractor.cpython-38.pyc ADDED Viewed

Binary file (5.89 kB). View file

extractor/manga_panel_extractor.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# stdlib
+import argparse
+from argparse import RawTextHelpFormatter
+import os
+from os.path import splitext, basename, exists, join
+from os import makedirs
+# 3p
+from tqdm import tqdm
+import numpy as np
+from skimage import measure
+from PIL import Image
+from PIL import ImageFont
+from PIL import ImageDraw
+import cv2
+# project
+from utils import get_files, load_image
+from skimage import io
+class PanelExtractor:
+    def __init__(self, min_pct_panel=2, max_pct_panel=90, paper_th=0.35):
+        assert min_pct_panel < max_pct_panel, "Minimum percentage must be smaller than maximum percentage"
+        self.min_panel = min_pct_panel / 100
+        self.max_panel = max_pct_panel / 100
+        self.paper_th = paper_th
+    def _generate_panel_blocks(self, img):
+        img = img if len(img.shape) == 2 else img[:, :, 0]
+        blur = cv2.GaussianBlur(img, (5, 5), 0)
+        thresh = cv2.threshold(blur, 230, 255, cv2.THRESH_BINARY)[1]
+        cv2.rectangle(thresh, (0, 0), tuple(img.shape[::-1]), (0, 0, 0), 10)
+        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, 4, cv2.CV_32S)
+        ind = np.argsort(stats[:, 4], )[::-1][1]
+        panel_block_mask = ((labels == ind) * 255).astype("uint8")
+        # Image.fromarray(panel_block_mask).show()
+        return panel_block_mask
+    def generate_panels(self, img):
+        block_mask = self._generate_panel_blocks(img)
+        cv2.rectangle(block_mask, (0, 0), tuple(block_mask.shape[::-1]), (255, 255, 255), 10)
+        # Image.fromarray(block_mask).show()
+        # detect contours
+        contours, hierarchy = cv2.findContours(block_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+        panels = []
+        masks = []
+        panel_masks = []
+        # print(len(contours))
+        for i in range(len(contours)):
+            area = cv2.contourArea(contours[i])
+            img_area = img.shape[0] * img.shape[1]
+            # if the contour is very small or very big, it's likely wrongly detected
+            if area < (self.min_panel * img_area) or area > (self.max_panel * img_area):
+                continue
+            x, y, w, h = cv2.boundingRect(contours[i])
+            masks.append(cv2.boundingRect(contours[i]))
+            # create panel mask
+            panel_mask = np.ones_like(block_mask, "int32")
+            cv2.fillPoly(panel_mask, [contours[i].astype("int32")], color=(0, 0, 0))
+            # Image.fromarray(panel_mask).show()
+            panel_mask = panel_mask[y:y + h, x:x + w].copy()
+            # Image.fromarray(panel_mask).show()
+            # apply panel mask
+            panel = img[y:y + h, x:x + w].copy()
+            # Image.fromarray(panel).show()
+            panel[panel_mask == 1] = 255
+            # Image.fromarray(panel).show()
+            panels.append(panel)
+            panel_masks.append(panel_mask)
+        return panels, masks, panel_masks
+    def extract(self, folder):
+        print("Loading images ... ", end="")
+        # image_list, _, _ = get_files(folder)
+        image_list = []
+        image_list.append(folder)
+        imgs = [load_image(x) for x in image_list]
+        print("Done!")
+        folder = os.path.dirname(folder)
+        # create panels dir
+        if not exists(join(folder, "panels")):
+            makedirs(join(folder, "panels"))
+        folder = join(folder, "panels")
+        # remove images with paper texture, not well segmented
+        paperless_imgs = []
+        for img in tqdm(imgs, desc="Removing images with paper texture"):
+            hist, bins = np.histogram(img.copy().ravel(), 256, [0, 256])
+            if np.sum(hist[50:200]) / np.sum(hist) < self.paper_th:
+                paperless_imgs.append(img)
+        if not paperless_imgs:
+            return imgs, [], []
+        for i, img in tqdm(enumerate(paperless_imgs), desc="extracting panels"):
+            panels, masks, panel_masks = self.generate_panels(img)
+            name, ext = splitext(basename(image_list[i]))
+            for j, panel in enumerate(panels):
+                cv2.imwrite(join(folder, f'{name}_{j}.{ext}'), panel)
+            # show the order of colorized panels
+            img = Image.fromarray(img)
+            draw = ImageDraw.Draw(img)
+            font = ImageFont.truetype('extractor/Open-Sans-Bold.ttf', 160)
+            def flatten(l):
+                for el in l:
+                    if isinstance(el, list):
+                        yield from flatten(el)
+                    else:
+                        yield el
+            for i, bbox in enumerate(flatten(masks), start=1):
+                w, h = draw.textsize(str(i), font=font)
+                y = (bbox[1] + bbox[3] / 2 - h / 2)
+                x = (bbox[0] + bbox[2] / 2 - w / 2)
+                draw.text((x, y), str(i), (255, 215, 0), font=font)
+            img.show()
+            return panels, masks, panel_masks
+    def concatPanels(self, img_file, fake_imgs, masks, panel_masks):
+        img = io.imread(img_file)
+        # out_imgs.append(f"D:\MyProject\Python\DL_learning\Manga-Panel-Extractor-master\out\in0_ref0.png")
+        # out_imgs.append(f"D:\MyProject\Python\DL_learning\Manga-Panel-Extractor-master\out\in1_ref1.png")
+        # out_imgs.append(f"D:\MyProject\Python\DL_learning\Manga-Panel-Extractor-master\out\in2_ref2.png")
+        for i in range(len(fake_imgs)):
+            x, y, w, h = masks[i]
+            # fake_img = io.imread(fake_imgs[i])
+            # fake_img = np.array(fake_img)
+            fake_img = fake_imgs[i]
+            panel_mask = panel_masks[i]
+            img[y:y + h, x:x + w][panel_mask == 0] = fake_img[panel_mask == 0]
+            # Image.fromarray(img).show()
+        out_folder = os.path.dirname(img_file)
+        out_name = os.path.basename(img_file)
+        out_name = os.path.splitext(out_name)[0]
+        out_img_path = os.path.join(out_folder,'color',f'{out_name}_color.png')
+        # show image
+        Image.fromarray(img).show()
+        # save image
+        folder_path = os.path.join(out_folder, 'color')
+        if not os.path.exists(folder_path):
+            os.mkdir(folder_path)
+        io.imsave(out_img_path, img)
+def main(args):
+    panel_extractor = PanelExtractor(min_pct_panel=args.min_panel, max_pct_panel=args.max_panel)
+    panels, masks, panel_masks = panel_extractor.extract(args.folder)
+    panel_extractor.concatPanels(args.folder, [], masks, panel_masks)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Implementation of a Manga Panel Extractor and dialogue bubble text eraser.",
+        formatter_class=RawTextHelpFormatter
+    )
+    parser.add_argument("-minp", "--min_panel", type=int, choices=range(1, 99), default=5, metavar="[1-99]",
+                        help="Percentage of minimum panel area in relation to total page area.")
+    parser.add_argument("-maxp", "--max_panel", type=int, choices=range(1, 99), default=90, metavar="[1-99]",
+                        help="Percentage of minimum panel area in relation to total page area.")
+    parser.add_argument("-f", '--folder', default='./images/002.png', type=str,
+                        help="""folder path to input manga pages.
+Panels will be saved to a directory named `panels` in this folder.""")
+    args = parser.parse_args()
+    main(args)

inference.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import os
+import numpy as np
+from skimage import color, io
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from models import ColorEncoder, ColorUNet
+from extractor.manga_panel_extractor import PanelExtractor
+import argparse
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+def mkdirs(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+def Lab2RGB_out(img_lab):
+    img_lab = img_lab.detach().cpu()
+    img_l = img_lab[:,:1,:,:]
+    img_ab = img_lab[:,1:,:,:]
+    # print(torch.max(img_l), torch.min(img_l))
+    # print(torch.max(img_ab), torch.min(img_ab))
+    img_l = img_l + 50
+    pred_lab = torch.cat((img_l, img_ab), 1)[0,...].numpy()
+    # grid_lab = utils.make_grid(pred_lab, nrow=1).numpy().astype("float64")
+    # print(grid_lab.shape)
+    out = (np.clip(color.lab2rgb(pred_lab.transpose(1, 2, 0)), 0, 1)* 255).astype("uint8")
+    return out
+def RGB2Lab(inputs):
+    return color.rgb2lab(inputs)
+def Normalize(inputs):
+    l = inputs[:, :, 0:1]
+    ab = inputs[:, :, 1:3]
+    l = l - 50
+    lab = np.concatenate((l, ab), 2)
+    return lab.astype('float32')
+def numpy2tensor(inputs):
+    out = torch.from_numpy(inputs.transpose(2,0,1))
+    return out
+def tensor2numpy(inputs):
+    out = inputs[0,...].detach().cpu().numpy().transpose(1,2,0)
+    return out
+def preprocessing(inputs):
+    # input: rgb, [0, 255], uint8
+    img_lab = Normalize(RGB2Lab(inputs))
+    img = np.array(inputs, 'float32') # [0, 255]
+    img = numpy2tensor(img)
+    img_lab = numpy2tensor(img_lab)
+    return img.unsqueeze(0), img_lab.unsqueeze(0)
+if __name__ == "__main__":
+    device = "cuda"
+    # model_name = 'Color2Manga_sketch'
+    ckpt_path = 'experiments/Color2Manga_gray/074000_gray.pt'
+    test_dir_path = 'test_datasets/gray_test'
+    no_extractor = False
+    # imgs_num = len(os.listdir(test_dir_path)) // 2
+    imgsize = 256
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--path", type=str, default=None, help="path of input image")
+    parser.add_argument("--size", type=int, default=None)
+    parser.add_argument("--ckpt", type=str, default=None, help="path of model weight")
+    parser.add_argument("-ne", "--no_extractor", action='store_true',
+                        help="Do not segment the manga panels.")
+    args = parser.parse_args()
+    if args.path:
+        ckpt_path = args.path
+    if args.size:
+        imgsize = args.size
+    if args.ckpt:
+        test_dir_path = args.ckpt
+    if args.no_extractor:
+        no_extractor = args.no_extractor
+    ckpt = torch.load(ckpt_path, map_location=lambda storage, loc: storage)
+    colorEncoder = ColorEncoder().to(device)
+    colorEncoder.load_state_dict(ckpt["colorEncoder"])
+    colorEncoder.eval()
+    colorUNet = ColorUNet().to(device)
+    colorUNet.load_state_dict(ckpt["colorUNet"])
+    colorUNet.eval()
+    imgs = []
+    imgs_lab = []
+    # for i in range(imgs_num):
+    # idx = i
+    # print('Image', idx, 'Input Image', 'in%d.JPEG'%idx, 'Ref Image', 'ref%d.JPEG'%idx)
+    while 1:
+        print(f'make sure both manga image and reference images are under this path{test_dir_path}')
+        img_path = input("please input the name of image needed to be colorized(with file extension): ")
+        img_path = os.path.join(test_dir_path, img_path)
+        img_name = os.path.basename(img_path)
+        img_name = os.path.splitext(img_name)[0]
+        if no_extractor:
+            ref_img_path = os.path.join(test_dir_path, input(f"{1}/{1} reference image:"))
+            img1 = Image.open(img_path).convert("RGB")
+            width, height = img1.size
+            img2 = Image.open(ref_img_path).convert("RGB")
+            img1, img1_lab = preprocessing(img1)
+            img2, img2_lab = preprocessing(img2)
+            img1 = img1.to(device)
+            img1_lab = img1_lab.to(device)
+            img2 = img2.to(device)
+            img2_lab = img2_lab.to(device)
+            # print('-------',torch.max(img1_lab[:,:1,:,:]), torch.min(img1_lab[:,1:,:,:]))
+            with torch.no_grad():
+                img2_resize = F.interpolate(img2 / 255., size=(imgsize, imgsize), mode='bilinear',
+                                            recompute_scale_factor=False, align_corners=False)
+                img1_L_resize = F.interpolate(img1_lab[:, :1, :, :] / 50., size=(imgsize, imgsize), mode='bilinear',
+                                              recompute_scale_factor=False, align_corners=False)
+                color_vector = colorEncoder(img2_resize)
+                fake_ab = colorUNet((img1_L_resize, color_vector))
+                fake_ab = F.interpolate(fake_ab * 110, size=(height, width), mode='bilinear',
+                                        recompute_scale_factor=False, align_corners=False)
+                fake_img = torch.cat((img1_lab[:, :1, :, :], fake_ab), 1)
+                fake_img = Lab2RGB_out(fake_img)
+                # io.imsave(out_img_path, fake_img)
+                out_folder = os.path.dirname(img_path)
+                out_name = os.path.basename(img_path)
+                out_name = os.path.splitext(out_name)[0]
+                out_img_path = os.path.join(out_folder, 'color', f'{out_name}_color.png')
+                # show image
+                Image.fromarray(fake_img).show()
+                # save image
+                folder_path = os.path.join(out_folder, 'color')
+                if not os.path.exists(folder_path):
+                    os.mkdir(folder_path)
+                io.imsave(out_img_path, fake_img)
+            continue
+        # extract panels from manga
+        panel_extractor = PanelExtractor(min_pct_panel=5, max_pct_panel=90)
+        panels, masks, panel_masks = panel_extractor.extract(img_path)
+        panel_num = len(panels)
+        ref_img_paths = []
+        # ref_img_path = os.path.join(test_dir_path, '%03d_ref.png' % idx)
+        print("Please enter the name of the reference image in order according to the number prompts on the picture")
+        for i in range(panel_num):
+            ref_img_path = os.path.join(test_dir_path, input(f"{i+1}/{panel_num} reference image:"))
+            ref_img_paths.append(ref_img_path)
+        fake_imgs = []
+        for i in range(panel_num):
+            img1 = Image.fromarray(panels[i]).convert("RGB")
+            width, height = img1.size
+            img2 = Image.open(ref_img_paths[i]).convert("RGB")
+            # img1 = Image.open(img_path).convert("RGB")
+            # width, height = img1.size
+            # img2 = Image.open(ref_img_path).convert("RGB")
+            img1, img1_lab = preprocessing(img1)
+            img2, img2_lab = preprocessing(img2)
+            img1 = img1.to(device)
+            img1_lab = img1_lab.to(device)
+            img2 = img2.to(device)
+            img2_lab = img2_lab.to(device)
+            # print('-------',torch.max(img1_lab[:,:1,:,:]), torch.min(img1_lab[:,1:,:,:]))
+            with torch.no_grad():
+                img2_resize = F.interpolate(img2 / 255., size=(imgsize, imgsize), mode='bilinear', recompute_scale_factor=False, align_corners=False)
+                img1_L_resize = F.interpolate(img1_lab[:,:1,:,:] / 50., size=(imgsize, imgsize), mode='bilinear', recompute_scale_factor=False, align_corners=False)
+                color_vector = colorEncoder(img2_resize)
+                fake_ab = colorUNet((img1_L_resize, color_vector))
+                fake_ab = F.interpolate(fake_ab*110, size=(height, width), mode='bilinear', recompute_scale_factor=False, align_corners=False)
+                fake_img = torch.cat((img1_lab[:,:1,:,:], fake_ab), 1)
+                fake_img = Lab2RGB_out(fake_img)
+                # io.imsave(f'test_datasets/gray_test/panels/{i}.png', fake_img)
+                fake_imgs.append(fake_img)
+        if panel_num == 1:
+            out_folder = os.path.dirname(img_path)
+            out_name = os.path.basename(img_path)
+            out_name = os.path.splitext(out_name)[0]
+            out_img_path = os.path.join(out_folder,'color',f'{out_name}_color.png')
+            # show image
+            Image.fromarray(fake_imgs[0]).show()
+            # save image
+            folder_path = os.path.join(out_folder, 'color')
+            if not os.path.exists(folder_path):
+                os.mkdir(folder_path)
+            io.imsave(out_img_path, fake_imgs[0])
+        else:
+            panel_extractor.concatPanels(img_path, fake_imgs, masks, panel_masks)
+        print(f'colored image has been put to: {test_dir_path}color')

models.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from vgg_model import vgg19
+class DoubleConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+    def __init__(self, in_channels, out_channels, mid_channels=None):
+        super().__init__()
+        if not mid_channels:
+            mid_channels = out_channels
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
+            nn.BatchNorm2d(mid_channels),
+            nn.LeakyReLU(0.1, True),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(0.1, True)
+        )
+    def forward(self, x):
+        x = self.double_conv(x)
+        return x
+class ResBlock(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.bottle_conv = nn.Conv2d(in_channels, out_channels, 1, 1, 0)
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
+        )
+    def forward(self, x):
+        x = self.bottle_conv(x)
+        x = self.double_conv(x) + x
+        return x / math.sqrt(2)
+class Down(nn.Module):
+    """Downscaling with stride conv then double conv"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.main = nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, 4, 2, 1),
+            nn.LeakyReLU(0.1, True),
+            # DoubleConv(in_channels, out_channels)
+            ResBlock(in_channels, out_channels)
+        )
+    def forward(self, x):
+        x = self.main(x)
+        return x
+class SDFT(nn.Module):
+    def __init__(self, color_dim, channels, kernel_size = 3):
+        super().__init__()
+        # generate global conv weights
+        fan_in = channels * kernel_size ** 2
+        self.kernel_size = kernel_size
+        self.padding = kernel_size // 2
+        self.scale = 1 / math.sqrt(fan_in)
+        self.modulation = nn.Conv2d(color_dim, channels, 1)
+        self.weight = nn.Parameter(
+            torch.randn(1, channels, channels, kernel_size, kernel_size)
+        )
+    def forward(self, fea, color_style):
+        # for global adjustation
+        B, C, H, W = fea.size()
+        # print(fea.shape, color_style.shape)
+        style = self.modulation(color_style).view(B, 1, C, 1, 1)
+        weight = self.scale * self.weight * style
+        # demodulation
+        demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8)
+        weight = weight * demod.view(B, C, 1, 1, 1)
+        weight = weight.view(
+            B * C, C, self.kernel_size, self.kernel_size
+        )
+        fea = fea.view(1, B * C, H, W)
+        fea = F.conv2d(fea, weight, padding=self.padding, groups=B)
+        fea = fea.view(B, C, H, W)
+        return fea
+class UpBlock(nn.Module):
+    def __init__(self, color_dim, in_channels, out_channels, kernel_size = 3, bilinear=True):
+        super().__init__()
+        # if bilinear, use the normal convolutions to reduce the number of channels
+        if bilinear:
+            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)
+        else:
+            self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2)
+        self.conv_cat = nn.Sequential(
+            nn.Conv2d(in_channels // 2 + in_channels // 8, out_channels, 1, 1, 0),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.2, True)
+        )
+        self.conv_s = nn.Conv2d(in_channels//2, out_channels, 1, 1, 0)
+        # generate global conv weights
+        self.SDFT = SDFT(color_dim, out_channels, kernel_size)
+    def forward(self, x1, x2, color_style):
+        # print(x1.shape, x2.shape, color_style.shape)
+        x1 = self.up(x1)
+        x1_s = self.conv_s(x1)
+        x = torch.cat([x1, x2[:, ::4, :, :]], dim=1)
+        x = self.conv_cat(x)
+        x = self.SDFT(x, color_style)
+        x = x + x1_s            #ResBlock
+        return x
+class ColorEncoder(nn.Module):
+    def __init__(self, color_dim=512):
+        super(ColorEncoder, self).__init__()
+        # self.vgg = vgg19(pretrained_path=None)
+        self.vgg = vgg19()
+        self.feature2vector = nn.Sequential(
+            nn.Conv2d(color_dim, color_dim, 4, 2, 2), # 8x8
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(color_dim, color_dim, 3, 1, 1),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(color_dim, color_dim, 4, 2, 2), # 4x4
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(color_dim, color_dim, 3, 1, 1),
+            nn.LeakyReLU(0.2, True),
+            nn.AdaptiveAvgPool2d((1, 1)), # 1x1
+            nn.Conv2d(color_dim, color_dim//2, 1), # linear-1
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(color_dim//2, color_dim//2, 1), # linear-2
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(color_dim//2, color_dim, 1), # linear-3
+        )
+        self.color_dim = color_dim
+    def forward(self, x):
+        # x #[0, 1] RGB
+        vgg_fea = self.vgg(x, layer_name='relu5_2') # [B, 512, 16, 16]
+        x_color = self.feature2vector(vgg_fea[-1]) # [B, 512, 1, 1]
+        return x_color
+class ColorUNet(nn.Module):
+    ### this model output is ab
+    def __init__(self, n_channels=1, n_classes=3, bilinear=True):
+        super(ColorUNet, self).__init__()
+        self.n_channels = n_channels
+        self.n_classes = n_classes
+        self.bilinear = bilinear
+        self.inc = DoubleConv(n_channels, 64)
+        self.down1 = Down(64, 128)
+        self.down2 = Down(128, 256)
+        self.down3 = Down(256, 512)
+        factor = 2 if bilinear else 1
+        self.down4 = Down(512, 1024 // factor)
+        self.up1 = UpBlock(512, 1024, 512 // factor, 3, bilinear)
+        self.up2 = UpBlock(512, 512, 256 // factor, 3, bilinear)
+        self.up3 = UpBlock(512, 256, 128 // factor, 5, bilinear)
+        self.up4 = UpBlock(512, 128, 64, 5, bilinear)
+        self.outc = nn.Sequential(
+                nn.Conv2d(64, 64, 3, 1, 1),
+                nn.LeakyReLU(0.2, True),
+                nn.Conv2d(64, 2, 3, 1, 1),
+                nn.Tanh()                   # [-1,1]
+        )
+    def forward(self, x):
+        # print(torch.max(x[0]), torch.min(x[0])) #[-1, 1] gray image L
+        # print(torch.max(x[1]), torch.min(x[1])) # color vector
+        x_color = x[1] # [B, 512, 1, 1]
+        x1 = self.inc(x[0]) # [B, 64, 256, 256]
+        x2 = self.down1(x1) # [B, 128, 128, 128]
+        x3 = self.down2(x2) # [B, 256, 64, 64]
+        x4 = self.down3(x3) # [B, 512, 32, 32]
+        x5 = self.down4(x4) # [B, 512, 16, 16]
+        x6 = self.up1(x5, x4, x_color) # [B, 256, 32, 32]
+        x7 = self.up2(x6, x3, x_color) # [B, 128, 64, 64]
+        x8 = self.up3(x7, x2, x_color) # [B, 64, 128, 128]
+        x9 = self.up4(x8, x1, x_color) # [B, 64, 256, 256]
+        x_ab = self.outc(x9)
+        return x_ab