{ "cells": [ { "cell_type": "markdown", "id": "1027b46a", "metadata": {}, "source": [ "# Talking Head(?) Anime from a Single Image 3: Now the Body Too (Manual Poser Tool)\n", "\n", "**Instruction**\n", "\n", "1. Run the four cells below, one by one, in order by clicking the \"Play\" button to the left of it. Wait for each cell to finish before going to the next one.\n", "2. Scroll down to the end of the last cell, and play with the GUI.\n", "\n", "**Links**\n", "\n", "* Github repository: http://github.com/pkhungurn/talking-head-anime-3-demo\n", "* Project writeup: http://pkhungurn.github.io/talking-head-anime-3/" ] }, { "cell_type": "code", "execution_count": null, "id": "54cc96d7", "metadata": {}, "outputs": [], "source": [ "# Clone the repository\n", "%cd /content\n", "!git clone https://github.com/pkhungurn/talking-head-anime-3-demo.git" ] }, { "cell_type": "code", "execution_count": null, "id": "77f2016c", "metadata": {}, "outputs": [], "source": [ "# CD into the repository directory.\n", "%cd /content/talking-head-anime-3-demo" ] }, { "cell_type": "code", "execution_count": null, "id": "1771c927", "metadata": {}, "outputs": [], "source": [ "# Download model files\n", "!mkdir -p data/models/standard_float\n", "!wget -O data/models/standard_float/editor.pt https://www.dropbox.com/s/zp3e5ox57sdws3y/editor.pt?dl=0\n", "!wget -O data/models/standard_float/eyebrow_decomposer.pt https://www.dropbox.com/s/bcp42knbrk7egk8/eyebrow_decomposer.pt?dl=0\n", "!wget -O data/models/standard_float/eyebrow_morphing_combiner.pt https://www.dropbox.com/s/oywaiio2s53lc57/eyebrow_morphing_combiner.pt?dl=0\n", "!wget -O data/models/standard_float/face_morpher.pt https://www.dropbox.com/s/8qvo0u5lw7hqvtq/face_morpher.pt?dl=0\n", "!wget -O data/models/standard_float/two_algo_face_body_rotator.pt https://www.dropbox.com/s/qmq1dnxrmzsxb4h/two_algo_face_body_rotator.pt?dl=0\n", "\n", "!mkdir -p data/models/standard_half\n", "!wget -O data/models/standard_half/editor.pt https://www.dropbox.com/s/g21ps8gfuvz4kbo/editor.pt?dl=0\n", "!wget -O data/models/standard_half/eyebrow_decomposer.pt https://www.dropbox.com/s/nwwwevzpmxiilgn/eyebrow_decomposer.pt?dl=0\n", "!wget -O data/models/standard_half/eyebrow_morphing_combiner.pt https://www.dropbox.com/s/z5v0amgqif7yup1/eyebrow_morphing_combiner.pt?dl=0\n", "!wget -O data/models/standard_half/face_morpher.pt https://www.dropbox.com/s/g03sfnd5yfs0m65/face_morpher.pt?dl=0\n", "!wget -O data/models/standard_half/two_algo_face_body_rotator.pt https://www.dropbox.com/s/c5lrn7z34x12317/two_algo_face_body_rotator.pt?dl=0\n", "\n", "!mkdir -p data/models/separable_float \n", "!wget -O data/models/separable_float/editor.pt https://www.dropbox.com/s/nwdxhrpa9fy19r4/editor.pt?dl=0\n", "!wget -O data/models/separable_float/eyebrow_decomposer.pt https://www.dropbox.com/s/hfzjcu9cqr9wm3i/eyebrow_decomposer.pt?dl=0\n", "!wget -O data/models/separable_float/eyebrow_morphing_combiner.pt https://www.dropbox.com/s/g04dyyyavh5o1e2/eyebrow_morphing_combiner.pt?dl=0\n", "!wget -O data/models/separable_float/face_morpher.pt https://www.dropbox.com/s/vgi9dsj95y0rrwv/face_morpher.pt?dl=0\n", "!wget -O data/models/separable_float/two_algo_face_body_rotator.pt https://www.dropbox.com/s/8u0qond8po34l24/two_algo_face_body_rotator.pt?dl=0\n", "\n", "!mkdir -p data/models/separable_half\n", "!wget -O data/models/separable_half/editor.pt https://www.dropbox.com/s/on8kn6z9fj95j0h/editor.pt?dl=0\n", "!wget -O data/models/separable_half/eyebrow_decomposer.pt https://www.dropbox.com/s/0hxu8opu1hmghqe/eyebrow_decomposer.pt?dl=0\n", "!wget -O data/models/separable_half/eyebrow_morphing_combiner.pt https://www.dropbox.com/s/bgz02afp0xojqfs/eyebrow_morphing_combiner.pt?dl=0\n", "!wget -O data/models/separable_half/face_morpher.pt https://www.dropbox.com/s/bgz02afp0xojqfs/eyebrow_morphing_combiner.pt?dl=0\n", "!wget -O data/models/separable_half/two_algo_face_body_rotator.pt https://www.dropbox.com/s/vr8h2xxltszhw7w/two_algo_face_body_rotator.pt?dl=0" ] }, { "cell_type": "code", "execution_count": null, "id": "062014f7", "metadata": { "id": "breeding-extra" }, "outputs": [], "source": [ "# Set this constant to specify which system variant to use.\n", "MODEL_NAME = \"standard_float\" \n", "\n", "# Load the models.\n", "import torch\n", "DEVICE_NAME = 'cuda'\n", "device = torch.device(DEVICE_NAME)\n", "\n", "def load_poser(model: str, device: torch.device):\n", " print(\"Using the %s model.\" % model)\n", " if model == \"standard_float\":\n", " from tha3.poser.modes.standard_float import create_poser\n", " return create_poser(device)\n", " elif model == \"standard_half\":\n", " from tha3.poser.modes.standard_half import create_poser\n", " return create_poser(device)\n", " elif model == \"separable_float\":\n", " from tha3.poser.modes.separable_float import create_poser\n", " return create_poser(device)\n", " elif model == \"separable_half\":\n", " from tha3.poser.modes.separable_half import create_poser\n", " return create_poser(device)\n", " else:\n", " raise RuntimeError(\"Invalid model: '%s'\" % model)\n", " \n", "poser = load_poser(MODEL_NAME, DEVICE_NAME)\n", "poser.get_modules();" ] }, { "cell_type": "code", "execution_count": null, "id": "breeding-extra", "metadata": { "id": "breeding-extra" }, "outputs": [], "source": [ "# Create the GUI for manipulating character images.\n", "import PIL.Image\n", "import io\n", "from io import StringIO, BytesIO\n", "import IPython.display\n", "import numpy\n", "import ipywidgets\n", "import time\n", "import threading\n", "import torch\n", "from tha3.util import resize_PIL_image, extract_PIL_image_from_filelike, \\\n", " extract_pytorch_image_from_PIL_image, convert_output_image_from_torch_to_numpy\n", "\n", "FRAME_RATE = 30.0\n", "\n", "last_torch_input_image = None\n", "torch_input_image = None\n", "\n", "def show_pytorch_image(pytorch_image):\n", " output_image = pytorch_image.detach().cpu()\n", " numpy_image = numpy.uint8(numpy.rint(convert_output_image_from_torch_to_numpy(output_image) * 255.0))\n", " pil_image = PIL.Image.fromarray(numpy_image, mode='RGBA')\n", " IPython.display.display(pil_image)\n", "\n", "upload_input_image_button = ipywidgets.FileUpload(\n", " accept='.png',\n", " multiple=False,\n", " layout={\n", " 'width': '512px'\n", " }\n", ")\n", "\n", "output_image_widget = ipywidgets.Output(\n", " layout={\n", " 'border': '1px solid black',\n", " 'width': '512px',\n", " 'height': '512px'\n", " }\n", ")\n", "\n", "eyebrow_dropdown = ipywidgets.Dropdown(\n", " options=[\"troubled\", \"angry\", \"lowered\", \"raised\", \"happy\", \"serious\"],\n", " value=\"troubled\",\n", " description=\"Eyebrow:\", \n", ")\n", "eyebrow_left_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=0.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Left:\",\n", " readout=True,\n", " readout_format=\".2f\"\n", ")\n", "eyebrow_right_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=0.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Right:\",\n", " readout=True,\n", " readout_format=\".2f\"\n", ")\n", "\n", "eye_dropdown = ipywidgets.Dropdown(\n", " options=[\"wink\", \"happy_wink\", \"surprised\", \"relaxed\", \"unimpressed\", \"raised_lower_eyelid\"],\n", " value=\"wink\",\n", " description=\"Eye:\", \n", ")\n", "eye_left_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=0.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Left:\",\n", " readout=True,\n", " readout_format=\".2f\"\n", ")\n", "eye_right_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=0.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Right:\",\n", " readout=True,\n", " readout_format=\".2f\"\n", ")\n", "\n", "mouth_dropdown = ipywidgets.Dropdown(\n", " options=[\"aaa\", \"iii\", \"uuu\", \"eee\", \"ooo\", \"delta\", \"lowered_corner\", \"raised_corner\", \"smirk\"],\n", " value=\"aaa\",\n", " description=\"Mouth:\", \n", ")\n", "mouth_left_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=0.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Value:\",\n", " readout=True,\n", " readout_format=\".2f\"\n", ")\n", "mouth_right_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=0.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\" \",\n", " readout=True,\n", " readout_format=\".2f\",\n", " disabled=True,\n", ")\n", "\n", "def update_mouth_sliders(change):\n", " if mouth_dropdown.value == \"lowered_corner\" or mouth_dropdown.value == \"raised_corner\":\n", " mouth_left_slider.description = \"Left:\"\n", " mouth_right_slider.description = \"Right:\"\n", " mouth_right_slider.disabled = False\n", " else:\n", " mouth_left_slider.description = \"Value:\"\n", " mouth_right_slider.description = \" \"\n", " mouth_right_slider.disabled = True\n", "\n", "mouth_dropdown.observe(update_mouth_sliders, names='value')\n", "\n", "iris_small_left_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=0.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Left:\",\n", " readout=True,\n", " readout_format=\".2f\"\n", ")\n", "iris_small_right_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=0.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Right:\",\n", " readout=True,\n", " readout_format=\".2f\", \n", ")\n", "iris_rotation_x_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=-1.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"X-axis:\",\n", " readout=True,\n", " readout_format=\".2f\"\n", ")\n", "iris_rotation_y_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=-1.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Y-axis:\",\n", " readout=True,\n", " readout_format=\".2f\", \n", ")\n", "\n", "head_x_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=-1.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"X-axis:\",\n", " readout=True,\n", " readout_format=\".2f\"\n", ")\n", "head_y_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=-1.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Y-axis:\",\n", " readout=True,\n", " readout_format=\".2f\", \n", ")\n", "neck_z_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=-1.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Z-axis:\",\n", " readout=True,\n", " readout_format=\".2f\", \n", ")\n", "body_y_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=-1.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Y-axis rotation:\",\n", " readout=True,\n", " readout_format=\".2f\", \n", ")\n", "body_z_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=-1.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Z-axis rotation:\",\n", " readout=True,\n", " readout_format=\".2f\", \n", ")\n", "breathing_slider = ipywidgets.FloatSlider(\n", " value=0.0,\n", " min=0.0,\n", " max=1.0,\n", " step=0.01,\n", " description=\"Breathing:\",\n", " readout=True,\n", " readout_format=\".2f\", \n", ")\n", "\n", "\n", "control_panel = ipywidgets.VBox([\n", " eyebrow_dropdown,\n", " eyebrow_left_slider,\n", " eyebrow_right_slider,\n", " ipywidgets.HTML(value=\"
\"),\n", " eye_dropdown,\n", " eye_left_slider,\n", " eye_right_slider,\n", " ipywidgets.HTML(value=\"
\"),\n", " mouth_dropdown,\n", " mouth_left_slider,\n", " mouth_right_slider,\n", " ipywidgets.HTML(value=\"
\"),\n", " ipywidgets.HTML(value=\"
Iris Shrinkage
\"),\n", " iris_small_left_slider,\n", " iris_small_right_slider,\n", " ipywidgets.HTML(value=\"
Iris Rotation
\"),\n", " iris_rotation_x_slider,\n", " iris_rotation_y_slider,\n", " ipywidgets.HTML(value=\"
\"),\n", " ipywidgets.HTML(value=\"
Head Rotation
\"),\n", " head_x_slider,\n", " head_y_slider,\n", " neck_z_slider,\n", " ipywidgets.HTML(value=\"
\"),\n", " ipywidgets.HTML(value=\"
Body Rotation
\"),\n", " body_y_slider,\n", " body_z_slider,\n", " ipywidgets.HTML(value=\"
\"),\n", " ipywidgets.HTML(value=\"
Breathing
\"),\n", " breathing_slider,\n", "])\n", "\n", "controls = ipywidgets.HBox([\n", " ipywidgets.VBox([\n", " output_image_widget, \n", " upload_input_image_button\n", " ]),\n", " control_panel,\n", "])\n", "\n", "from tha3.poser.modes.pose_parameters import get_pose_parameters\n", "pose_parameters = get_pose_parameters()\n", "pose_size = poser.get_num_parameters()\n", "last_pose = torch.zeros(1, pose_size, dtype=poser.get_dtype()).to(device)\n", "\n", "iris_small_left_index = pose_parameters.get_parameter_index(\"iris_small_left\")\n", "iris_small_right_index = pose_parameters.get_parameter_index(\"iris_small_right\")\n", "iris_rotation_x_index = pose_parameters.get_parameter_index(\"iris_rotation_x\")\n", "iris_rotation_y_index = pose_parameters.get_parameter_index(\"iris_rotation_y\")\n", "head_x_index = pose_parameters.get_parameter_index(\"head_x\")\n", "head_y_index = pose_parameters.get_parameter_index(\"head_y\")\n", "neck_z_index = pose_parameters.get_parameter_index(\"neck_z\")\n", "body_y_index = pose_parameters.get_parameter_index(\"body_y\")\n", "body_z_index = pose_parameters.get_parameter_index(\"body_z\")\n", "breathing_index = pose_parameters.get_parameter_index(\"breathing\")\n", "\n", "def get_pose():\n", " pose = torch.zeros(1, pose_size, dtype=poser.get_dtype())\n", "\n", " eyebrow_name = f\"eyebrow_{eyebrow_dropdown.value}\"\n", " eyebrow_left_index = pose_parameters.get_parameter_index(f\"{eyebrow_name}_left\")\n", " eyebrow_right_index = pose_parameters.get_parameter_index(f\"{eyebrow_name}_right\")\n", " pose[0, eyebrow_left_index] = eyebrow_left_slider.value\n", " pose[0, eyebrow_right_index] = eyebrow_right_slider.value\n", "\n", " eye_name = f\"eye_{eye_dropdown.value}\"\n", " eye_left_index = pose_parameters.get_parameter_index(f\"{eye_name}_left\")\n", " eye_right_index = pose_parameters.get_parameter_index(f\"{eye_name}_right\")\n", " pose[0, eye_left_index] = eye_left_slider.value\n", " pose[0, eye_right_index] = eye_right_slider.value\n", "\n", " mouth_name = f\"mouth_{mouth_dropdown.value}\"\n", " if mouth_name == \"mouth_lowered_corner\" or mouth_name == \"mouth_raised_corner\":\n", " mouth_left_index = pose_parameters.get_parameter_index(f\"{mouth_name}_left\")\n", " mouth_right_index = pose_parameters.get_parameter_index(f\"{mouth_name}_right\")\n", " pose[0, mouth_left_index] = mouth_left_slider.value\n", " pose[0, mouth_right_index] = mouth_right_slider.value\n", " else:\n", " mouth_index = pose_parameters.get_parameter_index(mouth_name)\n", " pose[0, mouth_index] = mouth_left_slider.value\n", "\n", " pose[0, iris_small_left_index] = iris_small_left_slider.value\n", " pose[0, iris_small_right_index] = iris_small_right_slider.value\n", " pose[0, iris_rotation_x_index] = iris_rotation_x_slider.value\n", " pose[0, iris_rotation_y_index] = iris_rotation_y_slider.value\n", " pose[0, head_x_index] = head_x_slider.value\n", " pose[0, head_y_index] = head_y_slider.value\n", " pose[0, neck_z_index] = neck_z_slider.value\n", " pose[0, body_y_index] = body_y_slider.value\n", " pose[0, body_z_index] = body_z_slider.value\n", " pose[0, breathing_index] = breathing_slider.value\n", "\n", " return pose.to(device)\n", "\n", "display(controls)\n", "\n", "def update(change):\n", " global last_pose\n", " global last_torch_input_image\n", "\n", " if torch_input_image is None:\n", " return\n", "\n", " needs_update = False\n", " if last_torch_input_image is None:\n", " needs_update = True \n", " else:\n", " if (torch_input_image - last_torch_input_image).abs().max().item() > 0:\n", " needs_update = True \n", "\n", " pose = get_pose()\n", " if (pose - last_pose).abs().max().item() > 0:\n", " needs_update = True\n", "\n", " if not needs_update:\n", " return\n", "\n", " output_image = poser.pose(torch_input_image, pose)[0]\n", " with output_image_widget:\n", " output_image_widget.clear_output(wait=True)\n", " show_pytorch_image(output_image) \n", "\n", " last_torch_input_image = torch_input_image\n", " last_pose = pose\n", "\n", "def upload_image(change):\n", " global torch_input_image\n", " for name, file_info in upload_input_image_button.value.items():\n", " content = io.BytesIO(file_info['content'])\n", " if content is not None:\n", " pil_image = resize_PIL_image(extract_PIL_image_from_filelike(content), size=(512,512))\n", " w, h = pil_image.size\n", " if pil_image.mode != 'RGBA':\n", " with output_image_widget:\n", " torch_input_image = None\n", " output_image_widget.clear_output(wait=True)\n", " display(ipywidgets.HTML(\"Image must have an alpha channel!!!\"))\n", " else:\n", " torch_input_image = extract_pytorch_image_from_PIL_image(pil_image).to(device)\n", " if poser.get_dtype() == torch.half:\n", " torch_input_image = torch_input_image.half()\n", " update(None)\n", "\n", "upload_input_image_button.observe(upload_image, names='value')\n", "eyebrow_dropdown.observe(update, 'value')\n", "eyebrow_left_slider.observe(update, 'value')\n", "eyebrow_right_slider.observe(update, 'value')\n", "eye_dropdown.observe(update, 'value')\n", "eye_left_slider.observe(update, 'value')\n", "eye_right_slider.observe(update, 'value')\n", "mouth_dropdown.observe(update, 'value')\n", "mouth_left_slider.observe(update, 'value')\n", "mouth_right_slider.observe(update, 'value')\n", "iris_small_left_slider.observe(update, 'value')\n", "iris_small_right_slider.observe(update, 'value')\n", "iris_rotation_x_slider.observe(update, 'value')\n", "iris_rotation_y_slider.observe(update, 'value')\n", "head_x_slider.observe(update, 'value')\n", "head_y_slider.observe(update, 'value')\n", "neck_z_slider.observe(update, 'value')\n", "body_y_slider.observe(update, 'value')\n", "body_z_slider.observe(update, 'value')\n", "breathing_slider.observe(update, 'value')" ] } ], "metadata": { "accelerator": "GPU", "colab": { "name": "tha3.ipynb", "provenance": [] }, "interpreter": { "hash": "684906ad716c90e6f3397644b72c2a23821e93080f6b0264e4cd74aee22032ce" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 }