Spaces:

ethanchern
/

Anole

Running on Zero

App Files Files Community

xuefengli commited on Jul 15

Commit

7362797

•

1 Parent(s): e0974a9

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +3 -3
app.py +77 -0
chameleon/__init__.py +4 -0
chameleon/download_data.py +88 -0
chameleon/inference/__init__.py +4 -0
chameleon/inference/alignment.py +79 -0
chameleon/inference/chameleon.py +689 -0
chameleon/inference/cudagraph.py +85 -0
chameleon/inference/generation.py +162 -0
chameleon/inference/image_tokenizer.py +125 -0
chameleon/inference/loader.py +71 -0
chameleon/inference/logits_processor.py +336 -0
chameleon/inference/model_adapter.py +118 -0
chameleon/inference/stopping_criteria.py +55 -0
chameleon/inference/token_selector.py +47 -0
chameleon/inference/transformer.py +421 -0
chameleon/inference/utils.py +34 -0
chameleon/inference/vocab.py +123 -0
chameleon/inference/vqgan.py +675 -0
chameleon/miniviewer/__init__.py +4 -0
chameleon/miniviewer/__main__.py +9 -0
chameleon/miniviewer/miniviewer.html +409 -0
chameleon/miniviewer/miniviewer.py +254 -0
chameleon/viewer/backend/__init__.py +4 -0
chameleon/viewer/backend/data_types.py +90 -0
chameleon/viewer/backend/model_viewer.py +66 -0
chameleon/viewer/backend/models/__init__.py +4 -0
chameleon/viewer/backend/models/abstract_model.py +67 -0
chameleon/viewer/backend/models/chameleon_distributed.py +827 -0
chameleon/viewer/backend/models/chameleon_local.py +642 -0
chameleon/viewer/backend/models/service.py +300 -0
chameleon/viewer/backend/requirements.txt +35 -0
chameleon/viewer/backend/utils.py +28 -0
chameleon/viewer/frontend/README.md +11 -0
chameleon/viewer/frontend/index.html +17 -0
chameleon/viewer/frontend/package-lock.json +0 -0
chameleon/viewer/frontend/package.json +62 -0
chameleon/viewer/frontend/postcss.config.cjs +13 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_DisplayVF_W_Wght.woff2 +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_Bd.woff +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_Bd.woff2 +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_Md.woff +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_Md.woff2 +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_SBd.woff +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_SBd.woff2 +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_TextVF_W_Wght.woff2 +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Text_W_Bd.woff +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Text_W_Bd.woff2 +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Text_W_Md.woff +0 -0
chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Text_W_Md.woff2 +0 -0

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
 title: Anole
-emoji: ⚡
-colorFrom: red
 colorTo: red
 sdk: gradio
-sdk_version: 4.38.1
 app_file: app.py
 pinned: false
 ---

 ---
 title: Anole
+emoji: 🏆
+colorFrom: green
 colorTo: red
 sdk: gradio
+sdk_version: 4.37.2
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import spaces
+import subprocess
+import shutil
+import gradio as gr
+from PIL import Image
+from huggingface_hub import snapshot_download
+import json
+import os
+# Specify the repository ID
+repo_id = "GAIR/Anole-7b-v0.1"
+if not os.path.exists("./Anole-7b-v0.1"):
+    os.system("git lfs install")
+    os.system("git clone https://huggingface.co/GAIR/Anole-7b-v0.1")
+subprocess.run(["/bin/bash", "install.sh"], capture_output=True, text=True)
+result = subprocess.run(["/bin/bash", "install.sh"], capture_output=True, text=True)
+@spaces.GPU(duration=90)
+def text_to_image(instruction):
+    result = subprocess.run(["python", "text2image.py", "-i", instruction, "-b", "1"], capture_output=True, text=True)
+    if result.returncode == 0:
+        return gr.update(value="Image Generated. Check the display below.", visible=True), "outputs/text2image/1.png"
+    else:
+        return "Error: " + result.stderr, None
+@spaces.GPU(duration=150)
+def text_to_interleaved(instruction):
+    result = subprocess.run(["python", "interleaved_generation.py", "-i", instruction], capture_output=True, text=True)
+    if result.returncode == 0:
+        outputs = [None for i in range(7)]
+        box_index = 0
+        # Read the segments.jsonl file
+        with open('./segments.jsonl', 'r') as file:
+            for line in file:
+                line_dict = json.loads(line.strip())
+                if line_dict['type'] == 'text':
+                    if box_index % 2 != 0:
+                        box_index += 1
+                    outputs[box_index] = line_dict['content']
+                elif line_dict['type'] == 'image':
+                    if box_index % 2 == 0:
+                        box_index += 1
+                    outputs[box_index] = Image.open(line_dict['content'])
+                box_index += 1
+        return outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6]
+    else:
+        return ("Error: " + result.stderr, ) * 7
+# Use Blocks to organize the interfaces side by side
+with gr.Blocks() as demo:
+    # Create a row to place columns side by side
+    with gr.Row():
+        # First column for Text-to-Image Interface
+        with gr.Column():
+            gr.Interface(
+                fn=text_to_image,  # Function to generate cat images
+                inputs=gr.Textbox(label="Enter Instruction for Image Generation"),  # Input textbox for user instructions
+                outputs=[gr.Text(label="Status"), gr.Image(label="Generated Image")],  # Outputs: status message and generated image
+                title="Anole: Text-to-Image",  # Title of the interface
+                description="Generate images based on text instructions. Check https://github.com/GAIR-NLP/anole for more information. Model can be downloaded at: https://huggingface.co/GAIR/Anole-7b-v0.1."
+            )
+        # Second column for Text-to-Interleaved Image-Text Interface
+        with gr.Column():
+            gr.Interface(
+                fn=text_to_interleaved,
+                inputs=gr.Textbox(label="Enter Instruction for Interleaved Content"),
+                outputs=[gr.Text(label="Text Output 1"), gr.Image(label="Image Output 1"), gr.Text(label="Text Output 2"), gr.Image(label="Image Output 2"), gr.Text(label="Text Output 3"), gr.Image(label="Image Output 3"), gr.Text(label="Text Output 4")],
+                title="Anole: Text-to-Interleaved",  # Title of the interface
+                description="Generate interleaved text and images based on text instructions. Check https://github.com/GAIR-NLP/anole for more information. Model can be downloaded at: https://huggingface.co/GAIR/Anole-7b-v0.1."
+            )
+# Launch the entire Blocks interface
+demo.launch()

chameleon/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.

chameleon/download_data.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# This software may be used and distributed according to the terms of the Chameleon License Agreement.
+import hashlib
+import subprocess
+import sys
+from pathlib import Path
+def download_file(url: str, output_path: Path):
+    print(f"Downloading {output_path}")
+    subprocess.check_call(["wget", "--continue", url, "-O", str(output_path)])
+def validate_checksum(folder: Path):
+    chks_parts = (folder / "checklist.chk").read_text().split()
+    for expected_checksum, file in zip(chks_parts[::2], chks_parts[1::2]):
+        file_path = folder / file
+        checksum = hashlib.md5(file_path.read_bytes()).hexdigest()
+        if checksum != expected_checksum:
+            print(f"Checksum mismatch for {file_path}")
+            sys.exit(1)
+def download_tokenizer(presigned_url: str, target_folder: Path):
+    tokenizer_folder = target_folder / "tokenizer"
+    tokenizer_folder.mkdir(parents=True, exist_ok=True)
+    for filename in [
+        "text_tokenizer.json",
+        "vqgan.ckpt",
+        "vqgan.yaml",
+        "checklist.chk",
+    ]:
+        download_file(
+            presigned_url.replace("*", f"tokenizer/{filename}"),
+            tokenizer_folder / filename,
+        )
+    validate_checksum(tokenizer_folder)
+def download_model(presigned_url: str, target_folder: Path, model: str):
+    model_folder = target_folder / "models" / model
+    model_folder.mkdir(parents=True, exist_ok=True)
+    download_filenames = ["params.json", "consolidate_params.json", "checklist.chk"]
+    if model == "7b":
+        download_filenames += ["consolidated.pth"]
+    elif model == "30b":
+        download_filenames += [f"consolidated.{i:02}.pth" for i in range(4)]
+    else:
+        print(f"Unknown model: {model}")
+        sys.exit(1)
+    for filename in download_filenames:
+        download_file(
+            presigned_url.replace("*", f"{model}/{filename}"),
+            model_folder / filename,
+        )
+    validate_checksum(model_folder)
+def main():
+    presigned_url = (
+        sys.argv[1] if len(sys.argv) > 1 else input("Enter the URL from email: ")
+    )
+    target_folder = Path("./data")
+    target_folder.mkdir(parents=True, exist_ok=True)
+    download_tokenizer(presigned_url, target_folder)
+    model_size = input(
+        "Enter the list of models to download without spaces (7B,30B), or press Enter for all: "
+    )
+    if not model_size:
+        model_size = "7B,30B"
+    for model in model_size.split(","):
+        model = model.strip().lower()
+        download_model(presigned_url, target_folder, model)
+if __name__ == "__main__":
+    main()

chameleon/inference/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.

chameleon/inference/alignment.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+from abc import ABC, abstractmethod
+import torch
+class PromptAlignment(ABC):
+    @abstractmethod
+    def start_index(self, input_ids: list[list[int]]) -> int:
+        ...
+    @abstractmethod
+    def prepare_inputs(self, input_ids: list[list[int]]) -> torch.Tensor:
+        ...
+    @abstractmethod
+    def postprocess_inputs(
+        self, inputs: torch.Tensor, original_inputs: torch.Tensor
+    ) -> torch.Tensor:
+        ...
+class AlignPromptRight(PromptAlignment):
+    def __init__(self, pad_id: int):
+        self.pad_id = pad_id
+    def start_index(self, input_ids: list[list[int]]) -> int:
+        return max(len(sublist) for sublist in input_ids)
+    def prepare_inputs(self, input_ids: list[list[int]]) -> torch.LongTensor:
+        max_length = max(len(sublist) for sublist in input_ids)
+        return torch.tensor(
+            [
+                ([self.pad_id] * (max_length - len(sublist))) + sublist
+                for sublist in input_ids
+            ],
+            requires_grad=False,
+        )
+    def postprocess_inputs(
+        self,
+        inputs: torch.Tensor,
+        original_inputs: torch.Tensor,
+    ) -> torch.Tensor:
+        return inputs
+class AlignPromptLeft(PromptAlignment):
+    def __init__(self, pad_id: int = -1):
+        self.pad_id = pad_id
+    def start_index(self, input_ids: list[list[int]]) -> int:
+        return min(len(sublist) for sublist in input_ids)
+    def prepare_inputs(self, input_ids: list[list[int]]) -> torch.Tensor:
+        max_length = max(len(sublist) for sublist in input_ids)
+        return torch.tensor(
+            [
+                sublist + ([self.pad_id] * (max_length - len(sublist)))
+                for sublist in input_ids
+            ],
+            requires_grad=False,
+        )
+    def postprocess_inputs(
+        self,
+        inputs: torch.Tensor,
+        original_inputs: torch.Tensor,
+    ) -> torch.Tensor:
+        max_init_len = original_inputs.shape[1]
+        if inputs.shape[1] <= max_init_len:
+            original_inputs_limited = original_inputs[:, : inputs.shape[1]]
+            mask = original_inputs_limited != self.pad_id
+            inputs[mask] = original_inputs_limited[mask]
+        return inputs

chameleon/inference/chameleon.py ADDED Viewed

	@@ -0,0 +1,689 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import base64
+import io
+import json
+import math
+import queue
+import threading
+from dataclasses import dataclass, field
+from tqdm import tqdm
+from enum import Enum
+from multiprocessing import managers, queues, synchronize
+from typing import Literal, Union
+import PIL
+import torch
+import torch.distributed as dist
+import torch.multiprocessing as mp
+from PIL.Image import Image
+from tokenizers import Tokenizer
+from transformers import (
+    LogitsProcessor,
+    RepetitionPenaltyLogitsProcessor,
+    TemperatureLogitsWarper,
+    TopPLogitsWarper,
+    enable_full_determinism,
+)
+from chameleon.inference import loader
+from chameleon.inference.alignment import AlignPromptRight
+from chameleon.inference.generation import ChameleonGenerator
+from chameleon.inference.image_tokenizer import ImageTokenizer
+from chameleon.inference.logits_processor import (
+    AllowOnlyTokensLogitsProcessor,
+    DisallowTokensAtOrAfterIndexLogitsProcessor,
+    InBatchInstructCFGLogitsProcessor,
+)
+from chameleon.inference.model_adapter import ChameleonModelAdapter
+from chameleon.inference.stopping_criteria import (
+    MaxLengthCriteria,
+    StopOnEOSAfterBatchIndex,
+)
+from chameleon.inference.token_selector import (
+    ArgmaxTokenSelector,
+    MultinomialTokenSelector,
+    ReplicatedInputTokenSelector,
+)
+from chameleon.inference.transformer import Transformer
+from chameleon.inference.utils import DynamicGenerator, advance, random_unused_port
+from chameleon.inference.vocab import VocabInfo, VocabTranslation
+@dataclass
+class Options:
+    @dataclass
+    class Text:
+        repetition_penalty: float = 1.2
+        temp: float = 1.0
+        top_p: float = 0.9
+        greedy: bool = False
+    @dataclass
+    class Image:
+        @dataclass
+        class CFG:
+            guidance_scale_text: float = 3.0
+            guidance_scale_image: float = 1.2
+        cfg: CFG = field(default_factory=CFG)
+        temp: float = 0.7
+        top_p: float = 0.9
+        greedy: bool = False
+    max_seq_len: int = 4096
+    max_gen_len: int = 4096
+    seed: int | None = None
+    txt: Text | bool = True
+    img: Image | bool = True
+    extra_eos_tokens: list[int | str] = field(default_factory=lambda: [])
+    def __post_init__(self):
+        if self.txt is True:
+            self.txt = Options.Text()
+        if self.img is True:
+            self.img = Options.Image()
+class TokenManager:
+    def __init__(
+        self,
+        tokenizer_path: str,
+        vqgan_cfg_path: str,
+        vqgan_ckpt_path: str,
+        device: str | None = None,
+    ):
+        self.tokenizer = Tokenizer.from_file(tokenizer_path)
+        self.vocab = VocabInfo(json.load(open(tokenizer_path))["model"]["vocab"])
+        self.translation = VocabTranslation(self.vocab, device=device)
+        self.image_tokenizer = ImageTokenizer(
+            cfg_path=vqgan_cfg_path, ckpt_path=vqgan_ckpt_path, device=device
+        )
+    def pil_from_bpe_tokens(self, bpe_tokens: torch.Tensor) -> PIL.Image:
+        image_tensor = self.translation.convert_bpe2img(bpe_tokens)
+        if image_tensor.shape[0] < 1024:
+            padding = (
+                torch.ones(
+                    [1024 - image_tensor.shape[0]],
+                    dtype=int,
+                    device=image_tensor.device,
+                )
+                * image_tensor[0]
+            )
+            image_tensor = torch.cat((image_tensor, padding)).unsqueeze(0)
+        return self.image_tokenizer.pil_from_img_toks(image_tensor)
+    def png_from_bpe_tokens(self, bpe_tokens: torch.Tensor) -> bytes:
+        pil = self.pil_from_bpe_tokens(bpe_tokens)
+        img_io = io.BytesIO()
+        pil.save(img_io, format="PNG")
+        return img_io.getvalue()
+    def tokenize_text(self, text: str) -> list[int]:
+        return self.tokenizer.encode(text).ids
+    def tokenize_image(self, img: Image) -> list[int]:
+        return (
+            [self.vocab.begin_image]
+            + self.translation.convert_img2bp2(
+                self.image_tokenizer.img_tokens_from_pil(img)   # [0 : 8191], vqgan codebook ids
+            ).tolist()
+            + [self.vocab.end_image]
+        )
+    def tokenize_b64img(self, b64img: str) -> list[int]:
+        image_data = base64.b64decode(b64img)
+        image_file = io.BytesIO(image_data)
+        return self.tokenize_image(PIL.Image.open(image_file))
+    def tokens_from_ui(self, inputs: list[dict]) -> list[int]:
+        tokens = [self.vocab.bos_id]
+        for input_ in inputs:
+            if input_["type"] == "text":
+                tokens += self.tokenize_text(input_["value"])
+            elif input_["type"] == "image":
+                if isinstance(input_["value"], str):
+                    if input_["value"].startswith("data:"):
+                        # Value Format: 'data:image/[^;]+;base64,[A-Za-z0-9+/]+={0,2}'
+                        tokens += self.tokenize_b64img(input_["value"].split(",", 1)[1])
+                    elif input_["value"].startswith("file:"):
+                        tokens += self.tokenize_image(
+                            PIL.Image.open(input_["value"].split(":", 1)[1])
+                        )
+                    else:
+                        raise ValueError("Unknown image format.")
+                elif isinstance(input_["value"], Image):
+                    tokens += self.tokenize_image(input_["value"])
+                else:
+                    raise ValueError("Unknown image type.")
+            elif input_["type"] == "sentinel":
+                tokens += [
+                    {
+                        "<START-OF-IMAGE>": self.vocab.begin_image,
+                        "<END-OF-TURN>": self.vocab.eot_id,
+                    }[input_["value"]]
+                ]
+            elif input_["type"] == "ids":
+                tokens += input_["value"]
+            else:
+                raise ValueError("Unknown input type.")
+        return tokens
+    def decode_text(self, ids: torch.LongTensor | list[list[int]]) -> list[str]:
+        if isinstance(ids, torch.Tensor):
+            ids = ids.tolist()
+        for row, values in enumerate(ids):
+            try:
+                ids[row] = values[: values.index(self.vocab.eos_id)]
+            except ValueError:
+                pass
+        return self.tokenizer.decode_batch(ids)
+    def decode_image(self, ids: torch.LongTensor) -> list[PIL.Image]:
+        return [self.pil_from_bpe_tokens(sample) for sample in ids]
+@dataclass
+class DecodePiece:
+    token: ChameleonGenerator.Token
+    next_decoder: type["Decoder"] | None
+class Decoder:
+    def __init__(
+        self,
+        model: Transformer,
+        vocab: VocabInfo,
+        options: Options,
+        input_ids: list[int],
+    ): ...
+    def __next__(self) -> DecodePiece: ...
+class TextDecoder(Decoder):
+    def __init__(
+        self,
+        model: Transformer,
+        vocab: VocabInfo,
+        options: Options,
+        input_ids: list[list[int]],
+    ):
+        self.vocab = vocab
+        self.options = options
+        assert vocab.eos_id is not None
+        prompt_lens = [len(inp) for inp in input_ids]
+        max_prompt_len = max(prompt_lens)
+        max_seq_len = min(options.max_seq_len, max_prompt_len + options.max_gen_len)
+        self.eos_ids = [vocab.eos_id]
+        for extra_eos_token in options.extra_eos_tokens:
+            if isinstance(extra_eos_token, str):
+                extra_eos_token = vocab.name2val[extra_eos_token]
+            assert isinstance(extra_eos_token, int)
+            self.eos_ids.append(extra_eos_token)
+        stopping_criteria = [
+            MaxLengthCriteria(max_seq_len),
+        ] + [StopOnEOSAfterBatchIndex(eos_id, [max_prompt_len] * len(prompt_lens)) for eos_id in self.eos_ids]
+        self.gen = ChameleonGenerator(
+            model=ChameleonModelAdapter(model, max_seq_len=max_seq_len),
+            input_ids=input_ids,
+            stopping_criteria=stopping_criteria,
+            logits_processors=self._logits_processors(),
+            alignment=AlignPromptRight(vocab.pad_id),
+            token_selector=(
+                ArgmaxTokenSelector()
+                if options.txt.greedy
+                else MultinomialTokenSelector()
+            ),
+        )
+        advance(self.gen, max_prompt_len)
+    def _allowed_tokens(self) -> list[int]:
+        allowed_tokens = [self.vocab.eos_id]
+        if self.options.txt:
+            allowed_tokens += self.vocab.text_tokens
+        if self.options.img:
+            allowed_tokens += [self.vocab.begin_image]
+        return allowed_tokens
+    def _logits_processors(self) -> list[LogitsProcessor]:
+        logits_processors = [
+            AllowOnlyTokensLogitsProcessor(self._allowed_tokens()),
+        ]
+        if isinstance(self.options.img, Options.Image):
+            logits_processors += [
+                DisallowTokensAtOrAfterIndexLogitsProcessor(
+                    [self.vocab.begin_image],
+                    self.options.max_seq_len - 1026,
+                ),
+            ]
+        if isinstance(self.options.txt, Options.Text):
+            logits_processors += [
+                RepetitionPenaltyLogitsProcessor(self.options.txt.repetition_penalty),
+                TemperatureLogitsWarper(self.options.txt.temp),
+                TopPLogitsWarper(self.options.txt.top_p),
+            ]
+        return logits_processors
+    def __next__(self) -> DecodePiece:
+        tok = next(self.gen)
+        next_decoder = None
+        if (
+            self.vocab.begin_image not in self.eos_ids
+            and (tok.id == self.vocab.begin_image).all()
+        ):
+            next_decoder = ImageDecoder
+        return DecodePiece(tok, next_decoder)
+class ImageDecoder(Decoder):
+    def __init__(
+        self,
+        model: Transformer,
+        vocab: VocabInfo,
+        options: Options,
+        input_ids: list[list[int]],
+    ):
+        assert isinstance(options.img, Options.Image)
+        self.vocab = vocab
+        self.options = options
+        self.batch_size = len(input_ids)
+        logits_processors = [
+            InBatchInstructCFGLogitsProcessor(
+                options.img.cfg.guidance_scale_text,
+                options.img.cfg.guidance_scale_image,
+            ),
+            AllowOnlyTokensLogitsProcessor(vocab.image_tokens),
+            TemperatureLogitsWarper(options.img.temp),
+            TopPLogitsWarper(options.img.top_p),
+        ]
+        for inp in input_ids:
+            if inp[-1] != self.vocab.begin_image:
+                inp.append(self.vocab.begin_image)
+        max_prompt_len = max(len(inp) for inp in input_ids)
+        self.gen = ChameleonGenerator(
+            model=ChameleonModelAdapter(model, max_seq_len=max_prompt_len + 1024),
+            input_ids=self._split_inputs_for_cfg(input_ids),
+            logits_processors=logits_processors,
+            alignment=AlignPromptRight(vocab.pad_id),
+            token_selector=ReplicatedInputTokenSelector(
+                (
+                    ArgmaxTokenSelector()
+                    if options.img.greedy
+                    else MultinomialTokenSelector()
+                ),
+                n=3,
+            ),
+        )
+        advance(self.gen, max_prompt_len)
+        self.gen_count = 0
+    def _split_inputs_for_cfg(self, input_ids: list[list[int]]) -> list[list[int]]:
+        image_conditioned_allowed = set(self.vocab.image_tokens) | {
+            self.vocab.bos_id,
+            self.vocab.begin_image,
+            self.vocab.end_image,
+        }
+        full_conditioned = input_ids
+        image_conditioned = [
+            [id for id in sample if id in image_conditioned_allowed]
+            for sample in input_ids
+        ]
+        unconditioned = [
+            [
+                self.vocab.bos_id,
+                self.vocab.begin_image,
+            ]
+        ] * self.batch_size
+        return full_conditioned + image_conditioned + unconditioned
+    def __next__(self) -> DecodePiece:
+        if self.gen_count == 1024:
+            id = torch.tensor([self.vocab.end_image] * self.batch_size)
+            logits = torch.full(
+                (self.batch_size, len(self.vocab.all_tokens)), -math.inf
+            )
+            logits[:, self.vocab.end_image] = 0
+            return DecodePiece(
+                ChameleonGenerator.Token(id=id, logits=logits),
+                TextDecoder,
+            )
+        tok = next(self.gen)
+        tok.id = tok.id.chunk(3)[0]
+        self.gen_count += 1
+        return DecodePiece(tok, None)
+class Generator(Decoder):
+    def __init__(
+        self,
+        model: Transformer,
+        vocab: VocabInfo,
+        options: Options,
+        input_ids: list[list[int]],
+    ):
+        if options.seed is not None:
+            enable_full_determinism(options.seed, warn_only=True)
+        self.model = model
+        self.vocab = vocab
+        self.input_ids = input_ids[:]
+        self.generated_token_ids: list[torch.LongTensor] = []
+        self.options = options
+        if not self.options.txt:
+            self.dyngen = DynamicGenerator(
+                ImageDecoder(model, vocab, options, input_ids)
+            )
+        else:
+            self.dyngen = DynamicGenerator(
+                TextDecoder(model, vocab, options, input_ids)
+            )
+    def __iter__(self):
+        return self
+    def __next__(self) -> ChameleonGenerator.Token:
+        piece = next(self.dyngen)
+        self.generated_token_ids.append(piece.token.id)
+        if piece.next_decoder is not None:
+            if not self.options.txt:
+                raise StopIteration
+            self.input_ids = [
+                old_list + generated
+                for old_list, generated in zip(
+                    self.input_ids, torch.stack(self.generated_token_ids).T.tolist()
+                )
+            ]
+            self.generated_token_ids = []
+            self.dyngen.gen = piece.next_decoder(
+                self.model,
+                self.vocab,
+                self.options,
+                self.input_ids,
+            )
+        return piece.token
+class DistributedMode(Enum):
+    AUTO = 0
+    THREAD = 1
+    PROCESS = 2
+@dataclass
+class _DistributedContext:
+    req_q: Union[queue.Queue, queues.Queue]
+    res_q: Union[queue.Queue, queues.Queue]
+    active_key: Union[dict[int, Literal[True]], managers.DictProxy]
+    active_key_lock: Union[threading.Lock, synchronize.Lock]
+    ready_barrier: Union[threading.Barrier, synchronize.Barrier]
+    worker_launcher: Union[type[threading.Thread], type[mp.Process]]
+    @staticmethod
+    def make_for_threading(world_size: int):
+        return _DistributedContext(
+            req_q=queue.Queue(),
+            res_q=queue.Queue(),
+            active_key={},
+            active_key_lock=threading.Lock(),
+            ready_barrier=threading.Barrier(world_size + 1),
+            worker_launcher=threading.Thread,
+        )
+    @staticmethod
+    def make_for_multiprocessing(world_size: int):
+        local_mp = mp.get_context("spawn")
+        return _DistributedContext(
+            req_q=local_mp.Queue(),
+            res_q=local_mp.Queue(),
+            active_key=local_mp.Manager().dict(),
+            active_key_lock=local_mp.Lock(),
+            ready_barrier=local_mp.Barrier(world_size + 1),
+            worker_launcher=local_mp.Process,
+        )
+    @staticmethod
+    def make(mode: DistributedMode, world_size: int):
+        if mode == DistributedMode.AUTO:
+            mode = DistributedMode.PROCESS
+        if mode == DistributedMode.THREAD:
+            return _DistributedContext.make_for_threading(world_size)
+        elif mode == DistributedMode.PROCESS:
+            return _DistributedContext.make_for_multiprocessing(world_size)
+        else:
+            raise ValueError("Unknown DistributedMode")
+def _worker_impl(
+    init_method: str,
+    model: Transformer | str,
+    world_size: int,
+    rank: int,
+    vocab: VocabInfo,
+    dctx: _DistributedContext,
+):
+    dist.init_process_group(
+        "nccl",
+        init_method=init_method,
+        world_size=world_size,
+        rank=rank,
+    )
+    torch.set_default_device(f"cuda:{rank}")
+    torch.cuda.set_device(rank)
+    if isinstance(model, str):
+        model = loader.load_model(model, rank=rank)
+    dctx.ready_barrier.wait()
+    is_coord = rank == 0
+    while True:
+        req = [Options(), [], 0, False]
+        if is_coord:
+            req = dctx.req_q.get()
+        dist.broadcast_object_list(req, src=0)
+        options, input_ids, key, shutdown = req
+        if shutdown:
+            break
+        for token in Generator(
+            model=model,
+            vocab=vocab,
+            options=options,
+            input_ids=input_ids,
+        ):
+            if is_coord:
+                dctx.res_q.put((key, token))
+            to_continue = [True]
+            if is_coord:
+                with dctx.active_key_lock:
+                    to_continue = [key in dctx.active_key]
+            dist.broadcast_object_list(to_continue, src=0)
+            if not to_continue[0]:
+                break
+        if is_coord:
+            dctx.res_q.put((key, None))
+class ChameleonInferenceModel:
+    def __init__(
+        self,
+        model: Transformer | str,
+        tokenizer_path: str,
+        vqgan_cfg_path: str,
+        vqgan_ckpt_path: str,
+        *,
+        options: Options | None = None,
+        distributed_mode: DistributedMode = DistributedMode.AUTO,
+    ):
+        self.options = options or Options()
+        self.next_key = 0
+        self.token_manager = TokenManager(
+            tokenizer_path=tokenizer_path,
+            vqgan_cfg_path=vqgan_cfg_path,
+            vqgan_ckpt_path=vqgan_ckpt_path,
+            device="cuda",
+        )
+        self.vocab = self.token_manager.vocab
+        world_size = 1
+        if isinstance(model, str):
+            world_size = loader.detect_shard_count(model)
+        self.dctx = _DistributedContext.make(distributed_mode, world_size)
+        init_method = f"tcp://0.0.0.0:{random_unused_port()}"
+        self.workers = [
+            self.dctx.worker_launcher(
+                target=_worker_impl,
+                args=(init_method, model, world_size, i, self.vocab, self.dctx),
+                daemon=True,
+            )
+            for i in range(world_size)
+        ]
+        for w in self.workers:
+            w.start()
+        self.dctx.ready_barrier.wait()
+    def __del__(self):
+        try:
+            with self.dctx.active_key_lock:
+                self.dctx.active_key.clear()
+            self.dctx.req_q.put([None, None, None, True])
+            for w in self.workers:
+                w.join()
+        except FileNotFoundError:
+            pass
+    def stream(
+        self,
+        *,
+        input_ids: list[int] | None = None,
+        prompt_text: str | None = None,
+        prompt_ui: list[dict] | None = None,
+        batch_input_ids: list[list[int]] | None = None,
+        batch_prompt_text: list[str] | None = None,
+        batch_prompt_ui: list[list[dict]] | None = None,
+        options: Options | None = None,
+    ):
+        # NOTE: Not thread-safe! Only one instance of generate may be run at a time.
+        if (
+            sum(
+                x is not None
+                for x in [
+                    input_ids,
+                    prompt_text,
+                    prompt_ui,
+                    batch_input_ids,
+                    batch_prompt_text,
+                    batch_prompt_ui,
+                ]
+            )
+            != 1
+        ):
+            raise ValueError(
+                "Must specify exactly one of: input_ids, prompt_text, prompt_ui, batch_input_ids, batch_prompt_text, batch_prompt_ui"
+            )
+        options = options or self.options
+        if prompt_text is not None:
+            batch_prompt_text = [prompt_text]
+        if prompt_ui is not None:
+            batch_prompt_ui = [prompt_ui]
+        if input_ids is not None:
+            batch_input_ids = [input_ids]
+        if batch_prompt_text is not None:
+            batch_prompt_ui = [
+                [{"type": "text", "value": prompt_text}]
+                for prompt_text in batch_prompt_text
+            ]
+        if batch_prompt_ui is not None:
+            batch_input_ids = [
+                self.token_manager.tokens_from_ui(prompt_ui)
+                for prompt_ui in batch_prompt_ui
+            ]
+        assert batch_input_ids
+        if not options.txt and not options.img:
+            raise ValueError("Must specify at least one modality.")
+        if options.txt and options.img and len(batch_input_ids) > 1:
+            raise ValueError(
+                "Batch generation only supported for one modality at a time."
+            )
+        req_key = self.next_key
+        self.next_key += 1
+        with self.dctx.active_key_lock:
+            self.dctx.active_key[req_key] = True
+        self.dctx.req_q.put([options, batch_input_ids, req_key, False])
+        try:
+            while key_token := self.dctx.res_q.get():
+                key, token = key_token
+                if key != req_key:
+                    # Residual from prior calls to generation. Skip.
+                    continue
+                if token is None:
+                    break
+                yield token
+        finally:
+            with self.dctx.active_key_lock:
+                del self.dctx.active_key[req_key]
+    def step(self, *args, **kwargs) -> ChameleonGenerator.Token:
+        return next(self.stream(*args, **kwargs))
+    def generate(self, *args, **kwargs) -> torch.LongTensor:
+        tokens = [t.id for t in self.stream(*args, **kwargs)]
+        if not tokens:
+            return torch.LongTensor()
+        return torch.stack(tokens).T
+    def decode_text(self, ids: torch.LongTensor | list[list[int]]) -> list[str]:
+        return self.token_manager.decode_text(ids)
+    def decode_image(self, ids: torch.LongTensor) -> list[PIL.Image]:
+        return self.token_manager.decode_image(ids)
+    def sft_tokenization(self, json_path: str) -> list[dict]:
+        with open(json_path, 'r') as input_file:
+            jsonl_input = [json.loads(line) for line in input_file]
+        output_data = []
+        for entry in tqdm(jsonl_input, desc="Tokenize dataset"):
+            # print(i)
+            text_tokens = self.token_manager.tokenize_text(entry['text'])
+            image_tokens = self.token_manager.tokenize_image(PIL.Image.open(entry['image']))
+            entry['text_tokens'] = text_tokens
+            entry['image_tokens'] = image_tokens
+            output_data.append(entry)
+        return output_data

chameleon/inference/cudagraph.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import functools
+from typing import Any, Callable, TypeVar
+import torch
+T = TypeVar("T")
+FN = Callable[..., T]  # type: ignore
+class CUDAGraphWrapper:
+    def __init__(
+        self,
+        fn: FN[T],
+        warmup_iter: int = 1,
+        debug_dump_path: str | None = None,
+    ):
+        self.fn = fn
+        self.warmup_iter = warmup_iter
+        self.debug_dump_path = debug_dump_path
+        self.graph: torch.cuda.CUDAGraph | None = None
+        self.result: T | None = None
+    def __call__(self, *args, **kwargs) -> Any:  # type: ignore
+        if self.warmup_iter > 0:
+            self.warmup_iter -= 1
+            return self.fn(*args, **kwargs)
+        if self.graph is None:
+            self.graph = torch.cuda.CUDAGraph()
+            if self.debug_dump_path is not None:
+                self.graph.enable_debug_mode()
+            recording_kwargs = {}
+            if "capture_error_mode" in torch.cuda.graph.__init__.__annotations__:
+                # In PyTorch 2.1+ and nightlies from late Aug 2023,
+                # we can do this to maybe avoid watchdog-related crashes
+                recording_kwargs["capture_error_mode"] = "thread_local"
+            with torch.cuda.graph(self.graph, **recording_kwargs):
+                self.result = self.fn(*args, **kwargs)
+            torch.cuda.synchronize()
+            if self.debug_dump_path is not None:
+                self.graph.debug_dump(self.debug_dump_path)
+        assert self.graph is not None
+        self.graph.replay()
+        return self.result
+def cudagraph_wrap(
+    *args,
+    warmup_iter: int = 1,
+    debug_dump_path: str | None = None,
+) -> Callable[[FN[T]], FN[T]]:
+    def wrapper(fn: FN[T]) -> FN[T]:
+        graph_wrapper = CUDAGraphWrapper(
+            fn, warmup_iter=warmup_iter, debug_dump_path=debug_dump_path
+        )
+        @functools.wraps(fn)
+        def call_wrapper(*inner_args, **inner_kwargs):
+            return graph_wrapper(*inner_args, **inner_kwargs)
+        return call_wrapper
+    # @cudagraph_wrap
+    # def fn(...):
+    #   ...
+    #
+    # - or -
+    #
+    # fast_fn = cudagraph_wrap(slow_fn, warmup_iter=2)
+    if len(args) == 1 and callable(args[0]):
+        return wrapper(args[0])
+    # @cudagraph_wrap(warmup_iter=3)
+    # def fn(...):
+    #   ...
+    def decorator(fn: FN[T]) -> FN[T]:
+        return wrapper(fn)
+    return decorator

chameleon/inference/generation.py ADDED Viewed

	@@ -0,0 +1,162 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+from dataclasses import dataclass
+import torch
+from transformers import (
+    LogitsProcessor,
+    LogitsProcessorList,
+)
+from transformers.generation.streamers import BaseStreamer
+from chameleon.inference.alignment import AlignPromptLeft, PromptAlignment
+from chameleon.inference.model_adapter import ModelAdapter
+from chameleon.inference.stopping_criteria import StoppingCriteria, StoppingCriteriaList
+from chameleon.inference.token_selector import MultinomialTokenSelector, TokenSelector
+class ChameleonGenerator:
+    @dataclass
+    class Token:
+        id: torch.LongTensor
+        logits: torch.Tensor | None
+    def __init__(
+        self,
+        model: ModelAdapter,
+        input_ids: list[list[int]],
+        stopping_criteria: StoppingCriteriaList | list[StoppingCriteria] | None = None,
+        logits_processors: LogitsProcessorList | list[LogitsProcessor] | None = None,
+        probability_processors: LogitsProcessorList
+        | list[LogitsProcessor]
+        | None = None,
+        token_selector: TokenSelector | None = None,
+        alignment: PromptAlignment = AlignPromptLeft(),
+    ):
+        assert model.supports_alignment(alignment)
+        self.model = model
+        self.stopping_criteria = stopping_criteria
+        self.logits_processors = logits_processors
+        self.probability_processors = probability_processors
+        self.token_selector: TokenSelector = (
+            token_selector or MultinomialTokenSelector()
+        )
+        self.alignment = alignment
+        self.model.initialize(input_ids)
+        self._inputs = self.alignment.prepare_inputs(
+            input_ids
+        )  # inputs.shape = [batch, seq-len]
+        self._idx = 0
+        self._start_idx = self.alignment.start_index(input_ids)
+        self._original_inputs = self._inputs.clone()
+        self._inputs = self._inputs[:, : self._start_idx]
+    def __iter__(self):
+        return self
+    @torch.inference_mode()
+    def __next__(self) -> Token:
+        # Are we done?
+        if self.stopping_criteria(self._inputs, None):
+            raise StopIteration
+        # Emit initial tokens.
+        # Model is not run for these.
+        # If you want the logits, you can do a separate forward pass outside generation.
+        if self._idx < self._start_idx:
+            idx, self._idx = self._idx, self._idx + 1
+            return ChameleonGenerator.Token(id=self._inputs[:, idx], logits=None)
+        # Run the model for the next token.
+        self._inputs = self._inputs.contiguous()
+        outputs = self.model(self._inputs)  # outputs.shape = [batch, seq-len, vocab]
+        # Pull out and process the logits.
+        logits = outputs[:, -1, :]  # logits.shape = [batch, vocab]
+        logits = self.logits_processors(self._inputs, logits)
+        probs = logits.softmax(dim=1)  # probs.shape = [batch, vocab]
+        probs = self.probability_processors(self._inputs, probs)
+        # Select a token and add it to the inputs.
+        next_tokens = self.token_selector(
+            self._inputs, probs
+        )  # next_tokens.shape = [batch]
+        self._inputs = torch.cat([self._inputs, next_tokens[:, None]], dim=1)
+        # Run alignment specific postprocessing.
+        self._inputs = self.alignment.postprocess_inputs(
+            self._inputs, self._original_inputs
+        )
+        # Return the next step result.
+        return ChameleonGenerator.Token(id=self._inputs[:, -1], logits=logits)
+    @property
+    def stopping_criteria(self) -> StoppingCriteriaList:
+        return self._stopping_criteria
+    @stopping_criteria.setter
+    def stopping_criteria(
+        self, value: StoppingCriteriaList | list[StoppingCriteria] | None
+    ):
+        self._stopping_criteria = StoppingCriteriaList(value or [])
+    @property
+    def logits_processors(self) -> LogitsProcessorList:
+        return self._logits_processors
+    @logits_processors.setter
+    def logits_processors(
+        self, value: LogitsProcessorList | list[LogitsProcessor] | None
+    ):
+        self._logits_processors = LogitsProcessorList(value or [])
+    @property
+    def probability_processors(self) -> LogitsProcessorList:
+        return self._probability_processors
+    @probability_processors.setter
+    def probability_processors(
+        self, value: LogitsProcessorList | list[LogitsProcessor] | None
+    ):
+        self._probability_processors = LogitsProcessorList(value or [])
+def run_generation(
+    model: torch.nn.Module,
+    input_ids: list[list[int]],
+    stopping_criteria: StoppingCriteriaList | list[StoppingCriteria],
+    logits_processors: LogitsProcessorList | list[LogitsProcessor] | None = None,
+    probability_processors: LogitsProcessorList | list[LogitsProcessor] | None = None,
+    token_selector: TokenSelector | None = None,
+    alignment: PromptAlignment = AlignPromptLeft(),
+    streamer: BaseStreamer | None = None,
+) -> torch.LongTensor:
+    result = torch.empty((len(input_ids), 0), dtype=int)
+    for tok in ChameleonGenerator(
+        model=model,
+        input_ids=input_ids,
+        stopping_criteria=stopping_criteria,
+        logits_processors=logits_processors,
+        probability_processors=probability_processors,
+        token_selector=token_selector,
+        alignment=alignment,
+    ):
+        if streamer is not None:
+            streamer.put(tok.id)
+        result = torch.cat([result, tok.id.view(-1, 1)], dim=1)
+    if streamer is not None:
+        streamer.end()
+    return result

chameleon/inference/image_tokenizer.py ADDED Viewed

	@@ -0,0 +1,125 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import numpy as np
+import PIL
+import torch
+import yaml
+from PIL import Image
+from chameleon.inference.vqgan import VQModel
+class ImageTokenizer:
+    def __init__(
+        self,
+        cfg_path: str,
+        ckpt_path: str,
+        device: str | torch.device | None = None,
+    ):
+        with open(cfg_path) as f:
+            config = yaml.safe_load(f)
+        params = config["model"]["params"]
+        if "lossconfig" in params:
+            del params["lossconfig"]
+        params["ckpt_path"] = ckpt_path
+        self._vq_model = VQModel(**params)
+        self._vq_model.eval()
+        if device is None:
+            devices = {p.device for p in self._vq_model.parameters()}
+            assert len(devices) == 1
+            device = devices.pop()
+        else:
+            self._vq_model.to(device)
+        self._device = device
+        dtypes = {p.dtype for p in self._vq_model.parameters()}
+        assert len(dtypes) == 1
+        self._dtype = dtypes.pop()
+    def _whiten_transparency(self, img: PIL.Image) -> PIL.Image:
+        # Check if it's already in RGB format.
+        if img.mode == "RGB":
+            return img
+        vals_rgba = np.array(img.convert("RGBA"))
+        # If there is no transparency layer, simple convert and return.
+        if not (vals_rgba[:, :, 3] < 255).any():
+            return img.convert("RGB")
+        # There is a transparency layer, blend it with a white background.
+        # Calculate the alpha proportion for blending.
+        alpha = vals_rgba[:, :, 3] / 255.0
+        # Blend with white background.
+        vals_rgb = (1 - alpha[:, :, np.newaxis]) * 255 + alpha[
+            :, :, np.newaxis
+        ] * vals_rgba[:, :, :3]
+        return PIL.Image.fromarray(vals_rgb.astype("uint8"), "RGB")
+    def _vqgan_input_from(self, img: PIL.Image, target_image_size=512) -> torch.Tensor:
+        # Resize with aspect ratio preservation.
+        s = min(img.size)
+        scale = target_image_size / s
+        new_size = (round(scale * img.size[0]), round(scale * img.size[1]))
+        img = img.resize(new_size, PIL.Image.LANCZOS)
+        # Center crop.
+        x0 = (img.width - target_image_size) // 2
+        y0 = (img.height - target_image_size) // 2
+        img = img.crop((x0, y0, x0 + target_image_size, y0 + target_image_size))
+        # Convert to tensor.
+        np_img = np.array(img) / 255.0  # Normalize to [0, 1]
+        np_img = np_img * 2 - 1  # Scale to [-1, 1]
+        tensor_img = (
+            torch.from_numpy(np_img).permute(2, 0, 1).float()
+        )  # (Channels, Height, Width) format.
+        # Add batch dimension.
+        return tensor_img.unsqueeze(0)
+    def img_tokens_from_pil(self, image: PIL.Image) -> list[int]:
+        image = self._whiten_transparency(image)
+        vqgan_input = self._vqgan_input_from(image).to(self._device).to(self._dtype)
+        _, _, [_, _, img_toks] = self._vq_model.encode(vqgan_input)
+        return img_toks
+    def _pil_from_chw_tensor(self, chw_tensor: torch.Tensor) -> PIL.Image:
+        # Ensure detachment and move tensor to CPU.
+        detached_chw_tensor = chw_tensor.detach().cpu()
+        # Normalize tensor to [0, 1] range from [-1, 1] range.
+        normalized_chw_tensor = (
+            torch.clamp(detached_chw_tensor, -1.0, 1.0) + 1.0
+        ) / 2.0
+        # Permute CHW tensor to HWC format and convert to NumPy array.
+        hwc_array = normalized_chw_tensor.permute(1, 2, 0).numpy()
+        # Convert to an 8-bit unsigned integer format.
+        image_array_uint8 = (hwc_array * 255).astype(np.uint8)
+        # Convert NumPy array to PIL Image.
+        pil_image = Image.fromarray(image_array_uint8)
+        # Convert image to RGB if it is not already.
+        if pil_image.mode != "RGB":
+            pil_image = pil_image.convert("RGB")
+        return pil_image
+    def pil_from_img_toks(self, img_tensor: torch.Tensor) -> PIL.Image:
+        emb_dim = self._vq_model.quantize.embedding.weight.shape[-1]
+        codebook_entry = self._vq_model.quantize.get_codebook_entry(
+            img_tensor, (1, 32, 32, emb_dim)
+        )
+        pixels = self._vq_model.decode(codebook_entry)
+        return self._pil_from_chw_tensor(pixels[0])

chameleon/inference/loader.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import glob
+import inspect
+import json
+from pathlib import Path
+import torch
+from chameleon.inference.transformer import ModelArgs, Transformer
+def _convert(model_args: ModelArgs, consolidated_path: Path) -> Transformer:
+    old_default_dtype = torch.get_default_dtype()
+    torch.set_default_dtype(torch.bfloat16)
+    model = Transformer(model_args)
+    transfer_results = model.load_state_dict(
+        torch.load(str(consolidated_path), map_location='cuda'),
+        strict=False,
+    )
+    # TODO: More generally, assert missing or unexpected keys are buffers.
+    assert transfer_results.missing_keys == []
+    assert transfer_results.unexpected_keys == ["rope.freqs"]
+    model.eval()
+    torch.set_default_dtype(old_default_dtype)
+    return model
+def _get_checkpoint_path(src_dir: Path, rank: int | None) -> Path:
+    base_path = src_dir / "consolidated.pth"
+    if not rank and base_path.exists():
+        return base_path
+    alt_path = src_dir / f"consolidated.{rank:02}.pth"
+    if alt_path.exists():
+        return alt_path
+    raise ValueError("Consolidated checkpoint not found.")
+def load_model(path: str, rank: int | None = None) -> Transformer:
+    src_dir = Path(path)
+    with open(src_dir / "params.json", "r") as f:
+        params = json.loads(f.read())
+    with open(src_dir / "consolidate_params.json", "r") as f:
+        consolidate_params = json.loads(f.read())
+    params = {**params, **params["model"], **consolidate_params}
+    known_param = inspect.signature(ModelArgs.__init__).parameters
+    filtered_params = {k: v for k, v in params.items() if k in known_param}
+    return _convert(
+        ModelArgs(**filtered_params),
+        _get_checkpoint_path(src_dir, rank),
+    )
+def detect_shard_count(path: str) -> int:
+    src_dir = Path(path)
+    if (src_dir / "consolidated.pth").exists():
+        return 1
+    return len(glob.glob(str(src_dir / "consolidated.*.pth")))

chameleon/inference/logits_processor.py ADDED Viewed

	@@ -0,0 +1,336 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import math
+import torch
+from transformers import LogitsProcessor
+class TopPProbabilityProcessor(LogitsProcessor):
+    # Modified version of TopPLogitsWarper to act on probabilities.
+    # Changes:
+    # * filter_value changed from -inf to 0
+    # * removed softmax
+    # * renormalize L1
+    def __init__(
+        self,
+        top_p: float,
+        min_tokens_to_keep: int = 1,
+    ):
+        top_p = float(top_p)
+        if top_p < 0 or top_p > 1.0:
+            raise ValueError(f"`top_p` has to be a float > 0 and < 1, but is {top_p}")
+        if not isinstance(min_tokens_to_keep, int) or (min_tokens_to_keep < 1):
+            raise ValueError(
+                f"`min_tokens_to_keep` has to be a positive integer, but is {min_tokens_to_keep}"
+            )
+        self.top_p = top_p
+        self.min_tokens_to_keep = min_tokens_to_keep
+    def __call__(
+        self, input_ids: torch.LongTensor, probs: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        # input_ids.shape=[batch, seq-len]
+        # probs.shape=[batch, vocab]
+        sorted_probs, sorted_indices = torch.sort(probs, descending=False)
+        cumulative_probs = sorted_probs.cumsum(dim=-1)
+        # Remove tokens with cumulative top_p above the threshold (token with 0 are kept)
+        sorted_indices_to_remove = cumulative_probs <= (1 - self.top_p)
+        # Keep at least min_tokens_to_keep
+        sorted_indices_to_remove[..., -self.min_tokens_to_keep :] = 0
+        # scatter sorted tensors to original indexing
+        indices_to_remove = sorted_indices_to_remove.scatter(
+            1, sorted_indices, sorted_indices_to_remove
+        )
+        probs = probs.masked_fill(indices_to_remove, 0.0)
+        probs = probs / probs.sum(dim=-1, keepdim=True)
+        return probs
+class DisallowTokensInIndexRangeLogitsProcessor(LogitsProcessor):
+    def __init__(
+        self, token_ids: list[int], start_index: int, end_index: int | None = None
+    ):
+        self.token_ids = torch.tensor(token_ids)
+        self.start_index = start_index
+        self.end_index = end_index if end_index is not None else math.inf
+    def __call__(
+        self, input_ids: torch.LongTensor, logits: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        current_index = input_ids.shape[1]
+        if self.start_index <= current_index < self.end_index:
+            logits[:, self.token_ids] = -math.inf
+        return logits
+class DisallowTokensLogitsProcessor(DisallowTokensInIndexRangeLogitsProcessor):
+    def __init__(self, token_ids: list[int]):
+        super().__init__(token_ids, 0)
+class DisallowTokensAtIndexLogitsProcessor(DisallowTokensInIndexRangeLogitsProcessor):
+    def __init__(self, token_ids: list[int], index: int):
+        super().__init__(token_ids, index, index + 1)
+class DisallowTokensAfterIndexLogitsProcessor(
+    DisallowTokensInIndexRangeLogitsProcessor
+):
+    def __init__(self, token_ids: list[int], index: int):
+        super().__init__(token_ids, index + 1)
+class DisallowTokensAtOrAfterIndexLogitsProcessor(
+    DisallowTokensInIndexRangeLogitsProcessor
+):
+    def __init__(self, token_ids: list[int], index: int):
+        super().__init__(token_ids, index)
+class DisallowTokensInBatchIndexRangeLogitsProcessor(LogitsProcessor):
+    def __init__(
+        self,
+        token_ids: list[int],
+        start_indices: list[int],
+        end_indices: list[int] | None = None,
+    ):
+        self.token_ids = torch.tensor(token_ids)
+        self.start_indices = torch.tensor(start_indices)
+        self.end_indices = (
+            torch.tensor(end_indices)
+            if end_indices is not None
+            else torch.full_like(self.start_indices, math.inf, dtype=torch.float)
+        )
+    def __call__(
+        self, input_ids: torch.LongTensor, logits: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        # input_ids.shape = [batch, seq_len]
+        # logits.shape = [batch, vocab]
+        current_index = input_ids.shape[1]
+        mask = (self.start_indices <= current_index) & (
+            current_index < self.end_indices
+        )
+        # The following will fail if the mask is all False.
+        # logits[mask, self.token_ids] = -math.inf
+        logits[torch.where(mask)[0].unsqueeze(1), self.token_ids] = -math.inf
+        return logits
+class DisallowTokensAtBatchIndexLogitsProcessor(
+    DisallowTokensInBatchIndexRangeLogitsProcessor
+):
+    def __init__(self, token_ids: list[int], batch_index: list[int]):
+        super().__init__(token_ids, batch_index, [i + 1 for i in batch_index])
+class AllowOnlyTokensInIndexRangeLogitsProcessor(LogitsProcessor):
+    def __init__(
+        self, token_ids: list[int], start_index: int, end_index: int | None = None
+    ):
+        self.token_ids = torch.tensor(token_ids)
+        self.start_index = start_index
+        self.end_index = end_index if end_index is not None else math.inf
+    def __call__(
+        self, input_ids: torch.LongTensor, logits: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        current_index = input_ids.shape[1]
+        if self.start_index <= current_index < self.end_index:
+            replacement = torch.full_like(logits, -math.inf)
+            replacement[:, self.token_ids] = logits[:, self.token_ids]
+            logits[:] = replacement
+        return logits
+class AllowOnlyTokensLogitsProcessor(AllowOnlyTokensInIndexRangeLogitsProcessor):
+    def __init__(self, token_ids: list[int]):
+        super().__init__(token_ids, 0)
+class AllowOnlyTokensAtIndexLogitsProcessor(AllowOnlyTokensInIndexRangeLogitsProcessor):
+    def __init__(self, token_ids: list[int], index: int):
+        super().__init__(token_ids, index, index + 1)
+class AllowOnlyTokensAfterIndexLogitsProcessor(
+    AllowOnlyTokensInIndexRangeLogitsProcessor
+):
+    def __init__(self, token_ids: list[int], index: int):
+        super().__init__(token_ids, index + 1)
+class AllowOnlyTokensAtOrAfterIndexLogitsProcessor(
+    AllowOnlyTokensInIndexRangeLogitsProcessor
+):
+    def __init__(self, token_ids: list[int], index: int):
+        super().__init__(token_ids, index)
+class AllowOnlyTokensInBatchIndexRangeLogitsProcessor(LogitsProcessor):
+    def __init__(
+        self,
+        token_ids: list[int],
+        start_indices: list[int],
+        end_indices: list[int] | None = None,
+    ):
+        self.token_ids = torch.tensor(token_ids)
+        self.start_indices = torch.tensor(start_indices)
+        self.end_indices = (
+            torch.tensor(end_indices)
+            if end_indices is not None
+            else torch.full_like(self.start_indices, math.inf, dtype=torch.float)
+        )
+    def __call__(
+        self, input_ids: torch.LongTensor, logits: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        # input_ids.shape = [batch, seq_len]
+        # logits.shape = [batch, vocab]
+        current_index = input_ids.shape[1]
+        mask = (self.start_indices <= current_index) & (
+            current_index < self.end_indices
+        )
+        valid_batch_indices = torch.where(mask)[0].unsqueeze(1)
+        full_mask = torch.full_like(logits, -math.inf)
+        full_mask[valid_batch_indices, self.token_ids] = logits[
+            valid_batch_indices, self.token_ids
+        ]
+        logits[:] = torch.where(full_mask != -math.inf, full_mask, logits)
+        return logits
+class AllowOnlyTokensAtRelativeOffsetLogitsProcessor(LogitsProcessor):
+    def __init__(
+        self, trigger_token_id: int, subsequent_token_ids: list[int], offset: int
+    ):
+        self.trigger_token_id = trigger_token_id
+        self.subsequent_token_ids = torch.tensor(subsequent_token_ids)
+        self.offset = offset
+    def __call__(
+        self, input_ids: torch.LongTensor, logits: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        # input_ids.shape=[batch, seq_len]
+        # logits.shape=[batch, vocab]
+        if input_ids.shape[1] < self.offset:
+            return logits
+        trigger_positions = (
+            input_ids[:, -self.offset] == self.trigger_token_id
+        ).unsqueeze(-1)
+        disallowed_tokens_mask = torch.ones_like(logits, dtype=bool)
+        disallowed_tokens_mask[:, self.subsequent_token_ids] = False
+        return logits.masked_fill_(
+            disallowed_tokens_mask & trigger_positions,
+            -math.inf,
+        )
+class AllowOnlyTokensInRelativeWindowLogitsProcessor(LogitsProcessor):
+    def __init__(self, trigger_token_id: int, allowed_token_ids: list[int], width: int):
+        self.trigger_token_id = trigger_token_id
+        self.allowed_token_ids = torch.tensor(allowed_token_ids).unsqueeze(
+            0
+        )  # shape: [1, num_allowed_tokens]
+        self.width = width
+    def __call__(
+        self, input_ids: torch.LongTensor, logits: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        # input_ids.shape=[batch, seq_len]
+        # logits.shape=[batch, vocab]
+        width = min(self.width, input_ids.shape[1])
+        trigger_positions = (
+            (input_ids[:, -width:] == self.trigger_token_id).any(dim=1).unsqueeze(-1)
+        )
+        disallowed_tokens_mask = torch.ones_like(logits, dtype=bool)
+        disallowed_tokens_mask[:, self.allowed_token_ids] = False
+        return logits.masked_fill_(
+            disallowed_tokens_mask & trigger_positions,
+            -math.inf,
+        )
+class CFGLogitsProcessor(LogitsProcessor):
+    def __init__(
+        self,
+        guidance_scale: float,
+        unconditional_ids: torch.LongTensor,
+        model,
+    ):
+        self.guidance_scale = guidance_scale
+        self.unconditional_ids = unconditional_ids
+        self.model = model
+    def __call__(
+        self, input_ids: torch.LongTensor, logits: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        conditioned_logits = logits
+        self.unconditional_ids = torch.cat(
+            [self.unconditional_ids, input_ids[:, -1:]], dim=1
+        )
+        unconditioned_outputs = self.model(self.unconditional_ids)
+        unconditioned_logits = unconditioned_outputs[:, -1, :]
+        return (
+            self.guidance_scale * (conditioned_logits - unconditioned_logits)
+            + unconditioned_logits
+        )
+class InBatchCFGLogitsProcessor(LogitsProcessor):
+    def __init__(self, guidance_scale: float):
+        self.guidance_scale = guidance_scale
+    def __call__(
+        self, input_ids: torch.LongTensor, logits: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        # input_ids.shape=[2*batch, seq-len]
+        # logits.shape=[2*batch, vocab]
+        conditioned_logits, unconditioned_logits = torch.chunk(logits, chunks=2, dim=0)
+        mixed_logits = unconditioned_logits + self.guidance_scale * (
+            conditioned_logits - unconditioned_logits
+        )
+        return mixed_logits.repeat(2, 1)
+class InBatchInstructCFGLogitsProcessor(LogitsProcessor):
+    # See https://arxiv.org/abs/2211.09800
+    def __init__(self, guidance_scale_text: float, guidance_scale_image: float):
+        self.guidance_scale_text = guidance_scale_text
+        self.guidance_scale_image = guidance_scale_image
+    def __call__(
+        self, input_ids: torch.LongTensor, logits: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        # input_ids.shape=[3*batch, seq-len]
+        # logits.shape=[3*batch, vocab]
+        (
+            full_conditioned_logits,
+            image_conditioned_logits,
+            unconditioned_logits,
+        ) = logits.chunk(3)
+        mixed_logits = (
+            unconditioned_logits
+            + self.guidance_scale_image
+            * (image_conditioned_logits - unconditioned_logits)
+            + self.guidance_scale_text
+            * (full_conditioned_logits - image_conditioned_logits)
+        )
+        return mixed_logits.repeat(3, 1)

chameleon/inference/model_adapter.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import math
+from abc import ABC, abstractmethod
+import torch
+from chameleon.inference import transformer
+from chameleon.inference.alignment import (
+    AlignPromptLeft,
+    AlignPromptRight,
+    PromptAlignment,
+)
+from chameleon.inference.cudagraph import cudagraph_wrap
+class ModelAdapter(ABC):
+    @abstractmethod
+    def initialize(self, prompt_tokens: list[list[int]]):
+        ...
+    @abstractmethod
+    def supports_alignment(self, alignment: PromptAlignment) -> bool:
+        ...
+    @abstractmethod
+    @torch.inference_mode()
+    def __call__(self, inputs: torch.LongTensor) -> torch.FloatTensor:
+        ...
+class ChameleonModelAdapter(ModelAdapter):
+    """Adapter for Chameleon-style model that handles state, such as cache."""
+    def __init__(
+        self,
+        model: transformer.Transformer,
+        max_seq_len: int,
+        dtype: torch.dtype | None = None,
+    ):
+        super().__init__()
+        self._args = model.args
+        self._model = model
+        self._max_seq_len = max_seq_len
+        self._dtype = dtype or next(model.parameters()).data.dtype
+    def initialize(self, prompt_tokens: list[list[int]]):
+        self._prompt_lengths = [len(toks) for toks in prompt_tokens]
+        batch_size = len(prompt_tokens)
+        self._cache = transformer.make_cache(
+            args=self._args,
+            length=batch_size * self._max_seq_len,
+            dtype=self._dtype,
+        )
+        self._local_inputs = torch.zeros([batch_size], dtype=int, device="cuda")
+        self._forward = cudagraph_wrap(self._model.forward_with_attn_bias)
+        self._first_pass = True
+    def supports_alignment(self, alignment: PromptAlignment) -> bool:
+        return isinstance(alignment, AlignPromptLeft) or isinstance(
+            alignment, AlignPromptRight
+        )
+    def __call__(self, inputs: torch.LongTensor) -> torch.FloatTensor:
+        # inputs.shape=[batch, seq-len]
+        batch_size, seq_len = inputs.shape
+        if self._first_pass:
+            attn_seqlen = [min(pl, seq_len) for pl in self._prompt_lengths]
+            self._bias = transformer.AttnBias.from_seqlens(
+                q_seqlen=attn_seqlen,
+                kv_seqlen=attn_seqlen,
+                kv_padding=self._max_seq_len,
+            )
+            mask = torch.zeros_like(inputs, dtype=torch.bool)
+            for i, k in enumerate(self._prompt_lengths):
+                mask[i, -k:] = True
+            flat_outputs: torch.Tensor = self._forward(  # type: ignore
+                token_values=inputs[mask],
+                attn_bias=self._bias,
+                cache=self._cache,
+            )
+            self._local_outputs = torch.full(
+                (inputs.shape[0], inputs.shape[1], flat_outputs.shape[-1]),
+                -math.inf,
+            )
+            self._local_outputs[mask] = flat_outputs
+            self._vocab_size = self._local_outputs.shape[-1]
+            self._bias.q_seqinfo.seqstart.copy_(
+                torch.arange(batch_size + 1, dtype=torch.int)
+            )
+            self._bias.q_seqinfo.max_seqlen = 1
+            self._bias.q_seqinfo.seqstart_py = self._bias.q_seqinfo.seqstart.tolist()
+            self._first_pass = False
+        else:
+            self._local_inputs.copy_(inputs[:, -1])  # type: ignore
+            self._local_outputs = self._forward(  # type: ignore
+                token_values=self._local_inputs,
+                attn_bias=self._bias,
+                cache=self._cache,
+            )
+        self._bias.k_seqinfo.seqlen.add_(1)
+        return self._local_outputs.view(batch_size, -1, self._vocab_size)

chameleon/inference/stopping_criteria.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+class StoppingCriteria:
+    def __call__(
+        self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+    ) -> bool:
+        raise NotImplementedError("StoppingCriteria needs to be subclassed")
+class StoppingCriteriaList(list):
+    def __call__(
+        self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+    ) -> bool:
+        return any(criteria(input_ids, scores, **kwargs) for criteria in self)
+class MaxLengthCriteria(StoppingCriteria):
+    def __init__(self, max_length: int):
+        self.max_length = max_length
+    def __call__(
+        self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+    ) -> bool:
+        cur_len = input_ids.shape[-1]
+        return cur_len >= self.max_length
+class StopOnEOS(StoppingCriteria):
+    def __init__(self, eos_id: int):
+        self._eos_id = eos_id
+    def __call__(self, input_ids: torch.LongTensor, _: torch.FloatTensor) -> bool:
+        # input_ids.shape=[batch, seq_len]
+        return (input_ids == self._eos_id).sum(dim=1).all()
+class StopOnEOSAfterBatchIndex(StoppingCriteria):
+    def __init__(self, eos_id: int, batch_index: list[int]):
+        self._eos_id = eos_id
+        self.batch_index = torch.tensor(batch_index, dtype=torch.long).unsqueeze(1)
+    def __call__(self, input_ids: torch.LongTensor, _: torch.FloatTensor) -> bool:
+        # input_ids.shape=[batch, seq_len]
+        eos_mask = input_ids == self._eos_id
+        consider_eos_mask = (
+            torch.arange(input_ids.shape[1]).unsqueeze(0) >= self.batch_index
+        )
+        valid_eos = eos_mask & consider_eos_mask
+        return valid_eos.sum(dim=1).all()

chameleon/inference/token_selector.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+class TokenSelector:
+    def __call__(
+        self, input_ids: torch.LongTensor, probs: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        # input_ids.shape=[batch, seq_len]
+        # probs.shape=[batch, vocab]
+        ...
+class ArgmaxTokenSelector(TokenSelector):
+    def __call__(
+        self, _: torch.LongTensor, probs: torch.FloatTensor
+    ) -> torch.LongTensor:
+        # probs.shape=[batch, vocab]
+        return probs.argmax(dim=1)
+class MultinomialTokenSelector(TokenSelector):
+    def __call__(
+        self, _: torch.LongTensor, probs: torch.FloatTensor
+    ) -> torch.LongTensor:
+        # probs.shape=[batch, vocab]
+        return probs.multinomial(num_samples=1).squeeze(1)
+class ReplicatedInputTokenSelector(TokenSelector):
+    def __init__(self, token_selector: TokenSelector, n: int):
+        self.token_selector = token_selector
+        self.n = n
+    def __call__(
+        self, input_ids: torch.LongTensor, probs: torch.FloatTensor
+    ) -> torch.LongTensor:
+        # input_ids.shape=[n*batch, seq_len]
+        # probs.shape=[n*batch, vocab]
+        primary_input_ids = torch.chunk(input_ids, chunks=self.n, dim=0)[0]
+        primary_probs = torch.chunk(probs, chunks=self.n, dim=0)[0]
+        tokens = self.token_selector(primary_input_ids, primary_probs)
+        return tokens.repeat(self.n)

chameleon/inference/transformer.py ADDED Viewed

	@@ -0,0 +1,421 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+from dataclasses import dataclass
+import torch
+from torch import distributed as dist
+from torch import nn
+from torch.nn import functional as F
+from xformers.ops import RMSNorm, fmha, rope_padded
+from xformers.ops.fmha.attn_bias import (
+    BlockDiagonalCausalWithOffsetPaddedKeysMask as AttnBias,
+)
+@dataclass
+class ModelArgs:
+    model_parallel_size: int = 1
+    dim: int = 512
+    n_layers: int = 8
+    n_heads: int = 8
+    n_kv_heads: int | None = None
+    vocab_size: int = -1
+    ffn_dim_multiplier: float | None = None
+    multiple_of: int = 256
+    norm_eps: float = 1e-5
+    rope_theta: float = 10000.0
+    qk_normalization: bool = False
+    swin_norm: bool = False
+LayerCache = tuple[torch.Tensor, torch.Tensor]
+class Attention(nn.Module):
+    def __init__(
+        self,
+        model_parallel_size: int,
+        dim: int,
+        head_dim: int,
+        n_heads: int,
+        n_kv_heads: int,
+        rope_theta: float,
+        qk_normalization: bool = False,
+    ):
+        super().__init__()
+        self.model_parallel_size = model_parallel_size
+        self.head_dim = head_dim
+        self.rope_theta = rope_theta
+        self.n_local_heads = n_heads // model_parallel_size
+        self.n_local_kv_heads = n_kv_heads // model_parallel_size
+        self.wqkv = nn.Linear(
+            dim,
+            (self.n_local_heads + 2 * self.n_local_kv_heads) * head_dim,
+            bias=False,
+            dtype=torch.bfloat16,
+        )
+        self.wo = nn.Linear(
+            self.n_local_heads * head_dim,
+            dim,
+            bias=False,
+            dtype=torch.bfloat16,
+        )
+        self.qk_normalization = qk_normalization
+        if qk_normalization:
+            self.q_normalization = torch.nn.LayerNorm(head_dim)
+            self.k_normalization = torch.nn.LayerNorm(head_dim)
+        self._register_load_state_dict_pre_hook(self.load_hook)
+    # This adapter makes sure we can load vanilla
+    # Llama checkpoints where wq, wk, and wv are
+    # not fused in a single parameter
+    def load_hook(
+        self,
+        state_dict,
+        prefix,
+        local_metadata,
+        strict,
+        missing_keys,
+        unexpected_keys,
+        error_msgs,
+    ):
+        if prefix + "wq.weight" in state_dict:
+            wq = state_dict.pop(prefix + "wq.weight")
+            wk = state_dict.pop(prefix + "wk.weight")
+            wv = state_dict.pop(prefix + "wv.weight")
+            state_dict[prefix + "wqkv.weight"] = torch.cat([wq, wk, wv])
+    def forward(
+        self,
+        x: torch.Tensor,
+        cache: LayerCache,
+        attn_bias: AttnBias,
+        group: dist.ProcessGroup | None = None,
+    ) -> torch.Tensor:
+        # x.shape is (sum(seq_lens), dim)
+        #
+        # Since we support heterogenous sequence
+        # lengths, the hidden states are all
+        # concatenated together along the usual
+        # sequence dimension. The attention below
+        # finds out where sequences start & end
+        # using the provided attention bias.
+        xqkv = self.wqkv(x)
+        xq = xqkv[:, : (self.n_local_heads * self.head_dim)]
+        xkv = xqkv[:, (self.n_local_heads * self.head_dim) :]
+        xk, xv = xkv.chunk(2, 1)
+        if self.qk_normalization:
+            xq = xq.view(-1, self.n_local_heads, self.head_dim)
+            xq = self.q_normalization(xq)
+            xq = xq.view(-1, self.n_local_heads * self.head_dim)
+            xk = xk.view(-1, self.n_local_kv_heads, self.head_dim)
+            xk = self.k_normalization(xk)
+            xk = xk.view(-1, self.n_local_kv_heads * self.head_dim)
+        output_shape = xq.shape
+        xq = xq.view(1, xq.shape[0], self.n_local_heads, self.head_dim)
+        xk = xk.view(1, xk.shape[0], self.n_local_kv_heads, self.head_dim)
+        xv = xv.view(1, xv.shape[0], self.n_local_kv_heads, self.head_dim)
+        cache_k, cache_v = cache
+        xq = rope_padded(
+            xq=xq,
+            xk=xk,
+            xv=xv,
+            cache_k=cache_k,
+            cache_v=cache_v,
+            attn_bias=attn_bias,
+            theta=self.rope_theta,
+        )
+        # Handle GQA
+        # Q shape: [B, M, Hkv, Hq // Hkv, K]
+        heads_per_group = self.n_local_heads // self.n_local_kv_heads
+        cache_k = cache_k.unsqueeze(3).expand(-1, -1, -1, heads_per_group, -1)
+        cache_v = cache_v.unsqueeze(3).expand(-1, -1, -1, heads_per_group, -1)
+        xq = xq.reshape(
+            [*xq.shape[:2], self.n_local_kv_heads, heads_per_group, xq.shape[-1]]
+        )
+        # rope_padded() updated the caches, so we
+        # call attention directly
+        output = fmha.memory_efficient_attention_forward(
+            xq, cache_k, cache_v, attn_bias
+        )
+        output = self.wo(output.reshape(output_shape))
+        if self.model_parallel_size > 1:
+            dist.all_reduce(output, group=group)
+        return output
+class FeedForward(nn.Module):
+    def __init__(
+        self,
+        model_parallel_size: int,
+        dim: int,
+        hidden_dim: int,
+        multiple_of: int,
+        ffn_dim_multiplier: float | None,
+    ):
+        super().__init__()
+        self.model_parallel_size = model_parallel_size
+        hidden_dim = int(2 * hidden_dim / 3)
+        if ffn_dim_multiplier is not None:
+            hidden_dim = int(ffn_dim_multiplier * hidden_dim)
+        hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
+        assert hidden_dim % model_parallel_size == 0
+        self.w13 = nn.Linear(
+            dim,
+            2 * hidden_dim // model_parallel_size,
+            bias=False,
+        )
+        self.w2 = nn.Linear(
+            hidden_dim // model_parallel_size,
+            dim,
+            bias=False,
+        )
+        self._register_load_state_dict_pre_hook(self.load_hook)
+    # This adapter makes sure we can load vanilla
+    # Llama checkpoints where w1 and w3 are not
+    # fused in a single parameter
+    def load_hook(
+        self,
+        state_dict,
+        prefix,
+        local_metadata,
+        strict,
+        missing_keys,
+        unexpected_keys,
+        error_msgs,
+    ):
+        if prefix + "w1.weight" in state_dict:
+            w1 = state_dict.pop(prefix + "w1.weight")
+            w3 = state_dict.pop(prefix + "w3.weight")
+            state_dict[prefix + "w13.weight"] = torch.cat([w1, w3])
+    def forward(
+        self, x: torch.Tensor, group: dist.ProcessGroup | None = None
+    ) -> torch.Tensor:
+        x13 = self.w13(x)
+        x1, x3 = x13.chunk(2, -1)
+        output = self.w2(F.silu(x1) * x3)
+        if self.model_parallel_size > 1:
+            dist.all_reduce(output, group=group)
+        return output
+class TransformerBlock(nn.Module):
+    def __init__(self, args: ModelArgs):
+        super().__init__()
+        assert args.dim % args.n_heads == 0
+        head_dim = args.dim // args.n_heads
+        if args.n_kv_heads is not None:
+            n_kv_heads = args.n_kv_heads
+        else:
+            n_kv_heads = args.n_heads
+        model_parallel_size = args.model_parallel_size
+        assert args.n_heads % n_kv_heads == 0
+        assert args.n_heads % model_parallel_size == 0
+        assert n_kv_heads % model_parallel_size == 0
+        self.attention = Attention(
+            model_parallel_size=model_parallel_size,
+            dim=args.dim,
+            head_dim=head_dim,
+            n_heads=args.n_heads,
+            n_kv_heads=n_kv_heads,
+            rope_theta=args.rope_theta,
+            qk_normalization=args.qk_normalization,
+        )
+        self.feed_forward = FeedForward(
+            model_parallel_size=model_parallel_size,
+            dim=args.dim,
+            hidden_dim=4 * args.dim,
+            multiple_of=args.multiple_of,
+            ffn_dim_multiplier=args.ffn_dim_multiplier,
+        )
+        self.attention_norm = RMSNorm(args.dim, eps=args.norm_eps)
+        self.ffn_norm = RMSNorm(args.dim, eps=args.norm_eps)
+        self.swin_norm = args.swin_norm
+    def forward(
+        self,
+        x: torch.Tensor,
+        cache: LayerCache,
+        attn_bias: AttnBias,
+        group: dist.ProcessGroup | None = None,
+    ) -> torch.Tensor:
+        if self.swin_norm:
+            h = x + self.attention_norm(
+                self.attention.forward(
+                    x,
+                    cache,
+                    attn_bias,
+                    group=group,
+                )
+            )
+            out = h + self.ffn_norm(self.feed_forward(h, group=group))
+        else:
+            h = x + self.attention.forward(
+                self.attention_norm(x),
+                cache,
+                attn_bias,
+                group=group,
+            )
+            out = h + self.feed_forward(self.ffn_norm(h), group=group)
+        return out
+class Transformer(nn.Module):
+    def __init__(self, args: ModelArgs):
+        super().__init__()
+        self.args = args
+        self.model_parallel_size = args.model_parallel_size
+        assert args.dim % self.model_parallel_size == 0
+        assert args.vocab_size > 0
+        assert args.vocab_size % self.model_parallel_size == 0
+        self.tok_embeddings = nn.Embedding(
+            num_embeddings=args.vocab_size,
+            embedding_dim=args.dim // self.model_parallel_size,
+        )
+        self.layers = nn.ModuleList()
+        for _ in range(args.n_layers):
+            self.layers.append(TransformerBlock(args))
+        self.norm = RMSNorm(args.dim, eps=args.norm_eps)
+        self.output = nn.Linear(
+            args.dim,
+            args.vocab_size // self.model_parallel_size,
+            bias=False,
+        )
+    @torch.no_grad()
+    def forward_with_attn_bias(
+        self,
+        token_values: torch.Tensor,
+        attn_bias: AttnBias,
+        cache: list[LayerCache],
+        group: dist.ProcessGroup | None = None,
+    ) -> torch.Tensor:
+        h = self.tok_embeddings(token_values)
+        if self.model_parallel_size > 1:
+            gather = [torch.empty_like(h) for _ in range(self.model_parallel_size)]
+            dist.all_gather(gather, h, group=group)
+            h = torch.cat(gather, dim=-1)
+        for i, layer in enumerate(self.layers):
+            h = layer(h, cache[i], attn_bias, group=group)
+        logits = self.output(self.norm(h))
+        if self.model_parallel_size > 1:
+            gather = [torch.empty_like(logits) for _ in range(self.model_parallel_size)]
+            dist.all_gather(gather, logits, group=group)
+            logits = torch.cat(gather, dim=-1)
+        return logits.float()
+    def forward(
+        self,
+        token_values: torch.Tensor,
+        token_lengths: torch.Tensor,
+        start_pos: torch.Tensor,
+        cache: list[LayerCache],
+        kv_padding: int,
+        group: dist.ProcessGroup | None = None,
+    ) -> torch.Tensor:
+        attn_bias = AttnBias.from_seqlens(
+            q_seqlen=token_lengths.tolist(),
+            kv_seqlen=(start_pos + token_lengths).tolist(),
+            kv_padding=kv_padding,
+        )
+        return self.forward_with_attn_bias(token_values, attn_bias, cache, group=group)
+def make_cache(
+    args: ModelArgs,
+    length: int,
+    device: str | torch.device | None = None,
+    n_layers: int | None = None,
+    dtype: torch.dtype | None = None,
+) -> list[LayerCache]:
+    """
+    Allocate a cache to be used with the Transformer module.
+    Args:
+        args (ModelArgs): the model configuration.
+        length (int): per layer cache size.
+            It is usually budgeted as ``max_batch * max_seq``
+        device (torch.device, optional): the device on which
+            the cache should be allocated.
+        n_layers (int, optional): the number of layers to
+            allocate a cache for (defaults to the model
+            settings).
+        dtype (torch.dtype, optional): the dtype to use for
+            cache entries (defaults to the default dtype).
+    Returns:
+        The cache object to pass to ``Tranformer.forward``.
+    """
+    head_dim = args.dim // args.n_heads
+    n_kv_heads = args.n_kv_heads
+    if n_kv_heads is None:
+        n_kv_heads = args.n_heads
+    n_local_kv_heads = n_kv_heads // args.model_parallel_size
+    if n_layers is None:
+        n_layers = args.n_layers
+    shape = (1, length, n_local_kv_heads, head_dim)
+    return [
+        (
+            torch.zeros(shape, device=device, dtype=dtype),
+            torch.zeros(shape, device=device, dtype=dtype),
+        )
+        for _ in range(n_layers)
+    ]
+def cache_prefix(cache: list[LayerCache], length: int) -> list[LayerCache]:
+    """
+    Take a prefix view of a larger cache.
+    The original cache object remains of identical size and valid
+    after the shrinked alias has been used. This function is useful
+    when a cache was allocated for a larger batch size than what is
+    necessary.
+    Args:
+        cache: the cache to take a view in.
+        length (int): the desired length
+    Returns:
+        A view in the input cache object.
+    """
+    if len(cache) > 0:
+        assert cache[0][0].shape[1] >= length
+    return [(ck[:, :length], cv[:, :length]) for ck, cv in cache]

chameleon/inference/utils.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import socket
+from typing import Generator, Generic, Iterator, TypeVar
+T = TypeVar("T")
+class DynamicGenerator(Generic[T]):
+    def __init__(self, gen: Generator[T, None, None]):
+        self.gen = gen
+    def __iter__(self) -> Iterator[T]:
+        return self
+    def __next__(self) -> T:
+        return next(self.gen)
+def advance(iterator: Iterator[T], steps: int):
+    try:
+        for _ in range(steps):
+            next(iterator)
+    except StopIteration:
+        pass
+def random_unused_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("", 0))
+        return s.getsockname()[1]

chameleon/inference/vocab.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+from functools import cached_property
+import torch
+class VocabInfo:
+    def __init__(self, vocab_map: dict[str, int]):
+        self.name2val = vocab_map
+        self.bos_id = vocab_map.get("<s>")
+        self.eos_id = vocab_map.get("</s>")
+        self.boi_id = vocab_map.get("<racm3:break>")
+        self.eoi_id = vocab_map.get("<eoss>")
+        self.pad_id = vocab_map.get("<pad>")
+        self.eot_id = vocab_map.get("<reserved08706>")
+    @property
+    def begin_sequence(self) -> int:
+        return self.bos_id
+    @property
+    def end_sequence(self) -> int:
+        return self.eos_id
+    @property
+    def begin_image(self) -> int:
+        return self.boi_id
+    @property
+    def end_image(self) -> int:
+        return self.eoi_id
+    @property
+    def padding(self) -> int:
+        return self.pad_id
+    @property
+    def end_turn(self) -> int:
+        return self.eot_id
+    @cached_property
+    def val2name(self) -> dict[int, str]:
+        return {v: k for k, v in self.name2val.items()}
+    @cached_property
+    def all_tokens(self) -> list[int]:
+        return sorted(self.name2val.values())
+    @cached_property
+    def image_tokens(self) -> list[int]:
+        return sorted(
+            [val for name, val in self.name2val.items() if name.startswith("IMGIMG")]
+        )
+    @cached_property
+    def special_tokens(self) -> list[int]:
+        return sorted(
+            [
+                val
+                for name, val in self.name2val.items()
+                if name.startswith("<") and name != "<"
+            ]
+        )
+    @cached_property
+    def text_tokens(self) -> list[int]:
+        return sorted(
+            set(self.all_tokens) - set(self.image_tokens) - set(self.special_tokens)
+        )
+class VocabTranslation:
+    def __init__(self, vocab_info: VocabInfo, device: str | None = None):
+        self._vocab = vocab_info
+        self._device = device
+    @cached_property
+    def bpe2img(self) -> dict[int, int]:    # vocab id => codebook id, i.e. [4:8195] => [0:8191]
+        img_tkn_chr_mapping = {chr(ord("A") + i): str(i) for i in range(10)}    # A-J: 0-9
+        def remap(old_name: str) -> str:
+            return "".join(
+                img_tkn_chr_mapping.get(c, c) for c in old_name[len("IMGIMG") : -1] # last chr is 'Z'
+            )
+            # e.g.: IMGIMGFDZ => FD => 53,
+        return {
+            tok: int(remap(self._vocab.val2name[tok]))
+            for tok in self._vocab.image_tokens # the token starts with 'IMGIMG', value: [4: 8195]
+        }
+    @cached_property
+    def img2bpe(self) -> dict[int, int]:
+        return {v: k for k, v in self.bpe2img.items()}  # codebook id => vocab id, i.e. [0:8191] => [4:8191]
+    @cached_property
+    def bpe2img_search_tensors(self) -> tuple[torch.Tensor, torch.Tensor]:
+        sorted_bpe = torch.tensor(sorted(self.bpe2img.keys()), device=self._device)
+        sorted_img = torch.tensor(sorted(self.bpe2img.values()), device=self._device)
+        return sorted_bpe, sorted_img
+    @cached_property
+    def img2bpe_mapping_tensor(self) -> torch.LongTensor:
+        mapping = torch.zeros(
+            max(self.img2bpe.keys()) + 1,
+            dtype=torch.int,
+            device=self._device,
+        )
+        for k, v in self.img2bpe.items():
+            mapping[k] = v
+        return mapping
+    def convert_bpe2img(self, bpe_batch: torch.Tensor) -> torch.Tensor:
+        bpe_tok, img_tok = self.bpe2img_search_tensors
+        return img_tok[torch.searchsorted(bpe_tok, bpe_batch)]
+    def convert_img2bp2(self, img_batch: torch.Tensor) -> torch.Tensor:
+        return self.img2bpe_mapping_tensor[img_batch]

chameleon/inference/vqgan.py ADDED Viewed

	@@ -0,0 +1,675 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Contents of this file are taken from https://github.com/CompVis/taming-transformers/blob/3ba01b241669f5ade541ce990f7650a3b8f65318/taming/models/vqgan.py
+[with minimal dependencies]
+This implementation is inference-only -- training steps and optimizer components
+introduce significant additional dependencies
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class VectorQuantizer2(nn.Module):
+    """
+    Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly
+    avoids costly matrix multiplications and allows for post-hoc remapping of indices.
+    """
+    # NOTE: due to a bug the beta term was applied to the wrong term. for
+    # backwards compatibility we use the buggy version by default, but you can
+    # specify legacy=False to fix it.
+    def __init__(
+        self,
+        n_e,
+        e_dim,
+        beta,
+        remap=None,
+        unknown_index="random",
+        sane_index_shape=False,
+        legacy=True,
+    ):
+        super().__init__()
+        self.n_e = n_e
+        self.e_dim = e_dim
+        self.beta = beta
+        self.legacy = legacy
+        self.embedding = nn.Embedding(self.n_e, self.e_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
+        self.remap = remap
+        if self.remap is not None:
+            self.register_buffer("used", torch.tensor(np.load(self.remap)))
+            self.re_embed = self.used.shape[0]
+            self.unknown_index = unknown_index  # "random" or "extra" or integer
+            if self.unknown_index == "extra":
+                self.unknown_index = self.re_embed
+                self.re_embed = self.re_embed + 1
+            print(
+                f"Remapping {self.n_e} indices to {self.re_embed} indices. "
+                f"Using {self.unknown_index} for unknown indices."
+            )
+        else:
+            self.re_embed = n_e
+        self.sane_index_shape = sane_index_shape
+    def remap_to_used(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        match = (inds[:, :, None] == used[None, None, ...]).long()
+        new = match.argmax(-1)
+        unknown = match.sum(2) < 1
+        if self.unknown_index == "random":
+            new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to(
+                device=new.device
+            )
+        else:
+            new[unknown] = self.unknown_index
+        return new.reshape(ishape)
+    def unmap_to_all(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        if self.re_embed > self.used.shape[0]:  # extra token
+            inds[inds >= self.used.shape[0]] = 0  # simply set to zero
+        back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds)
+        return back.reshape(ishape)
+    def forward(self, z, temp=None, rescale_logits=False, return_logits=False):
+        assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
+        assert rescale_logits is False, "Only for interface compatible with Gumbel"
+        assert return_logits is False, "Only for interface compatible with Gumbel"
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = z.permute(0, 2, 3, 1).contiguous()
+        z_flattened = z.view(-1, self.e_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+        d = (
+            torch.sum(z_flattened**2, dim=1, keepdim=True)
+            + torch.sum(self.embedding.weight**2, dim=1)
+            - 2
+            * torch.einsum(
+                "bd,dn->bn", z_flattened, self.embedding.weight.transpose(0, 1)
+            )
+        )
+        min_encoding_indices = torch.argmin(d, dim=1)
+        z_q = self.embedding(min_encoding_indices).view(z.shape)
+        perplexity = None
+        min_encodings = None
+        # compute loss for embedding
+        if not self.legacy:
+            loss = self.beta * torch.mean((z_q.detach() - z) ** 2) + torch.mean(
+                (z_q - z.detach()) ** 2
+            )
+        else:
+            loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * torch.mean(
+                (z_q - z.detach()) ** 2
+            )
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+        # reshape back to match original input shape
+        z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        if self.remap is not None:
+            min_encoding_indices = min_encoding_indices.reshape(
+                z.shape[0], -1
+            )  # add batch axis
+            min_encoding_indices = self.remap_to_used(min_encoding_indices)
+            min_encoding_indices = min_encoding_indices.reshape(-1, 1)  # flatten
+        if self.sane_index_shape:
+            min_encoding_indices = min_encoding_indices.reshape(
+                z_q.shape[0], z_q.shape[2], z_q.shape[3]
+            )
+        return z_q, loss, (perplexity, min_encodings, min_encoding_indices)
+    def get_codebook_entry(self, indices, shape):
+        # shape specifying (batch, height, width, channel)
+        if self.remap is not None:
+            indices = indices.reshape(shape[0], -1)  # add batch axis
+            indices = self.unmap_to_all(indices)
+            indices = indices.reshape(-1)  # flatten again
+        # get quantized latent vectors
+        z_q = self.embedding(indices)
+        if shape is not None:
+            z_q = z_q.view(shape)
+            # reshape back to match original input shape
+            z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        return z_q
+# Alias
+VectorQuantizer = VectorQuantizer2
+def nonlinearity(x):
+    # swish
+    return x * torch.sigmoid(x)
+def Normalize(in_channels, num_groups=32):
+    return torch.nn.GroupNorm(
+        num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True
+    )
+class Upsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            self.conv = torch.nn.Conv2d(
+                in_channels, in_channels, kernel_size=3, stride=1, padding=1
+            )
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=2.0, mode="nearest")
+        if self.with_conv:
+            x = self.conv(x)
+        return x
+class Downsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            # no asymmetric padding in torch conv, must do it ourselves
+            self.conv = torch.nn.Conv2d(
+                in_channels, in_channels, kernel_size=3, stride=2, padding=0
+            )
+    def forward(self, x):
+        if self.with_conv:
+            pad = (0, 1, 0, 1)
+            x = F.pad(x, pad, mode="constant", value=0)
+            x = self.conv(x)
+        else:
+            x = F.avg_pool2d(x, kernel_size=2, stride=2)
+        return x
+class ResnetBlock(nn.Module):
+    def __init__(
+        self,
+        *,
+        in_channels,
+        out_channels=None,
+        conv_shortcut=False,
+        dropout,
+        temb_channels=512,
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        out_channels = in_channels if out_channels is None else out_channels
+        self.out_channels = out_channels
+        self.use_conv_shortcut = conv_shortcut
+        self.norm1 = Normalize(in_channels)
+        self.conv1 = torch.nn.Conv2d(
+            in_channels, out_channels, kernel_size=3, stride=1, padding=1
+        )
+        if temb_channels > 0:
+            self.temb_proj = torch.nn.Linear(temb_channels, out_channels)
+        self.norm2 = Normalize(out_channels)
+        self.dropout = torch.nn.Dropout(dropout)
+        self.conv2 = torch.nn.Conv2d(
+            out_channels, out_channels, kernel_size=3, stride=1, padding=1
+        )
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                self.conv_shortcut = torch.nn.Conv2d(
+                    in_channels, out_channels, kernel_size=3, stride=1, padding=1
+                )
+            else:
+                self.nin_shortcut = torch.nn.Conv2d(
+                    in_channels, out_channels, kernel_size=1, stride=1, padding=0
+                )
+    def forward(self, x, temb):
+        h = x
+        h = self.norm1(h)
+        h = nonlinearity(h)
+        h = self.conv1(h)
+        if temb is not None:
+            h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None]
+        h = self.norm2(h)
+        h = nonlinearity(h)
+        h = self.dropout(h)
+        h = self.conv2(h)
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                x = self.conv_shortcut(x)
+            else:
+                x = self.nin_shortcut(x)
+        return x + h
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+        self.norm = Normalize(in_channels)
+        self.q = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.k = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.v = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.proj_out = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+    def forward(self, x):
+        h_ = x
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+        # compute attention
+        b, c, h, w = q.shape
+        q = q.reshape(b, c, h * w)
+        q = q.permute(0, 2, 1)  # b,hw,c
+        k = k.reshape(b, c, h * w)  # b,c,hw
+        w_ = torch.bmm(q, k)  # b,hw,hw    w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
+        w_ = w_ * (int(c) ** (-0.5))
+        w_ = F.softmax(w_, dim=2)
+        # attend to values
+        v = v.reshape(b, c, h * w)
+        w_ = w_.permute(0, 2, 1)  # b,hw,hw (first hw of k, second of q)
+        h_ = torch.bmm(v, w_)  # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
+        h_ = h_.reshape(b, c, h, w)
+        h_ = self.proj_out(h_)
+        return x + h_
+def make_attn(in_channels, attn_type="vanilla"):
+    assert attn_type in ["vanilla", "linear", "none"], f"attn_type {attn_type} unknown"
+    # print(f"making attention of type '{attn_type}' with {in_channels} in_channels")
+    if attn_type == "vanilla":
+        return AttnBlock(in_channels)
+    elif attn_type == "none":
+        return nn.Identity(in_channels)
+    else:
+        raise ValueError("Unexpected attention type")
+class Encoder(nn.Module):
+    def __init__(
+        self,
+        *,
+        ch,
+        out_ch,
+        ch_mult=(1, 2, 4, 8),
+        num_res_blocks,
+        attn_resolutions,
+        dropout=0.0,
+        resamp_with_conv=True,
+        in_channels,
+        resolution,
+        z_channels,
+        double_z=True,
+        use_linear_attn=False,
+        attn_type="vanilla",
+        **ignore_kwargs,
+    ):
+        super().__init__()
+        if use_linear_attn:
+            attn_type = "linear"
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        # downsampling
+        self.conv_in = torch.nn.Conv2d(
+            in_channels, self.ch, kernel_size=3, stride=1, padding=1
+        )
+        curr_res = resolution
+        in_ch_mult = (1,) + tuple(ch_mult)
+        self.in_ch_mult = in_ch_mult
+        self.down = nn.ModuleList()
+        for i_level in range(self.num_resolutions):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_in = ch * in_ch_mult[i_level]
+            block_out = ch * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks):
+                block.append(
+                    ResnetBlock(
+                        in_channels=block_in,
+                        out_channels=block_out,
+                        temb_channels=self.temb_ch,
+                        dropout=dropout,
+                    )
+                )
+                block_in = block_out
+                if curr_res in attn_resolutions:
+                    attn.append(make_attn(block_in, attn_type=attn_type))
+            down = nn.Module()
+            down.block = block
+            down.attn = attn
+            if i_level != self.num_resolutions - 1:
+                down.downsample = Downsample(block_in, resamp_with_conv)
+                curr_res = curr_res // 2
+            self.down.append(down)
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(
+            in_channels=block_in,
+            out_channels=block_in,
+            temb_channels=self.temb_ch,
+            dropout=dropout,
+        )
+        self.mid.attn_1 = make_attn(block_in, attn_type=attn_type)
+        self.mid.block_2 = ResnetBlock(
+            in_channels=block_in,
+            out_channels=block_in,
+            temb_channels=self.temb_ch,
+            dropout=dropout,
+        )
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv2d(
+            block_in,
+            2 * z_channels if double_z else z_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+        )
+    def forward(self, x):
+        # timestep embedding
+        temb = None
+        # downsampling
+        hs = [self.conv_in(x)]
+        for i_level in range(self.num_resolutions):
+            for i_block in range(self.num_res_blocks):
+                h = self.down[i_level].block[i_block](hs[-1], temb)
+                if len(self.down[i_level].attn) > 0:
+                    h = self.down[i_level].attn[i_block](h)
+                hs.append(h)
+            if i_level != self.num_resolutions - 1:
+                hs.append(self.down[i_level].downsample(hs[-1]))
+        # middle
+        h = hs[-1]
+        h = self.mid.block_1(h, temb)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h, temb)
+        # end
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
+        return h
+class Decoder(nn.Module):
+    def __init__(
+        self,
+        *,
+        ch,
+        out_ch,
+        ch_mult=(1, 2, 4, 8),
+        num_res_blocks,
+        attn_resolutions,
+        dropout=0.0,
+        resamp_with_conv=True,
+        in_channels,
+        resolution,
+        z_channels,
+        give_pre_end=False,
+        tanh_out=False,
+        use_linear_attn=False,
+        attn_type="vanilla",
+        **ignorekwargs,
+    ):
+        super().__init__()
+        if use_linear_attn:
+            attn_type = "linear"
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        self.give_pre_end = give_pre_end
+        self.tanh_out = tanh_out
+        # compute in_ch_mult, block_in and curr_res at lowest res
+        block_in = ch * ch_mult[self.num_resolutions - 1]
+        curr_res = resolution // 2 ** (self.num_resolutions - 1)
+        self.z_shape = (1, z_channels, curr_res, curr_res)
+        # z to block_in
+        self.conv_in = torch.nn.Conv2d(
+            z_channels, block_in, kernel_size=3, stride=1, padding=1
+        )
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(
+            in_channels=block_in,
+            out_channels=block_in,
+            temb_channels=self.temb_ch,
+            dropout=dropout,
+        )
+        self.mid.attn_1 = make_attn(block_in, attn_type=attn_type)
+        self.mid.block_2 = ResnetBlock(
+            in_channels=block_in,
+            out_channels=block_in,
+            temb_channels=self.temb_ch,
+            dropout=dropout,
+        )
+        # upsampling
+        self.up = nn.ModuleList()
+        for i_level in reversed(range(self.num_resolutions)):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_out = ch * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks + 1):
+                block.append(
+                    ResnetBlock(
+                        in_channels=block_in,
+                        out_channels=block_out,
+                        temb_channels=self.temb_ch,
+                        dropout=dropout,
+                    )
+                )
+                block_in = block_out
+                if curr_res in attn_resolutions:
+                    attn.append(make_attn(block_in, attn_type=attn_type))
+            up = nn.Module()
+            up.block = block
+            up.attn = attn
+            if i_level != 0:
+                up.upsample = Upsample(block_in, resamp_with_conv)
+                curr_res = curr_res * 2
+            self.up.insert(0, up)  # prepend to get consistent order
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv2d(
+            block_in, out_ch, kernel_size=3, stride=1, padding=1
+        )
+    def forward(self, z):
+        # assert z.shape[1:] == self.z_shape[1:]
+        self.last_z_shape = z.shape
+        # timestep embedding
+        temb = None
+        # z to block_in
+        h = self.conv_in(z)
+        # middle
+        h = self.mid.block_1(h, temb)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h, temb)
+        # upsampling
+        for i_level in reversed(range(self.num_resolutions)):
+            for i_block in range(self.num_res_blocks + 1):
+                h = self.up[i_level].block[i_block](h, temb)
+                if len(self.up[i_level].attn) > 0:
+                    h = self.up[i_level].attn[i_block](h)
+            if i_level != 0:
+                h = self.up[i_level].upsample(h)
+        # end
+        if self.give_pre_end:
+            return h
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
+        if self.tanh_out:
+            h = torch.tanh(h)
+        return h
+class VQModel(nn.Module):
+    def __init__(
+        self,
+        ddconfig,
+        n_embed,
+        embed_dim,
+        ckpt_path=None,
+        ignore_keys=[],
+        image_key="image",
+        colorize_nlabels=None,
+        monitor=None,
+        scheduler_config=None,
+        lr_g_factor=1.0,
+        remap=None,
+        sane_index_shape=False,  # tell vector quantizer to return indices as bhw
+    ):
+        super().__init__()
+        self.image_key = image_key
+        self.encoder = Encoder(**ddconfig)
+        self.decoder = Decoder(**ddconfig)
+        self.quantize = VectorQuantizer(
+            n_embed,
+            embed_dim,
+            beta=0.25,
+            remap=remap,
+            sane_index_shape=sane_index_shape,
+        )
+        self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1)
+        self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
+        if ckpt_path is not None:
+            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
+        self.image_key = image_key
+        if colorize_nlabels is not None:
+            assert isinstance(colorize_nlabels, int)
+            self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1))
+        if monitor is not None:
+            self.monitor = monitor
+        self.scheduler_config = scheduler_config
+        self.lr_g_factor = lr_g_factor
+    def init_from_ckpt(self, path, ignore_keys=list()):
+        sd = torch.load(path, map_location="cpu")["state_dict"]
+        keys = list(sd.keys())
+        for k in keys:
+            for ik in ignore_keys:
+                if k.startswith(ik):
+                    print("Deleting key {} from state_dict.".format(k))
+                    del sd[k]
+        self.load_state_dict(sd, strict=False)
+        print(f"VQModel loaded from {path}")
+    def encode(self, x):
+        h = self.encoder(x)
+        h = self.quant_conv(h)
+        quant, emb_loss, info = self.quantize(h)
+        return quant, emb_loss, info
+    def decode(self, quant):
+        quant = self.post_quant_conv(quant)
+        dec = self.decoder(quant)
+        return dec
+    def decode_code(self, code_b):
+        quant_b = self.quantize.embed_code(code_b)
+        dec = self.decode(quant_b)
+        return dec
+    def forward(self, input):
+        quant, diff, _ = self.encode(input)
+        dec = self.decode(quant)
+        return dec, diff
+    def get_input(self, batch, k):
+        x = batch[k]
+        if len(x.shape) == 3:
+            x = x[..., None]
+        x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format)
+        return x.float()
+    def get_last_layer(self):
+        return self.decoder.conv_out.weight
+    def log_images(self, batch, **kwargs):
+        log = dict()
+        x = self.get_input(batch, self.image_key)
+        x = x.to(self.device)
+        xrec, _ = self(x)
+        if x.shape[1] > 3:
+            # colorize with random projection
+            assert xrec.shape[1] > 3
+            x = self.to_rgb(x)
+            xrec = self.to_rgb(xrec)
+        log["inputs"] = x
+        log["reconstructions"] = xrec
+        return log
+    def to_rgb(self, x):
+        assert self.image_key == "segmentation"
+        if not hasattr(self, "colorize"):
+            self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x))
+        x = F.conv2d(x, weight=self.colorize)
+        x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0
+        return x

chameleon/miniviewer/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.

chameleon/miniviewer/__main__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+from chameleon.miniviewer.miniviewer import main
+if __name__ == "__main__":
+    main()

chameleon/miniviewer/miniviewer.html ADDED Viewed

	@@ -0,0 +1,409 @@

+<!-- Copyright (c) Meta Platforms, Inc. and affiliates. -->
+<!-- This source code is licensed under the Chameleon License found in the -->
+<!-- LICENSE file in the root directory of this source tree. -->
+<h1>
+  <div id="connection-status"></div>
+  MiniViewer:
+</h1>
+<div class="container">
+  <div class="sidebar with-padding">
+    <h4>Input Controls</h4>
+    <div class="input-controls-container">
+      <button class="button" onclick="addInput('text')">Add text input</button>
+      <button class="button" onclick="addInput('image')">
+        Add image input
+      </button>
+      <button class="button" onclick="addInput('<END-OF-TURN>')">
+        Add end-of-turn token
+      </button>
+    </div>
+    <hr />
+    <h4>General Options</h4>
+    <div class="option">
+      <label for="seed">seed</label>
+      <input type="number" id="seed" value="0" />
+    </div>
+    <div class="option">
+      <label for="max-seq-len">max sequence length</label>
+      <input type="number" id="max-seq-len" value="4096" />
+    </div>
+    <div class="option">
+      <label for="max-gen-len">max generation length</label>
+      <input type="number" id="max-gen-len" value="4096" />
+    </div>
+    <h4>
+      <input type="checkbox" id="enable-text" name="enable-text" checked />
+      <label for="enable-text">Text Decoder Options</label>
+    </h4>
+    <div class="option">
+      <label for="text-rep-penalty">repetition penalty</label>
+      <input type="number" id="text-rep-penalty" value="1.2" step="0.01" />
+    </div>
+    <div class="option">
+      <label for="text-temp">temperature</label>
+      <input type="number" id="text-temp" value="0.7" step="0.01" />
+    </div>
+    <div class="option">
+      <label for="text-top-p">top-p</label>
+      <input type="number" id="text-top-p" value="0.9" step="0.01" />
+    </div>
+    <h4>
+      <input type="checkbox" id="enable-image" name="enable-image" checked />
+      <label for="enable-image">Image Decoder Options</label>
+    </h4>
+    <div class="option">
+      <label for="img-cfg-gstext">cfg text</label>
+      <input type="number" id="img-cfg-gstext" value="3.0" step="0.01" />
+    </div>
+    <div class="option">
+      <label for="img-cfg-gsimage">cfg image</label>
+      <input type="number" id="img-cfg-gsimage" value="1.2" step="0.01" />
+    </div>
+    <div class="option">
+      <label for="img-temp">temperature</label>
+      <input type="number" id="img-temp" value="0.7" step="0.01" />
+    </div>
+    <div class="option">
+      <label for="img-top-p">top-p</label>
+      <input type="number" id="img-top-p" value="0.9" step="0.01" />
+    </div>
+  </div>
+  <div class="content with-padding">
+    <div class="input-wrapper">
+      Inputs:
+      <div id="inputs" class="with-padding"></div>
+    </div>
+    <h4>
+      <button id="generate" class="button" onclick="generate()">
+        Generate
+      </button>
+      <button
+        id="cancel"
+        class="button"
+        onclick="cancel()"
+        style="display: none"
+      >
+        Cancel
+      </button>
+    </h4>
+    Results:
+    <pre id="results" class="with-padding"></pre>
+    <div id="timing" class="with-padding"></div>
+    <div id="queue" class="with-padding"></div>
+  </div>
+</div>
+<style>
+  .container {
+    display: inline-flex;
+  }
+  .sidebar {
+    flex: 0 0 200px;
+    border-right: 2px solid #ddd;
+  }
+  #connection-status {
+    width: 20px;
+    height: 20px;
+    border-radius: 10px;
+    background-color: grey;
+    display: inline-block;
+  }
+  .input-controls-container {
+    display: inline-grid;
+  }
+  .option {
+    display: flex;
+    margin-bottom: 5px;
+  }
+  .option label {
+    white-space: nowrap;
+    margin-right: 10px;
+  }
+  .option input {
+    flex-grow: 1;
+    text-align: right;
+  }
+  .content {
+    width: 100%;
+  }
+  .with-padding {
+    padding: 10px;
+  }
+  .input-wrapper {
+    border: dotted;
+  }
+  .input-container {
+    display: flex;
+    align-items: center;
+  }
+  .input-controls {
+    display: inline-flex;
+    padding: 2px;
+  }
+  #results {
+    background: lightgray;
+  }
+  button {
+    text-align: left;
+  }
+  img {
+    width: 200px;
+    height: 200px;
+  }
+</style>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.6.0/socket.io.min.js"></script>
+<script>
+  var active_key;
+  var socket;
+  function createButton(text, onClick) {
+    var button = document.createElement("button");
+    button.textContent = text;
+    button.onclick = onClick;
+    return button;
+  }
+  function removeInput(evt) {
+    var inputWrapper = evt.target.parentNode.parentNode;
+    inputWrapper.parentNode.removeChild(inputWrapper);
+  }
+  function moveInputUp(evt) {
+    var inputWrapper = evt.target.parentNode.parentNode;
+    var prev = inputWrapper.previousElementSibling;
+    if (prev) {
+      inputWrapper.parentNode.insertBefore(inputWrapper, prev);
+    }
+  }
+  function moveInputDown(evt) {
+    var inputWrapper = evt.target.parentNode.parentNode;
+    var next = inputWrapper.nextElementSibling;
+    if (next) {
+      inputWrapper.parentNode.insertBefore(next, inputWrapper);
+    }
+  }
+  function readFileAsync(file) {
+    return new Promise((resolve, reject) => {
+      let reader = new FileReader();
+      reader.onload = () => resolve(reader.result);
+      reader.onerror = reject;
+      reader.readAsDataURL(file);
+    });
+  }
+  async function loadImageSource(dataTransfer) {
+    if (dataTransfer.files.length > 0) {
+      return await readFileAsync(dataTransfer.files[0]);
+    }
+    let htmlContent = dataTransfer.getData("text/html");
+    if (htmlContent) {
+      const div = document.createElement("div");
+      div.innerHTML = htmlContent;
+      return div.querySelector("img").src;
+    }
+    return (
+      dataTransfer.getData("text/uri-list") ||
+      dataTransfer.getData("text/plain")
+    );
+  }
+  async function showPreview(evt) {
+    var wrapper = evt.target.parentElement;
+    wrapper.querySelector("img").src = await loadImageSource(evt.target);
+    wrapper.querySelector("img").style.display = "block";
+    wrapper.querySelector("p").style.display = "none";
+  }
+  async function handleDrop(evt) {
+    evt.preventDefault();
+    var wrapper = evt.target.parentElement;
+    var file = evt.dataTransfer.files[0];
+    var fileInput = wrapper.querySelector('input[type="file"]');
+    fileInput.files = evt.dataTransfer.files;
+    wrapper.querySelector("img").src = await loadImageSource(evt.dataTransfer);
+    wrapper.querySelector("img").style.display = "block";
+    wrapper.querySelector("p").style.display = "none";
+  }
+  function addInput(input_kind) {
+    var inputs_div = document.getElementById("inputs");
+    var wrapper = document.createElement("div");
+    wrapper.kind = input_kind;
+    wrapper.className = "input-container";
+    var new_inputs = [];
+    if (input_kind === "text") {
+      new_inputs.push(document.createElement("textarea"));
+    } else if (input_kind === "image") {
+      wrapper.setAttribute("draggable", true);
+      wrapper.ondragover = (evt) => evt.preventDefault();
+      wrapper.ondrop = handleDrop;
+      var hiddenImageFromFile = document.createElement("input");
+      hiddenImageFromFile.type = "file";
+      hiddenImageFromFile.accept = "image/*";
+      hiddenImageFromFile.addEventListener("change", showPreview);
+      hiddenImageFromFile.style.display = "none";
+      wrapper.onclick = function () {
+        hiddenImageFromFile.click();
+      };
+      new_inputs.push(hiddenImageFromFile);
+      var description = document.createElement("p");
+      description.textContent =
+        "Drag and drop your image here, or click to select.";
+      new_inputs.push(description);
+      var preview = document.createElement("img");
+      preview.style.display = "none";
+      new_inputs.push(preview);
+    } else {
+      var span = document.createElement("span");
+      span.textContent = input_kind;
+      new_inputs.push(span);
+    }
+    const input_controls = document.createElement("div");
+    input_controls.className = "input-controls";
+    input_controls.appendChild(createButton("-", removeInput));
+    input_controls.appendChild(createButton("↓", moveInputDown));
+    input_controls.appendChild(createButton("↑", moveInputUp));
+    wrapper.appendChild(input_controls);
+    for (var new_input of new_inputs) {
+      wrapper.appendChild(new_input);
+    }
+    wrapper.appendChild(document.createElement("br"));
+    inputs_div.appendChild(wrapper);
+  }
+  async function generate() {
+    document.getElementById("generate").style.display = "none";
+    document.getElementById("cancel").style.display = "block";
+    document.getElementById("results").innerHTML = "";
+    document.getElementById("timing").innerHTML = "";
+    document.getElementById("queue").innerHTML = "";
+    active_key = `key_${Math.random()
+      .toString(36)
+      .substring(2, 11)}_${Date.now()}`;
+    const user_options = {};
+    for (const option of document.getElementsByClassName("option")) {
+      const input = option.querySelector("input");
+      user_options[input.id] = Number(input.value);
+    }
+    user_options["enable-text"] =
+      document.getElementById("enable-text").checked;
+    user_options["enable-image"] =
+      document.getElementById("enable-image").checked;
+    const user_inputs = [];
+    const inputs_div = document.getElementById("inputs");
+    const input_elems = Array.from(inputs_div.children).map((wrapper) =>
+      wrapper.querySelector("textarea, input, span")
+    );
+    const image_promises = Array.from(inputs_div.children)
+      .filter((wrapper) => wrapper.kind === "image")
+      .map((wrapper) => {
+        const file_input = wrapper.querySelector('input[type="file"]');
+        return file_input.files[0]
+          ? readFileAsync(file_input.files[0])
+          : Promise.resolve(null);
+      });
+    const images = await Promise.all(image_promises);
+    for (const wrapper of inputs_div.children) {
+      if (wrapper.kind === "text") {
+        user_inputs.push({
+          type: "text",
+          value: wrapper.querySelector("textarea").value,
+        });
+      } else if (wrapper.kind === "image") {
+        user_inputs.push({ type: "image", value: images.shift() });
+      } else {
+        user_inputs.push({ type: "sentinel", value: wrapper.kind });
+      }
+    }
+    socket.emit("generate", active_key, user_options, user_inputs);
+  }
+  function cancel() {
+    document.getElementById("generate").style.display = "block";
+    document.getElementById("cancel").style.display = "none";
+    document.getElementById("queue").innerHTML = "";
+    socket.emit("cancel", active_key);
+    active_key = null;
+  }
+  function connectSocket() {
+    socket = io();
+    socket.on("connect", function() {
+      document.getElementById("connection-status").style.backgroundColor = 'green';
+    });
+    socket.on("disconnect", function(reason) {
+      cancel();
+      document.getElementById("connection-status").style.backgroundColor = 'red';
+    });
+    socket.on("progress", function (data) {
+      if (data.key != active_key) {
+        return;
+      }
+      document.getElementById("queue").innerHTML = "";
+      if (data.type == "queue") {
+        document.getElementById(
+          "queue"
+        ).innerHTML = `queue position ${data.value}`;
+      }
+      if (data.type == "text") {
+        document.getElementById("results").innerHTML += data.value;
+      } else if (data.type == "image_start") {
+        document.getElementById("results").appendChild(new Image());
+      } else if (data.type == "image") {
+        document.getElementById("results").lastElementChild.src = data.value;
+      } else if (data.type == "image_end") {
+      } else if (data.type == "done") {
+        document.getElementById(
+          "timing"
+        ).innerHTML = `Generation time: ${data.value.toFixed(2)} sec`;
+        document.getElementById("generate").style.display = "block";
+        document.getElementById("cancel").style.display = "none";
+        active_key = null;
+      }
+    });
+  }
+  window.onload = (evt) => {
+    connectSocket();
+  };
+</script>

chameleon/miniviewer/miniviewer.py ADDED Viewed

	@@ -0,0 +1,254 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import base64
+import os
+import threading
+import time
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+import click
+import torch
+from flask import Flask, request
+from flask_socketio import SocketIO
+from chameleon.inference.chameleon import ChameleonInferenceModel, Options, TokenManager
+@dataclass
+class Request:
+    room: str
+    key: str
+    options: dict[str, int | float | bool]
+    prompt_ui: list[dict]
+def convert_options(ui_options: dict) -> Options:
+    txt = None
+    if ui_options["enable-text"]:
+        txt = Options.Text(
+            repetition_penalty=ui_options["text-rep-penalty"],
+            temp=ui_options["text-temp"],
+            top_p=ui_options["text-top-p"],
+        )
+    img = None
+    if ui_options["enable-image"]:
+        img = Options.Image(
+            cfg=Options.Image.CFG(
+                guidance_scale_image=ui_options["img-cfg-gsimage"],
+                guidance_scale_text=ui_options["img-cfg-gstext"],
+            ),
+            temp=ui_options["img-temp"],
+            top_p=ui_options["img-top-p"],
+        )
+    return Options(
+        max_seq_len=ui_options["max-seq-len"],
+        max_gen_len=ui_options["max-gen-len"],
+        seed=ui_options["seed"],
+        txt=txt,
+        img=img,
+    )
+class UIDecoder:
+    class State(Enum):
+        TXT = 1
+        IMG = 2
+        IMG_END = 3
+    def __init__(self, token_manager: TokenManager):
+        self.token_manager = token_manager
+        self.state = UIDecoder.State.TXT
+        self.image_builder = []
+        self.image_yield_every_n = 32
+        self.image_has_updated = False
+    def _image_progress(self) -> dict:
+        self.image_has_updated = False
+        png = self.token_manager.png_from_bpe_tokens(torch.cat(self.image_builder))
+        return {
+            "type": "image",
+            "value": "data:image/png;base64," + base64.b64encode(png).decode(),
+        }
+    def next(self, gpu_token: torch.LongTensor) -> dict | None:
+        if self.state == UIDecoder.State.TXT:
+            cpu_tok = gpu_token.item()
+            if cpu_tok == self.token_manager.vocab.begin_image:
+                self.state = UIDecoder.State.IMG
+                return {"type": "image_start"}
+            return {
+                "type": "text",
+                "value": self.token_manager.tokenizer.decode([cpu_tok]),
+            }
+        elif self.state == UIDecoder.State.IMG:
+            self.image_builder.append(gpu_token)
+            self.image_has_updated = True
+            if len(self.image_builder) == 1024:
+                self.state = UIDecoder.State.IMG_END
+            if len(self.image_builder) % self.image_yield_every_n == 0:
+                return self._image_progress()
+        elif self.state == UIDecoder.State.IMG_END:
+            # assert gpu_token == end_image
+            self.state = UIDecoder.State.TXT
+            progress = self._image_progress() if self.image_has_updated else None
+            self.image_builder = []
+            return progress
+@dataclass
+class State:
+    room_keys: dict[str, set[str]]
+    pending_requests: list[Request]
+    cond: threading.Condition
+    def __enter__(self, *args, **kwargs):
+        self.cond.__enter__(*args, **kwargs)
+        return self
+    def __exit__(self, *args, **kwargs):
+        self.cond.__exit__(*args, **kwargs)
+        return self
+GlobalState = State(room_keys={}, pending_requests=[], cond=threading.Condition())
+app = Flask(__name__)
+socketio = SocketIO(app, max_http_buffer_size=16 * 1024 * 1024)
+@app.route("/")
+def index():
+    with open(Path(__file__).parent / "miniviewer.html") as f:
+        return f.read()
+@socketio.on("disconnect")
+def handle_disconnect():
+    with GlobalState as state:
+        try:
+            del state.room_keys[request.sid]
+        except KeyError:
+            pass
+@socketio.on("cancel")
+def handle_cancel(key):
+    with GlobalState as state:
+        try:
+            state.room_keys[request.sid].remove(key)
+        except KeyError:
+            pass
+@socketio.on("generate")
+def handle_generate(key, options, prompt_ui):
+    with GlobalState as state:
+        if request.sid not in state.room_keys:
+            state.room_keys[request.sid] = set()
+        state.room_keys[request.sid].add(key)
+        state.pending_requests.append(Request(request.sid, key, options, prompt_ui))
+        state.cond.notify_all()
+def generation_thread(model: ChameleonInferenceModel):
+    while True:
+        with GlobalState as state:
+            state.cond.wait_for(lambda: state.pending_requests)
+            req = state.pending_requests.pop(0)
+        start = time.time()
+        ui_decoder = UIDecoder(model.token_manager)
+        options = convert_options(req.options)
+        if not options.txt:
+            progress = ui_decoder.next(
+                torch.tensor([model.token_manager.vocab.begin_image])
+            )
+            socketio.emit(
+                "progress",
+                {"key": req.key, **progress},
+                room=req.room,
+            )
+        for token in model.stream(
+            prompt_ui=req.prompt_ui,
+            options=options,
+        ):
+            with GlobalState as state:
+                if req.key not in state.room_keys.get(req.room, {}):
+                    break
+            if progress := ui_decoder.next(token.id):
+                socketio.emit(
+                    "progress",
+                    {"key": req.key, **progress},
+                    room=req.room,
+                )
+        timing = time.time() - start
+        socketio.emit(
+            "progress",
+            {"key": req.key, "type": "done", "value": timing},
+            room=req.room,
+        )
+def queue_position_thread():
+    local_pending_requests = []
+    while True:
+        with GlobalState as state:
+            state.cond.wait_for(
+                lambda: local_pending_requests != state.pending_requests
+            )
+            local_pending_requests = state.pending_requests[:]
+        for i, req in enumerate(local_pending_requests):
+            progress = {
+                "type": "queue",
+                "key": req.key,
+                "value": i + 1,
+            }
+            socketio.emit("progress", progress, room=req.room)
+@click.command()
+@click.option("--data-path", type=click.Path(), default="./data")
+@click.option(
+    "--model-size", type=click.Choice(["7b", "30b"], case_sensitive=False), default="7b"
+)
+def main(data_path, model_size):
+    data_path = Path(data_path)
+    model_path = str(data_path / "models" / model_size)
+    tokenizer_path = str(data_path / "tokenizer/text_tokenizer.json")
+    vqgan_cfg_path = str(data_path / "tokenizer/vqgan.yaml")
+    vqgan_ckpt_path = str(data_path / "tokenizer/vqgan.ckpt")
+    if not os.path.exists(model_path):
+        raise ValueError(
+            "Model not found. Did you run python -m chameleon.download_data {PRESIGNED_URL}"
+        )
+    cm3v2_inference_model = ChameleonInferenceModel(
+        model_path, tokenizer_path, vqgan_cfg_path, vqgan_ckpt_path
+    )
+    threading.Thread(
+        target=generation_thread,
+        args=(cm3v2_inference_model,),
+        daemon=True,
+    ).start()
+    threading.Thread(target=queue_position_thread, daemon=True).start()
+    socketio.run(app, debug=False)
+if __name__ == "__main__":
+    main()

chameleon/viewer/backend/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.

chameleon/viewer/backend/data_types.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+from enum import Enum
+from typing import Literal
+from pydantic import BaseModel, Extra, Field
+from chameleon.viewer.backend.models.abstract_model import (
+    DEFAULT_MULTIMODAL_CFG_IMAGE,
+    DEFAULT_MULTIMODAL_CFG_TEXT,
+)
+class WSMessageType(str, Enum):
+    GENERATE_IMAGE = "GENERATE_IMAGE"
+    GENERATE_TEXT = "GENERATE_TEXT"
+    GENERATE_MULTIMODAL = "GENERATE_MULTIMODAL"
+    PARTIAL_OUTPUT = "PARTIAL_OUTPUT"
+    FULL_OUTPUT = "FULL_OUTPUT"
+    COMPLETE = "COMPLETE"
+    ERROR = "ERROR"
+    QUEUE_STATUS = "QUEUE_STATUS"
+class ContentType(str, Enum):
+    TEXT = "TEXT"
+    IMAGE = "IMAGE"
+class Content(BaseModel):
+    content_type: ContentType
+    content: str
+    class Config:
+        extra = Extra.forbid
+class NoOptionsForPartial(BaseModel):
+    message_type: Literal[WSMessageType.PARTIAL_OUTPUT] = WSMessageType.PARTIAL_OUTPUT
+class NoOptionsForFull(BaseModel):
+    message_type: Literal[WSMessageType.FULL_OUTPUT] = WSMessageType.FULL_OUTPUT
+class NoOptionsForComplete(BaseModel):
+    message_type: Literal[WSMessageType.COMPLETE] = WSMessageType.COMPLETE
+class NoOptionsForError(BaseModel):
+    message_type: Literal[WSMessageType.ERROR] = WSMessageType.ERROR
+class NoOptionsForQueueStatus(BaseModel):
+    message_type: Literal[WSMessageType.QUEUE_STATUS] = WSMessageType.QUEUE_STATUS
+class MultimodalGeneratorOptions(BaseModel):
+    message_type: Literal[
+        WSMessageType.GENERATE_MULTIMODAL
+    ] = WSMessageType.GENERATE_MULTIMODAL
+    temp: float = 0.7
+    top_p: float = 0.9
+    cfg_image_weight: float = DEFAULT_MULTIMODAL_CFG_IMAGE
+    cfg_text_weight: float = DEFAULT_MULTIMODAL_CFG_TEXT
+    yield_every_n: int = 32
+    max_gen_tokens: int = 4096
+    repetition_penalty: float = 1.2
+    suffix_tokens: list[str] | None = None
+    seed: int | None = None
+    class Config:
+        extra = Extra.forbid
+class WSMultimodalMessage(BaseModel):
+    message_type: WSMessageType
+    content: list[Content]
+    options: (
+        MultimodalGeneratorOptions
+        | NoOptionsForPartial
+        | NoOptionsForFull
+        | NoOptionsForError
+        | NoOptionsForComplete
+        | NoOptionsForQueueStatus
+    ) = Field(..., discriminator="message_type")
+    debug_info: dict[str, str] = {}

chameleon/viewer/backend/model_viewer.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import hydra
+import torch
+from omegaconf import DictConfig
+from chameleon.inference import loader
+from chameleon.viewer.backend.models.chameleon_distributed import (
+    ChameleonDistributedGenerator,
+)
+from chameleon.viewer.backend.models.chameleon_local import ChameleonLocalGenerator
+from chameleon.viewer.backend.models.service import serve
+from chameleon.viewer.backend.utils import configure_rich_logging, get_logger
+logger = get_logger(__name__)
+VERSION = "2.0"
+SEED = 42
+def create_chameleon_generator(cfg: DictConfig):
+    world_size = loader.detect_shard_count(cfg.model_path)
+    if world_size > 1:
+        torch.multiprocessing.set_start_method("spawn")
+        generator = ChameleonDistributedGenerator(
+            model_path=cfg.model_path,
+            tokenizer_path=cfg.tokenizer_path,
+            vqgan_config_path=cfg.vqgan_config_path,
+            vqgan_ckpt_path=cfg.vqgan_ckpt_path,
+            additional_eos_tokens=cfg.additional_eos_tokens,
+            world_size=world_size,
+            master_address=cfg.distributed.master_address,
+            master_port=cfg.distributed.master_port,
+            redis_port=cfg.redis_port,
+        )
+    else:
+        generator = ChameleonLocalGenerator(
+            model_path=cfg.model_path,
+            tokenizer_path=cfg.tokenizer_path,
+            vqgan_config_path=cfg.vqgan_config_path,
+            vqgan_ckpt_path=cfg.vqgan_ckpt_path,
+            additional_eos_tokens=cfg.additional_eos_tokens,
+        )
+    return generator
+@hydra.main("../../../config", config_name="model_viewer", version_base="1.3.2")
+def main(cfg: DictConfig) -> None:
+    configure_rich_logging()
+    torch.set_default_tensor_type("torch.cuda.FloatTensor")
+    logger.info("Starting viewer server with hydra cfg: %s", cfg)
+    serve(
+        create_chameleon_generator(cfg),
+        cfg.host,
+        cfg.port,
+        debug=cfg.debug,
+        redis_port=cfg.redis_port,
+    )
+if __name__ == "__main__":
+    main()

chameleon/viewer/backend/models/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.

chameleon/viewer/backend/models/abstract_model.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import abc
+from dataclasses import dataclass
+from typing import Generator
+import PIL.Image
+# images, joined retrieval queries, retrieval images
+MixedTokenType = str | PIL.Image.Image
+MixedSequenceType = list[MixedTokenType]
+@dataclass
+class StreamingImage:
+    image: PIL.Image.Image
+    final: bool
+DEFAULT_MULTIMODAL_CFG_IMAGE = 1.2
+DEFAULT_MULTIMODAL_CFG_TEXT = 3.0
+DEFAULT_IMAGE_CFG_IMAGE = 3.0
+DEFAULT_IMAGE_CFG_TEXT = 3.0
+class AbstractMultimodalGenerator(abc.ABC):
+    @abc.abstractmethod
+    def generate_text_streaming(
+        self,
+        prompts: list[MixedSequenceType],
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        seed: int | None = None,
+    ) -> Generator[list[str], None, None]:
+        pass
+    @abc.abstractmethod
+    def generate_image_streaming(
+        self,
+        prompt: MixedSequenceType,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        cfg_image_weight: float = DEFAULT_IMAGE_CFG_IMAGE,
+        cfg_text_weight: float = DEFAULT_IMAGE_CFG_TEXT,
+        yield_every_n: int = 32,
+        seed: int | None = None,
+    ) -> Generator[PIL.Image.Image, None, None]:
+        pass
+    @abc.abstractmethod
+    def generate_multimodal_streaming(
+        self,
+        prompt: MixedSequenceType,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        cfg_image_weight: float = DEFAULT_MULTIMODAL_CFG_IMAGE,
+        cfg_text_weight: float = DEFAULT_MULTIMODAL_CFG_TEXT,
+        yield_every_n: int = 32,
+        max_gen_tokens: int = 4096,
+        repetition_penalty: float = 1.2,
+        suffix_tokens: list[str] | None = None,
+        seed: int | None = None,
+    ) -> Generator[MixedSequenceType, None, None]:
+        pass

chameleon/viewer/backend/models/chameleon_distributed.py ADDED Viewed

	@@ -0,0 +1,827 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import asyncio
+import json
+import multiprocessing
+import os
+import random
+import sys
+import threading
+import time
+import traceback
+from functools import partial
+from typing import Any, Generator, TypeVar
+import redis
+import redis.asyncio as async_redis
+import torch
+from tokenizers import Tokenizer
+from chameleon.inference.image_tokenizer import ImageTokenizer
+from chameleon.inference.loader import load_model
+from chameleon.inference.vocab import VocabInfo
+from chameleon.viewer.backend.data_types import WSMessageType
+from chameleon.viewer.backend.models.abstract_model import (
+    DEFAULT_IMAGE_CFG_IMAGE,
+    DEFAULT_IMAGE_CFG_TEXT,
+    DEFAULT_MULTIMODAL_CFG_IMAGE,
+    DEFAULT_MULTIMODAL_CFG_TEXT,
+    AbstractMultimodalGenerator,
+    MixedSequenceType,
+    StreamingImage,
+)
+from chameleon.viewer.backend.models.chameleon_local import (
+    ChameleonForwardMixin,
+    ChameleonTokenizationMixin,
+)
+from chameleon.viewer.backend.utils import get_logger
+logger = get_logger(__name__)
+START = "START"
+T = TypeVar("T")
+def find_any(queue_by_id: dict[str, list]) -> str | None:
+    for candidate_queue_id, candidate_queue in queue_by_id.items():
+        if len(candidate_queue) > 0:
+            return candidate_queue_id
+    return None
+class RedisQueue:
+    def __init__(self, redis_client: redis.Redis, name: str, interval: float = 0.1):
+        self.redis_client = redis_client
+        self.name = name
+        self.interval = interval
+        self.lock = redis.lock.Lock(redis_client, f"lock_for_{name}")
+    def reset(self):
+        self.redis_client.set(self.name, json.dumps({}))
+        try:
+            self.lock.release()
+        except redis.lock.LockError:
+            pass
+    def size(self) -> int:
+        maybe_queue_by_id = self.redis_client.get(self.name)
+        if maybe_queue_by_id is None:
+            return 0
+        else:
+            return len(json.loads(maybe_queue_by_id))
+    def clear(self, queue_id: str):
+        with self.lock:
+            maybe_queue_by_id = self.redis_client.get(self.name)
+            if maybe_queue_by_id is None:
+                queue_by_id: dict[str, list] = {}
+            else:
+                queue_by_id: dict[str, list] = json.loads(maybe_queue_by_id)
+            queue_by_id[queue_id] = []
+            self.redis_client.set(self.name, json.dumps(queue_by_id))
+    def put(self, queue_id: str, value: T):
+        logger.debug(
+            "Thread %s: Starting PUT(%s) for %s",
+            threading.get_ident(),
+            self.name,
+            queue_id,
+        )
+        with self.lock:
+            maybe_queue_by_id = self.redis_client.get(self.name)
+            if maybe_queue_by_id is None:
+                queue_by_id: dict[str, list[T]] = {}
+            else:
+                queue_by_id: dict[str, list[T]] = json.loads(maybe_queue_by_id)
+            if queue_id not in queue_by_id:
+                queue_by_id[queue_id] = []
+            queue_by_id[queue_id] = [value] + queue_by_id[queue_id]
+            self.redis_client.set(self.name, json.dumps(queue_by_id))
+        logger.debug(
+            "Thread %s: Finished PUT(%s) for %s",
+            threading.get_ident(),
+            self.name,
+            queue_id,
+        )
+    def get(self, queue_id: str | None) -> tuple[str, T]:
+        """
+        Get the next value in the queue.
+        if queue_id is None, will get a value from any queue
+        if queue_id is not none, will wait to get a value from a specific queue
+        """
+        logger.debug(
+            "Thread %s: Starting GET(%s) for %s",
+            threading.get_ident(),
+            self.name,
+            queue_id,
+        )
+        while True:
+            with self.lock:
+                # Initialization hasn't happened, so wait for it to happen
+                maybe_queue_by_id = self.redis_client.get(self.name)
+                if maybe_queue_by_id is None:
+                    continue
+                queue_by_id: dict[str, list[T]] = json.loads(maybe_queue_by_id)
+                if queue_id is None:
+                    queue_id = find_any(queue_by_id)
+                # Ensure a queue_id was found or that it already existed
+                if queue_id is not None and queue_id in queue_by_id:
+                    queue = queue_by_id[queue_id]
+                    if len(queue) == 0:
+                        continue
+                    value = queue.pop(-1)
+                    # queue is mutated and queue_by_id references it, so this works
+                    self.redis_client.set(self.name, json.dumps(queue_by_id))
+                    logger.debug(
+                        "Thread %s: Finished GET(%s) for %s",
+                        threading.get_ident(),
+                        self.name,
+                        queue_id,
+                    )
+                    return queue_id, value
+            time.sleep(self.interval)
+class AsyncRedisQueue:
+    def __init__(
+        self, redis_client: async_redis.Redis, name: str, interval: float = 0.1
+    ) -> None:
+        self.redis_client = redis_client
+        self.name = name
+        self.interval = interval
+        self.lock = async_redis.lock.Lock(redis_client, f"lock_for_{name}")
+    async def reset(self):
+        await self.redis_client.set(self.name, json.dumps({}))
+        try:
+            await self.lock.release()
+        except async_redis.lock.LockError:
+            pass
+    async def size(self) -> int:
+        maybe_queue_by_id = await self.redis_client.get(self.name)
+        if maybe_queue_by_id is None:
+            return 0
+        else:
+            return len(json.loads(maybe_queue_by_id))
+    async def clear(self, queue_id: str):
+        logger.debug(
+            "ASYNC Thread %s: Starting CLEAR(%s) for %s",
+            threading.get_ident(),
+            self.name,
+            queue_id,
+        )
+        async with self.lock:
+            maybe_queue_by_id = await self.redis_client.get(self.name)
+            if maybe_queue_by_id is None:
+                queue_by_id: dict[str, list] = {}
+            else:
+                queue_by_id: dict[str, list] = json.loads(maybe_queue_by_id)
+            queue_by_id[queue_id] = []
+            await self.redis_client.set(self.name, json.dumps(queue_by_id))
+        logger.debug(
+            "ASYNC Thread %s: Finished CLEAR(%s) for %s",
+            threading.get_ident(),
+            self.name,
+            queue_id,
+        )
+    async def put(self, queue_id: str, value: T):
+        logger.debug(
+            "ASYNC Thread %s: Starting PUT(%s) for %s",
+            threading.get_ident(),
+            self.name,
+            queue_id,
+        )
+        async with self.lock:
+            maybe_queue_by_id = await self.redis_client.get(self.name)
+            if maybe_queue_by_id is None:
+                queue_by_id: dict[str, list[T]] = {}
+            else:
+                queue_by_id: dict[str, list[T]] = json.loads(maybe_queue_by_id)
+            if queue_id not in queue_by_id:
+                queue_by_id[queue_id] = []
+            queue_by_id[queue_id] = [value] + queue_by_id[queue_id]
+            await self.redis_client.set(self.name, json.dumps(queue_by_id))
+        logger.debug(
+            "ASYNC Thread %s: Finished PUT(%s) for %s",
+            threading.get_ident(),
+            self.name,
+            queue_id,
+        )
+    async def get(self, queue_id: str | None):
+        """
+        Get the next value in the queue.
+        if queue_id is None, will get a value from any queue
+        if queue_id is not none, will wait to get a value from a specific queue
+        """
+        logger.debug(
+            "ASYNC Thread %s: Starting GET(%s) for %s",
+            threading.get_ident(),
+            self.name,
+            queue_id,
+        )
+        while True:
+            async with self.lock:
+                maybe_queue_by_id = await self.redis_client.get(self.name)
+                if maybe_queue_by_id is None:
+                    continue
+                queue_by_id: dict[str, list[T]] = json.loads(maybe_queue_by_id)
+                if queue_id is None:
+                    queue_id = find_any(queue_by_id)
+                # Ensure a queue_id was found or that it already existed
+                if queue_id is not None and queue_id in queue_by_id:
+                    queue: list = queue_by_id[queue_id]
+                    if len(queue) == 0:
+                        continue
+                    value = queue.pop(-1)
+                    # queue is mutated and queue_by_id references it, so this works
+                    await self.redis_client.set(self.name, json.dumps(queue_by_id))
+                    logger.debug(
+                        "ASYNC Thread %s: Finished GET(%s) for %s",
+                        threading.get_ident(),
+                        self.name,
+                        queue_id,
+                    )
+                    return queue_id, value
+            await asyncio.sleep(self.interval)
+class AsyncRedisCounter:
+    def __init__(self, redis_client: async_redis.Redis, name: str) -> None:
+        self.redis_client = redis_client
+        self.name = name
+        self.lock = async_redis.lock.Lock(redis_client, f"lock_for_{name}")
+    async def reset(self) -> int:
+        try:
+            await self.lock.release()
+        except async_redis.lock.LockError:
+            pass
+        await self.redis_client.set(self.name, 0)
+    async def add(self, n: int) -> int:
+        async with self.lock:
+            current_val = await self.redis_client.get(self.name)
+            if current_val is None:
+                current_val = 0
+            else:
+                current_val = int(current_val)
+            new_val = current_val + n
+            await self.redis_client.set(self.name, new_val)
+            return new_val
+    async def sub(self, n: int) -> int:
+        async with self.lock:
+            current_val = await self.redis_client.get(self.name)
+            if current_val is None:
+                raise ValueError("Invalid sub counter when counter does not exist")
+            current_val = int(current_val)
+            if current_val <= 0:
+                raise ValueError("Invalid sub counter to counter that is already zero")
+            new_val = current_val - n
+            await self.redis_client.set(self.name, new_val)
+            return new_val
+    async def count(self) -> int:
+        value = await self.redis_client.get(self.name)
+        if value is None:
+            return 0
+        else:
+            return int(value)
+def distributed_workers(
+    model_args: dict,
+    master_address: str,
+    master_port: str,
+    world_size: int,
+    rank: int,
+    redis_port: int,
+    worker_queues: dict[int, multiprocessing.Queue],
+) -> None:
+    redis_client = redis.Redis("redis", redis_port)
+    request_queue = RedisQueue(redis_client, "request")
+    response_queue = RedisQueue(redis_client, "response")
+    os.environ["MASTER_ADDR"] = master_address
+    os.environ["MASTER_PORT"] = str(master_port)
+    torch.set_default_tensor_type("torch.cuda.FloatTensor")
+    torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size)
+    assert rank == torch.distributed.get_rank()
+    torch.cuda.set_device(rank)
+    is_coord = rank == 0
+    worker = ChameleonWorker(
+        rank=rank,
+        model_path=model_args["model_path"],
+        tokenizer_path=model_args["tokenizer_path"],
+        additional_eos_tokens=model_args["additional_eos_tokens"],
+    )
+    worker_id = id(worker)
+    logger.info("Rank %s, master_port=%s worker=%s", rank, master_port, worker_id)
+    step = 0
+    while True:
+        step += 1
+        redis_client.set(f"status_rank_{rank}", "Pre-coordinator sync")
+        if is_coord:
+            distributed_objs = [request_queue.get(None)]
+            logger.info("Objects from queue: %s", distributed_objs)
+            for worker_rank in range(1, world_size):
+                worker_message = {"message": START, "src": rank, "dst": worker_rank}
+                logger.info("Rank %s Sending: %s", rank, worker_message)
+                worker_queues[worker_rank].put(worker_message)
+        else:
+            distributed_objs = [None]
+            logger.info("Rank %s worker %s waiting for rank 0", rank, worker_id)
+            message_from_rank_0 = worker_queues[rank].get()
+            logger.info(
+                "Received message from rank 0 in rank %s: %s", rank, message_from_rank_0
+            )
+            if message_from_rank_0["message"] != START:
+                raise ValueError(
+                    f"Unexpected message from rank 0: {message_from_rank_0['message']}"
+                )
+        redis_client.set(f"status_rank_{rank}", "Post-coordinator sync")
+        try:
+            logger.info(
+                "Broadcast Starting: Rank %s, worker %s, step %s",
+                rank,
+                worker_id,
+                step,
+            )
+            redis_client.set(f"status_rank_{rank}", "Pre-torch sync")
+            torch.distributed.broadcast_object_list(distributed_objs, src=0)
+            redis_client.set(f"status_rank_{rank}", "Post-torch sync")
+            logger.info(
+                "Broadcast Complete: Rank %s, worker %s, step %s",
+                rank,
+                worker_id,
+                step,
+            )
+        except RuntimeError as e:
+            logger.error(
+                "Rank %s, worker %s, step %s, Error detected in torch broadcast: %s",
+                rank,
+                worker_id,
+                step,
+                str(e),
+            )
+            raise
+        logger.info("rank %s, objs %s", rank, distributed_objs)
+        queue_id, data = distributed_objs[0]
+        mode = data.pop("mode")
+        request_id = data.pop("request_id")
+        assert queue_id == request_id
+        tokenized_prompt = data.pop("tokenized_prompt")
+        try:
+            match mode:
+                case WSMessageType.GENERATE_TEXT:
+                    generator_fn = partial(
+                        worker._generate_text_streaming, tokenized_prompt, **data
+                    )
+                case WSMessageType.GENERATE_IMAGE:
+                    generator_fn = partial(
+                        worker._generate_image_streaming, tokenized_prompt, **data
+                    )
+                case WSMessageType.GENERATE_MULTIMODAL:
+                    generator_fn = partial(
+                        worker._generate_multimodal_streaming, tokenized_prompt, **data
+                    )
+                case _:
+                    logger.error(
+                        "Encountered unknown mode, crashing the program: %s", mode
+                    )
+                    response_queue.put(
+                        queue_id, {"error": True, "final": True, "message": mode}
+                    )
+                    raise ValueError("Unknown mode")
+            logger.info("Rank: %s, Processing request: %s", rank, request_id)
+            i = 0
+            redis_client.set(f"status_rank_{rank}", "Pre-generate")
+            for output in generator_fn():
+                i += 1
+                if is_coord:
+                    response = {"final": False, "output": output, "error": False}
+                    logger.info(
+                        "Rank: %s, Adding to response queue: %.100s",
+                        rank,
+                        response,
+                    )
+                    redis_client.set(f"status_rank_{rank}", f"Generate Pre Put {i}")
+                    response_queue.put(queue_id, response)
+                    redis_client.set(f"status_rank_{rank}", f"Generate Post Put {i}")
+                else:
+                    redis_client.set(f"status_rank_{rank}", f"Generate {i}")
+                redis_client.set(f"step_on_rank_{rank}", i)
+            redis_client.set(f"status_rank_{rank}", "Post-generate")
+            if is_coord:
+                logger.info("Rank: %s, Adding final result to output queue", rank)
+                response_queue.put(queue_id, {"final": True, "error": False})
+        except torch.cuda.OutOfMemoryError as e:
+            logger.error("Encountered OOM, crashing the program: %s", e)
+            response_queue.put(
+                queue_id, {"error": True, "final": True, "message": str(e)}
+            )
+            crash_program()
+        except RuntimeError as e:
+            message = str(e)
+            if "CUDA" in message:
+                logger.error("Encountered CUDA error, crashing the program: %s", e)
+                response_queue.put(
+                    queue_id, {"error": True, "final": True, "message": str(e)}
+                )
+                crash_program()
+            else:
+                logger.error(
+                    "Encountered unexpected runtime error, crashing the program: %s %s",
+                    e,
+                    traceback.format_exc(),
+                )
+                response_queue.put(
+                    queue_id, {"error": True, "final": True, "message": str(e)}
+                )
+                crash_program()
+        except Exception as e:
+            logger.error(
+                "Encountered unexpected exception: %s %s",
+                str(e),
+                traceback.format_exc(),
+            )
+            response_queue.put(
+                queue_id, {"error": True, "final": True, "message": str(e)}
+            )
+            crash_program()
+class ChameleonWorker(ChameleonForwardMixin):
+    def __init__(
+        self,
+        *,
+        rank: int,
+        model_path: str,
+        tokenizer_path: str,
+        additional_eos_tokens: list[str] | None,
+    ) -> None:
+        self.rank = rank
+        self.model_path = model_path
+        self.additional_eos_tokens = additional_eos_tokens
+        torch.set_default_device(f"cuda:{rank}")
+        self.model = load_model(model_path, rank)
+        self.tokenizer = Tokenizer.from_file(str(tokenizer_path))
+        self.vocab = VocabInfo(json.load(open(tokenizer_path))["model"]["vocab"])
+        logger.info(
+            "Rank: %s, Model loaded in worker_obj: %s",
+            rank,
+            id(self),
+        )
+def crash_program() -> None:
+    logger.error(
+        "Crashing the program as instructed, likely due to distributed worker failures"
+    )
+    sys.exit(1)
+class ChameleonDistributedGenerator(AbstractMultimodalGenerator, ChameleonTokenizationMixin):
+    def __init__(
+        self,
+        *,
+        world_size: int,
+        model_path: str,
+        master_port: int,
+        tokenizer_path: str,
+        vqgan_config_path: str,
+        vqgan_ckpt_path: str | None = None,
+        master_address: str = "0.0.0.0",
+        additional_eos_tokens: list[str] | None = None,
+        redis_port: int | None = None,
+    ) -> None:
+        self.master_port = master_port
+        self.master_address = master_address
+        self.additional_eos_tokens = additional_eos_tokens
+        logger.info("Loading tokenizer...")
+        tokenizer_path = tokenizer_path
+        self.tokenizer = Tokenizer.from_file(str(tokenizer_path))
+        self.vocab = VocabInfo(json.load(open(tokenizer_path))["model"]["vocab"])
+        logger.info("Loading VQGAN...")
+        self.image_tokenizer = ImageTokenizer(vqgan_config_path, vqgan_ckpt_path)
+        self.redis_port = redis_port
+        self.redis_pool = async_redis.ConnectionPool.from_url(
+            f"redis://redis:{redis_port}"
+        )
+        self.redis_client = async_redis.Redis.from_pool(self.redis_pool)
+        self.request_queue = AsyncRedisQueue(self.redis_client, "request")
+        self.response_queue = AsyncRedisQueue(self.redis_client, "response")
+        self.worker_queues: dict[int, multiprocessing.Queue] = {
+            rank: multiprocessing.Queue() for rank in range(world_size)
+        }
+        self.procs: list[multiprocessing.Process] = []
+        model_args = {
+            "model_path": model_path,
+            "master_address": master_address,
+            "master_port": master_port,
+            "tokenizer_path": tokenizer_path,
+            "additional_eos_tokens": additional_eos_tokens,
+        }
+        logger.info("Launching paralle model with world_size=%s", world_size)
+        for i in range(world_size):
+            proc = multiprocessing.Process(
+                target=distributed_workers,
+                args=(
+                    model_args,
+                    master_address,
+                    master_port,
+                    world_size,
+                    i,
+                    self.redis_port,
+                    self.worker_queues,
+                ),
+                daemon=True,
+            )
+            self.procs.append(proc)
+            proc.start()
+    def check_error(self, output: dict) -> None:
+        if output["error"]:
+            import sys
+            print(f"check_error({output})", file=sys.stderr)
+            self.kill_procs()
+            logger.error(
+                "COORDINATOR: Encountered error in managed processes, exiting: %s",
+                output,
+            )
+            crash_program()
+    def __del__(self) -> None:
+        self.kill_procs(error=False)
+    def kill_procs(self, error: bool = True) -> None:
+        if error:
+            log_fn = logger.error
+        else:
+            log_fn = logger.info
+        log_fn("Error encountered, killing worker procs: %s", self.procs)
+        for p in self.procs:
+            try:
+                log_fn("Killing: %s", p)
+                p.kill()
+            except:
+                log_fn("Encountered issue killing process and ignoring: %s", p)
+    # ALLOW_ANY(get_next_output.return)
+    async def get_next_output(self, request_id: str) -> Any:
+        logger.info("Waiting for response for request_id=%s", request_id)
+        queue_id, output = await self.response_queue.get(request_id)
+        assert queue_id == request_id
+        return output
+    async def generate_text_streaming(
+        self,
+        prompt: MixedSequenceType,
+        max_gen_tokens: int = 256,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        repetition_penalty: float = 1.2,
+        seed: int | None = None,
+        debug: dict | None = None,
+    ) -> Generator[str, None, None]:
+        tokenized_prompt = self.tokens_from_inputs(prompt)
+        request_id = f"request_{random.randint(100_000, 200_000)}"
+        if seed is None:
+            seed = random.randint(1, 2048)
+            if debug is not None:
+                debug["seed"] = seed
+        if len(tokenized_prompt) > (4096 - 3):
+            yield "ERROR: Your input exceeds the model's context length of 4096. Note that images consume 1024 tokens whether in input or output."
+            return
+        assert not isinstance(tokenized_prompt, torch.Tensor)
+        request = {
+            "mode": WSMessageType.GENERATE_TEXT.value,
+            "request_id": request_id,
+            "tokenized_prompt": tokenized_prompt,
+            "max_gen_tokens": max_gen_tokens,
+            "temp": temp,
+            "top_p": top_p,
+            "repetition_penalty": repetition_penalty,
+            "seed": seed,
+        }
+        logger.info(
+            "Sending request_id=%s: %s",
+            request_id,
+            request,
+        )
+        await asyncio.gather(
+            self.request_queue.clear(request_id),
+            self.response_queue.clear(request_id),
+        )
+        logger.info("Cleared request/response queue for %s", request_id)
+        await self.request_queue.put(request_id, request)
+        logger.info("Sent request to coordinator %s", request_id)
+        try:
+            while True:
+                output = await self.get_next_output(request_id)
+                logger.info("Received response for %s", request_id)
+                self.check_error(output)
+                if output["final"]:
+                    break
+                n_outs = len(output["output"])
+                if n_outs != 1:
+                    logger.error(
+                        "Encountered unexpected number of %s arguments in: %s",
+                        n_outs,
+                        output["output"],
+                    )
+                tokens = output["output"]
+                assert not isinstance(tokens, torch.Tensor)
+                logger.info("output info: type=%s, value=%.20s", type(tokens), tokens)
+                yield self.tokenizer.decode(tokens)
+        finally:
+            logger.info("Cleaning up queues in request_id=%s", request_id)
+            await asyncio.gather(
+                self.request_queue.clear(request_id),
+                self.response_queue.clear(request_id),
+            )
+            logger.info("Completed cleaning for request_id=%s", request_id)
+    async def generate_image_streaming(
+        self,
+        prompt: MixedSequenceType,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        cfg_image_weight: float = DEFAULT_IMAGE_CFG_IMAGE,
+        cfg_text_weight: float = DEFAULT_IMAGE_CFG_TEXT,
+        yield_every_n: int = 32,
+        debug: dict | None = None,
+        seed: int | None = None,
+    ) -> Generator[StreamingImage, None, None]:
+        tokenized_prompt = self.tokens_from_inputs(prompt)
+        tokenized_prompt.append(self.vocab.begin_image)
+        assert not isinstance(tokenized_prompt, torch.Tensor)
+        request_id = f"request_{random.randint(100_000, 200_000)}"
+        if seed is None:
+            seed = random.randint(1, 2048)
+            if debug is not None:
+                debug["seed"] = seed
+        if len(tokenized_prompt) > (4096 - 3 - 1024):
+            yield "ERROR: Your input exceeds the model's context length of 4096. Note that images consume 1024 tokens whether in input or output."
+            return
+        request = {
+            "mode": WSMessageType.GENERATE_IMAGE.value,
+            "request_id": request_id,
+            "tokenized_prompt": tokenized_prompt,
+            "cfg_image_weight": cfg_image_weight,
+            "cfg_text_weight": cfg_text_weight,
+            "yield_every_n": yield_every_n,
+            "temp": temp,
+            "top_p": top_p,
+            "seed": seed,
+        }
+        logger.info(
+            "Sending request_id=%s: %s",
+            request_id,
+            request,
+        )
+        await asyncio.gather(
+            self.request_queue.clear(request_id),
+            self.response_queue.clear(request_id),
+        )
+        logger.info("Cleared request/response queue for %s", request_id)
+        await self.request_queue.put(request_id, request)
+        logger.info("Sent request to coordinator %s", request_id)
+        try:
+            while True:
+                output = await self.get_next_output(request_id)
+                logger.info("Received response for %s", request_id)
+                self.check_error(output)
+                if output["final"]:
+                    break
+                n_outs = len(output["output"])
+                if n_outs != 2:
+                    logger.error(
+                        "Encountered unexpected number of %s arguments in: %s",
+                        n_outs,
+                        output["output"],
+                    )
+                tokens, final = output["output"]
+                assert not isinstance(tokens, torch.Tensor)
+                yield StreamingImage(
+                    image=self.pillow_from_bpe_tokens(torch.tensor(tokens)), final=final
+                )
+        finally:
+            logger.info("Cleaning up queues in request_id=%s", request_id)
+            await asyncio.gather(
+                self.request_queue.clear(request_id),
+                self.response_queue.clear(request_id),
+            )
+            logger.info("Completed cleaning for request_id=%s", request_id)
+    async def generate_multimodal_streaming(
+        self,
+        prompt: MixedSequenceType,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        cfg_image_weight: float = DEFAULT_MULTIMODAL_CFG_IMAGE,
+        cfg_text_weight: float = DEFAULT_MULTIMODAL_CFG_TEXT,
+        yield_every_n: int = 32,
+        max_gen_tokens: int = 4096,
+        repetition_penalty: float = 1.2,
+        suffix_tokens: list[str] | None = None,
+        seed: int | None = None,
+        debug: dict | None = None,
+    ) -> Generator[MixedSequenceType, None, None]:
+        tokenized_prompt = self.tokens_from_inputs(prompt, suffix_tokens=suffix_tokens)
+        assert not isinstance(tokenized_prompt, torch.Tensor)
+        request_id = f"request_{random.randint(100_000, 200_000)}"
+        if seed is None:
+            seed = random.randint(1, 2048)
+            if debug is not None:
+                debug["seed"] = seed
+        if len(tokenized_prompt) > (4096 - 3):
+            yield "ERROR: Your input exceeds the model's context length of 4096. Note that images consume 1024 tokens."
+            return
+        request = {
+            "mode": WSMessageType.GENERATE_MULTIMODAL.value,
+            "request_id": request_id,
+            "tokenized_prompt": tokenized_prompt,
+            "cfg_image_weight": cfg_image_weight,
+            "cfg_text_weight": cfg_text_weight,
+            "repetition_penalty": repetition_penalty,
+            "yield_every_n": yield_every_n,
+            "max_gen_tokens": max_gen_tokens,
+            "temp": temp,
+            "top_p": top_p,
+            "seed": seed,
+        }
+        logger.info(
+            "Sending request_id=%s: %s",
+            request_id,
+            request,
+        )
+        await asyncio.gather(
+            self.request_queue.clear(request_id),
+            self.response_queue.clear(request_id),
+        )
+        logger.info("Cleared request/response queue for %s", request_id)
+        await self.request_queue.put(request_id, request)
+        logger.info("Sent request to coordinator %s", request_id)
+        try:
+            while True:
+                output = await self.get_next_output(request_id)
+                logger.info("Received response for %s", request_id)
+                self.check_error(output)
+                if output["final"]:
+                    break
+                n_outs = len(output["output"])
+                if n_outs != 3:
+                    logger.error(
+                        "Encountered unexpected number of %s arguments in: %s",
+                        n_outs,
+                        output["output"],
+                    )
+                token_type, tokens, image_is_final = output["output"]
+                assert not isinstance(tokens, torch.Tensor)
+                match token_type:
+                    case "TEXT":
+                        yield self.tokenizer.decode(tokens)
+                    case "IMAGE":
+                        yield StreamingImage(
+                            image=self.pillow_from_bpe_tokens(torch.tensor(tokens)),
+                            final=image_is_final,
+                        )
+                    case _:
+                        raise ValueError("Unknown token type")
+        finally:
+            logger.info("Cleaning up queues in request_id=%s", request_id)
+            await self.request_queue.clear(request_id)
+            await self.response_queue.clear(request_id)

chameleon/viewer/backend/models/chameleon_local.py ADDED Viewed

	@@ -0,0 +1,642 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import io
+import json
+from typing import Generator
+import PIL.Image
+import torch
+import transformers
+from tokenizers import Tokenizer
+from transformers import (
+    MaxLengthCriteria,
+    RepetitionPenaltyLogitsProcessor,
+    TemperatureLogitsWarper,
+    TopPLogitsWarper,
+)
+from chameleon.inference.alignment import AlignPromptRight
+from chameleon.inference.generation import ChameleonGenerator
+from chameleon.inference.image_tokenizer import ImageTokenizer
+from chameleon.inference.loader import load_model
+from chameleon.inference.logits_processor import (
+    AllowOnlyTokensAfterIndexLogitsProcessor,
+    AllowOnlyTokensLogitsProcessor,
+    InBatchInstructCFGLogitsProcessor,
+)
+from chameleon.inference.model_adapter import ChameleonModelAdapter
+from chameleon.inference.stopping_criteria import StopOnEOS, StopOnEOSAfterBatchIndex
+from chameleon.inference.token_selector import (
+    MultinomialTokenSelector,
+    ReplicatedInputTokenSelector,
+)
+from chameleon.inference.vocab import VocabInfo, VocabTranslation
+from chameleon.viewer.backend.models.abstract_model import (
+    DEFAULT_IMAGE_CFG_IMAGE,
+    DEFAULT_IMAGE_CFG_TEXT,
+    DEFAULT_MULTIMODAL_CFG_IMAGE,
+    DEFAULT_MULTIMODAL_CFG_TEXT,
+    AbstractMultimodalGenerator,
+    MixedSequenceType,
+    StreamingImage,
+)
+from chameleon.viewer.backend.utils import get_logger
+logger = get_logger(__name__)
+def set_seed(seed: int) -> None:
+    transformers.enable_full_determinism(seed, warn_only=True)
+def get_rank() -> int:
+    if torch.distributed.is_initialized():
+        return torch.distributed.get_rank()
+    else:
+        return 0
+class ChameleonTokenizationMixin:
+    def png_from_bpe_tokens(self, bpe_tokens: torch.Tensor) -> bytes:
+        img = self.pillow_from_bpe_tokens(bpe_tokens)
+        img_io = io.BytesIO()
+        img.save(img_io, format="PNG")
+        return img_io.getvalue()
+    def pillow_from_bpe_tokens(self, bpe_tokens: torch.Tensor) -> PIL.Image.Image:
+        image_tensor = VocabTranslation(self.vocab).convert_bpe2img(bpe_tokens)
+        if image_tensor.shape[0] < 1024:
+            padding = (
+                torch.ones([1024 - image_tensor.shape[0]], dtype=int) * image_tensor[0]
+            )
+            image_tensor = torch.cat((image_tensor, padding)).unsqueeze(0)
+        return self.image_tokenizer.pil_from_img_toks(image_tensor)
+    def tokens_from_inputs(
+        self,
+        inputs: MixedSequenceType,
+        suffix_tokens: list[str] | None = None,
+    ) -> list[int]:
+        tokens = [self.vocab.bos_id]
+        for input_ in inputs:
+            if isinstance(input_, str):
+                tokens.extend(self.tokenizer.encode(input_.strip()).ids)
+            elif isinstance(input_, PIL.Image.Image):
+                tokens.append(self.vocab.begin_image)
+                imgtoks = self.image_tokenizer.img_tokens_from_pil(input_)
+                tokens.extend(VocabTranslation(self.vocab).convert_img2bp2(imgtoks))
+                tokens.append(self.vocab.end_image)
+            else:
+                raise ValueError(f"Unknown input type: {type(input_)}")
+        if suffix_tokens is not None:
+            for t in suffix_tokens:
+                tokens.extend(self.tokenizer.encode(t).ids)
+        sanitized_tokens = []
+        for t in tokens:
+            if isinstance(t, torch.Tensor):
+                sanitized_tokens.append(t.item())
+            else:
+                sanitized_tokens.append(t)
+        return sanitized_tokens
+class GeneratorWrapper:
+    def __init__(self, gen):
+        self.gen = gen
+    def __iter__(self):
+        return self
+    def __next__(self):
+        return next(self.gen)
+class Decoder:
+    def __init__(
+        self,
+        chameleon_generator: "ChameleonLocalGenerator",
+        input_ids: list[int],
+    ):
+        ...
+    def __next__(self) -> tuple[list[int], dict | None, type["Decoder"] | None]:
+        ...
+class TextDecoder(Decoder):
+    def __init__(
+        self,
+        chameleon_generator: "ChameleonLocalGenerator",
+        input_ids: list[int],
+        *,
+        temp: float,
+        top_p: float,
+        max_seq_len: int,
+        # TODO: Propagage setting upwards
+        repetition_penalty: float,
+        **kwargs,
+    ):
+        self.chameleon_generator = chameleon_generator
+        assert chameleon_generator.vocab.eos_id is not None
+        stopping_criteria = [
+            StopOnEOS(chameleon_generator.vocab.eos_id),
+            MaxLengthCriteria(max_seq_len),
+        ]
+        if chameleon_generator.additional_eos_tokens is not None:
+            for token in chameleon_generator.additional_eos_tokens:
+                stopping_criteria.append(
+                    StopOnEOSAfterBatchIndex(
+                        chameleon_generator.tokenizer.token_to_id(token), [len(input_ids)]
+                    )
+                )
+        logits_processors = [
+            AllowOnlyTokensLogitsProcessor(
+                chameleon_generator.vocab.text_tokens
+                + [chameleon_generator.vocab.eos_id, chameleon_generator.vocab.begin_image]
+            ),
+            # Don't allow any more images near the end since there isn't enough room
+            AllowOnlyTokensAfterIndexLogitsProcessor(
+                chameleon_generator.vocab.text_tokens + [chameleon_generator.vocab.eos_id],
+                # TODO: Calculate exact
+                1024 * 3 - 3,
+            ),
+            RepetitionPenaltyLogitsProcessor(repetition_penalty),
+            TemperatureLogitsWarper(temp),
+            TopPLogitsWarper(top_p),
+        ]
+        self.gen = ChameleonGenerator(
+            model=ChameleonModelAdapter(chameleon_generator.model, max_seq_len=max_seq_len),
+            input_ids=[input_ids],
+            stopping_criteria=stopping_criteria,
+            logits_processors=logits_processors,
+        )
+        for _ in range(len(input_ids)):
+            next(self.gen)
+    def __next__(self) -> tuple[list[int], dict | None, type[Decoder] | None]:
+        gpu_tok = next(self.gen).id.item()
+        cpu_tok = gpu_tok
+        if cpu_tok == self.chameleon_generator.vocab.begin_image:
+            # return "TEXT", [cpu_tok], [], False, ImageDecoder
+            raise StopIteration()
+        return (
+            "TEXT",
+            [cpu_tok],
+            [cpu_tok],
+            False,
+            None,
+        )
+class ImageDecoder(Decoder):
+    def __init__(
+        self,
+        chameleon_generator: "ChameleonLocalGenerator",
+        input_ids: list[int],
+        *,
+        cfg_image_weight: float,
+        cfg_text_weight: float,
+        temp: float,
+        top_p: float,
+        yield_every_n: int,
+        **kwargs,
+    ):
+        self.yield_every_n = yield_every_n
+        self.chameleon_generator = chameleon_generator
+        logits_processors = [
+            InBatchInstructCFGLogitsProcessor(cfg_text_weight, cfg_image_weight),
+            AllowOnlyTokensLogitsProcessor(chameleon_generator.vocab.image_tokens),
+            TemperatureLogitsWarper(temp),
+            TopPLogitsWarper(top_p),
+        ]
+        image_conditioned_allowed = set(chameleon_generator.vocab.image_tokens) | {
+            chameleon_generator.vocab.bos_id,
+            chameleon_generator.vocab.begin_image,
+            chameleon_generator.vocab.end_image,
+        }
+        full_conditioned = input_ids
+        image_conditioned = [
+            in_id for in_id in input_ids if in_id in image_conditioned_allowed
+        ]
+        unconditioned = [
+            chameleon_generator.vocab.bos_id,
+            chameleon_generator.vocab.begin_image,
+        ]
+        self.gen = ChameleonGenerator(
+            model=ChameleonModelAdapter(
+                chameleon_generator.model, max_seq_len=len(input_ids) + 1024
+            ),
+            input_ids=[full_conditioned, image_conditioned, unconditioned],
+            logits_processors=logits_processors,
+            alignment=AlignPromptRight(chameleon_generator.vocab.pad_id),
+            token_selector=ReplicatedInputTokenSelector(
+                MultinomialTokenSelector(), n=3
+            ),
+        )
+        for _ in range(len(input_ids)):
+            next(self.gen)
+        self.image_builder: list[torch.LongTensor] = []
+        self.gpu_tok_batch: list[torch.LongTensor] = []
+    def __next__(self) -> tuple[list[int], dict | None, type[Decoder] | None]:
+        while True:
+            gpu_tok = next(self.gen)
+            gpu_tok = torch.chunk(gpu_tok, chunks=3, dim=0)[0]
+            self.image_builder.append(gpu_tok)
+            self.gpu_tok_batch.append(gpu_tok)
+            if len(self.image_builder) == 1024:
+                return (
+                    "IMAGE",
+                    torch.tensor(self.gpu_tok_batch).tolist()
+                    + [self.chameleon_generator.vocab.end_image],
+                    torch.tensor(self.image_builder).tolist(),
+                    True,
+                    TextDecoder,
+                )
+            elif len(self.image_builder) % self.yield_every_n == 0:
+                cpu_toks = torch.tensor(self.gpu_tok_batch).tolist()
+                self.gpu_tok_batch = []
+                return (
+                    "IMAGE",
+                    cpu_toks,
+                    torch.tensor(self.image_builder).tolist(),
+                    False,
+                    None,
+                )
+class ChameleonForwardMixin:
+    @torch.inference_mode()
+    def _generate_text_streaming(
+        self,
+        input_ids: list[int],
+        max_gen_tokens: int = 256,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        repetition_penalty: float = 1.2,
+        seed: int | None = None,
+    ) -> Generator[str, None, None]:
+        if seed is not None:
+            set_seed(seed)
+            logger.info(
+                "Rank: %s, set seed: %s",
+                get_rank(),
+                seed,
+            )
+        logits_processors = [
+            # Only allow text tokens and end-of-sequence.
+            AllowOnlyTokensLogitsProcessor(
+                self.vocab.text_tokens + [self.vocab.eos_id]
+            ),
+            # Don't allow the first token to be end-of-sequence.
+            # DisallowTokensAtIndexLogitProcessor([self.vocab.eos_id], len()),
+            RepetitionPenaltyLogitsProcessor(repetition_penalty),
+            TemperatureLogitsWarper(temp),
+            TopPLogitsWarper(top_p),
+        ]
+        stopping_criteria = [
+            StopOnEOS(self.vocab.eos_id),
+            MaxLengthCriteria(len(input_ids) + max_gen_tokens),
+        ]
+        if self.additional_eos_tokens is not None:
+            for token in self.additional_eos_tokens:
+                stopping_criteria.append(
+                    StopOnEOSAfterBatchIndex(
+                        self.tokenizer.token_to_id(token), [len(input_ids)]
+                    )
+                )
+        for tok in ChameleonGenerator(
+            model=ChameleonModelAdapter(
+                self.model,
+                max_seq_len=len(input_ids) + max_gen_tokens,
+            ),
+            input_ids=[input_ids],
+            stopping_criteria=stopping_criteria,
+            logits_processors=logits_processors,
+        ):
+            yield tok.tolist()
+    @torch.inference_mode()
+    def _generate_batched_text_streaming(
+        self,
+        batch: list[list[int]],
+        max_gen_tokens: int = 256,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        repetition_penalty: float = 1.2,
+        seed: int | None = None,
+    ) -> Generator[list[str], None, None]:
+        if seed is not None:
+            set_seed(seed)
+        logits_processors = [
+            # Only allow text tokens and end-of-sequence.
+            AllowOnlyTokensLogitsProcessor(
+                self.vocab.text_tokens + [self.vocab.eos_id]
+            ),
+            # Don't allow the first token to be end-of-sequence.
+            # DisallowTokensAtIndexLogitProcessor([self.vocab.eos_id], len()),
+            RepetitionPenaltyLogitsProcessor(repetition_penalty),
+            TemperatureLogitsWarper(temp),
+            TopPLogitsWarper(top_p),
+        ]
+        max_batch_size = max(len(p) for p in batch)
+        stopping_criteria = [
+            StopOnEOS(self.vocab.eos_id),
+            MaxLengthCriteria(max_batch_size + max_gen_tokens),
+        ]
+        if self.additional_eos_tokens is not None:
+            for token in self.additional_eos_tokens:
+                stopping_criteria.append(
+                    StopOnEOSAfterBatchIndex(
+                        self.tokenizer.token_to_id(token), [len(x) for x in batch]
+                    )
+                )
+        for tok in ChameleonGenerator(
+            model=ChameleonModelAdapter(
+                self.model,
+                max_seq_len=max_batch_size + max_gen_tokens,
+            ),
+            input_ids=batch,
+            stopping_criteria=stopping_criteria,
+            logits_processors=logits_processors,
+        ):
+            yield tok.unsqueeze(1).tolist()
+    @torch.inference_mode()
+    def _generate_image_streaming(
+        self,
+        tokenized_prompt: list[int],
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        cfg_image_weight: float = DEFAULT_IMAGE_CFG_IMAGE,
+        cfg_text_weight: float = DEFAULT_IMAGE_CFG_TEXT,
+        yield_every_n: int = 32,
+        seed: int | None = None,
+    ) -> Generator[tuple[list[int], bool], None, None]:
+        if seed is not None:
+            set_seed(seed)
+            logger.info(
+                "Rank: %s, set seed: %s",
+                get_rank(),
+                seed,
+            )
+        decoder = ImageDecoder(
+            self,
+            tokenized_prompt,
+            cfg_image_weight=cfg_image_weight,
+            cfg_text_weight=cfg_text_weight,
+            temp=temp,
+            top_p=top_p,
+            yield_every_n=yield_every_n,
+        )
+        for _, _, frontend_tokens, is_final, next_decoder in GeneratorWrapper(decoder):
+            if next_decoder is not None:
+                break
+            yield torch.tensor(frontend_tokens).tolist(), is_final
+    @torch.inference_mode()
+    def _generate_multimodal_streaming(
+        self,
+        input_ids: list[int],
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        cfg_image_weight: float = DEFAULT_MULTIMODAL_CFG_IMAGE,
+        cfg_text_weight: float = DEFAULT_MULTIMODAL_CFG_TEXT,
+        yield_every_n: int = 32,
+        max_gen_tokens: int = 4096,
+        repetition_penalty: float = 1.2,
+        seed: int | None = None,
+    ) -> Generator[tuple[str, list[int], bool], None, None]:
+        if seed is not None:
+            set_seed(seed)
+            logger.info(
+                "Rank: %s, set seed: %s",
+                get_rank(),
+                seed,
+            )
+        max_seq_len = min(len(input_ids) + max_gen_tokens, 4096)
+        gen_wrapper = GeneratorWrapper(
+            TextDecoder(
+                self,
+                input_ids,
+                temp=temp,
+                top_p=top_p,
+                max_seq_len=max_seq_len,
+                repetition_penalty=repetition_penalty,
+            )
+        )
+        for (
+            message_type,
+            cpu_toks,
+            frontend_tokens,
+            is_final,
+            next_decoder,
+        ) in gen_wrapper:
+            input_ids.extend(cpu_toks)
+            if len(frontend_tokens) > 0:
+                yield message_type, frontend_tokens, is_final
+            if next_decoder is not None:
+                gen_wrapper.gen = next_decoder(
+                    self,
+                    input_ids,
+                    temp=temp,
+                    top_p=top_p,
+                    max_seq_len=max_seq_len,
+                    cfg_image_weight=cfg_image_weight,
+                    cfg_text_weight=cfg_text_weight,
+                    yield_every_n=yield_every_n,
+                    repetition_penalty=repetition_penalty,
+                )
+class ChameleonLocalGenerator(
+    AbstractMultimodalGenerator, ChameleonForwardMixin, ChameleonTokenizationMixin
+):
+    def __init__(
+        self,
+        model_path: str,
+        tokenizer_path: str,
+        vqgan_config_path: str,
+        vqgan_ckpt_path: str | None = None,
+        additional_eos_tokens: list[str] | None = None,
+    ) -> None:
+        super().__init__()
+        logger.info("Loading model...")
+        self.model = load_model(model_path)
+        self.additional_eos_tokens = additional_eos_tokens
+        logger.info("Loading tokenizer...")
+        tokenizer_path = tokenizer_path
+        self.tokenizer = Tokenizer.from_file(str(tokenizer_path))
+        self.vocab = VocabInfo(json.load(open(tokenizer_path))["model"]["vocab"])
+        logger.info("Loading VQGAN...")
+        self.image_tokenizer = ImageTokenizer(vqgan_config_path, vqgan_ckpt_path)
+    @torch.inference_mode()
+    def generate_batched_text(
+        self,
+        prompts: list[MixedSequenceType],
+        max_gen_tokens: int = 256,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        repetition_penalty: float = 1.2,
+        seed: int | None = None,
+    ) -> list[str]:
+        outputs = [""] * len(prompts)
+        for vals in self.generate_batched_text_streaming(
+            prompts,
+            max_gen_tokens=max_gen_tokens,
+            temp=temp,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            seed=seed,
+        ):
+            for idx, val in enumerate(vals):
+                outputs[idx] += val
+        return outputs
+    @torch.inference_mode()
+    def generate_batched_text_streaming(
+        self,
+        prompts: list[MixedSequenceType],
+        max_gen_tokens: int = 256,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        repetition_penalty: float = 1.2,
+        seed: int | None = None,
+    ) -> Generator[list[str], None, None]:
+        batch = []
+        for prompt in prompts:
+            batch.append(self.tokens_from_inputs(prompt))
+        for tok in self._generate_batched_text_streaming(
+            batch,
+            max_gen_tokens=max_gen_tokens,
+            temp=temp,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            seed=seed,
+        ):
+            yield self.tokenizer.decode_batch(tok)
+    @torch.inference_mode()
+    async def generate_text_streaming(
+        self,
+        prompt: MixedSequenceType,
+        max_gen_tokens: int = 256,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        repetition_penalty: float = 1.2,
+        seed: int | None = None,
+        debug: dict | None = None,
+    ) -> Generator[str, None, None]:
+        tokenized_prompt = self.tokens_from_inputs(prompt)
+        if len(tokenized_prompt) > (4096 - 3):
+            yield "ERROR: Your input exceeds the model's context length of 4096. Note that images consume 1024 tokens whether in input or output."
+            return
+        for out in self.generate_batched_text_streaming(
+            [prompt],
+            max_gen_tokens=max_gen_tokens,
+            temp=temp,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            seed=seed,
+        ):
+            yield out[0]
+    @torch.inference_mode()
+    async def generate_image_streaming(
+        self,
+        prompt: MixedSequenceType,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        cfg_image_weight: float = DEFAULT_IMAGE_CFG_IMAGE,
+        cfg_text_weight: float = DEFAULT_IMAGE_CFG_TEXT,
+        yield_every_n: int = 32,
+        seed: int | None = None,
+        debug: dict | None = None,
+    ) -> Generator[StreamingImage, None, None]:
+        assert isinstance(prompt, list)
+        tokenized_prompt = self.tokens_from_inputs(prompt)
+        tokenized_prompt.append(self.vocab.begin_image)
+        if len(tokenized_prompt) > (4096 - 3 - 1024):
+            yield "ERROR: Your input exceeds the model's context length of 4096. Note that images consume 1024 tokens whether in input or output."
+            return
+        for tokens, final in self._generate_image_streaming(
+            tokenized_prompt,
+            temp=temp,
+            top_p=top_p,
+            cfg_image_weight=cfg_image_weight,
+            cfg_text_weight=cfg_text_weight,
+            yield_every_n=yield_every_n,
+            seed=seed,
+        ):
+            yield StreamingImage(
+                image=self.pillow_from_bpe_tokens(torch.tensor(tokens)), final=final
+            )
+    @torch.inference_mode()
+    async def generate_multimodal_streaming(
+        self,
+        prompt: MixedSequenceType,
+        temp: float = 1.0,
+        top_p: float = 0.8,
+        cfg_image_weight: float = DEFAULT_MULTIMODAL_CFG_IMAGE,
+        cfg_text_weight: float = DEFAULT_MULTIMODAL_CFG_TEXT,
+        yield_every_n: int = 32,
+        max_gen_tokens: int = 4096,
+        repetition_penalty: float = 1.2,
+        suffix_tokens: list[str] | None = None,
+        seed: int | None = None,
+        debug: dict | None = None,
+    ) -> Generator[MixedSequenceType, None, None]:
+        input_ids = self.tokens_from_inputs(prompt, suffix_tokens=suffix_tokens)
+        if len(input_ids) > (4096 - 3):
+            yield "ERROR: Your input exceeds the model's context length of 4096. Note that images consume 1024 tokens."
+            return
+        for token_type, tokens, is_final in self._generate_multimodal_streaming(
+            input_ids,
+            temp=temp,
+            top_p=top_p,
+            cfg_image_weight=cfg_image_weight,
+            cfg_text_weight=cfg_text_weight,
+            yield_every_n=yield_every_n,
+            max_gen_tokens=max_gen_tokens,
+            repetition_penalty=repetition_penalty,
+            seed=seed,
+        ):
+            match token_type:
+                case "TEXT":
+                    yield self.tokenizer.decode(tokens)
+                case "IMAGE":
+                    yield StreamingImage(
+                        image=self.pillow_from_bpe_tokens(torch.tensor(tokens)),
+                        final=is_final,
+                    )
+                case _:
+                    raise ValueError("Unknown token type")

chameleon/viewer/backend/models/service.py ADDED Viewed

	@@ -0,0 +1,300 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import base64
+import io
+import socket
+import subprocess
+import time
+from functools import partial
+import fastapi
+import PIL
+import pydantic
+import redis.asyncio as async_redis
+import uvicorn
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, WebSocketException
+from websockets.exceptions import ConnectionClosedError, ConnectionClosedOK
+from chameleon.viewer.backend.data_types import (
+    Content,
+    ContentType,
+    NoOptionsForComplete,
+    NoOptionsForFull,
+    NoOptionsForPartial,
+    NoOptionsForQueueStatus,
+    WSMessageType,
+    WSMultimodalMessage,
+)
+from chameleon.viewer.backend.models.abstract_model import (
+    AbstractMultimodalGenerator,
+    StreamingImage,
+)
+from chameleon.viewer.backend.models.chameleon_distributed import AsyncRedisCounter
+from chameleon.viewer.backend.utils import get_logger
+logger = get_logger(__name__)
+def nvidia_smi() -> str:
+    return subprocess.check_output(["nvidia-smi"], text=True)
+async def await_generate_message(websocket: WebSocket) -> WSMultimodalMessage:
+    while True:
+        rec_message = await websocket.receive_json()
+        try:
+            maybe_message = WSMultimodalMessage.parse_obj(rec_message)
+        except pydantic.ValidationError:
+            maybe_message = None
+            logger.info("Got invalid message", maybe_message)
+        if maybe_message is not None:
+            return maybe_message
+async def async_acquire_lock(
+    *,
+    websocket: WebSocket,
+    counter: AsyncRedisCounter,
+    lock: async_redis.lock.Lock,
+    interval=0.1,
+    status_interval=1,
+    hostname: str | None = None,
+):
+    start = time.time()
+    await counter.add(1)
+    while True:
+        acquired = await lock.acquire(blocking_timeout=interval)
+        if acquired:
+            break
+        elapsed = time.time() - start
+        if elapsed > status_interval:
+            n_requests = await counter.count()
+            message = WSMultimodalMessage(
+                message_type=WSMessageType.QUEUE_STATUS,
+                content=[
+                    Content(
+                        content_type=ContentType.TEXT,
+                        content=f"n_requests={n_requests}",
+                    )
+                ],
+                options=NoOptionsForQueueStatus(),
+                debug_info={"hostname": hostname},
+            ).dict()
+            await websocket.send_json(message)
+            start = time.time()
+    await counter.sub(1)
+COORDINATOR = "coordinator"
+def web_app(
+    generator: AbstractMultimodalGenerator,
+    debug: bool = True,
+    redis_port: int | None = None,
+) -> FastAPI:
+    app = FastAPI(debug=debug)
+    if redis_port is None:
+        redis_client = None
+        redis_lock = None
+        queue_counter = None
+    else:
+        redis_client = async_redis.Redis.from_url(f"redis://redis:{redis_port}")
+        redis_lock = async_redis.lock.Lock(redis_client, COORDINATOR)
+        queue_counter = AsyncRedisCounter(redis_client, "count_pending")
+    hostname = socket.gethostname()
+    @app.get("/api/2.0/status")
+    def alive() -> dict:
+        return {
+            "status": "alive",
+            "hostname": hostname,
+            "nvidia-smi": nvidia_smi(),
+        }
+    @app.websocket("/ws/chameleon/v2/{client_id}")
+    async def websocket_chameleon_v2(*, websocket: WebSocket, client_id: str):
+        logger.info("Requested client_id: %s", client_id)
+        await websocket.accept()
+        logger.info("Client opened %s with generator id %s", client_id, id(generator))
+        try:
+            while True:
+                generate_message = await await_generate_message(websocket)
+                logger.info("Got generate message: %s", str(generate_message)[:300])
+                parsed_prompt = []
+                for c in generate_message.content:
+                    match c.content_type:
+                        case ContentType.TEXT:
+                            parsed_prompt.append(c.content)
+                        case ContentType.IMAGE:
+                            image_parts = c.content.split(",", 1)
+                            if len(image_parts) < 2:
+                                logger.error(
+                                    "Encountered invalid image: %s", image_parts
+                                )
+                                raise WebSocketException(
+                                    code=fastapi.status.WS_1008_POLICY_VIOLATION,
+                                    reason=f"Invalid image: {image_parts}",
+                                )
+                            image_data = image_parts[1]
+                            base64_image = base64.b64decode(image_data)
+                            image_file = io.BytesIO(base64_image)
+                            parsed_prompt.append(PIL.Image.open(image_file))
+                        case _:
+                            raise ValueError("Unknown content type")
+                logger.info("Prompt: %s", parsed_prompt)
+                partial_outputs = []
+                final_contents: list[Content] = []
+                match generate_message.message_type:
+                    case WSMessageType.GENERATE_TEXT:
+                        output_generator = generator.generate_text_streaming
+                    case WSMessageType.GENERATE_IMAGE:
+                        output_generator = generator.generate_image_streaming
+                    case WSMessageType.GENERATE_MULTIMODAL:
+                        output_generator = generator.generate_multimodal_streaming
+                    case _:
+                        raise WebSocketException(
+                            code=fastapi.status.WS_1008_POLICY_VIOLATION,
+                            reason="Unknown message type",
+                        )
+                logger.info(
+                    "Acquiring lock for client %s generation with options: %s",
+                    client_id,
+                    generate_message.options,
+                )
+                option_args = generate_message.options.dict()
+                debug_info = {"hostname": hostname}
+                del option_args["message_type"]
+                output_generator = partial(
+                    output_generator,
+                    **option_args,
+                    debug=debug_info,
+                )
+                if redis_lock is not None:
+                    await async_acquire_lock(
+                        websocket=websocket,
+                        lock=redis_lock,
+                        hostname=hostname,
+                        counter=queue_counter,
+                    )
+                    await redis_client.set("has_lock", client_id)
+                logger.info(
+                    "Starting locked generation for client %s with options: %s",
+                    client_id,
+                    generate_message.options,
+                )
+                try:
+                    async for output_token in output_generator(parsed_prompt):
+                        if isinstance(output_token, str):
+                            content_type = ContentType.TEXT
+                            content = output_token
+                            message_type = WSMessageType.PARTIAL_OUTPUT
+                            options = NoOptionsForPartial()
+                            partial_outputs.extend(output_token)
+                        elif isinstance(output_token, StreamingImage):
+                            content_type = ContentType.IMAGE
+                            image = output_token.image
+                            img_io = io.BytesIO()
+                            image.save(img_io, format="png")
+                            content = (
+                                "data:image/png;base64,"
+                                + base64.b64encode(img_io.getvalue()).decode()
+                            )
+                            if output_token.final:
+                                message_type = WSMessageType.FULL_OUTPUT
+                                options = NoOptionsForFull()
+                            else:
+                                message_type = WSMessageType.PARTIAL_OUTPUT
+                                options = NoOptionsForPartial()
+                            if output_token.final:
+                                partial_outputs.append(output_token.image)
+                        else:
+                            raise ValueError(f"Invalid output_token: {output_token}")
+                        message_content = Content(
+                            content_type=content_type, content=content
+                        )
+                        match content_type:
+                            case ContentType.TEXT:
+                                final_contents.append(message_content)
+                            case ContentType.IMAGE:
+                                if message_type == WSMessageType.FULL_OUTPUT:
+                                    final_contents.append(message_content)
+                            case _:
+                                pass
+                        message = WSMultimodalMessage(
+                            message_type=message_type,
+                            content=[message_content],
+                            options=options,
+                            debug_info=debug_info,
+                        ).dict()
+                        await websocket.send_json(message)
+                finally:
+                    if redis_lock is not None:
+                        logger.info(
+                            "Attempting release of lock for client %s generation with options: %s",
+                            client_id,
+                            generate_message.options,
+                        )
+                        owned = await redis_lock.owned()
+                        if owned:
+                            await redis_client.set("has_lock", "")
+                            try:
+                                await redis_lock.release()
+                            except async_redis.lock.LockError:
+                                pass
+                        logger.info(
+                            "Released lock for client %s generation with options: %s",
+                            client_id,
+                            generate_message.options,
+                        )
+                await websocket.send_json(
+                    WSMultimodalMessage(
+                        message_type=WSMessageType.COMPLETE,
+                        content=final_contents,
+                        options=NoOptionsForComplete(),
+                        debug_info=debug_info,
+                    ).dict()
+                )
+        except WebSocketDisconnect:
+            logger.info("Client disconnected %s", client_id)
+        except ConnectionClosedError:
+            logger.info("Client forced a close %s", client_id)
+        except ConnectionClosedOK:
+            logger.info("Connection closed ok %s", client_id)
+        finally:
+            if redis_lock is not None:
+                logger.info("Checking for client holding lock: %s", client_id)
+                owned = await redis_lock.owned()
+                if owned:
+                    try:
+                        logger.info("Attempted to release owned lock: %s", client_id)
+                        await redis_lock.release()
+                    except async_redis.lock.LockError:
+                        pass
+                    await redis_client.set("has_lock", "")
+    return app
+def serve(
+    model: AbstractMultimodalGenerator,
+    host: str,
+    port: int,
+    debug: bool = True,
+    redis_port: int | None = None,
+) -> None:
+    app = web_app(model, debug=debug, redis_port=redis_port)
+    # TODO: convert this to a subprocess call so enable more
+    # uvicorn features like multiple workers
+    uvicorn.run(app, host=host, port=port)

chameleon/viewer/backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,35 @@

+# If black/isort/pytest change, then update `.circleci/config.yml`
+black==23.7.0
+isort==5.12.0
+pytest==7.4.0
+rich==13.5.*
+ipython
+# Do not change, python 3.11 needs this
+hydra-core==1.3.2
+typer==0.9.0
+httpx==0.24.1
+pylint==2.17.5
+submitit==1.4.2
+pudb==2022.1.3
+# These do/should match dependency versions
+# This is so that the viewer can run without any other deps outside of this file
+Pillow==10.0.*
+fastapi==0.101.1
+pydantic==1.10.*
+requests==2.31.*
+uvicorn==0.23.2
+python-multipart==0.0.6
+ruff==0.1.2
+websockets==12.0
+redis[hiredis]==5.0.1
+psutil==5.9.7
+# For inference
+albumentations==1.3.1
+einops==0.7.0
+pytorch_lightning==2.1.2
+transformers==4.36.2
+xformers==0.0.23
+torchvision==0.16.*

chameleon/viewer/backend/utils.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Chameleon License found in the
+# LICENSE file in the root directory of this source tree.
+import logging
+import types
+from rich.logging import RichHandler
+def configure_rich_logging():
+    FORMAT = "%(message)s"
+    logging.basicConfig(
+        level=logging.INFO,
+        handlers=[RichHandler(rich_tracebacks=True)],
+        format=FORMAT,
+        force=True,
+    )
+configure_rich_logging()
+def get_logger(module: types.ModuleType) -> logging.Logger:
+    """This forces logging.basicConfig to be called first."""
+    logger = logging.getLogger(module)
+    return logger

chameleon/viewer/frontend/README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+# Install
+```
+npm install
+```
+# Run local
+```
+npm run dev
+```

chameleon/viewer/frontend/index.html ADDED Viewed

	@@ -0,0 +1,17 @@

+<!-- Copyright (c) Meta Platforms, Inc. and affiliates. -->
+<!-- This source code is licensed under the Chameleon License found in the -->
+<!-- LICENSE file in the root directory of this source tree. -->
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Chameleon Viewer</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>

chameleon/viewer/frontend/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

chameleon/viewer/frontend/package.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "name": "chameleon-frontend",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite --host 0.0.0.0 --port 7654",
+    "staging": "vite --mode staging --host 0.0.0.0",
+    "datadev": "vite --mode datadev --host 0.0.0.0",
+    "check-build": "tsc && vite build",
+    "build": "vite build",
+    "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview",
+    "check-format": "prettier --check src",
+    "format": "prettier --write src",
+    "test": "vitest"
+  },
+  "dependencies": {
+    "@carbon/icons-react": "^11.25.0",
+    "@lexical/react": "^0.12.2",
+    "axios": "^1.4.0",
+    "lexical": "^0.12.2",
+    "prettier": "^3.0.3",
+    "react": "^18.2.0",
+    "react-cookie": "^6.1.1",
+    "react-daisyui": "^4.1.0",
+    "react-dnd": "^16.0.1",
+    "react-dnd-html5-backend": "^16.0.1",
+    "react-dom": "^18.2.0",
+    "react-dropzone": "^14.2.3",
+    "react-hotkeys-hook": "^4.4.1",
+    "react-markdown": "^9.0.1",
+    "react-router-dom": "^6.15.0",
+    "react-use-websocket": "^4.5.0",
+    "react18-json-view": "^0.2.4",
+    "remark-gfm": "^4.0.0",
+    "unique-username-generator": "^1.2.0",
+    "ws": "^8.14.2",
+    "zod": "^3.22.2",
+    "zustand": "^4.4.1"
+  },
+  "devDependencies": {
+    "@tailwindcss/typography": "^0.5.9",
+    "@types/react": "^18.2.15",
+    "@types/react-dom": "^18.2.7",
+    "@types/ws": "^8.5.9",
+    "@typescript-eslint/eslint-plugin": "^6.0.0",
+    "@typescript-eslint/parser": "^6.0.0",
+    "@vitejs/plugin-react": "^4.0.3",
+    "autoprefixer": "^10.4.15",
+    "daisyui": "^3.9.2",
+    "eslint": "^8.45.0",
+    "eslint-plugin-react-hooks": "^4.6.0",
+    "eslint-plugin-react-refresh": "^0.4.3",
+    "postcss": "^8.4.28",
+    "prettier": "^3.0.3",
+    "tailwindcss": "^3.3.3",
+    "typescript": "^5.0.2",
+    "vite": "^4.4.5",
+    "vitest": "^0.34.6"
+  }
+}

chameleon/viewer/frontend/postcss.config.cjs ADDED Viewed

	@@ -0,0 +1,13 @@

+/*
+* Copyright (c) Meta Platforms, Inc. and affiliates.
+*
+* This source code is licensed under the Chameleon License found in the
+* LICENSE file in the root directory of this source tree.
+*/
+module.exports = {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+};

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_DisplayVF_W_Wght.woff2 ADDED Viewed

Binary file (36.5 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_Bd.woff ADDED Viewed

Binary file (28.9 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_Bd.woff2 ADDED Viewed

Binary file (23.6 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_Md.woff ADDED Viewed

Binary file (28.6 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_Md.woff2 ADDED Viewed

Binary file (23.4 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_SBd.woff ADDED Viewed

Binary file (28.7 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Display_W_SBd.woff2 ADDED Viewed

Binary file (23.6 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_TextVF_W_Wght.woff2 ADDED Viewed

Binary file (36.3 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Text_W_Bd.woff ADDED Viewed

Binary file (28.8 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Text_W_Bd.woff2 ADDED Viewed

Binary file (23.5 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Text_W_Md.woff ADDED Viewed

Binary file (28.7 kB). View file

chameleon/viewer/frontend/public/fonts/optimistic/Optimistic_Text_W_Md.woff2 ADDED Viewed

Binary file (23.5 kB). View file