Image-Text-to-Text
Transformers
PyTorch
English
doubutsu
conversational
custom_code
Inference Endpoints
File size: 440 Bytes
4b953b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from transformers import PretrainedConfig, Qwen2Config, SiglipVisionConfig


class DoubutsuConfig(PretrainedConfig):
    model_type = "doubutsu"

    def __init__(self, **kwargs):
        self.text_config = Qwen2Config(
            **kwargs.pop(
                "text_config",
                {},
            ),
        )
        self.vision_config = SiglipVisionConfig(**kwargs.pop("vision_config", {}))
        super().__init__(**kwargs)