File size: 7,027 Bytes

60b6dc7
c3b6556
 
0fff8d2
d48e82c
0fff8d2
60b6dc7
0fff8d2
60b6dc7
0fff8d2
 
60b6dc7
0fff8d2
 
d48e82c
0fff8d2
c3b6556
67d744a
 
 
 
 
c3b6556
d48e82c
c3b6556
 
 
 
 
 
 
 
 
 
 
 
d48e82c
 
 
 
c3b6556
 
 
 
 
 
 
d48e82c
 
 
 
 
 
 
 
 
0fff8d2
60b6dc7
c3b6556
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60b6dc7
 
 
c3b6556
 
60b6dc7
 
c3b6556
60b6dc7
 
c3b6556
60b6dc7
c3b6556
60b6dc7
 
c3b6556
 
 
60b6dc7
c3b6556
 
0fff8d2
c3b6556
60b6dc7
 
 
0fff8d2
60b6dc7
0fff8d2
60b6dc7
0fff8d2
c3b6556
60b6dc7
 
 
 
c3b6556
 
 
 
 
 
 
 
60b6dc7
 
c3b6556
60b6dc7
 
c3b6556
 
60b6dc7
0fff8d2
c3b6556
60b6dc7
 
c3b6556
 
60b6dc7
0fff8d2
c3b6556
60b6dc7
 
c3b6556
 
 
 
 
 
 
 
 
 
 
 
0fff8d2
 
 
c3b6556
 
0fff8d2
 
 
c3b6556
0fff8d2
 
 
c3b6556
0fff8d2
 
 
60b6dc7
 
 
c3b6556
 
3745c32


from typing import Optional, Sequence
from torch import nn, Tensor
from transformers import PretrainedConfig, PreTrainedModel, AutoConfig, AutoModel

# from huggingface_hub import notebook_login

# notebook_login()

# AutoEncoderConfig.register_for_auto_class()
# AutoEncoder.register_for_auto_class("AutoModel")

# AutoConfig.register("autoencoder", AutoEncoderConfig)
# AutoModel.register(AutoEncoderConfig, AutoModel)

# autoencoder.push_to_hub("autoencoder")

# from transformers import AutoConfig, AutoModel
 
# config = AutoConfig.from_pretrained("amaye15/autoencoder", trust_remote_code = True)
# autoencoder = AutoModel.from_config(config, trust_remote_code = True)


class AutoEncoderConfig(PretrainedConfig):
    """
    Configuration class for AutoEncoder. This class stores the parameters for the autoencoder model.
    
    Attributes:
        input_dim (int): The dimensionality of the input data (default: 128).
        latent_dim (int): The dimensionality of the latent representation (default: 64).
        layer_types (str): The type of layers used, e.g., 'linear', 'lstm', 'gru', 'rnn' (default: 'linear').
        dropout_rate (float): The dropout rate applied after each layer (except for the last layer) (default: 0.1).
        num_layers (int): The number of layers in the encoder/decoder (default: 3).
        compression_rate (float): Factor by which to compress the dimensions through layers (default: 0.5).
        bidirectional (bool): Whether the sequence layers should be bidirectional (default: False).
    """
    model_type = "autoencoder"

    def __init__(
        self, 
        input_dim: int = 128, 
        latent_dim: int = 64, 
        layer_types: str = 'linear', 
        dropout_rate: float = 0.1, 
        num_layers: int = 3, 
        compression_rate: float = 0.5, 
        bidirectional: bool = False,
        **kwargs
    ):
        super().__init__(**kwargs)
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.layer_types = layer_types
        self.dropout_rate = dropout_rate
        self.num_layers = num_layers
        self.compression_rate = compression_rate
        self.bidirectional = bidirectional

def create_layers(
    model_section: str, 
    layer_types: str, 
    input_dim: int, 
    latent_dim: int, 
    num_layers: int, 
    dropout_rate: float, 
    compression_rate: float, 
    bidirectional: bool
) -> nn.Sequential:
    """
    Creates a sequence of layers for the encoder or decoder part of the autoencoder.

    Args:
        model_section (str): A string indicating whether this is for 'encoder' or 'decoder'.
        layer_types (str): The type of layers to include in the sequence.
        input_dim (int): The input dimension for the first layer.
        latent_dim (int): The target dimension for the latent representation.
        num_layers (int): The number of layers to create.
        dropout_rate (float): The dropout rate to apply between layers.
        compression_rate (float): The compression rate for reducing dimensions through layers.
        bidirectional (bool): Whether the RNN layers should be bidirectional.
    
    Returns:
        A nn.Sequential module containing the created layers.
    """
    layers = []
    current_dim = input_dim

    input_dimensions = []
    output_dimensions = []

    for _ in range(num_layers):
        input_dimensions.append(current_dim)
        next_dim = max(int(current_dim * compression_rate), latent_dim)
        current_dim = next_dim
        output_dimensions.append(current_dim)

    output_dimensions[num_layers - 1] = latent_dim

    if model_section == "decoder":
        input_dimensions, output_dimensions = output_dimensions, input_dimensions
        input_dimensions.reverse()
        output_dimensions.reverse()

        if bidirectional and (layer_types in ['lstm', 'rnn', 'gru']):
            output_dimensions = [2 * value for value in output_dimensions]

    for idx, (input_dim, output_dim) in enumerate(zip(input_dimensions, output_dimensions)):
        if layer_types == 'linear':
            layers.append(nn.Linear(input_dim, output_dim))
        elif layer_types == 'lstm':
            layers.append(nn.LSTM(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional))
        elif layer_types == 'rnn':
            layers.append(nn.RNN(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional))
        elif layer_types == 'gru':
            layers.append(nn.GRU(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional))
        if (idx != num_layers - 1) and (dropout_rate is not None):
            layers.append(nn.Dropout(dropout_rate))
    return nn.Sequential(*layers)

class AutoEncoder(PreTrainedModel):
    """
    AutoEncoder model for creating an encoder-decoder architecture.
    
    Inherits from PreTrainedModel to utilize its pretrained model features from the Hugging Face library.
    
    Args:
        config (AutoEncoderConfig): The configuration instance with all model parameters.
    """
    config_class = AutoEncoderConfig
    
    def __init__(self, config: AutoEncoderConfig):
        super(AutoEncoder, self).__init__(config)
        
        self.encoder = create_layers(
            "encoder",
            config.layer_types, config.input_dim, config.latent_dim, 
            config.num_layers, config.dropout_rate, config.compression_rate,
            config.bidirectional
        )
        # Assuming symmetry between encoder and decoder
        self.decoder = create_layers(
            "decoder",
            config.layer_types, config.input_dim, config.latent_dim, 
            config.num_layers, config.dropout_rate, config.compression_rate,
            config.bidirectional
        )

    def forward(self, x: Tensor) -> Tensor:
        """
        Forward pass through the autoencoder.

        Args:
            x (Tensor): The input tensor to encode and decode.

        Returns:
            A Tensor that is the output of the decoder.
        """
        # Assuming self.config.layer_types contains only a single layer type as a string.
        # If using sequence models, handle each layer's outputs
        if self.config.layer_types in ['lstm', 'rnn', 'gru']:
            for layer in self.encoder:
                if isinstance(layer, nn.LSTM):
                    x, (h_n, c_n) = layer(x)
                elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
                    x, h_o = layer(x)
                else:
                    x = layer(x)

            for layer in self.decoder:
                if isinstance(layer, nn.LSTM):
                    x, (h_n, c_n) = layer(x)
                elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
                    x, h_o = layer(x)
                else:
                    x = layer(x)
        else:
            x = self.encoder(x)
            x = self.decoder(x)

        return x