from typing import Optional, Sequence from torch import nn, Tensor from transformers import PretrainedConfig, PreTrainedModel, AutoConfig, AutoModel # from huggingface_hub import notebook_login # notebook_login() # AutoEncoderConfig.register_for_auto_class() # AutoEncoder.register_for_auto_class("AutoModel") # AutoConfig.register("autoencoder", AutoEncoderConfig) # AutoModel.register(AutoEncoderConfig, AutoModel) # autoencoder.push_to_hub("autoencoder") # from transformers import AutoConfig, AutoModel # config = AutoConfig.from_pretrained("amaye15/autoencoder", trust_remote_code = True) # autoencoder = AutoModel.from_config(config, trust_remote_code = True) class AutoEncoderConfig(PretrainedConfig): """ Configuration class for AutoEncoder. This class stores the parameters for the autoencoder model. Attributes: input_dim (int): The dimensionality of the input data (default: 128). latent_dim (int): The dimensionality of the latent representation (default: 64). layer_types (str): The type of layers used, e.g., 'linear', 'lstm', 'gru', 'rnn' (default: 'linear'). dropout_rate (float): The dropout rate applied after each layer (except for the last layer) (default: 0.1). num_layers (int): The number of layers in the encoder/decoder (default: 3). compression_rate (float): Factor by which to compress the dimensions through layers (default: 0.5). bidirectional (bool): Whether the sequence layers should be bidirectional (default: False). """ model_type = "autoencoder" def __init__( self, input_dim: int = 128, latent_dim: int = 64, layer_types: str = 'linear', dropout_rate: float = 0.1, num_layers: int = 3, compression_rate: float = 0.5, bidirectional: bool = False, **kwargs ): super().__init__(**kwargs) self.input_dim = input_dim self.latent_dim = latent_dim self.layer_types = layer_types self.dropout_rate = dropout_rate self.num_layers = num_layers self.compression_rate = compression_rate self.bidirectional = bidirectional def create_layers( model_section: str, layer_types: str, input_dim: int, latent_dim: int, num_layers: int, dropout_rate: float, compression_rate: float, bidirectional: bool ) -> nn.Sequential: """ Creates a sequence of layers for the encoder or decoder part of the autoencoder. Args: model_section (str): A string indicating whether this is for 'encoder' or 'decoder'. layer_types (str): The type of layers to include in the sequence. input_dim (int): The input dimension for the first layer. latent_dim (int): The target dimension for the latent representation. num_layers (int): The number of layers to create. dropout_rate (float): The dropout rate to apply between layers. compression_rate (float): The compression rate for reducing dimensions through layers. bidirectional (bool): Whether the RNN layers should be bidirectional. Returns: A nn.Sequential module containing the created layers. """ layers = [] current_dim = input_dim input_dimensions = [] output_dimensions = [] for _ in range(num_layers): input_dimensions.append(current_dim) next_dim = max(int(current_dim * compression_rate), latent_dim) current_dim = next_dim output_dimensions.append(current_dim) output_dimensions[num_layers - 1] = latent_dim if model_section == "decoder": input_dimensions, output_dimensions = output_dimensions, input_dimensions input_dimensions.reverse() output_dimensions.reverse() if bidirectional and (layer_types in ['lstm', 'rnn', 'gru']): output_dimensions = [2 * value for value in output_dimensions] for idx, (input_dim, output_dim) in enumerate(zip(input_dimensions, output_dimensions)): if layer_types == 'linear': layers.append(nn.Linear(input_dim, output_dim)) elif layer_types == 'lstm': layers.append(nn.LSTM(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional)) elif layer_types == 'rnn': layers.append(nn.RNN(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional)) elif layer_types == 'gru': layers.append(nn.GRU(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional)) if (idx != num_layers - 1) and (dropout_rate is not None): layers.append(nn.Dropout(dropout_rate)) return nn.Sequential(*layers) class AutoEncoder(PreTrainedModel): """ AutoEncoder model for creating an encoder-decoder architecture. Inherits from PreTrainedModel to utilize its pretrained model features from the Hugging Face library. Args: config (AutoEncoderConfig): The configuration instance with all model parameters. """ config_class = AutoEncoderConfig def __init__(self, config: AutoEncoderConfig): super(AutoEncoder, self).__init__(config) self.encoder = create_layers( "encoder", config.layer_types, config.input_dim, config.latent_dim, config.num_layers, config.dropout_rate, config.compression_rate, config.bidirectional ) # Assuming symmetry between encoder and decoder self.decoder = create_layers( "decoder", config.layer_types, config.input_dim, config.latent_dim, config.num_layers, config.dropout_rate, config.compression_rate, config.bidirectional ) def forward(self, x: Tensor) -> Tensor: """ Forward pass through the autoencoder. Args: x (Tensor): The input tensor to encode and decode. Returns: A Tensor that is the output of the decoder. """ # Assuming self.config.layer_types contains only a single layer type as a string. # If using sequence models, handle each layer's outputs if self.config.layer_types in ['lstm', 'rnn', 'gru']: for layer in self.encoder: if isinstance(layer, nn.LSTM): x, (h_n, c_n) = layer(x) elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU): x, h_o = layer(x) else: x = layer(x) for layer in self.decoder: if isinstance(layer, nn.LSTM): x, (h_n, c_n) = layer(x) elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU): x, h_o = layer(x) else: x = layer(x) else: x = self.encoder(x) x = self.decoder(x) return x