#from transformers import T5TokenizerFast, T5ForConditionalGeneration
#from torch import Tensor
#from torch.nn import Module
from typing import List, Optional, Tuple
#import torch, os
#import torch.nn.functional as F


class T5(Module):
    '''
    T5 model from: https://huggingface.co/docs/transformers/model_doc/t5
    '''

    def __init__(self, 
                 variant:str="t5-small",
                 max_source_length:int=256, 
                 max_target_length:int=128,
                 optimizer_config:dict={},
                ):

        # Assertions
        assert variant in ["t5-small", "t5-base", "t5-large"]

        super().__init__()

        self.variant            = variant
        self.max_source_length  = max_source_length
        self.max_target_length  = max_target_length

        # Tokenizer & model
        self.tokenizer          = T5TokenizerFast.from_pretrained(self.variant,
                                                                  model_max_length=self.max_source_length)
        self.model              = T5ForConditionalGeneration.from_pretrained(self.variant)
        
        # Optimizer
        self.optimizer          = torch.optim.AdamW(self.parameters(), **optimizer_config)
        
        # Scheduler
        self.scheduler          = None


    def tokenize(self, input:List[str]):

        out = self.tokenizer(input, max_length=self.max_source_length,
                             truncation=True, padding=True, 
                             return_tensors="pt")

        return out.input_ids.cuda(), out.attention_mask.cuda()


    def forward(self, input:List[str], label:Optional[List[str]]=None) -> Tuple[Tensor, Optional[Tensor]]:

        '''
        Will receive input and target string and produce the final output as tensor (not decoded)
        when target is not None, it will give the loss functions with the output as tuple
        '''
        
        input_ids, input_masks = self.tokenize(input)
        
        if label is not None:
            label_ids, label_masks = self.tokenize(label)    
            output = self.model(input_ids=input_ids, labels=label_ids)
            return output.logits, output.loss
            
        return self.model.generate(input_ids=input_ids,
                                   max_new_tokens=self.max_target_length), None


    def predict(self, input:List[str]) -> List[str]:
        
        '''
        Will generate the target output as string
        '''
        
        logits, loss = self.forward(input=input)
        return self.tokenizer.batch_decode(logits, skip_special_tokens=True)
        

if __name__ == '__main__':

    '''
    Implement a tester class similar to T5-old.py to test if it works
    '''
    
    model = T5('t5-small')
    model.to('cuda')

    #inputs = [
        #"translate English to German: Thank you so much, Chris.",
        #"translate English to German: I have been blown away by this conference, and I want to thank all of you for the many nice comments about what I had to say the other night.",
        #"translate German to English: Es ist mir wirklich eine Ehre, zweimal auf dieser Bühne stehen zu dürfen. Tausend Dank dafür.",
    #]

    #targets = [
        #"Vielen Dank, Chris.",
        #"Ich bin wirklich begeistert von dieser Konferenz, und ich danke Ihnen allen für die vielen netten Kommentare zu meiner Rede vorgestern Abend.",
        #"And it's truly a great honor to have the opportunity to come to this stage twice; I'm extremely grateful.",
    #]

    inputs = ["Good Morning, How are you?"]
    targets = ["Buongiorno, come stai?"]

    logits, loss = model.forward(inputs, targets)
    print('Model forward')
    print('logits: ', logits)
    print('loss: ', loss)
    
    outputs = model.predict(inputs)
    
    #print('OUTPUT')
    #print(outputs)
    for (inp, out), tar in zip(zip(inputs, outputs), targets):
        print(f"Input: \n{inp}\n\nOutput: \n{out}\n\nTarget: \n{tar}\n\n")