Spaces:

czl
/

Seq2Seq

Build error

App Files Files Community

czl commited on Jul 28, 2023

Commit

93ceca7

•

1 Parent(s): 3159b65

update with 219M models

Browse files

Files changed (3) hide show

app.py +128 -16
vocab219/idx2word.json +0 -0
vocab219/word2idx.json +0 -0

app.py CHANGED Viewed

@@ -60,15 +60,6 @@ def lookup_words(idx2word, indices):
     return [idx2word[str(idx)] for idx in indices]
-params = {'input_dim': len(word2idx),
-            'emb_dim': 128,
-            'enc_hid_dim': 256,
-            'dec_hid_dim': 256,
-            'dropout': 0.5,
-            'attn_dim': 32,
-            'teacher_forcing_ratio': 0.5,
-            'epochs': 35}
 class Encoder(nn.Module):
     """
     GRU RNN Encoder
@@ -292,8 +283,15 @@ class Seq2Seq(nn.Module):
             output = trg[t] if teacher_force else top1
         return outputs
 enc = Encoder(input_dim=params['input_dim'], emb_dim=params['emb_dim'], enc_hid_dim=params['enc_hid_dim'], dec_hid_dim=params['dec_hid_dim'], dropout=params['dropout'])
 attn = Attention(enc_hid_dim=params['enc_hid_dim'], dec_hid_dim=params['dec_hid_dim'], attn_dim=params['attn_dim'])
@@ -308,9 +306,50 @@ norm_model = Seq2Seq(encoder=enc, decoder=dec, device=device)
 norm_model.load_state_dict(torch.load('NormSeq2Seq-188M_epoch35.pt', map_location=torch.device('cpu')))
 norm_model.to(device)
-models_dict = {'AttentionSeq2Seq-188M': attn_model, 'NormalSeq2Seq-188M': norm_model}
-def generateAttn(sentence, history, max_len=12,
              word2idx=word2idx, idx2word=idx2word,
              device=device, tokenize=tokenize, preprocess_text=preprocess_text,
              lookup_words=lookup_words, models_dict=models_dict):
@@ -343,7 +382,7 @@ def generateAttn(sentence, history, max_len=12,
     response = lookup_words(idx2word, outputs)
     return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
-def generateNorm(sentence, history, max_len=12,
              word2idx=word2idx, idx2word=idx2word,
              device=device, tokenize=tokenize, preprocess_text=preprocess_text,
              lookup_words=lookup_words, models_dict=models_dict):
@@ -376,13 +415,86 @@ def generateNorm(sentence, history, max_len=12,
     response = lookup_words(idx2word, outputs)
     return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
-# demo = gr.ChatInterface(generate, title="AttentionSeq2Seq-188M")
 with gr.Blocks() as demo:
-    gr.ChatInterface(generateNorm,
                      title="NormalSeq2Seq-188M")
-    gr.ChatInterface(generateAttn,
                      title="AttentionSeq2Seq-188M")
 if __name__ == "__main__":
     demo.launch()

     return [idx2word[str(idx)] for idx in indices]
 class Encoder(nn.Module):
     """
     GRU RNN Encoder
             output = trg[t] if teacher_force else top1
         return outputs
+params = {'input_dim': len(word2idx),
+            'emb_dim': 128,
+            'enc_hid_dim': 256,
+            'dec_hid_dim': 256,
+            'dropout': 0.5,
+            'attn_dim': 32,
+            'teacher_forcing_ratio': 0.5,
+            'epochs': 35}
 enc = Encoder(input_dim=params['input_dim'], emb_dim=params['emb_dim'], enc_hid_dim=params['enc_hid_dim'], dec_hid_dim=params['dec_hid_dim'], dropout=params['dropout'])
 attn = Attention(enc_hid_dim=params['enc_hid_dim'], dec_hid_dim=params['dec_hid_dim'], attn_dim=params['attn_dim'])
 norm_model.load_state_dict(torch.load('NormSeq2Seq-188M_epoch35.pt', map_location=torch.device('cpu')))
 norm_model.to(device)
+with open('vocab219/word2idx.json', 'r') as f:
+    word2idx2 = json.load(f)
+with open('vocab219/idx2word.json', 'r') as f:
+    idx2word2 = json.load(f)
+params219 = {'input_dim': len(word2idx2),
+            'emb_dim': 192,
+            'enc_hid_dim': 256,
+            'dec_hid_dim': 256,
+            'dropout': 0.5,
+            'attn_dim': 64,
+            'teacher_forcing_ratio': 0.5,
+            'epochs': 35}
+enc = Encoder(input_dim=params219['input_dim'], emb_dim=params219['emb_dim'],
+              enc_hid_dim=params219['enc_hid_dim'], dec_hid_dim=params219['dec_hid_dim'],
+              dropout=params219['dropout'])
+attn = Attention(enc_hid_dim=params219['enc_hid_dim'], dec_hid_dim=params219['dec_hid_dim'],
+                 attn_dim=params219['attn_dim'])
+dec = AttnDecoder(output_dim=params219['input_dim'], emb_dim=params219['emb_dim'],
+                  enc_hid_dim=params219['enc_hid_dim'], dec_hid_dim=params219['dec_hid_dim'],
+                  attention=attn, dropout=params219['dropout'])
+attn_model219 = Seq2Seq(encoder=enc, decoder=dec, device=device)
+attn_model219.load_state_dict(torch.load('AttnSeq2Seq-219M_epoch35.pt',
+                              map_location=torch.device('cpu')))
+attn_model219.to(device)
+enc = Encoder(input_dim=params219['input_dim'], emb_dim=params219['emb_dim'],
+              enc_hid_dim=params219['enc_hid_dim'],
+              dec_hid_dim=params219['dec_hid_dim'], dropout=params219['dropout'])
+dec = Decoder(output_dim=params219['input_dim'], emb_dim=params219['emb_dim'],
+              enc_hid_dim=params219['enc_hid_dim'],
+              dec_hid_dim=params219['dec_hid_dim'],
+              dropout=params219['dropout'])
+norm_model219 = Seq2Seq(encoder=enc, decoder=dec, device=device)
+norm_model219.load_state_dict(torch.load('NormSeq2Seq-219M_epoch35.pt',
+                              map_location=torch.device('cpu')))
+norm_model219.to(device)
+models_dict = {'AttentionSeq2Seq-188M': attn_model, 'NormalSeq2Seq-188M': norm_model,
+               'AttentionSeq2Seq-219M': attn_model219,
+               'NormalSeq2Seq-219M': norm_model219}
+def generateAttn188(sentence, history, max_len=12,
              word2idx=word2idx, idx2word=idx2word,
              device=device, tokenize=tokenize, preprocess_text=preprocess_text,
              lookup_words=lookup_words, models_dict=models_dict):
     response = lookup_words(idx2word, outputs)
     return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
+def generateNorm188(sentence, history, max_len=12,
              word2idx=word2idx, idx2word=idx2word,
              device=device, tokenize=tokenize, preprocess_text=preprocess_text,
              lookup_words=lookup_words, models_dict=models_dict):
     response = lookup_words(idx2word, outputs)
     return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
+def generateAttn219(sentence, history, max_len=12,
+             word2idx=word2idx2, idx2word=idx2word2,
+             device=device, tokenize=tokenize, preprocess_text=preprocess_text,
+             lookup_words=lookup_words, models_dict=models_dict):
+    """
+    Generate response
+    :param model: model
+    :param sentence: sentence
+    :param max_len: maximum length of sequence
+    :param word2idx: word to index mapping
+    :param idx2word: index to word mapping
+    :return: response
+    """
+    history = history
+    model = models_dict['AttentionSeq2Seq-219M']
+    model.eval()
+    sentence = preprocess_text(sentence)
+    tokens = tokenize(sentence)
+    tokens = [word2idx[token] if token in word2idx else word2idx['<unk>'] for token in tokens]
+    tokens = [word2idx['<bos>']] + tokens + [word2idx['<eos>']]
+    tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(1).to(device)
+    outputs = [word2idx['<bos>']]
+    with torch.no_grad():
+        encoder_outputs, hidden = model.encoder(tokens)
+    for t in range(max_len):
+        output, hidden = model.decoder(torch.tensor([outputs[-1]], dtype=torch.long).to(device), hidden, encoder_outputs)
+        top1 = output.max(1)[1]
+        outputs.append(top1.item())
+        if top1.item() == word2idx['<eos>']:
+            break
+    response = lookup_words(idx2word, outputs)
+    return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
+def generateNorm219(sentence, history, max_len=12,
+             word2idx=word2idx2, idx2word=idx2word2,
+             device=device, tokenize=tokenize, preprocess_text=preprocess_text,
+             lookup_words=lookup_words, models_dict=models_dict):
+    """
+    Generate response
+    :param model: model
+    :param sentence: sentence
+    :param max_len: maximum length of sequence
+    :param word2idx: word to index mapping
+    :param idx2word: index to word mapping
+    :return: response
+    """
+    history = history
+    model = models_dict['NormalSeq2Seq-219M']
+    model.eval()
+    sentence = preprocess_text(sentence)
+    tokens = tokenize(sentence)
+    tokens = [word2idx[token] if token in word2idx else word2idx['<unk>'] for token in tokens]
+    tokens = [word2idx['<bos>']] + tokens + [word2idx['<eos>']]
+    tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(1).to(device)
+    outputs = [word2idx['<bos>']]
+    with torch.no_grad():
+        encoder_outputs, hidden = model.encoder(tokens)
+    for t in range(max_len):
+        output, hidden = model.decoder(torch.tensor([outputs[-1]], dtype=torch.long).to(device), hidden, encoder_outputs)
+        top1 = output.max(1)[1]
+        outputs.append(top1.item())
+        if top1.item() == word2idx['<eos>']:
+            break
+    response = lookup_words(idx2word, outputs)
+    return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
 with gr.Blocks() as demo:
+    with gr.Row():
+        gr.ChatInterface(generateNorm188,
                      title="NormalSeq2Seq-188M")
+        gr.ChatInterface(generateAttn188,
                      title="AttentionSeq2Seq-188M")
+    gr.Markdown("""
+        # Seq2Seq Generative Chatbot with 219M parameters
+        """)
+    with gr.Row():
+        gr.ChatInterface(generateNorm219,
+                     title="NormalSeq2Seq-219M")
+        gr.ChatInterface(generateAttn219,
+                     title="AttentionSeq2Seq-219M")
 if __name__ == "__main__":
     demo.launch()

vocab219/idx2word.json ADDED Viewed

The diff for this file is too large to render. See raw diff

vocab219/word2idx.json ADDED Viewed

The diff for this file is too large to render. See raw diff