Spaces:

czl
/

Seq2Seq

Build error

App Files Files Community

czl commited on Jul 29, 2023

Commit

808fd7f

•

1 Parent(s): 03031ae

added more models, changed layout

Browse files

Files changed (4) hide show

app.py +160 -17
requirements.txt +2 -1
vocab219SW/idx2word.json +0 -0
vocab219SW/word2idx.json +0 -0

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import json
 import re
 import unicodedata
 from typing import Tuple
-import random
 import gradio as gr
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -346,9 +347,52 @@ norm_model219.load_state_dict(torch.load('NormSeq2Seq-219M_epoch35.pt',
                               map_location=torch.device('cpu')))
 norm_model219.to(device)
 models_dict = {'AttentionSeq2Seq-188M': attn_model, 'NormalSeq2Seq-188M': norm_model,
                'AttentionSeq2Seq-219M': attn_model219,
-               'NormalSeq2Seq-219M': norm_model219}
 def generateAttn188(sentence, history, max_len=12,
              word2idx=word2idx, idx2word=idx2word,
@@ -482,23 +526,122 @@ def generateNorm219(sentence, history, max_len=12,
     response = lookup_words(idx2word, outputs)
     return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
 with gr.Blocks() as demo:
-    gr.Markdown("""
-        # Seq2Seq Generative Chatbot with 188M parameters
-        """)
-    with gr.Row():
-        gr.ChatInterface(generateNorm188,
-                     title="NormalSeq2Seq-188M")
-        gr.ChatInterface(generateAttn188,
-                     title="AttentionSeq2Seq-188M")
-    gr.Markdown("""
-        # Seq2Seq Generative Chatbot with 219M parameters
-        """)
     with gr.Row():
-        gr.ChatInterface(generateNorm219,
-                     title="NormalSeq2Seq-219M")
-        gr.ChatInterface(generateAttn219,
-                     title="AttentionSeq2Seq-219M")
 if __name__ == "__main__":
     demo.launch()

 import json
+import random
 import re
 import unicodedata
 from typing import Tuple
 import gradio as gr
+import spacy
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
                               map_location=torch.device('cpu')))
 norm_model219.to(device)
+with open('vocab219SW/word2idx.json', 'r') as f:
+    word2idx3 = json.load(f)
+with open('vocab219SW/idx2word.json', 'r') as f:
+    idx2word3 = json.load(f)
+params219SW = {'input_dim': len(word2idx3),
+            'emb_dim': 192,
+            'enc_hid_dim': 256,
+            'dec_hid_dim': 256,
+            'dropout': 0.5,
+            'attn_dim': 64,
+            'teacher_forcing_ratio': 0.5,
+            'epochs': 35}
+enc = Encoder(input_dim=params219SW['input_dim'], emb_dim=params219SW['emb_dim'],
+              enc_hid_dim=params219SW['enc_hid_dim'], dec_hid_dim=params219SW['dec_hid_dim'],
+              dropout=params219SW['dropout'])
+attn = Attention(enc_hid_dim=params219SW['enc_hid_dim'], dec_hid_dim=params219SW['dec_hid_dim'],
+                 attn_dim=params219SW['attn_dim'])
+dec = AttnDecoder(output_dim=params219SW['input_dim'], emb_dim=params219['emb_dim'],
+                  enc_hid_dim=params219SW['enc_hid_dim'], dec_hid_dim=params219SW['dec_hid_dim'],
+                  attention=attn, dropout=params219SW['dropout'])
+attn_model219SW = Seq2Seq(encoder=enc, decoder=dec, device=device)
+attn_model219SW.load_state_dict(torch.load('AttnSeq2Seq-219M-SW_epoch35.pt',
+                              map_location=torch.device('cpu')))
+attn_model219SW.to(device)
+enc = Encoder(input_dim=params219SW['input_dim'], emb_dim=params219SW['emb_dim'],
+              enc_hid_dim=params219SW['enc_hid_dim'],
+              dec_hid_dim=params219SW['dec_hid_dim'], dropout=params219SW['dropout'])
+dec = Decoder(output_dim=params219SW['input_dim'], emb_dim=params219SW['emb_dim'],
+              enc_hid_dim=params219SW['enc_hid_dim'],
+              dec_hid_dim=params219SW['dec_hid_dim'],
+              dropout=params219SW['dropout'])
+norm_model219SW = Seq2Seq(encoder=enc, decoder=dec, device=device)
+norm_model219SW.load_state_dict(torch.load('NormSeq2Seq-219M-SW_epoch35.pt',
+                              map_location=torch.device('cpu')))
+norm_model219SW.to(device)
+nlp = spacy.load('en_core_web_sm')
 models_dict = {'AttentionSeq2Seq-188M': attn_model, 'NormalSeq2Seq-188M': norm_model,
                'AttentionSeq2Seq-219M': attn_model219,
+               'NormalSeq2Seq-219M': norm_model219,
+               'AttentionSeq2Seq-219M-SW': attn_model219SW,
+               'NormalSeq2Seq-219M-SW': norm_model219SW}
 def generateAttn188(sentence, history, max_len=12,
              word2idx=word2idx, idx2word=idx2word,
     response = lookup_words(idx2word, outputs)
     return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
+def tokenize_context(text, nlp=nlp):
+    """
+    Tokenize text and remove stop words
+    :param text: text to be tokenized
+    :return: list of tokens
+    """
+    return [tok.text for tok in nlp.tokenizer(text) if not tok.is_stop]
+def generateAttn219SW(sentence, history, max_len=12,
+             word2idx=word2idx3, idx2word=idx2word3,
+             device=device, tokenize_context=tokenize_context,
+             preprocess_text=preprocess_text,
+             lookup_words=lookup_words, models_dict=models_dict):
+    """
+    Generate response
+    :param model: model
+    :param sentence: sentence
+    :param max_len: maximum length of sequence
+    :param word2idx: word to index mapping
+    :param idx2word: index to word mapping
+    :return: response
+    """
+    history = history
+    model = models_dict['AttentionSeq2Seq-219M']
+    model.eval()
+    sentence = preprocess_text(sentence)
+    tokens = tokenize_context(sentence)
+    tokens = [word2idx[token] if token in word2idx else word2idx['<unk>'] for token in tokens]
+    tokens = [word2idx['<bos>']] + tokens + [word2idx['<eos>']]
+    tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(1).to(device)
+    outputs = [word2idx['<bos>']]
+    with torch.no_grad():
+        encoder_outputs, hidden = model.encoder(tokens)
+    for t in range(max_len):
+        output, hidden = model.decoder(torch.tensor([outputs[-1]], dtype=torch.long).to(device), hidden, encoder_outputs)
+        top1 = output.max(1)[1]
+        outputs.append(top1.item())
+        if top1.item() == word2idx['<eos>']:
+            break
+    response = lookup_words(idx2word, outputs)
+    return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
+def generateNorm219SW(sentence, history, max_len=12,
+             word2idx=word2idx3, idx2word=idx2word3,
+             device=device, tokenize_context=tokenize_context, preprocess_text=preprocess_text,
+             lookup_words=lookup_words, models_dict=models_dict):
+    """
+    Generate response
+    :param model: model
+    :param sentence: sentence
+    :param max_len: maximum length of sequence
+    :param word2idx: word to index mapping
+    :param idx2word: index to word mapping
+    :return: response
+    """
+    history = history
+    model = models_dict['NormalSeq2Seq-219M']
+    model.eval()
+    sentence = preprocess_text(sentence)
+    tokens = tokenize_context(sentence)
+    tokens = [word2idx[token] if token in word2idx else word2idx['<unk>'] for token in tokens]
+    tokens = [word2idx['<bos>']] + tokens + [word2idx['<eos>']]
+    tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(1).to(device)
+    outputs = [word2idx['<bos>']]
+    with torch.no_grad():
+        encoder_outputs, hidden = model.encoder(tokens)
+    for t in range(max_len):
+        output, hidden = model.decoder(torch.tensor([outputs[-1]], dtype=torch.long).to(device), hidden, encoder_outputs)
+        top1 = output.max(1)[1]
+        outputs.append(top1.item())
+        if top1.item() == word2idx['<eos>']:
+            break
+    response = lookup_words(idx2word, outputs)
+    return ' '.join(response).replace('<bos>', '').replace('<eos>', '').strip()
+norm188 = gr.ChatInterface(generateNorm188,
+                     title="NormalSeq2Seq-188M",
+description="""Seq2Seq Generative Chatbot without Attention.
+188,204,500 trainable parameters""")
+norm219 = gr.ChatInterface(generateNorm219,
+                     title="NormalSeq2Seq-219M",
+description="""Seq2Seq Generative Chatbot without Attention.
+219,456,724 trainable parameters""")
+norm219sw = gr.ChatInterface(generateNorm219SW,
+                        title="NormalSeq2Seq-219M-SW",
+description="""Seq2Seq Generative Chatbot without Attention.
+219,451,344 trainable parameters
+Trained with stop words removed for context (input) and more data.""")
+attn188 = gr.ChatInterface(generateAttn188,
+                     title="AttentionSeq2Seq-188M",
+description="""Seq2Seq Generative Chatbot with Attention.
+188,229,108 trainable parameters""")
+attn219 = gr.ChatInterface(generateAttn219,
+                     title="AttentionSeq2Seq-219M",
+description="""Seq2Seq Generative Chatbot with Attention.
+219,505,940 trainable parameters
+                     """)
+attn219sw = gr.ChatInterface(generateAttn219SW,
+                        title="AttentionSeq2Seq-219M-SW",
+description="""Seq2Seq Generative Chatbot with Attention.
+219,500,560 trainable parameters
+Trained with stop words removed for context (input) and more data""")
 with gr.Blocks() as demo:
     with gr.Row():
+        gr.TabbedInterface([norm188, norm219, norm219sw], ["188M", "219M", "219M-SW"])
+        gr.TabbedInterface([attn188, attn219, attn219sw], ["188M", "219M", "219M-SW"])
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ torch
 torchtext
 nltk
 sentence-transformers
-scipy

 torchtext
 nltk
 sentence-transformers
+scipy
+en-core-web-sm @ https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl

vocab219SW/idx2word.json ADDED Viewed

The diff for this file is too large to render. See raw diff

vocab219SW/word2idx.json ADDED Viewed

The diff for this file is too large to render. See raw diff