huamnifierWithSimpleGrammer

Running

App Files Files

“[shujaatalishariati]” commited on 9 days ago

Commit

847e3e1

•

1 Parent(s): 9367038

Initial commit for Gradio app with GECToR

Browse files

Files changed (16) hide show

app.py +37 -36
gector/bert_token_embedder.py +269 -0
gector/datareader.py +151 -0
gector/gec_model.py +298 -0
gector/seq2labels_model.py +194 -0
gector/tokenization.py +181 -0
gector/tokenizer_indexer.py +161 -0
gector/trainer.py +845 -0
output_vocabulary/d_tags.txt +4 -0
output_vocabulary/labels.txt +5002 -0
output_vocabulary/non_padded_namespaces.txt +2 -0
requirements.txt +8 -4
utils/filter_brackets.py +35 -0
utils/helpers.py +233 -0
utils/prepare_clc_fce_data.py +123 -0
utils/preprocess_data.py +488 -0

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ import nltk
 from nltk.corpus import wordnet
 from textblob import TextBlob
 from pattern.en import conjugate, lemma, pluralize, singularize
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -84,29 +86,41 @@ def correct_singular_plural_errors(text):
     return ' '.join(corrected_text)
-# Function to check and correct article errors
-def correct_article_errors(text):
-    doc = nlp(text)
-    corrected_text = []
-    for token in doc:
-        if token.text in ['a', 'an']:
-            next_token = token.nbor(1)
-            if token.text == "a" and next_token.text[0].lower() in "aeiou":
-                corrected_text.append("an")
-            elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
-                corrected_text.append("a")
-            else:
-                corrected_text.append(token.text)
-        else:
-            corrected_text.append(token.text)
-    return ' '.join(corrected_text)
 # Function to correct overall grammar using TextBlob
-def correct_grammar(text):
     blob = TextBlob(text)
     corrected_text = str(blob.correct())  # TextBlob's built-in grammar correction
     return corrected_text
 # Paraphrasing function using SpaCy and NLTK (Humanifier)
 def paraphrase_with_spacy_nltk(text):
     doc = nlp(text)
@@ -132,28 +146,17 @@ def paraphrase_with_spacy_nltk(text):
         else:
             paraphrased_words.append(token.text)
-    # Join the words back into a sentence
-    paraphrased_sentence = ' '.join(paraphrased_words)
-    return paraphrased_sentence
 # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
 def paraphrase_and_correct(text):
     # Step 1: Paraphrase the text
     paraphrased_text = paraphrase_with_spacy_nltk(text)
-    # Step 2: Apply grammatical corrections on the paraphrased text
-    corrected_text = correct_article_errors(paraphrased_text)
-    corrected_text = capitalize_sentences_and_nouns(corrected_text)
-    corrected_text = correct_singular_plural_errors(corrected_text)
-    # Step 3: Correct tense errors
-    corrected_text = correct_tense_errors(corrected_text)
-    # Step 4: Correct overall grammar using TextBlob
-    final_text = correct_grammar(corrected_text)
-    return final_text
 # Gradio app setup with two tabs
 with gr.Blocks() as demo:
@@ -163,15 +166,13 @@ with gr.Blocks() as demo:
         label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
         score1 = gr.Textbox(lines=1, label='Prob')
-        # Connect the prediction function to the button
         button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
     with gr.Tab("Humanifier"):
         text_input = gr.Textbox(lines=5, label="Input Text")
         paraphrase_button = gr.Button("Paraphrase & Correct")
-        output_text = gr.Textbox(label="Paraphrased Text")
-        # Connect the paraphrasing function to the button
         paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
 # Launch the app

 from nltk.corpus import wordnet
 from textblob import TextBlob
 from pattern.en import conjugate, lemma, pluralize, singularize
+from gector.gec_model import GecBERTModel  # Import GECToR Model
+from utils.helpers import read_lines, normalize  # GECToR utilities
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
     return ' '.join(corrected_text)
 # Function to correct overall grammar using TextBlob
+def correct_grammar_textblob(text):
     blob = TextBlob(text)
     corrected_text = str(blob.correct())  # TextBlob's built-in grammar correction
     return corrected_text
+# Initialize GECToR Model for Grammar Correction
+def load_gector_model():
+    model_path = ["gector/roberta_1_gector.th"]  # Ensure model file is placed correctly
+    vocab_path = "output_vocabulary"
+    model = GecBERTModel(vocab_path=vocab_path,
+                         model_paths=model_path,
+                         max_len=50,
+                         min_len=3,
+                         iterations=5,
+                         min_error_probability=0.0,
+                         lowercase_tokens=0,
+                         model_name="roberta",
+                         special_tokens_fix=1,
+                         log=False,
+                         confidence=0,
+                         del_confidence=0,
+                         is_ensemble=False,
+                         weigths=None)
+    return model
+# Load the GECToR model
+gector_model = load_gector_model()
+# Function to correct grammar using GECToR
+def correct_grammar_gector(text):
+    sentences = [text.split()]
+    corrected_sentences, _ = gector_model.handle_batch(sentences)
+    return " ".join(corrected_sentences[0])
 # Paraphrasing function using SpaCy and NLTK (Humanifier)
 def paraphrase_with_spacy_nltk(text):
     doc = nlp(text)
         else:
             paraphrased_words.append(token.text)
+    return ' '.join(paraphrased_words)
 # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
 def paraphrase_and_correct(text):
     # Step 1: Paraphrase the text
     paraphrased_text = paraphrase_with_spacy_nltk(text)
+    # Step 2: Apply grammatical corrections using GECToR
+    corrected_text = correct_grammar_gector(paraphrased_text)
+    return corrected_text
 # Gradio app setup with two tabs
 with gr.Blocks() as demo:
         label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
         score1 = gr.Textbox(lines=1, label='Prob')
         button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
     with gr.Tab("Humanifier"):
         text_input = gr.Textbox(lines=5, label="Input Text")
         paraphrase_button = gr.Button("Paraphrase & Correct")
+        output_text = gr.Textbox(label="Paraphrased and Corrected Text")
         paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
 # Launch the app

gector/bert_token_embedder.py ADDED Viewed

	@@ -0,0 +1,269 @@

+"""Tweaked version of corresponding AllenNLP file"""
+import logging
+from copy import deepcopy
+from typing import Dict
+import torch
+import torch.nn.functional as F
+from allennlp.modules.token_embedders.token_embedder import TokenEmbedder
+from allennlp.nn import util
+from transformers import AutoModel, PreTrainedModel
+logger = logging.getLogger(__name__)
+class PretrainedBertModel:
+    """
+    In some instances you may want to load the same BERT model twice
+    (e.g. to use as a token embedder and also as a pooling layer).
+    This factory provides a cache so that you don't actually have to load the model twice.
+    """
+    _cache: Dict[str, PreTrainedModel] = {}
+    @classmethod
+    def load(cls, model_name: str, cache_model: bool = True) -> PreTrainedModel:
+        if model_name in cls._cache:
+            return PretrainedBertModel._cache[model_name]
+        model = AutoModel.from_pretrained(model_name)
+        if cache_model:
+            cls._cache[model_name] = model
+        return model
+class BertEmbedder(TokenEmbedder):
+    """
+    A ``TokenEmbedder`` that produces BERT embeddings for your tokens.
+    Should be paired with a ``BertIndexer``, which produces wordpiece ids.
+    Most likely you probably want to use ``PretrainedBertEmbedder``
+    for one of the named pretrained models, not this base class.
+    Parameters
+    ----------
+    bert_model: ``BertModel``
+        The BERT model being wrapped.
+    top_layer_only: ``bool``, optional (default = ``False``)
+        If ``True``, then only return the top layer instead of apply the scalar mix.
+    max_pieces : int, optional (default: 512)
+        The BERT embedder uses positional embeddings and so has a corresponding
+        maximum length for its input ids. Assuming the inputs are windowed
+        and padded appropriately by this length, the embedder will split them into a
+        large batch, feed them into BERT, and recombine the output as if it was a
+        longer sequence.
+    num_start_tokens : int, optional (default: 1)
+        The number of starting special tokens input to BERT (usually 1, i.e., [CLS])
+    num_end_tokens : int, optional (default: 1)
+        The number of ending tokens input to BERT (usually 1, i.e., [SEP])
+    scalar_mix_parameters: ``List[float]``, optional, (default = None)
+        If not ``None``, use these scalar mix parameters to weight the representations
+        produced by different layers. These mixing weights are not updated during
+        training.
+    """
+    def __init__(
+        self,
+        bert_model: PreTrainedModel,
+        top_layer_only: bool = False,
+        max_pieces: int = 512,
+        num_start_tokens: int = 1,
+        num_end_tokens: int = 1
+    ) -> None:
+        super().__init__()
+        self.bert_model = deepcopy(bert_model)
+        self.output_dim = bert_model.config.hidden_size
+        self.max_pieces = max_pieces
+        self.num_start_tokens = num_start_tokens
+        self.num_end_tokens = num_end_tokens
+        self._scalar_mix = None
+    def set_weights(self, freeze):
+        for param in self.bert_model.parameters():
+            param.requires_grad = not freeze
+        return
+    def get_output_dim(self) -> int:
+        return self.output_dim
+    def forward(
+        self,
+        input_ids: torch.LongTensor,
+        offsets: torch.LongTensor = None
+    ) -> torch.Tensor:
+        """
+        Parameters
+        ----------
+        input_ids : ``torch.LongTensor``
+            The (batch_size, ..., max_sequence_length) tensor of wordpiece ids.
+        offsets : ``torch.LongTensor``, optional
+            The BERT embeddings are one per wordpiece. However it's possible/likely
+            you might want one per original token. In that case, ``offsets``
+            represents the indices of the desired wordpiece for each original token.
+            Depending on how your token indexer is configured, this could be the
+            position of the last wordpiece for each token, or it could be the position
+            of the first wordpiece for each token.
+            For example, if you had the sentence "Definitely not", and if the corresponding
+            wordpieces were ["Def", "##in", "##ite", "##ly", "not"], then the input_ids
+            would be 5 wordpiece ids, and the "last wordpiece" offsets would be [3, 4].
+            If offsets are provided, the returned tensor will contain only the wordpiece
+            embeddings at those positions, and (in particular) will contain one embedding
+            per token. If offsets are not provided, the entire tensor of wordpiece embeddings
+            will be returned.
+        """
+        batch_size, full_seq_len = input_ids.size(0), input_ids.size(-1)
+        initial_dims = list(input_ids.shape[:-1])
+        # The embedder may receive an input tensor that has a sequence length longer than can
+        # be fit. In that case, we should expect the wordpiece indexer to create padded windows
+        # of length `self.max_pieces` for us, and have them concatenated into one long sequence.
+        # E.g., "[CLS] I went to the [SEP] [CLS] to the store to [SEP] ..."
+        # We can then split the sequence into sub-sequences of that length, and concatenate them
+        # along the batch dimension so we effectively have one huge batch of partial sentences.
+        # This can then be fed into BERT without any sentence length issues. Keep in mind
+        # that the memory consumption can dramatically increase for large batches with extremely
+        # long sentences.
+        needs_split = full_seq_len > self.max_pieces
+        last_window_size = 0
+        if needs_split:
+            # Split the flattened list by the window size, `max_pieces`
+            split_input_ids = list(input_ids.split(self.max_pieces, dim=-1))
+            # We want all sequences to be the same length, so pad the last sequence
+            last_window_size = split_input_ids[-1].size(-1)
+            padding_amount = self.max_pieces - last_window_size
+            split_input_ids[-1] = F.pad(split_input_ids[-1], pad=[0, padding_amount], value=0)
+            # Now combine the sequences along the batch dimension
+            input_ids = torch.cat(split_input_ids, dim=0)
+        input_mask = (input_ids != 0).long()
+        # input_ids may have extra dimensions, so we reshape down to 2-d
+        # before calling the BERT model and then reshape back at the end.
+        all_encoder_layers = self.bert_model(
+            input_ids=util.combine_initial_dims(input_ids),
+            attention_mask=util.combine_initial_dims(input_mask),
+        )[0]
+        if len(all_encoder_layers[0].shape) == 3:
+            all_encoder_layers = torch.stack(all_encoder_layers)
+        elif len(all_encoder_layers[0].shape) == 2:
+            all_encoder_layers = torch.unsqueeze(all_encoder_layers, dim=0)
+        if needs_split:
+            # First, unpack the output embeddings into one long sequence again
+            unpacked_embeddings = torch.split(all_encoder_layers, batch_size, dim=1)
+            unpacked_embeddings = torch.cat(unpacked_embeddings, dim=2)
+            # Next, select indices of the sequence such that it will result in embeddings representing the original
+            # sentence. To capture maximal context, the indices will be the middle part of each embedded window
+            # sub-sequence (plus any leftover start and final edge windows), e.g.,
+            #  0     1 2    3  4   5    6    7     8     9   10   11   12    13 14  15
+            # "[CLS] I went to the very fine [SEP] [CLS] the very fine store to eat [SEP]"
+            # with max_pieces = 8 should produce max context indices [2, 3, 4, 10, 11, 12] with additional start
+            # and final windows with indices [0, 1] and [14, 15] respectively.
+            # Find the stride as half the max pieces, ignoring the special start and end tokens
+            # Calculate an offset to extract the centermost embeddings of each window
+            stride = (self.max_pieces - self.num_start_tokens - self.num_end_tokens) // 2
+            stride_offset = stride // 2 + self.num_start_tokens
+            first_window = list(range(stride_offset))
+            max_context_windows = [
+                i
+                for i in range(full_seq_len)
+                if stride_offset - 1 < i % self.max_pieces < stride_offset + stride
+            ]
+            # Lookback what's left, unless it's the whole self.max_pieces window
+            if full_seq_len % self.max_pieces == 0:
+                lookback = self.max_pieces
+            else:
+                lookback = full_seq_len % self.max_pieces
+            final_window_start = full_seq_len - lookback + stride_offset + stride
+            final_window = list(range(final_window_start, full_seq_len))
+            select_indices = first_window + max_context_windows + final_window
+            initial_dims.append(len(select_indices))
+            recombined_embeddings = unpacked_embeddings[:, :, select_indices]
+        else:
+            recombined_embeddings = all_encoder_layers
+        # Recombine the outputs of all layers
+        # (layers, batch_size * d1 * ... * dn, sequence_length, embedding_dim)
+        # recombined = torch.cat(combined, dim=2)
+        input_mask = (recombined_embeddings != 0).long()
+        if self._scalar_mix is not None:
+            mix = self._scalar_mix(recombined_embeddings, input_mask)
+        else:
+            mix = recombined_embeddings[-1]
+        # At this point, mix is (batch_size * d1 * ... * dn, sequence_length, embedding_dim)
+        if offsets is None:
+            # Resize to (batch_size, d1, ..., dn, sequence_length, embedding_dim)
+            dims = initial_dims if needs_split else input_ids.size()
+            return util.uncombine_initial_dims(mix, dims)
+        else:
+            # offsets is (batch_size, d1, ..., dn, orig_sequence_length)
+            offsets2d = util.combine_initial_dims(offsets)
+            # now offsets is (batch_size * d1 * ... * dn, orig_sequence_length)
+            range_vector = util.get_range_vector(
+                offsets2d.size(0), device=util.get_device_of(mix)
+            ).unsqueeze(1)
+            # selected embeddings is also (batch_size * d1 * ... * dn, orig_sequence_length)
+            selected_embeddings = mix[range_vector, offsets2d]
+            return util.uncombine_initial_dims(selected_embeddings, offsets.size())
+# @TokenEmbedder.register("bert-pretrained")
+class PretrainedBertEmbedder(BertEmbedder):
+    """
+    Parameters
+    ----------
+    pretrained_model: ``str``
+        Either the name of the pretrained model to use (e.g. 'bert-base-uncased'),
+        or the path to the .tar.gz file with the model weights.
+        If the name is a key in the list of pretrained models at
+        https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/modeling.py#L41
+        the corresponding path will be used; otherwise it will be interpreted as a path or URL.
+    requires_grad : ``bool``, optional (default = False)
+        If True, compute gradient of BERT parameters for fine tuning.
+    top_layer_only: ``bool``, optional (default = ``False``)
+        If ``True``, then only return the top layer instead of apply the scalar mix.
+    scalar_mix_parameters: ``List[float]``, optional, (default = None)
+        If not ``None``, use these scalar mix parameters to weight the representations
+        produced by different layers. These mixing weights are not updated during
+        training.
+    """
+    def __init__(
+        self,
+        pretrained_model: str,
+        requires_grad: bool = False,
+        top_layer_only: bool = False,
+        special_tokens_fix: int = 0,
+    ) -> None:
+        model = PretrainedBertModel.load(pretrained_model)
+        for param in model.parameters():
+            param.requires_grad = requires_grad
+        super().__init__(
+            bert_model=model,
+            top_layer_only=top_layer_only
+        )
+        if special_tokens_fix:
+            try:
+                vocab_size = self.bert_model.embeddings.word_embeddings.num_embeddings
+            except AttributeError:
+                # reserve more space
+                vocab_size = self.bert_model.word_embedding.num_embeddings + 5
+            self.bert_model.resize_token_embeddings(vocab_size + 1)

gector/datareader.py ADDED Viewed

	@@ -0,0 +1,151 @@

+"""Tweaked AllenNLP dataset reader."""
+import logging
+import re
+from random import random
+from typing import Dict, List
+from allennlp.common.file_utils import cached_path
+from allennlp.data.dataset_readers.dataset_reader import DatasetReader
+from allennlp.data.fields import TextField, SequenceLabelField, MetadataField, Field
+from allennlp.data.instance import Instance
+from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
+from allennlp.data.tokenizers import Token
+from overrides import overrides
+from utils.helpers import SEQ_DELIMETERS, START_TOKEN
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+@DatasetReader.register("seq2labels_datareader")
+class Seq2LabelsDatasetReader(DatasetReader):
+    """
+    Reads instances from a pretokenised file where each line is in the following format:
+    WORD###TAG [TAB] WORD###TAG [TAB] ..... \n
+    and converts it into a ``Dataset`` suitable for sequence tagging. You can also specify
+    alternative delimiters in the constructor.
+    Parameters
+    ----------
+    delimiters: ``dict``
+        The dcitionary with all delimeters.
+    token_indexers : ``Dict[str, TokenIndexer]``, optional (default=``{"tokens": SingleIdTokenIndexer()}``)
+        We use this to define the input representation for the text.  See :class:`TokenIndexer`.
+        Note that the `output` tags will always correspond to single token IDs based on how they
+        are pre-tokenised in the data file.
+    max_len: if set than will truncate long sentences
+    """
+    # fix broken sentences mostly in Lang8
+    BROKEN_SENTENCES_REGEXP = re.compile(r'\.[a-zA-RT-Z]')
+    def __init__(self,
+                 token_indexers: Dict[str, TokenIndexer] = None,
+                 delimeters: dict = SEQ_DELIMETERS,
+                 skip_correct: bool = False,
+                 skip_complex: int = 0,
+                 lazy: bool = False,
+                 max_len: int = None,
+                 test_mode: bool = False,
+                 tag_strategy: str = "keep_one",
+                 tn_prob: float = 0,
+                 tp_prob: float = 0,
+                 broken_dot_strategy: str = "keep") -> None:
+        super().__init__(lazy)
+        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
+        self._delimeters = delimeters
+        self._max_len = max_len
+        self._skip_correct = skip_correct
+        self._skip_complex = skip_complex
+        self._tag_strategy = tag_strategy
+        self._broken_dot_strategy = broken_dot_strategy
+        self._test_mode = test_mode
+        self._tn_prob = tn_prob
+        self._tp_prob = tp_prob
+    @overrides
+    def _read(self, file_path):
+        # if `file_path` is a URL, redirect to the cache
+        file_path = cached_path(file_path)
+        with open(file_path, "r") as data_file:
+            logger.info("Reading instances from lines in file at: %s", file_path)
+            for line in data_file:
+                line = line.strip("\n")
+                # skip blank and broken lines
+                if not line or (not self._test_mode and self._broken_dot_strategy == 'skip'
+                                and self.BROKEN_SENTENCES_REGEXP.search(line) is not None):
+                    continue
+                tokens_and_tags = [pair.rsplit(self._delimeters['labels'], 1)
+                                   for pair in line.split(self._delimeters['tokens'])]
+                try:
+                    tokens = [Token(token) for token, tag in tokens_and_tags]
+                    tags = [tag for token, tag in tokens_and_tags]
+                except ValueError:
+                    tokens = [Token(token[0]) for token in tokens_and_tags]
+                    tags = None
+                if tokens and tokens[0] != Token(START_TOKEN):
+                    tokens = [Token(START_TOKEN)] + tokens
+                words = [x.text for x in tokens]
+                if self._max_len is not None:
+                    tokens = tokens[:self._max_len]
+                    tags = None if tags is None else tags[:self._max_len]
+                instance = self.text_to_instance(tokens, tags, words)
+                if instance:
+                    yield instance
+    def extract_tags(self, tags: List[str]):
+        op_del = self._delimeters['operations']
+        labels = [x.split(op_del) for x in tags]
+        comlex_flag_dict = {}
+        # get flags
+        for i in range(5):
+            idx = i + 1
+            comlex_flag_dict[idx] = sum([len(x) > idx for x in labels])
+        if self._tag_strategy == "keep_one":
+            # get only first candidates for r_tags in right and the last for left
+            labels = [x[0] for x in labels]
+        elif self._tag_strategy == "merge_all":
+            # consider phrases as a words
+            pass
+        else:
+            raise Exception("Incorrect tag strategy")
+        detect_tags = ["CORRECT" if label == "$KEEP" else "INCORRECT" for label in labels]
+        return labels, detect_tags, comlex_flag_dict
+    def text_to_instance(self, tokens: List[Token], tags: List[str] = None,
+                         words: List[str] = None) -> Instance:  # type: ignore
+        """
+        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
+        """
+        # pylint: disable=arguments-differ
+        fields: Dict[str, Field] = {}
+        sequence = TextField(tokens, self._token_indexers)
+        fields["tokens"] = sequence
+        fields["metadata"] = MetadataField({"words": words})
+        if tags is not None:
+            labels, detect_tags, complex_flag_dict = self.extract_tags(tags)
+            if self._skip_complex and complex_flag_dict[self._skip_complex] > 0:
+                return None
+            rnd = random()
+            # skip TN
+            if self._skip_correct and all(x == "CORRECT" for x in detect_tags):
+                if rnd > self._tn_prob:
+                    return None
+            # skip TP
+            else:
+                if rnd > self._tp_prob:
+                    return None
+            fields["labels"] = SequenceLabelField(labels, sequence,
+                                                  label_namespace="labels")
+            fields["d_tags"] = SequenceLabelField(detect_tags, sequence,
+                                                  label_namespace="d_tags")
+        return Instance(fields)

gector/gec_model.py ADDED Viewed

	@@ -0,0 +1,298 @@

+"""Wrapper of AllenNLP model. Fixes errors based on model predictions"""
+import logging
+import os
+import sys
+from time import time
+import torch
+from allennlp.data.dataset import Batch
+from allennlp.data.fields import TextField
+from allennlp.data.instance import Instance
+from allennlp.data.tokenizers import Token
+from allennlp.data.vocabulary import Vocabulary
+from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
+from allennlp.nn import util
+from gector.bert_token_embedder import PretrainedBertEmbedder
+from gector.seq2labels_model import Seq2Labels
+from gector.tokenizer_indexer import PretrainedBertIndexer
+from utils.helpers import PAD, UNK, get_target_sent_by_edits, START_TOKEN
+from utils.helpers import get_weights_name
+logging.getLogger("werkzeug").setLevel(logging.ERROR)
+logger = logging.getLogger(__file__)
+class GecBERTModel(object):
+    def __init__(self, vocab_path=None, model_paths=None,
+                 weigths=None,
+                 max_len=50,
+                 min_len=3,
+                 lowercase_tokens=False,
+                 log=False,
+                 iterations=3,
+                 model_name='roberta',
+                 special_tokens_fix=1,
+                 is_ensemble=True,
+                 min_error_probability=0.0,
+                 confidence=0,
+                 del_confidence=0,
+                 resolve_cycles=False,
+                 ):
+        self.model_weights = list(map(float, weigths)) if weigths else [1] * len(model_paths)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.max_len = max_len
+        self.min_len = min_len
+        self.lowercase_tokens = lowercase_tokens
+        self.min_error_probability = min_error_probability
+        self.vocab = Vocabulary.from_files(vocab_path)
+        self.log = log
+        self.iterations = iterations
+        self.confidence = confidence
+        self.del_conf = del_confidence
+        self.resolve_cycles = resolve_cycles
+        # set training parameters and operations
+        self.indexers = []
+        self.models = []
+        for model_path in model_paths:
+            if is_ensemble:
+                model_name, special_tokens_fix = self._get_model_data(model_path)
+            weights_name = get_weights_name(model_name, lowercase_tokens)
+            self.indexers.append(self._get_indexer(weights_name, special_tokens_fix))
+            model = Seq2Labels(vocab=self.vocab,
+                               text_field_embedder=self._get_embbeder(weights_name, special_tokens_fix),
+                               confidence=self.confidence,
+                               del_confidence=self.del_conf,
+                               ).to(self.device)
+            if torch.cuda.is_available():
+                model.load_state_dict(torch.load(model_path), strict=False)
+            else:
+                model.load_state_dict(torch.load(model_path,
+                                                 map_location=torch.device('cpu')),
+                                                 strict=False)
+            model.eval()
+            self.models.append(model)
+    @staticmethod
+    def _get_model_data(model_path):
+        model_name = model_path.split('/')[-1]
+        tr_model, stf = model_name.split('_')[:2]
+        return tr_model, int(stf)
+    def _restore_model(self, input_path):
+        if os.path.isdir(input_path):
+            print("Model could not be restored from directory", file=sys.stderr)
+            filenames = []
+        else:
+            filenames = [input_path]
+        for model_path in filenames:
+            try:
+                if torch.cuda.is_available():
+                    loaded_model = torch.load(model_path)
+                else:
+                    loaded_model = torch.load(model_path,
+                                              map_location=lambda storage,
+                                                                  loc: storage)
+            except:
+                print(f"{model_path} is not valid model", file=sys.stderr)
+            own_state = self.model.state_dict()
+            for name, weights in loaded_model.items():
+                if name not in own_state:
+                    continue
+                try:
+                    if len(filenames) == 1:
+                        own_state[name].copy_(weights)
+                    else:
+                        own_state[name] += weights
+                except RuntimeError:
+                    continue
+        print("Model is restored", file=sys.stderr)
+    def predict(self, batches):
+        t11 = time()
+        predictions = []
+        for batch, model in zip(batches, self.models):
+            batch = util.move_to_device(batch.as_tensor_dict(), 0 if torch.cuda.is_available() else -1)
+            with torch.no_grad():
+                prediction = model.forward(**batch)
+            predictions.append(prediction)
+        preds, idx, error_probs = self._convert(predictions)
+        t55 = time()
+        if self.log:
+            print(f"Inference time {t55 - t11}")
+        return preds, idx, error_probs
+    def get_token_action(self, token, index, prob, sugg_token):
+        """Get lost of suggested actions for token."""
+        # cases when we don't need to do anything
+        if prob < self.min_error_probability or sugg_token in [UNK, PAD, '$KEEP']:
+            return None
+        if sugg_token.startswith('$REPLACE_') or sugg_token.startswith('$TRANSFORM_') or sugg_token == '$DELETE':
+            start_pos = index
+            end_pos = index + 1
+        elif sugg_token.startswith("$APPEND_") or sugg_token.startswith("$MERGE_"):
+            start_pos = index + 1
+            end_pos = index + 1
+        if sugg_token == "$DELETE":
+            sugg_token_clear = ""
+        elif sugg_token.startswith('$TRANSFORM_') or sugg_token.startswith("$MERGE_"):
+            sugg_token_clear = sugg_token[:]
+        else:
+            sugg_token_clear = sugg_token[sugg_token.index('_') + 1:]
+        return start_pos - 1, end_pos - 1, sugg_token_clear, prob
+    def _get_embbeder(self, weigths_name, special_tokens_fix):
+        embedders = {'bert': PretrainedBertEmbedder(
+            pretrained_model=weigths_name,
+            requires_grad=False,
+            top_layer_only=True,
+            special_tokens_fix=special_tokens_fix)
+        }
+        text_field_embedder = BasicTextFieldEmbedder(
+            token_embedders=embedders,
+            embedder_to_indexer_map={"bert": ["bert", "bert-offsets"]},
+            allow_unmatched_keys=True)
+        return text_field_embedder
+    def _get_indexer(self, weights_name, special_tokens_fix):
+        bert_token_indexer = PretrainedBertIndexer(
+            pretrained_model=weights_name,
+            do_lowercase=self.lowercase_tokens,
+            max_pieces_per_token=5,
+            special_tokens_fix=special_tokens_fix
+        )
+        return {'bert': bert_token_indexer}
+    def preprocess(self, token_batch):
+        seq_lens = [len(sequence) for sequence in token_batch if sequence]
+        if not seq_lens:
+            return []
+        max_len = min(max(seq_lens), self.max_len)
+        batches = []
+        for indexer in self.indexers:
+            batch = []
+            for sequence in token_batch:
+                tokens = sequence[:max_len]
+                tokens = [Token(token) for token in ['$START'] + tokens]
+                batch.append(Instance({'tokens': TextField(tokens, indexer)}))
+            batch = Batch(batch)
+            batch.index_instances(self.vocab)
+            batches.append(batch)
+        return batches
+    def _convert(self, data):
+        all_class_probs = torch.zeros_like(data[0]['class_probabilities_labels'])
+        error_probs = torch.zeros_like(data[0]['max_error_probability'])
+        for output, weight in zip(data, self.model_weights):
+            all_class_probs += weight * output['class_probabilities_labels'] / sum(self.model_weights)
+            error_probs += weight * output['max_error_probability'] / sum(self.model_weights)
+        max_vals = torch.max(all_class_probs, dim=-1)
+        probs = max_vals[0].tolist()
+        idx = max_vals[1].tolist()
+        return probs, idx, error_probs.tolist()
+    def update_final_batch(self, final_batch, pred_ids, pred_batch,
+                           prev_preds_dict):
+        new_pred_ids = []
+        total_updated = 0
+        for i, orig_id in enumerate(pred_ids):
+            orig = final_batch[orig_id]
+            pred = pred_batch[i]
+            prev_preds = prev_preds_dict[orig_id]
+            if orig != pred and pred not in prev_preds:
+                final_batch[orig_id] = pred
+                new_pred_ids.append(orig_id)
+                prev_preds_dict[orig_id].append(pred)
+                total_updated += 1
+            elif orig != pred and pred in prev_preds:
+                # update final batch, but stop iterations
+                final_batch[orig_id] = pred
+                total_updated += 1
+            else:
+                continue
+        return final_batch, new_pred_ids, total_updated
+    def postprocess_batch(self, batch, all_probabilities, all_idxs,
+                          error_probs):
+        all_results = []
+        noop_index = self.vocab.get_token_index("$KEEP", "labels")
+        for tokens, probabilities, idxs, error_prob in zip(batch,
+                                                           all_probabilities,
+                                                           all_idxs,
+                                                           error_probs):
+            length = min(len(tokens), self.max_len)
+            edits = []
+            # skip whole sentences if there no errors
+            if max(idxs) == 0:
+                all_results.append(tokens)
+                continue
+            # skip whole sentence if probability of correctness is not high
+            if error_prob < self.min_error_probability:
+                all_results.append(tokens)
+                continue
+            for i in range(length + 1):
+                # because of START token
+                if i == 0:
+                    token = START_TOKEN
+                else:
+                    token = tokens[i - 1]
+                # skip if there is no error
+                if idxs[i] == noop_index:
+                    continue
+                sugg_token = self.vocab.get_token_from_index(idxs[i],
+                                                             namespace='labels')
+                action = self.get_token_action(token, i, probabilities[i],
+                                               sugg_token)
+                if not action:
+                    continue
+                edits.append(action)
+            all_results.append(get_target_sent_by_edits(tokens, edits))
+        return all_results
+    def handle_batch(self, full_batch):
+        """
+        Handle batch of requests.
+        """
+        final_batch = full_batch[:]
+        batch_size = len(full_batch)
+        prev_preds_dict = {i: [final_batch[i]] for i in range(len(final_batch))}
+        short_ids = [i for i in range(len(full_batch))
+                     if len(full_batch[i]) < self.min_len]
+        pred_ids = [i for i in range(len(full_batch)) if i not in short_ids]
+        total_updates = 0
+        for n_iter in range(self.iterations):
+            orig_batch = [final_batch[i] for i in pred_ids]
+            sequences = self.preprocess(orig_batch)
+            if not sequences:
+                break
+            probabilities, idxs, error_probs = self.predict(sequences)
+            pred_batch = self.postprocess_batch(orig_batch, probabilities,
+                                                idxs, error_probs)
+            if self.log:
+                print(f"Iteration {n_iter + 1}. Predicted {round(100*len(pred_ids)/batch_size, 1)}% of sentences.")
+            final_batch, pred_ids, cnt = \
+                self.update_final_batch(final_batch, pred_ids, pred_batch,
+                                        prev_preds_dict)
+            total_updates += cnt
+            if not pred_ids:
+                break
+        return final_batch, total_updates

gector/seq2labels_model.py ADDED Viewed

	@@ -0,0 +1,194 @@

+"""Basic model. Predicts tags for every token"""
+from typing import Dict, Optional, List, Any
+import numpy
+import torch
+import torch.nn.functional as F
+from allennlp.data import Vocabulary
+from allennlp.models.model import Model
+from allennlp.modules import TimeDistributed, TextFieldEmbedder
+from allennlp.nn import InitializerApplicator, RegularizerApplicator
+from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
+from allennlp.training.metrics import CategoricalAccuracy
+from overrides import overrides
+from torch.nn.modules.linear import Linear
+@Model.register("seq2labels")
+class Seq2Labels(Model):
+    """
+    This ``Seq2Labels`` simply encodes a sequence of text with a stacked ``Seq2SeqEncoder``, then
+    predicts a tag (or couple tags) for each token in the sequence.
+    Parameters
+    ----------
+    vocab : ``Vocabulary``, required
+        A Vocabulary, required in order to compute sizes for input/output projections.
+    text_field_embedder : ``TextFieldEmbedder``, required
+        Used to embed the ``tokens`` ``TextField`` we get as input to the model.
+    encoder : ``Seq2SeqEncoder``
+        The encoder (with its own internal stacking) that we will use in between embedding tokens
+        and predicting output tags.
+    calculate_span_f1 : ``bool``, optional (default=``None``)
+        Calculate span-level F1 metrics during training. If this is ``True``, then
+        ``label_encoding`` is required. If ``None`` and
+        label_encoding is specified, this is set to ``True``.
+        If ``None`` and label_encoding is not specified, it defaults
+        to ``False``.
+    label_encoding : ``str``, optional (default=``None``)
+        Label encoding to use when calculating span f1.
+        Valid options are "BIO", "BIOUL", "IOB1", "BMES".
+        Required if ``calculate_span_f1`` is true.
+    labels_namespace : ``str``, optional (default=``labels``)
+        This is needed to compute the SpanBasedF1Measure metric, if desired.
+        Unless you did something unusual, the default value should be what you want.
+    verbose_metrics : ``bool``, optional (default = False)
+        If true, metrics will be returned per label class in addition
+        to the overall statistics.
+    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
+        Used to initialize the model parameters.
+    regularizer : ``RegularizerApplicator``, optional (default=``None``)
+        If provided, will be used to calculate the regularization penalty during training.
+    """
+    def __init__(self, vocab: Vocabulary,
+                 text_field_embedder: TextFieldEmbedder,
+                 predictor_dropout=0.0,
+                 labels_namespace: str = "labels",
+                 detect_namespace: str = "d_tags",
+                 verbose_metrics: bool = False,
+                 label_smoothing: float = 0.0,
+                 confidence: float = 0.0,
+                 del_confidence: float = 0.0,
+                 initializer: InitializerApplicator = InitializerApplicator(),
+                 regularizer: Optional[RegularizerApplicator] = None) -> None:
+        super(Seq2Labels, self).__init__(vocab, regularizer)
+        self.label_namespaces = [labels_namespace,
+                                 detect_namespace]
+        self.text_field_embedder = text_field_embedder
+        self.num_labels_classes = self.vocab.get_vocab_size(labels_namespace)
+        self.num_detect_classes = self.vocab.get_vocab_size(detect_namespace)
+        self.label_smoothing = label_smoothing
+        self.confidence = confidence
+        self.del_conf = del_confidence
+        self.incorr_index = self.vocab.get_token_index("INCORRECT",
+                                                       namespace=detect_namespace)
+        self._verbose_metrics = verbose_metrics
+        self.predictor_dropout = TimeDistributed(torch.nn.Dropout(predictor_dropout))
+        self.tag_labels_projection_layer = TimeDistributed(
+            Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_labels_classes))
+        self.tag_detect_projection_layer = TimeDistributed(
+            Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_detect_classes))
+        self.metrics = {"accuracy": CategoricalAccuracy()}
+        initializer(self)
+    @overrides
+    def forward(self,  # type: ignore
+                tokens: Dict[str, torch.LongTensor],
+                labels: torch.LongTensor = None,
+                d_tags: torch.LongTensor = None,
+                metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
+        # pylint: disable=arguments-differ
+        """
+        Parameters
+        ----------
+        tokens : Dict[str, torch.LongTensor], required
+            The output of ``TextField.as_array()``, which should typically be passed directly to a
+            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
+            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
+            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
+            for the ``TokenIndexers`` when you created the ``TextField`` representing your
+            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
+            which knows how to combine different word representations into a single vector per
+            token in your input.
+        labels : torch.LongTensor, optional (default = None)
+            A torch tensor representing the sequence of integer gold class labels of shape
+            ``(batch_size, num_tokens)``.
+        d_tags : torch.LongTensor, optional (default = None)
+            A torch tensor representing the sequence of integer gold class labels of shape
+            ``(batch_size, num_tokens)``.
+        metadata : ``List[Dict[str, Any]]``, optional, (default = None)
+            metadata containing the original words in the sentence to be tagged under a 'words' key.
+        Returns
+        -------
+        An output dictionary consisting of:
+        logits : torch.FloatTensor
+            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
+            unnormalised log probabilities of the tag classes.
+        class_probabilities : torch.FloatTensor
+            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
+            a distribution of the tag classes per word.
+        loss : torch.FloatTensor, optional
+            A scalar loss to be optimised.
+        """
+        encoded_text = self.text_field_embedder(tokens)
+        batch_size, sequence_length, _ = encoded_text.size()
+        mask = get_text_field_mask(tokens)
+        logits_labels = self.tag_labels_projection_layer(self.predictor_dropout(encoded_text))
+        logits_d = self.tag_detect_projection_layer(encoded_text)
+        class_probabilities_labels = F.softmax(logits_labels, dim=-1).view(
+            [batch_size, sequence_length, self.num_labels_classes])
+        class_probabilities_d = F.softmax(logits_d, dim=-1).view(
+            [batch_size, sequence_length, self.num_detect_classes])
+        error_probs = class_probabilities_d[:, :, self.incorr_index] * mask
+        incorr_prob = torch.max(error_probs, dim=-1)[0]
+        probability_change = [self.confidence, self.del_conf] + [0] * (self.num_labels_classes - 2)
+        class_probabilities_labels += torch.FloatTensor(probability_change).repeat(
+            (batch_size, sequence_length, 1)).to(class_probabilities_labels.device)
+        output_dict = {"logits_labels": logits_labels,
+                       "logits_d_tags": logits_d,
+                       "class_probabilities_labels": class_probabilities_labels,
+                       "class_probabilities_d_tags": class_probabilities_d,
+                       "max_error_probability": incorr_prob}
+        if labels is not None and d_tags is not None:
+            loss_labels = sequence_cross_entropy_with_logits(logits_labels, labels, mask,
+                                                             label_smoothing=self.label_smoothing)
+            loss_d = sequence_cross_entropy_with_logits(logits_d, d_tags, mask)
+            for metric in self.metrics.values():
+                metric(logits_labels, labels, mask.float())
+                metric(logits_d, d_tags, mask.float())
+            output_dict["loss"] = loss_labels + loss_d
+        if metadata is not None:
+            output_dict["words"] = [x["words"] for x in metadata]
+        return output_dict
+    @overrides
+    def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        Does a simple position-wise argmax over each token, converts indices to string labels, and
+        adds a ``"tags"`` key to the dictionary with the result.
+        """
+        for label_namespace in self.label_namespaces:
+            all_predictions = output_dict[f'class_probabilities_{label_namespace}']
+            all_predictions = all_predictions.cpu().data.numpy()
+            if all_predictions.ndim == 3:
+                predictions_list = [all_predictions[i] for i in range(all_predictions.shape[0])]
+            else:
+                predictions_list = [all_predictions]
+            all_tags = []
+            for predictions in predictions_list:
+                argmax_indices = numpy.argmax(predictions, axis=-1)
+                tags = [self.vocab.get_token_from_index(x, namespace=label_namespace)
+                        for x in argmax_indices]
+                all_tags.append(tags)
+            output_dict[f'{label_namespace}'] = all_tags
+        return output_dict
+    @overrides
+    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
+        metrics_to_return = {metric_name: metric.get_metric(reset) for
+                             metric_name, metric in self.metrics.items()}
+        return metrics_to_return

gector/tokenization.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import os
+from time import time
+os.environ['TOKENIZERS_PARALLELISM'] = 'false'
+def get_bpe_groups(token_offsets, bpe_offsets, input_ids, max_bpe_pieces=5):
+    bpe_groups = []
+    last_used_bpe = 0
+    # find the size of offsets
+    if (0, 0) in bpe_offsets:
+        bpe_size = bpe_offsets.index((0, 0))
+    else:
+        bpe_size = len(bpe_offsets)
+    saved_ids = [i for i in range(len(input_ids))]
+    redundant_ids = []
+    for token_offset in token_offsets:
+        start_token, end_token = token_offset
+        bpe_group = []
+        mapping_is_found = False
+        for i in range(last_used_bpe, bpe_size):
+            start_bpe, end_bpe = bpe_offsets[i]
+            if start_bpe >= start_token and end_bpe <= end_token:
+                # check if bpe_group is satisfy max_bpe_pieces constraint
+                if len(bpe_group) < max_bpe_pieces:
+                    bpe_group.append(i)
+                else:
+                    redundant_ids.append(i)
+                last_used_bpe = i + 1
+                mapping_is_found = True
+            elif mapping_is_found:
+                # stop doing useless iterations
+                break
+            else:
+                continue
+        bpe_groups.append(bpe_group)
+    saved_ids = [i for i in saved_ids if i not in redundant_ids]
+    return bpe_groups, saved_ids
+def reduce_input_ids(input_ids, bpe_groups, saved_ids,
+                     max_bpe_length=80, max_bpe_pieces=5):
+    # check if sequence is satisfy max_bpe_length constraint
+    while len(saved_ids) > max_bpe_length:
+        max_bpe_pieces -= 1
+        for token_id in range(len(bpe_groups)):
+            if len(bpe_groups[token_id]) > max_bpe_pieces:
+                redundant_ids = bpe_groups[token_id][max_bpe_pieces:]
+                bpe_groups[token_id] = bpe_groups[token_id][:max_bpe_pieces]
+                saved_ids = [i for i in saved_ids if i not in redundant_ids]
+    # get offsets
+    reduced_ids = [input_ids[i] for i in saved_ids]
+    correct_offsets = []
+    idx = 0
+    for i, bpe_group in enumerate(bpe_groups):
+        norm_idx = min(idx, len(reduced_ids) - 1)
+        correct_offsets.append(norm_idx)
+        idx += len(bpe_group)
+    return reduced_ids, correct_offsets
+def get_offsets_and_reduce_input_ids(tokenizer_output, token_offset_list,
+                                     index_name="bert", max_bpe_length=80,
+                                     max_bpe_pieces=5):
+    timings = {"bpe": 0, "reduce": 0, "mask": 0}
+    output_ids, output_offsets, output_masks = [], [], []
+    for i, token_offsets in enumerate(token_offset_list):
+        input_ids = tokenizer_output['input_ids'][i]
+        t0 = time()
+        # get bpe level offsets
+        bpe_offsets = tokenizer_output['offset_mapping'][i]
+        bpe_groups, saved_ids = get_bpe_groups(token_offsets, bpe_offsets,
+                                               input_ids,
+                                               max_bpe_pieces=max_bpe_pieces)
+        t1 = time()
+        timings["bpe"] += t1 - t0
+        # reduce sequence length
+        reduced_ids, correct_offsets = reduce_input_ids(input_ids, bpe_groups,
+                                                        saved_ids,
+                                                        max_bpe_length=max_bpe_length,
+                                                        max_bpe_pieces=max_bpe_pieces)
+        t2 = time()
+        timings["reduce"] += t2 - t1
+        # get mask
+        bpe_mask = [1 for _ in correct_offsets]
+        output_ids.append(reduced_ids)
+        output_offsets.append(correct_offsets)
+        output_masks.append(bpe_mask)
+        t3 = time()
+        timings["mask"] += t3 - t2
+    # tt = sum(timings.values())
+    # timings = {k: f"{round(v * 100 / tt, 2)}%" for k, v in timings.items()}
+    # print(timings)
+    output = {index_name: output_ids,
+              f"{index_name}-offsets": output_offsets,
+              "mask": output_masks}
+    return output
+def get_offset_for_tokens(tokens):
+    sentence = " ".join(tokens)
+    token_offsets = []
+    end_idx = 0
+    for token in tokens:
+        idx = sentence[end_idx:].index(token) + end_idx
+        end_idx = idx + len(token)
+        offset = (idx, end_idx)
+        token_offsets.append(offset)
+    return token_offsets
+def get_token_offsets(batch):
+    token_offset_list = []
+    for tokens in batch:
+        token_offsets = get_offset_for_tokens(tokens)
+        token_offset_list.append(token_offsets)
+    return token_offset_list
+def pad_output(output, pad_idx=0):
+    padded_output = {}
+    for input_key in output.keys():
+        indexes = output[input_key]
+        max_len = max([len(x) for x in indexes])
+        padded_indexes = []
+        for index_list in indexes:
+            cur_len = len(index_list)
+            pad_len = max_len - cur_len
+            padded_indexes.append(index_list + [pad_idx] * pad_len)
+        padded_output[input_key] = padded_indexes
+    return padded_output
+def tokenize_batch(tokenizer, batch_tokens, index_name="bert",
+                   max_bpe_length=80, max_bpe_pieces=5):
+    timings = {}
+    t0 = time()
+    # get batch with sentences
+    batch_sentences = [" ".join(x) for x in batch_tokens]
+    # get token level offsets
+    token_offset_list = get_token_offsets(batch_tokens)
+    # token_offset_list = get_token_offsets_multi(batch_tokens)
+    t1 = time()
+    timings["offset_time"] = t1 - t0
+    # tokenize batch
+    tokenizer_output = tokenizer.batch_encode_plus(batch_sentences,
+                                                   pad_to_max_length=False,
+                                                   return_offsets_mapping=True,
+                                                   add_special_tokens=False)
+    t2 = time()
+    timings["tokenize_time"] = t2 - t1
+    # postprocess batch
+    output = get_offsets_and_reduce_input_ids(tokenizer_output,
+                                              token_offset_list,
+                                              index_name=index_name,
+                                              max_bpe_length=max_bpe_length,
+                                              max_bpe_pieces=max_bpe_pieces)
+    t3 = time()
+    timings["reduce_time"] = t3 - t2
+    # pad output
+    output = pad_output(output)
+    t4 = time()
+    timings["pading_time"] = t4 - t3
+    # tt = sum(timings.values())
+    # timings = {k:f"{round(v*100/tt, 2)}%" for k,v in timings.items()}
+    # print(timings)
+    return output

gector/tokenizer_indexer.py ADDED Viewed

	@@ -0,0 +1,161 @@

+"""Tweaked version of corresponding AllenNLP file"""
+import logging
+from collections import defaultdict
+from typing import Dict, List, Callable
+from allennlp.common.util import pad_sequence_to_length
+from allennlp.data.token_indexers.token_indexer import TokenIndexer
+from allennlp.data.tokenizers.token import Token
+from allennlp.data.vocabulary import Vocabulary
+from overrides import overrides
+from transformers import AutoTokenizer
+from utils.helpers import START_TOKEN
+from gector.tokenization import tokenize_batch
+import copy
+logger = logging.getLogger(__name__)
+# TODO(joelgrus): Figure out how to generate token_type_ids out of this token indexer.
+class TokenizerIndexer(TokenIndexer[int]):
+    """
+    A token indexer that does the wordpiece-tokenization (e.g. for BERT embeddings).
+    If you are using one of the pretrained BERT models, you'll want to use the ``PretrainedBertIndexer``
+    subclass rather than this base class.
+    Parameters
+    ----------
+    tokenizer : ``Callable[[str], List[str]]``
+        A function that does the actual tokenization.
+    max_pieces : int, optional (default: 512)
+        The BERT embedder uses positional embeddings and so has a corresponding
+        maximum length for its input ids. Any inputs longer than this will
+        either be truncated (default), or be split apart and batched using a
+        sliding window.
+    token_min_padding_length : ``int``, optional (default=``0``)
+        See :class:`TokenIndexer`.
+    """
+    def __init__(self,
+                 tokenizer: Callable[[str], List[str]],
+                 max_pieces: int = 512,
+                 max_pieces_per_token: int = 3,
+                 token_min_padding_length: int = 0) -> None:
+        super().__init__(token_min_padding_length)
+        # The BERT code itself does a two-step tokenization:
+        #    sentence -> [words], and then word -> [wordpieces]
+        # In AllenNLP, the first step is implemented as the ``BertBasicWordSplitter``,
+        # and this token indexer handles the second.
+        self.tokenizer = tokenizer
+        self.max_pieces_per_token = max_pieces_per_token
+        self.max_pieces = max_pieces
+        self.max_pieces_per_sentence = 80
+    @overrides
+    def tokens_to_indices(self, tokens: List[Token],
+                          vocabulary: Vocabulary,
+                          index_name: str) -> Dict[str, List[int]]:
+        text = [token.text for token in tokens]
+        batch_tokens = [text]
+        output_fast = tokenize_batch(self.tokenizer,
+                                     batch_tokens,
+                                     max_bpe_length=self.max_pieces,
+                                     max_bpe_pieces=self.max_pieces_per_token)
+        output_fast = {k: v[0] for k, v in output_fast.items()}
+        return output_fast
+    @overrides
+    def count_vocab_items(self, token: Token, counter: Dict[str, Dict[str, int]]):
+        # If we only use pretrained models, we don't need to do anything here.
+        pass
+    @overrides
+    def get_padding_token(self) -> int:
+        return 0
+    @overrides
+    def get_padding_lengths(self, token: int) -> Dict[str, int]:  # pylint: disable=unused-argument
+        return {}
+    @overrides
+    def pad_token_sequence(self,
+                           tokens: Dict[str, List[int]],
+                           desired_num_tokens: Dict[str, int],
+                           padding_lengths: Dict[str, int]) -> Dict[str, List[int]]:  # pylint: disable=unused-argument
+        return {key: pad_sequence_to_length(val, desired_num_tokens[key])
+                for key, val in tokens.items()}
+    @overrides
+    def get_keys(self, index_name: str) -> List[str]:
+        """
+        We need to override this because the indexer generates multiple keys.
+        """
+        # pylint: disable=no-self-use
+        return [index_name, f"{index_name}-offsets", f"{index_name}-type-ids", "mask"]
+class PretrainedBertIndexer(TokenizerIndexer):
+    # pylint: disable=line-too-long
+    """
+    A ``TokenIndexer`` corresponding to a pretrained BERT model.
+    Parameters
+    ----------
+    pretrained_model: ``str``
+        Either the name of the pretrained model to use (e.g. 'bert-base-uncased'),
+        or the path to the .txt file with its vocabulary.
+        If the name is a key in the list of pretrained models at
+        https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/tokenization.py#L33
+        the corresponding path will be used; otherwise it will be interpreted as a path or URL.
+    do_lowercase: ``bool``, optional (default = True)
+        Whether to lowercase the tokens before converting to wordpiece ids.
+    max_pieces: int, optional (default: 512)
+        The BERT embedder uses positional embeddings and so has a corresponding
+        maximum length for its input ids. Any inputs longer than this will
+        either be truncated (default), or be split apart and batched using a
+        sliding window.
+    """
+    def __init__(self,
+                 pretrained_model: str,
+                 do_lowercase: bool = True,
+                 max_pieces: int = 512,
+                 max_pieces_per_token: int = 5,
+                 special_tokens_fix: int = 0) -> None:
+        if pretrained_model.endswith("-cased") and do_lowercase:
+            logger.warning("Your BERT model appears to be cased, "
+                           "but your indexer is lowercasing tokens.")
+        elif pretrained_model.endswith("-uncased") and not do_lowercase:
+            logger.warning("Your BERT model appears to be uncased, "
+                           "but your indexer is not lowercasing tokens.")
+        model_name = copy.deepcopy(pretrained_model)
+        model_tokenizer = AutoTokenizer.from_pretrained(
+            model_name, do_lower_case=do_lowercase, do_basic_tokenize=False, use_fast=True)
+        # to adjust all tokenizers
+        if hasattr(model_tokenizer, 'encoder'):
+            model_tokenizer.vocab = model_tokenizer.encoder
+        if hasattr(model_tokenizer, 'sp_model'):
+            model_tokenizer.vocab = defaultdict(lambda: 1)
+            for i in range(model_tokenizer.sp_model.get_piece_size()):
+                model_tokenizer.vocab[model_tokenizer.sp_model.id_to_piece(i)] = i
+        if special_tokens_fix:
+            model_tokenizer.add_tokens([START_TOKEN])
+            model_tokenizer.vocab[START_TOKEN] = len(model_tokenizer) - 1
+        super().__init__(tokenizer=model_tokenizer,
+                         max_pieces=max_pieces,
+                         max_pieces_per_token=max_pieces_per_token
+                        )

gector/trainer.py ADDED Viewed

	@@ -0,0 +1,845 @@

+"""Tweaked version of corresponding AllenNLP file"""
+import datetime
+import logging
+import math
+import os
+import time
+import traceback
+from typing import Dict, Optional, List, Tuple, Union, Iterable, Any
+import torch
+import torch.optim.lr_scheduler
+from allennlp.common import Params
+from allennlp.common.checks import ConfigurationError, parse_cuda_device
+from allennlp.common.tqdm import Tqdm
+from allennlp.common.util import dump_metrics, gpu_memory_mb, peak_memory_mb, lazy_groups_of
+from allennlp.data.instance import Instance
+from allennlp.data.iterators.data_iterator import DataIterator, TensorDict
+from allennlp.models.model import Model
+from allennlp.nn import util as nn_util
+from allennlp.training import util as training_util
+from allennlp.training.checkpointer import Checkpointer
+from allennlp.training.learning_rate_schedulers import LearningRateScheduler
+from allennlp.training.metric_tracker import MetricTracker
+from allennlp.training.momentum_schedulers import MomentumScheduler
+from allennlp.training.moving_average import MovingAverage
+from allennlp.training.optimizers import Optimizer
+from allennlp.training.tensorboard_writer import TensorboardWriter
+from allennlp.training.trainer_base import TrainerBase
+logger = logging.getLogger(__name__)
+class Trainer(TrainerBase):
+    def __init__(
+        self,
+        model: Model,
+        optimizer: torch.optim.Optimizer,
+        scheduler: torch.optim.lr_scheduler,
+        iterator: DataIterator,
+        train_dataset: Iterable[Instance],
+        validation_dataset: Optional[Iterable[Instance]] = None,
+        patience: Optional[int] = None,
+        validation_metric: str = "-loss",
+        validation_iterator: DataIterator = None,
+        shuffle: bool = True,
+        num_epochs: int = 20,
+        accumulated_batch_count: int = 1,
+        serialization_dir: Optional[str] = None,
+        num_serialized_models_to_keep: int = 20,
+        keep_serialized_model_every_num_seconds: int = None,
+        checkpointer: Checkpointer = None,
+        model_save_interval: float = None,
+        cuda_device: Union[int, List] = -1,
+        grad_norm: Optional[float] = None,
+        grad_clipping: Optional[float] = None,
+        learning_rate_scheduler: Optional[LearningRateScheduler] = None,
+        momentum_scheduler: Optional[MomentumScheduler] = None,
+        summary_interval: int = 100,
+        histogram_interval: int = None,
+        should_log_parameter_statistics: bool = True,
+        should_log_learning_rate: bool = False,
+        log_batch_size_period: Optional[int] = None,
+        moving_average: Optional[MovingAverage] = None,
+        cold_step_count: int = 0,
+        cold_lr: float = 1e-3,
+        cuda_verbose_step=None,
+    ) -> None:
+        """
+        A trainer for doing supervised learning. It just takes a labeled dataset
+        and a ``DataIterator``, and uses the supplied ``Optimizer`` to learn the weights
+        for your model over some fixed number of epochs. You can also pass in a validation
+        dataset and enable early stopping. There are many other bells and whistles as well.
+        Parameters
+        ----------
+        model : ``Model``, required.
+            An AllenNLP model to be optimized. Pytorch Modules can also be optimized if
+            their ``forward`` method returns a dictionary with a "loss" key, containing a
+            scalar tensor representing the loss function to be optimized.
+            If you are training your model using GPUs, your model should already be
+            on the correct device. (If you use `Trainer.from_params` this will be
+            handled for you.)
+        optimizer : ``torch.nn.Optimizer``, required.
+            An instance of a Pytorch Optimizer, instantiated with the parameters of the
+            model to be optimized.
+        iterator : ``DataIterator``, required.
+            A method for iterating over a ``Dataset``, yielding padded indexed batches.
+        train_dataset : ``Dataset``, required.
+            A ``Dataset`` to train on. The dataset should have already been indexed.
+        validation_dataset : ``Dataset``, optional, (default = None).
+            A ``Dataset`` to evaluate on. The dataset should have already been indexed.
+        patience : Optional[int] > 0, optional (default=None)
+            Number of epochs to be patient before early stopping: the training is stopped
+            after ``patience`` epochs with no improvement. If given, it must be ``> 0``.
+            If None, early stopping is disabled.
+        validation_metric : str, optional (default="loss")
+            Validation metric to measure for whether to stop training using patience
+            and whether to serialize an ``is_best`` model each epoch. The metric name
+            must be prepended with either "+" or "-", which specifies whether the metric
+            is an increasing or decreasing function.
+        validation_iterator : ``DataIterator``, optional (default=None)
+            An iterator to use for the validation set.  If ``None``, then
+            use the training `iterator`.
+        shuffle: ``bool``, optional (default=True)
+            Whether to shuffle the instances in the iterator or not.
+        num_epochs : int, optional (default = 20)
+            Number of training epochs.
+        serialization_dir : str, optional (default=None)
+            Path to directory for saving and loading model files. Models will not be saved if
+            this parameter is not passed.
+        num_serialized_models_to_keep : ``int``, optional (default=20)
+            Number of previous model checkpoints to retain.  Default is to keep 20 checkpoints.
+            A value of None or -1 means all checkpoints will be kept.
+        keep_serialized_model_every_num_seconds : ``int``, optional (default=None)
+            If num_serialized_models_to_keep is not None, then occasionally it's useful to
+            save models at a given interval in addition to the last num_serialized_models_to_keep.
+            To do so, specify keep_serialized_model_every_num_seconds as the number of seconds
+            between permanently saved checkpoints.  Note that this option is only used if
+            num_serialized_models_to_keep is not None, otherwise all checkpoints are kept.
+        checkpointer : ``Checkpointer``, optional (default=None)
+            An instance of class Checkpointer to use instead of the default. If a checkpointer is specified,
+            the arguments num_serialized_models_to_keep and keep_serialized_model_every_num_seconds should
+            not be specified. The caller is responsible for initializing the checkpointer so that it is
+            consistent with serialization_dir.
+        model_save_interval : ``float``, optional (default=None)
+            If provided, then serialize models every ``model_save_interval``
+            seconds within single epochs.  In all cases, models are also saved
+            at the end of every epoch if ``serialization_dir`` is provided.
+        cuda_device : ``Union[int, List[int]]``, optional (default = -1)
+            An integer or list of integers specifying the CUDA device(s) to use. If -1, the CPU is used.
+        grad_norm : ``float``, optional, (default = None).
+            If provided, gradient norms will be rescaled to have a maximum of this value.
+        grad_clipping : ``float``, optional (default = ``None``).
+            If provided, gradients will be clipped `during the backward pass` to have an (absolute)
+            maximum of this value.  If you are getting ``NaNs`` in your gradients during training
+            that are not solved by using ``grad_norm``, you may need this.
+        learning_rate_scheduler : ``LearningRateScheduler``, optional (default = None)
+            If specified, the learning rate will be decayed with respect to
+            this schedule at the end of each epoch (or batch, if the scheduler implements
+            the ``step_batch`` method). If you use :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`,
+            this will use the ``validation_metric`` provided to determine if learning has plateaued.
+            To support updating the learning rate on every batch, this can optionally implement
+            ``step_batch(batch_num_total)`` which updates the learning rate given the batch number.
+        momentum_scheduler : ``MomentumScheduler``, optional (default = None)
+            If specified, the momentum will be updated at the end of each batch or epoch
+            according to the schedule.
+        summary_interval: ``int``, optional, (default = 100)
+            Number of batches between logging scalars to tensorboard
+        histogram_interval : ``int``, optional, (default = ``None``)
+            If not None, then log histograms to tensorboard every ``histogram_interval`` batches.
+            When this parameter is specified, the following additional logging is enabled:
+                * Histograms of model parameters
+                * The ratio of parameter update norm to parameter norm
+                * Histogram of layer activations
+            We log histograms of the parameters returned by
+            ``model.get_parameters_for_histogram_tensorboard_logging``.
+            The layer activations are logged for any modules in the ``Model`` that have
+            the attribute ``should_log_activations`` set to ``True``.  Logging
+            histograms requires a number of GPU-CPU copies during training and is typically
+            slow, so we recommend logging histograms relatively infrequently.
+            Note: only Modules that return tensors, tuples of tensors or dicts
+            with tensors as values currently support activation logging.
+        should_log_parameter_statistics : ``bool``, optional, (default = True)
+            Whether to send parameter statistics (mean and standard deviation
+            of parameters and gradients) to tensorboard.
+        should_log_learning_rate : ``bool``, optional, (default = False)
+            Whether to send parameter specific learning rate to tensorboard.
+        log_batch_size_period : ``int``, optional, (default = ``None``)
+            If defined, how often to log the average batch size.
+        moving_average: ``MovingAverage``, optional, (default = None)
+            If provided, we will maintain moving averages for all parameters. During training, we
+            employ a shadow variable for each parameter, which maintains the moving average. During
+            evaluation, we backup the original parameters and assign the moving averages to corresponding
+            parameters. Be careful that when saving the checkpoint, we will save the moving averages of
+            parameters. This is necessary because we want the saved model to perform as well as the validated
+            model if we load it later. But this may cause problems if you restart the training from checkpoint.
+        """
+        super().__init__(serialization_dir, cuda_device)
+        # I am not calling move_to_gpu here, because if the model is
+        # not already on the GPU then the optimizer is going to be wrong.
+        self.model = model
+        self.iterator = iterator
+        self._validation_iterator = validation_iterator
+        self.shuffle = shuffle
+        self.optimizer = optimizer
+        self.scheduler = scheduler
+        self.train_data = train_dataset
+        self._validation_data = validation_dataset
+        self.accumulated_batch_count = accumulated_batch_count
+        self.cold_step_count = cold_step_count
+        self.cold_lr = cold_lr
+        self.cuda_verbose_step = cuda_verbose_step
+        if patience is None:  # no early stopping
+            if validation_dataset:
+                logger.warning(
+                    "You provided a validation dataset but patience was set to None, "
+                    "meaning that early stopping is disabled"
+                )
+        elif (not isinstance(patience, int)) or patience <= 0:
+            raise ConfigurationError(
+                '{} is an invalid value for "patience": it must be a positive integer '
+                "or None (if you want to disable early stopping)".format(patience)
+            )
+        # For tracking is_best_so_far and should_stop_early
+        self._metric_tracker = MetricTracker(patience, validation_metric)
+        # Get rid of + or -
+        self._validation_metric = validation_metric[1:]
+        self._num_epochs = num_epochs
+        if checkpointer is not None:
+            # We can't easily check if these parameters were passed in, so check against their default values.
+            # We don't check against serialization_dir since it is also used by the parent class.
+            if num_serialized_models_to_keep != 20 \
+                    or keep_serialized_model_every_num_seconds is not None:
+                raise ConfigurationError(
+                    "When passing a custom Checkpointer, you may not also pass in separate checkpointer "
+                    "args 'num_serialized_models_to_keep' or 'keep_serialized_model_every_num_seconds'."
+                )
+            self._checkpointer = checkpointer
+        else:
+            self._checkpointer = Checkpointer(
+                serialization_dir,
+                keep_serialized_model_every_num_seconds,
+                num_serialized_models_to_keep,
+            )
+        self._model_save_interval = model_save_interval
+        self._grad_norm = grad_norm
+        self._grad_clipping = grad_clipping
+        self._learning_rate_scheduler = learning_rate_scheduler
+        self._momentum_scheduler = momentum_scheduler
+        self._moving_average = moving_average
+        # We keep the total batch number as an instance variable because it
+        # is used inside a closure for the hook which logs activations in
+        # ``_enable_activation_logging``.
+        self._batch_num_total = 0
+        self._tensorboard = TensorboardWriter(
+            get_batch_num_total=lambda: self._batch_num_total,
+            serialization_dir=serialization_dir,
+            summary_interval=summary_interval,
+            histogram_interval=histogram_interval,
+            should_log_parameter_statistics=should_log_parameter_statistics,
+            should_log_learning_rate=should_log_learning_rate,
+        )
+        self._log_batch_size_period = log_batch_size_period
+        self._last_log = 0.0  # time of last logging
+        # Enable activation logging.
+        if histogram_interval is not None:
+            self._tensorboard.enable_activation_logging(self.model)
+    def rescale_gradients(self) -> Optional[float]:
+        return training_util.rescale_gradients(self.model, self._grad_norm)
+    def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor:
+        """
+        Does a forward pass on the given batches and returns the ``loss`` value in the result.
+        If ``for_training`` is `True` also applies regularization penalty.
+        """
+        if self._multiple_gpu:
+            output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices)
+        else:
+            assert len(batch_group) == 1
+            batch = batch_group[0]
+            batch = nn_util.move_to_device(batch, self._cuda_devices[0])
+            output_dict = self.model(**batch)
+        try:
+            loss = output_dict["loss"]
+            if for_training:
+                loss += self.model.get_regularization_penalty()
+        except KeyError:
+            if for_training:
+                raise RuntimeError(
+                    "The model you are trying to optimize does not contain a"
+                    " 'loss' key in the output of model.forward(inputs)."
+                )
+            loss = None
+        return loss
+    def _train_epoch(self, epoch: int) -> Dict[str, float]:
+        """
+        Trains one epoch and returns metrics.
+        """
+        logger.info("Epoch %d/%d", epoch, self._num_epochs - 1)
+        peak_cpu_usage = peak_memory_mb()
+        logger.info(f"Peak CPU memory usage MB: {peak_cpu_usage}")
+        gpu_usage = []
+        for gpu, memory in gpu_memory_mb().items():
+            gpu_usage.append((gpu, memory))
+            logger.info(f"GPU {gpu} memory usage MB: {memory}")
+        train_loss = 0.0
+        # Set the model to "train" mode.
+        self.model.train()
+        num_gpus = len(self._cuda_devices)
+        # Get tqdm for the training batches
+        raw_train_generator = self.iterator(self.train_data, num_epochs=1, shuffle=self.shuffle)
+        train_generator = lazy_groups_of(raw_train_generator, num_gpus)
+        num_training_batches = math.ceil(self.iterator.get_num_batches(self.train_data) / num_gpus)
+        residue = num_training_batches % self.accumulated_batch_count
+        self._last_log = time.time()
+        last_save_time = time.time()
+        batches_this_epoch = 0
+        if self._batch_num_total is None:
+            self._batch_num_total = 0
+        histogram_parameters = set(self.model.get_parameters_for_histogram_tensorboard_logging())
+        logger.info("Training")
+        train_generator_tqdm = Tqdm.tqdm(train_generator, total=num_training_batches)
+        cumulative_batch_size = 0
+        self.optimizer.zero_grad()
+        for batch_group in train_generator_tqdm:
+            batches_this_epoch += 1
+            self._batch_num_total += 1
+            batch_num_total = self._batch_num_total
+            iter_len = self.accumulated_batch_count \
+                if batches_this_epoch <= (num_training_batches - residue) else residue
+            if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
+                print(f'Before forward pass - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
+                print(f'Before forward pass - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
+            try:
+                loss = self.batch_loss(batch_group, for_training=True) / iter_len
+            except RuntimeError as e:
+                print(e)
+                for x in batch_group:
+                    all_words = [len(y['words']) for y in x['metadata']]
+                    print(f"Total sents: {len(all_words)}. "
+                          f"Min {min(all_words)}. Max {max(all_words)}")
+                    for elem in ['labels', 'd_tags']:
+                        tt = x[elem]
+                        print(
+                            f"{elem} shape {list(tt.shape)} and min {tt.min().item()} and {tt.max().item()}")
+                    for elem in ["bert", "mask", "bert-offsets"]:
+                        tt = x['tokens'][elem]
+                        print(
+                            f"{elem} shape {list(tt.shape)} and min {tt.min().item()} and {tt.max().item()}")
+                raise e
+            if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
+                print(f'After forward pass - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
+                print(f'After forward pass - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
+            if torch.isnan(loss):
+                raise ValueError("nan loss encountered")
+            loss.backward()
+            if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
+                print(f'After backprop - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
+                print(f'After backprop - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
+            train_loss += loss.item() * iter_len
+            del batch_group, loss
+            torch.cuda.empty_cache()
+            if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
+                print(f'After collecting garbage - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
+                print(f'After collecting garbage - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
+            batch_grad_norm = self.rescale_gradients()
+            # This does nothing if batch_num_total is None or you are using a
+            # scheduler which doesn't update per batch.
+            if self._learning_rate_scheduler:
+                self._learning_rate_scheduler.step_batch(batch_num_total)
+            if self._momentum_scheduler:
+                self._momentum_scheduler.step_batch(batch_num_total)
+            if self._tensorboard.should_log_histograms_this_batch():
+                # get the magnitude of parameter updates for logging
+                # We need a copy of current parameters to compute magnitude of updates,
+                # and copy them to CPU so large models won't go OOM on the GPU.
+                param_updates = {
+                    name: param.detach().cpu().clone()
+                    for name, param in self.model.named_parameters()
+                }
+                if batches_this_epoch % self.accumulated_batch_count == 0 or \
+                        batches_this_epoch == num_training_batches:
+                    self.optimizer.step()
+                    self.optimizer.zero_grad()
+                for name, param in self.model.named_parameters():
+                    param_updates[name].sub_(param.detach().cpu())
+                    update_norm = torch.norm(param_updates[name].view(-1))
+                    param_norm = torch.norm(param.view(-1)).cpu()
+                    self._tensorboard.add_train_scalar(
+                        "gradient_update/" + name, update_norm / (param_norm + 1e-7)
+                    )
+            else:
+                if batches_this_epoch % self.accumulated_batch_count == 0 or \
+                        batches_this_epoch == num_training_batches:
+                    self.optimizer.step()
+                    self.optimizer.zero_grad()
+            # Update moving averages
+            if self._moving_average is not None:
+                self._moving_average.apply(batch_num_total)
+            # Update the description with the latest metrics
+            metrics = training_util.get_metrics(self.model, train_loss, batches_this_epoch)
+            description = training_util.description_from_metrics(metrics)
+            train_generator_tqdm.set_description(description, refresh=False)
+            # Log parameter values to Tensorboard
+            if self._tensorboard.should_log_this_batch():
+                self._tensorboard.log_parameter_and_gradient_statistics(self.model, batch_grad_norm)
+                self._tensorboard.log_learning_rates(self.model, self.optimizer)
+                self._tensorboard.add_train_scalar("loss/loss_train", metrics["loss"])
+                self._tensorboard.log_metrics({"epoch_metrics/" + k: v for k, v in metrics.items()})
+            if self._tensorboard.should_log_histograms_this_batch():
+                self._tensorboard.log_histograms(self.model, histogram_parameters)
+            if self._log_batch_size_period:
+                cur_batch = sum([training_util.get_batch_size(batch) for batch in batch_group])
+                cumulative_batch_size += cur_batch
+                if (batches_this_epoch - 1) % self._log_batch_size_period == 0:
+                    average = cumulative_batch_size / batches_this_epoch
+                    logger.info(f"current batch size: {cur_batch} mean batch size: {average}")
+                    self._tensorboard.add_train_scalar("current_batch_size", cur_batch)
+                    self._tensorboard.add_train_scalar("mean_batch_size", average)
+            # Save model if needed.
+            if self._model_save_interval is not None and (
+                time.time() - last_save_time > self._model_save_interval
+            ):
+                last_save_time = time.time()
+                self._save_checkpoint(
+                    "{0}.{1}".format(epoch, training_util.time_to_str(int(last_save_time)))
+                )
+        metrics = training_util.get_metrics(self.model, train_loss, batches_this_epoch, reset=True)
+        metrics["cpu_memory_MB"] = peak_cpu_usage
+        for (gpu_num, memory) in gpu_usage:
+            metrics["gpu_" + str(gpu_num) + "_memory_MB"] = memory
+        return metrics
+    def _validation_loss(self) -> Tuple[float, int]:
+        """
+        Computes the validation loss. Returns it and the number of batches.
+        """
+        logger.info("Validating")
+        self.model.eval()
+        # Replace parameter values with the shadow values from the moving averages.
+        if self._moving_average is not None:
+            self._moving_average.assign_average_value()
+        if self._validation_iterator is not None:
+            val_iterator = self._validation_iterator
+        else:
+            val_iterator = self.iterator
+        num_gpus = len(self._cuda_devices)
+        raw_val_generator = val_iterator(self._validation_data, num_epochs=1, shuffle=False)
+        val_generator = lazy_groups_of(raw_val_generator, num_gpus)
+        num_validation_batches = math.ceil(
+            val_iterator.get_num_batches(self._validation_data) / num_gpus
+        )
+        val_generator_tqdm = Tqdm.tqdm(val_generator, total=num_validation_batches)
+        batches_this_epoch = 0
+        val_loss = 0
+        for batch_group in val_generator_tqdm:
+            loss = self.batch_loss(batch_group, for_training=False)
+            if loss is not None:
+                # You shouldn't necessarily have to compute a loss for validation, so we allow for
+                # `loss` to be None.  We need to be careful, though - `batches_this_epoch` is
+                # currently only used as the divisor for the loss function, so we can safely only
+                # count those batches for which we actually have a loss.  If this variable ever
+                # gets used for something else, we might need to change things around a bit.
+                batches_this_epoch += 1
+                val_loss += loss.detach().cpu().numpy()
+            # Update the description with the latest metrics
+            val_metrics = training_util.get_metrics(self.model, val_loss, batches_this_epoch)
+            description = training_util.description_from_metrics(val_metrics)
+            val_generator_tqdm.set_description(description, refresh=False)
+        # Now restore the original parameter values.
+        if self._moving_average is not None:
+            self._moving_average.restore()
+        return val_loss, batches_this_epoch
+    def train(self) -> Dict[str, Any]:
+        """
+        Trains the supplied model with the supplied parameters.
+        """
+        try:
+            epoch_counter = self._restore_checkpoint()
+        except RuntimeError:
+            traceback.print_exc()
+            raise ConfigurationError(
+                "Could not recover training from the checkpoint.  Did you mean to output to "
+                "a different serialization directory or delete the existing serialization "
+                "directory?"
+            )
+        training_util.enable_gradient_clipping(self.model, self._grad_clipping)
+        logger.info("Beginning training.")
+        train_metrics: Dict[str, float] = {}
+        val_metrics: Dict[str, float] = {}
+        this_epoch_val_metric: float = None
+        metrics: Dict[str, Any] = {}
+        epochs_trained = 0
+        training_start_time = time.time()
+        if self.cold_step_count > 0:
+            base_lr = self.optimizer.param_groups[0]['lr']
+            for param_group in self.optimizer.param_groups:
+                param_group['lr'] = self.cold_lr
+            self.model.text_field_embedder._token_embedders['bert'].set_weights(freeze=True)
+        metrics["best_epoch"] = self._metric_tracker.best_epoch
+        for key, value in self._metric_tracker.best_epoch_metrics.items():
+            metrics["best_validation_" + key] = value
+        for epoch in range(epoch_counter, self._num_epochs):
+            if epoch == self.cold_step_count and epoch != 0:
+                for param_group in self.optimizer.param_groups:
+                    param_group['lr'] = base_lr
+                self.model.text_field_embedder._token_embedders['bert'].set_weights(freeze=False)
+            epoch_start_time = time.time()
+            train_metrics = self._train_epoch(epoch)
+            # get peak of memory usage
+            if "cpu_memory_MB" in train_metrics:
+                metrics["peak_cpu_memory_MB"] = max(
+                    metrics.get("peak_cpu_memory_MB", 0), train_metrics["cpu_memory_MB"]
+                )
+            for key, value in train_metrics.items():
+                if key.startswith("gpu_"):
+                    metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value)
+            # clear cache before validation
+            torch.cuda.empty_cache()
+            if self._validation_data is not None:
+                with torch.no_grad():
+                    # We have a validation set, so compute all the metrics on it.
+                    val_loss, num_batches = self._validation_loss()
+                    val_metrics = training_util.get_metrics(
+                        self.model, val_loss, num_batches, reset=True
+                    )
+                    # Check validation metric for early stopping
+                    this_epoch_val_metric = val_metrics[self._validation_metric]
+                    self._metric_tracker.add_metric(this_epoch_val_metric)
+                    if self._metric_tracker.should_stop_early():
+                        logger.info("Ran out of patience.  Stopping training.")
+                        break
+            self._tensorboard.log_metrics(
+                train_metrics, val_metrics=val_metrics, log_to_console=True, epoch=epoch + 1
+            )  # +1 because tensorboard doesn't like 0
+            # Create overall metrics dict
+            training_elapsed_time = time.time() - training_start_time
+            metrics["training_duration"] = str(datetime.timedelta(seconds=training_elapsed_time))
+            metrics["training_start_epoch"] = epoch_counter
+            metrics["training_epochs"] = epochs_trained
+            metrics["epoch"] = epoch
+            for key, value in train_metrics.items():
+                metrics["training_" + key] = value
+            for key, value in val_metrics.items():
+                metrics["validation_" + key] = value
+            # if self.cold_step_count <= epoch:
+            self.scheduler.step(metrics['validation_loss'])
+            if self._metric_tracker.is_best_so_far():
+                # Update all the best_ metrics.
+                # (Otherwise they just stay the same as they were.)
+                metrics["best_epoch"] = epoch
+                for key, value in val_metrics.items():
+                    metrics["best_validation_" + key] = value
+                self._metric_tracker.best_epoch_metrics = val_metrics
+            if self._serialization_dir:
+                dump_metrics(
+                    os.path.join(self._serialization_dir, f"metrics_epoch_{epoch}.json"), metrics
+                )
+            # The Scheduler API is agnostic to whether your schedule requires a validation metric -
+            # if it doesn't, the validation metric passed here is ignored.
+            if self._learning_rate_scheduler:
+                self._learning_rate_scheduler.step(this_epoch_val_metric, epoch)
+            if self._momentum_scheduler:
+                self._momentum_scheduler.step(this_epoch_val_metric, epoch)
+            self._save_checkpoint(epoch)
+            epoch_elapsed_time = time.time() - epoch_start_time
+            logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time))
+            if epoch < self._num_epochs - 1:
+                training_elapsed_time = time.time() - training_start_time
+                estimated_time_remaining = training_elapsed_time * (
+                    (self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1
+                )
+                formatted_time = str(datetime.timedelta(seconds=int(estimated_time_remaining)))
+                logger.info("Estimated training time remaining: %s", formatted_time)
+            epochs_trained += 1
+        # make sure pending events are flushed to disk and files are closed properly
+        # self._tensorboard.close()
+        # Load the best model state before returning
+        best_model_state = self._checkpointer.best_model_state()
+        if best_model_state:
+            self.model.load_state_dict(best_model_state)
+        return metrics
+    def _save_checkpoint(self, epoch: Union[int, str]) -> None:
+        """
+        Saves a checkpoint of the model to self._serialization_dir.
+        Is a no-op if self._serialization_dir is None.
+        Parameters
+        ----------
+        epoch : Union[int, str], required.
+            The epoch of training.  If the checkpoint is saved in the middle
+            of an epoch, the parameter is a string with the epoch and timestamp.
+        """
+        # If moving averages are used for parameters, we save
+        # the moving average values into checkpoint, instead of the current values.
+        if self._moving_average is not None:
+            self._moving_average.assign_average_value()
+        # These are the training states we need to persist.
+        training_states = {
+            "metric_tracker": self._metric_tracker.state_dict(),
+            "optimizer": self.optimizer.state_dict(),
+            "batch_num_total": self._batch_num_total,
+        }
+        # If we have a learning rate or momentum scheduler, we should persist them too.
+        if self._learning_rate_scheduler is not None:
+            training_states["learning_rate_scheduler"] = self._learning_rate_scheduler.state_dict()
+        if self._momentum_scheduler is not None:
+            training_states["momentum_scheduler"] = self._momentum_scheduler.state_dict()
+        self._checkpointer.save_checkpoint(
+            model_state=self.model.state_dict(),
+            epoch=epoch,
+            training_states=training_states,
+            is_best_so_far=self._metric_tracker.is_best_so_far(),
+        )
+        # Restore the original values for parameters so that training will not be affected.
+        if self._moving_average is not None:
+            self._moving_average.restore()
+    def _restore_checkpoint(self) -> int:
+        """
+        Restores the model and training state from the last saved checkpoint.
+        This includes an epoch count and optimizer state, which is serialized separately
+        from model parameters. This function should only be used to continue training -
+        if you wish to load a model for inference/load parts of a model into a new
+        computation graph, you should use the native Pytorch functions:
+        `` model.load_state_dict(torch.load("/path/to/model/weights.th"))``
+        If ``self._serialization_dir`` does not exist or does not contain any checkpointed weights,
+        this function will do nothing and return 0.
+        Returns
+        -------
+        epoch: int
+            The epoch at which to resume training, which should be one after the epoch
+            in the saved training state.
+        """
+        model_state, training_state = self._checkpointer.restore_checkpoint()
+        if not training_state:
+            # No checkpoint to restore, start at 0
+            return 0
+        self.model.load_state_dict(model_state)
+        self.optimizer.load_state_dict(training_state["optimizer"])
+        if self._learning_rate_scheduler is not None \
+                and "learning_rate_scheduler" in training_state:
+            self._learning_rate_scheduler.load_state_dict(training_state["learning_rate_scheduler"])
+        if self._momentum_scheduler is not None and "momentum_scheduler" in training_state:
+            self._momentum_scheduler.load_state_dict(training_state["momentum_scheduler"])
+        training_util.move_optimizer_to_cuda(self.optimizer)
+        # Currently the ``training_state`` contains a serialized ``MetricTracker``.
+        if "metric_tracker" in training_state:
+            self._metric_tracker.load_state_dict(training_state["metric_tracker"])
+        # It used to be the case that we tracked ``val_metric_per_epoch``.
+        elif "val_metric_per_epoch" in training_state:
+            self._metric_tracker.clear()
+            self._metric_tracker.add_metrics(training_state["val_metric_per_epoch"])
+        # And before that we didn't track anything.
+        else:
+            self._metric_tracker.clear()
+        if isinstance(training_state["epoch"], int):
+            epoch_to_return = training_state["epoch"] + 1
+        else:
+            epoch_to_return = int(training_state["epoch"].split(".")[0]) + 1
+        # For older checkpoints with batch_num_total missing, default to old behavior where
+        # it is unchanged.
+        batch_num_total = training_state.get("batch_num_total")
+        if batch_num_total is not None:
+            self._batch_num_total = batch_num_total
+        return epoch_to_return
+    # Requires custom from_params.
+    @classmethod
+    def from_params(  # type: ignore
+        cls,
+        model: Model,
+        serialization_dir: str,
+        iterator: DataIterator,
+        train_data: Iterable[Instance],
+        validation_data: Optional[Iterable[Instance]],
+        params: Params,
+        validation_iterator: DataIterator = None,
+    ) -> "Trainer":
+        patience = params.pop_int("patience", None)
+        validation_metric = params.pop("validation_metric", "-loss")
+        shuffle = params.pop_bool("shuffle", True)
+        num_epochs = params.pop_int("num_epochs", 20)
+        cuda_device = parse_cuda_device(params.pop("cuda_device", -1))
+        grad_norm = params.pop_float("grad_norm", None)
+        grad_clipping = params.pop_float("grad_clipping", None)
+        lr_scheduler_params = params.pop("learning_rate_scheduler", None)
+        momentum_scheduler_params = params.pop("momentum_scheduler", None)
+        if isinstance(cuda_device, list):
+            model_device = cuda_device[0]
+        else:
+            model_device = cuda_device
+        if model_device >= 0:
+            # Moving model to GPU here so that the optimizer state gets constructed on
+            # the right device.
+            model = model.cuda(model_device)
+        parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad]
+        optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))
+        if "moving_average" in params:
+            moving_average = MovingAverage.from_params(
+                params.pop("moving_average"), parameters=parameters
+            )
+        else:
+            moving_average = None
+        if lr_scheduler_params:
+            lr_scheduler = LearningRateScheduler.from_params(optimizer, lr_scheduler_params)
+        else:
+            lr_scheduler = None
+        if momentum_scheduler_params:
+            momentum_scheduler = MomentumScheduler.from_params(optimizer, momentum_scheduler_params)
+        else:
+            momentum_scheduler = None
+        if "checkpointer" in params:
+            if "keep_serialized_model_every_num_seconds" in params \
+                    or "num_serialized_models_to_keep" in params:
+                raise ConfigurationError(
+                    "Checkpointer may be initialized either from the 'checkpointer' key or from the "
+                    "keys 'num_serialized_models_to_keep' and 'keep_serialized_model_every_num_seconds'"
+                    " but the passed config uses both methods."
+                )
+            checkpointer = Checkpointer.from_params(params.pop("checkpointer"))
+        else:
+            num_serialized_models_to_keep = params.pop_int("num_serialized_models_to_keep", 20)
+            keep_serialized_model_every_num_seconds = params.pop_int(
+                "keep_serialized_model_every_num_seconds", None
+            )
+            checkpointer = Checkpointer(
+                serialization_dir=serialization_dir,
+                num_serialized_models_to_keep=num_serialized_models_to_keep,
+                keep_serialized_model_every_num_seconds=keep_serialized_model_every_num_seconds,
+            )
+        model_save_interval = params.pop_float("model_save_interval", None)
+        summary_interval = params.pop_int("summary_interval", 100)
+        histogram_interval = params.pop_int("histogram_interval", None)
+        should_log_parameter_statistics = params.pop_bool("should_log_parameter_statistics", True)
+        should_log_learning_rate = params.pop_bool("should_log_learning_rate", False)
+        log_batch_size_period = params.pop_int("log_batch_size_period", None)
+        params.assert_empty(cls.__name__)
+        return cls(
+            model,
+            optimizer,
+            iterator,
+            train_data,
+            validation_data,
+            patience=patience,
+            validation_metric=validation_metric,
+            validation_iterator=validation_iterator,
+            shuffle=shuffle,
+            num_epochs=num_epochs,
+            serialization_dir=serialization_dir,
+            cuda_device=cuda_device,
+            grad_norm=grad_norm,
+            grad_clipping=grad_clipping,
+            learning_rate_scheduler=lr_scheduler,
+            momentum_scheduler=momentum_scheduler,
+            checkpointer=checkpointer,
+            model_save_interval=model_save_interval,
+            summary_interval=summary_interval,
+            histogram_interval=histogram_interval,
+            should_log_parameter_statistics=should_log_parameter_statistics,
+            should_log_learning_rate=should_log_learning_rate,
+            log_batch_size_period=log_batch_size_period,
+            moving_average=moving_average,
+        )

output_vocabulary/d_tags.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+CORRECT
+INCORRECT
+@@UNKNOWN@@
+@@PADDING@@

output_vocabulary/labels.txt ADDED Viewed

	@@ -0,0 +1,5002 @@

+$KEEP
+$DELETE
+$TRANSFORM_CASE_CAPITAL
+$APPEND_the
+$APPEND_,
+$APPEND_a
+$TRANSFORM_VERB_VB_VBZ
+$TRANSFORM_AGREEMENT_PLURAL
+$TRANSFORM_CASE_LOWER
+$TRANSFORM_VERB_VB_VBN
+$REPLACE_the
+$REPLACE_a
+$REPLACE_to
+$TRANSFORM_VERB_VB_VBG
+$REPLACE_.
+$APPEND_to
+$REPLACE_,
+$REPLACE_in
+$REPLACE_was
+$TRANSFORM_VERB_VBZ_VB
+$TRANSFORM_AGREEMENT_SINGULAR
+$APPEND_I
+$APPEND_.
+$REPLACE_for
+$REPLACE_I
+$APPEND_(
+$TRANSFORM_VERB_VBG_VB
+$REPLACE_is
+$REPLACE_have
+$REPLACE_on
+$REPLACE_are
+$REPLACE_of
+$REPLACE_it
+$TRANSFORM_VERB_VBN_VB
+$REPLACE_that
+$APPEND_in
+$REPLACE_and
+$APPEND_it
+$APPEND_that
+$REPLACE_at
+$APPEND_for
+$APPEND_of
+$APPEND_and
+$REPLACE_an
+$REPLACE_my
+$REPLACE_had
+$APPEND_have
+$APPEND_an
+$REPLACE_has
+$APPEND_my
+$APPEND_is
+$APPEND_The
+$APPEND_will
+$REPLACE_with
+$REPLACE_were
+$REPLACE_be
+$TRANSFORM_VERB_VBN_VBG
+$APPEND_``
+$REPLACE_do
+$TRANSFORM_VERB_VBG_VBN
+$REPLACE_this
+$REPLACE_will
+$TRANSFORM_VERB_VB_VBD
+$APPEND_was
+$REPLACE_n't
+$APPEND_about
+$REPLACE_from
+$REPLACE_about
+$REPLACE_It
+$APPEND_on
+$REPLACE_would
+$MERGE_SPACE
+$APPEND_at
+$APPEND_'s
+$REPLACE_as
+$REPLACE_'s
+$REPLACE_could
+$APPEND_with
+$REPLACE_did
+$REPLACE_them
+$REPLACE_The
+$REPLACE_by
+$REPLACE_so
+$REPLACE_not
+$REPLACE_can
+$APPEND_am
+$APPEND_be
+$REPLACE_because
+$APPEND_/
+$REPLACE_they
+$REPLACE_am
+$APPEND_are
+$TRANSFORM_VERB_VBZ_VBN
+$REPLACE_'m
+$REPLACE_their
+$TRANSFORM_VERB_VBN_VBZ
+$APPEND_had
+$APPEND_would
+$APPEND_-
+$REPLACE_(
+$TRANSFORM_VERB_VBN_VBD
+$REPLACE_very
+$REPLACE_people
+$REPLACE_get
+$REPLACE_there
+$REPLACE_?
+$APPEND_do
+$REPLACE_;
+$REPLACE_me
+$REPLACE_one
+$REPLACE_been
+$APPEND_so
+$APPEND_)
+$APPEND_'m
+$REPLACE_or
+$REPLACE_some
+$REPLACE_you
+$TRANSFORM_VERB_VBD_VBN
+$APPEND_as
+$REPLACE_like
+$TRANSFORM_VERB_VBD_VB
+$REPLACE_which
+$APPEND_has
+$REPLACE_these
+$REPLACE_This
+$APPEND_from
+$REPLACE_when
+$APPEND_'ve
+$REPLACE_``
+$APPEND_there
+$REPLACE_does
+$APPEND_also
+$APPEND_It
+$APPEND_can
+$REPLACE_:
+$REPLACE_other
+$APPEND_more
+$REPLACE_want
+$REPLACE_we
+$REPLACE_'ve
+$REPLACE_what
+$REPLACE_more
+$REPLACE_many
+$REPLACE_into
+$APPEND_been
+$APPEND_by
+$APPEND_this
+$REPLACE_went
+$REPLACE_time
+$APPEND_only
+$TRANSFORM_VERB_VBG_VBZ
+$REPLACE_go
+$REPLACE_while
+$REPLACE_but
+$APPEND_all
+$APPEND_if
+$REPLACE_should
+$REPLACE_out
+$APPEND_'
+$REPLACE_during
+$REPLACE_much
+$APPEND_like
+$REPLACE_!
+$APPEND_but
+$REPLACE_if
+$REPLACE_since
+$APPEND_people
+$APPEND_because
+$REPLACE_any
+$APPEND_A
+$REPLACE_another
+$REPLACE_They
+$APPEND_you
+$REPLACE_ca
+$REPLACE_our
+$REPLACE_who
+$APPEND_now
+$REPLACE_really
+$REPLACE_make
+$APPEND_me
+$APPEND_who
+$REPLACE_In
+$REPLACE_her
+$REPLACE_English
+$APPEND_some
+$APPEND_when
+$APPEND_still
+$APPEND_them
+$REPLACE_use
+$APPEND_just
+$REPLACE_things
+$REPLACE_/
+$REPLACE_got
+$REPLACE_My
+$APPEND_were
+$REPLACE_he
+$REPLACE_countries
+$APPEND_their
+$REPLACE_using
+$TRANSFORM_VERB_VBZ_VBG
+$APPEND_'ll
+$REPLACE_being
+$REPLACE_too
+$APPEND_we
+$APPEND_they
+$REPLACE_lot
+$REPLACE_-
+$REPLACE_all
+$REPLACE_good
+$APPEND_[
+$REPLACE_every
+$REPLACE_)
+$REPLACE_your
+$APPEND_My
+$APPEND_even
+$APPEND_out
+$REPLACE_his
+$REPLACE_made
+$APPEND_any
+$REPLACE_where
+$APPEND_which
+$REPLACE_work
+$REPLACE_used
+$APPEND_one
+$REPLACE_take
+$APPEND_In
+$REPLACE_There
+$REPLACE_up
+$REPLACE_how
+$REPLACE_myself
+$APPEND_what
+$APPEND_very
+$APPEND_?
+$REPLACE_become
+$REPLACE_think
+$REPLACE_going
+$REPLACE_Japanese
+$REPLACE_well
+$APPEND_being
+$APPEND_or
+$REPLACE_just
+$REPLACE_write
+$REPLACE_those
+$REPLACE_feel
+$REPLACE_until
+$APPEND_However
+$APPEND_our
+$REPLACE_something
+$APPEND_get
+$REPLACE_diary
+$REPLACE_no
+$REPLACE_over
+$APPEND_time
+$APPEND_then
+$REPLACE_see
+$REPLACE_writing
+$REPLACE_wo
+$REPLACE_only
+$REPLACE_'ll
+$REPLACE_after
+$REPLACE_know
+$REPLACE_anything
+$REPLACE_now
+$REPLACE_That
+$REPLACE_first
+$REPLACE_than
+$APPEND_up
+$REPLACE_better
+$REPLACE_hope
+$REPLACE_through
+$REPLACE_doing
+$APPEND_go
+$REPLACE_then
+$APPEND_too
+$REPLACE_studying
+$REPLACE_its
+$REPLACE_learn
+$REPLACE_lives
+$REPLACE_having
+$REPLACE_told
+$REPLACE_What
+$REPLACE_she
+$REPLACE_thought
+$APPEND_not
+$REPLACE_around
+$REPLACE_him
+$REPLACE_different
+$APPEND_could
+$APPEND_such
+$REPLACE_able
+$REPLACE_On
+$REPLACE_before
+$REPLACE_though
+$REPLACE_also
+$APPEND_entry
+$REPLACE_learned
+$TRANSFORM_CASE_UPPER
+$APPEND_again
+$REPLACE_friends
+$APPEND_This
+$REPLACE_might
+$REPLACE_A
+$REPLACE_However
+$APPEND_really
+$REPLACE_started
+$REPLACE_improve
+$APPEND_English
+$REPLACE_years
+$REPLACE_'
+$REPLACE_most
+$APPEND_how
+$REPLACE_day
+$APPEND_:
+$APPEND_today
+$REPLACE_find
+$REPLACE_help
+$APPEND_should
+$REPLACE_We
+$REPLACE_even
+$REPLACE_may
+$REPLACE_left
+$REPLACE_called
+$APPEND_did
+$REPLACE_course
+$REPLACE_These
+$REPLACE_understand
+$REPLACE_So
+$REPLACE_said
+$REPLACE_took
+$REPLACE_person
+$REPLACE_school
+$REPLACE_such
+$APPEND_called
+$REPLACE_At
+$APPEND_before
+$REPLACE_way
+$APPEND_he
+$REPLACE_everyone
+$REPLACE_here
+$REPLACE_When
+$REPLACE_everything
+$REPLACE_need
+$APPEND_her
+$REPLACE_Because
+$TRANSFORM_VERB_VBD_VBG
+$REPLACE_say
+$REPLACE_study
+$APPEND_much
+$REPLACE_still
+$REPLACE_found
+$APPEND_always
+$REPLACE_last
+$APPEND_other
+$TRANSFORM_VERB_VBG_VBD
+$REPLACE_learning
+$REPLACE_correct
+$REPLACE_two
+$REPLACE_days
+$REPLACE_difficult
+$REPLACE_never
+$APPEND__
+$REPLACE_'d
+$APPEND_your
+$REPLACE_us
+$REPLACE_foreign
+$REPLACE_entry
+$APPEND_!
+$REPLACE_Japan
+$APPEND_;
+$REPLACE_tell
+$REPLACE_give
+$REPLACE_decided
+$APPEND_during
+$REPLACE_Also
+$APPEND_his
+$REPLACE_speak
+$REPLACE_came
+$REPLACE_little
+$APPEND_while
+$TRANSFORM_VERB_VBZ_VBD
+$APPEND_things
+$REPLACE_especially
+$REPLACE_Recently
+$REPLACE_come
+$APPEND_especially
+$REPLACE_needed
+$APPEND_make
+$REPLACE_whether
+$REPLACE_felt
+$REPLACE_Although
+$REPLACE_someone
+$REPLACE_As
+$REPLACE_great
+$REPLACE_today
+$APPEND_since
+$REPLACE_hard
+$REPLACE_For
+$REPLACE_became
+$REPLACE_between
+$REPLACE_beautiful
+$REPLACE_life
+$REPLACE_why
+$APPEND_though
+$APPEND_There
+$APPEND_going
+$REPLACE_long
+$APPEND_where
+$REPLACE_believe
+$REPLACE_website
+$REPLACE_heard
+$REPLACE_job
+$REPLACE_home
+$REPLACE_'re
+$REPLACE_But
+$REPLACE_anyone
+$REPLACE_again
+$REPLACE_bad
+$REPLACE_recently
+$APPEND_here
+$REPLACE_practice
+$REPLACE_often
+$APPEND_got
+$APPEND_feel
+$REPLACE_saw
+$REPLACE_quickly
+$REPLACE_language
+$REPLACE_wanted
+$APPEND_each
+$REPLACE_put
+$REPLACE_done
+$REPLACE_minutes
+$REPLACE_each
+$APPEND_she
+$REPLACE_grammar
+$REPLACE_watch
+$REPLACE_happy
+$REPLACE_back
+$REPLACE_friend
+$REPLACE_off
+$REPLACE_He
+$REPLACE_Since
+$APPEND_something
+$APPEND_using
+$APPEND_At
+$REPLACE_university
+$REPLACE_country
+$REPLACE_watching
+$REPLACE_received
+$REPLACE_enough
+$REPLACE_weather
+$REPLACE_usually
+$APPEND_back
+$REPLACE_happened
+$APPEND_having
+$REPLACE_always
+$APPEND_does
+$REPLACE_After
+$REPLACE_try
+$REPLACE_start
+$APPEND_already
+$REPLACE_talk
+$REPLACE_thing
+$APPEND_But
+$APPEND_For
+$REPLACE_Then
+$REPLACE_fun
+$REPLACE_soon
+$REPLACE_starting
+$REPLACE_away
+$APPEND_want
+$REPLACE_asked
+$APPEND_went
+$REPLACE_trip
+$REPLACE_new
+$REPLACE_right
+$APPEND_after
+$REPLACE_keep
+$REPLACE_interesting
+$REPLACE_together
+$REPLACE_Do
+$APPEND_So
+$REPLACE_beginning
+$APPEND_myself
+$REPLACE_getting
+$APPEND_On
+$REPLACE_restaurant
+$REPLACE_looking
+$REPLACE_children
+$APPEND_last
+$REPLACE_college
+$APPEND_right
+$REPLACE_stay
+$REPLACE_year
+$REPLACE_live
+$REPLACE_travel
+$REPLACE_favorite
+$REPLACE_read
+$APPEND_well
+$REPLACE_written
+$REPLACE_months
+$APPEND_yet
+$APPEND_first
+$APPEND_most
+$REPLACE_look
+$REPLACE_tried
+$REPLACE_clothes
+$REPLACE_[
+$REPLACE_kind
+$APPEND_its
+$REPLACE_&
+$REPLACE_remember
+$APPEND_him
+$REPLACE_problem
+$APPEND_*
+$REPLACE_meet
+$REPLACE_gave
+$REPLACE_either
+$REPLACE_makes
+$REPLACE_elderly
+$REPLACE_hobbies
+$REPLACE_easily
+$REPLACE_important
+$APPEND_take
+$APPEND_thing
+$REPLACE_vocabulary
+$REPLACE_listening
+$REPLACE_must
+$REPLACE_hours
+$REPLACE_place
+$REPLACE_While
+$REPLACE_without
+$REPLACE_end
+$REPLACE_Korean
+$REPLACE_Therefore
+$REPLACE_working
+$REPLACE_high
+$REPLACE_house
+$REPLACE_already
+$APPEND_good
+$REPLACE_opportunity
+$APPEND_many
+$REPLACE_family
+$REPLACE_During
+$REPLACE_First
+$APPEND_both
+$REPLACE_once
+$REPLACE_experience
+$REPLACE_tomorrow
+$APPEND_these
+$REPLACE_true
+$APPEND_day
+$REPLACE_leave
+$APPEND_When
+$REPLACE_watched
+$APPEND_person
+$REPLACE_best
+$REPLACE_harder
+$REPLACE_Today
+$REPLACE_morning
+$REPLACE_If
+$REPLACE_woke
+$APPEND_into
+$APPEND_made
+$REPLACE_foreigners
+$REPLACE_part
+$APPEND_ever
+$APPEND_probably
+$APPEND_way
+$APPEND_over
+$APPEND_n't
+$REPLACE_towards
+$REPLACE_three
+$REPLACE_One
+$REPLACE_studied
+$REPLACE_nervous
+$REPLACE_forward
+$REPLACE_seen
+$REPLACE_Chinese
+$REPLACE_night
+$APPEND_own
+$REPLACE_taught
+$APPEND_usually
+$REPLACE_To
+$REPLACE_communicate
+$APPEND_Japanese
+$REPLACE_entries
+$REPLACE_traveling
+$REPLACE_site
+$REPLACE_difference
+$APPEND_those
+$TRANSFORM_VERB_VBD_VBZ
+$REPLACE_rainy
+$REPLACE_play
+$REPLACE_comfortable
+$REPLACE_recommend
+$REPLACE_coming
+$REPLACE_Is
+$REPLACE_asleep
+$REPLACE_realized
+$APPEND_recently
+$APPEND_around
+$REPLACE_men
+$REPLACE_Finally
+$REPLACE_excited
+$REPLACE_near
+$APPEND_often
+$REPLACE_t
+$REPLACE_next
+$REPLACE_ever
+$APPEND_Today
+$REPLACE_taking
+$APPEND_started
+$REPLACE_please
+$APPEND_than
+$REPLACE_sentences
+$APPEND_What
+$REPLACE_She
+$APPEND_work
+$REPLACE_visit
+$REPLACE_surprised
+$REPLACE_show
+$REPLACE_You
+$APPEND_used
+$REPLACE_ago
+$APPEND_Even
+$APPEND_That
+$REPLACE_similar
+$APPEND_soon
+$REPLACE_less
+$REPLACE_enjoy
+$REPLACE_diaries
+$REPLACE_speaking
+$REPLACE_past
+$APPEND_through
+$REPLACE_women
+$REPLACE_planned
+$REPLACE_later
+$REPLACE_looked
+$REPLACE_yet
+$APPEND_us
+$REPLACE_And
+$APPEND_'d
+$APPEND_As
+$REPLACE_healthy
+$APPEND_might
+$REPLACE_class
+$REPLACE_Now
+$REPLACE_outside
+$REPLACE_tired
+$APPEND_else
+$REPLACE_Please
+$REPLACE_problems
+$APPEND_They
+$REPLACE_food
+$REPLACE_reading
+$APPEND_&
+$APPEND_think
+$REPLACE_finished
+$REPLACE_popular
+$REPLACE_Are
+$APPEND_2
+$APPEND_may
+$APPEND_found
+$APPEND_whether
+$APPEND_We
+$REPLACE_How
+$REPLACE_continue
+$REPLACE_everyday
+$REPLACE_daily
+$REPLACE_talked
+$APPEND_new
+$REPLACE_reason
+$REPLACE_means
+$REPLACE_opportunities
+$APPEND_different
+$REPLACE_business
+$REPLACE_making
+$APPEND_ago
+$REPLACE_favourite
+$REPLACE_bit
+$REPLACE_delicious
+$APPEND_every
+$REPLACE_spend
+$APPEND_finally
+$APPEND_part
+$REPLACE_yesterday
+$REPLACE_down
+$REPLACE_times
+$REPLACE_holiday
+$REPLACE_nice
+$REPLACE_although
+$REPLACE_earlier
+$REPLACE_Can
+$REPLACE_due
+$APPEND_help
+$REPLACE_caught
+$REPLACE_quite
+$APPEND_kind
+$REPLACE_words
+$REPLACE_movie
+$REPLACE_else
+$APPEND_together
+$REPLACE_advertisement
+$APPEND_Is
+$APPEND_between
+$APPEND_enough
+$REPLACE_let
+$REPLACE_instead
+$REPLACE_disappointed
+$REPLACE_Have
+$APPEND_After
+$APPEND_no
+$APPEND_doing
+$REPLACE_skills
+$APPEND_instead
+$REPLACE_Some
+$REPLACE_Actually
+$APPEND_3
+$REPLACE_choose
+$REPLACE_An
+$APPEND_away
+$REPLACE_Does
+$REPLACE_played
+$APPEND_Because
+$REPLACE_both
+$REPLACE_easier
+$REPLACE_others
+$REPLACE_eat
+$REPLACE_onto
+$REPLACE_sometimes
+$REPLACE_began
+$REPLACE_usual
+$REPLACE_expensive
+$APPEND_To
+$APPEND_actually
+$REPLACE_old
+$APPEND_see
+$APPEND_know
+$REPLACE_few
+$APPEND_why
+$APPEND_sometimes
+$REPLACE_Unfortunately
+$APPEND_use
+$REPLACE_older
+$REPLACE_joined
+$REPLACE_own
+$REPLACE_raining
+$REPLACE_themselves
+$REPLACE_example
+$APPEND_able
+$REPLACE_arrived
+$REPLACE_whom
+$REPLACE_nothing
+$REPLACE_fluently
+$APPEND_getting
+$REPLACE_convenient
+$REPLACE_met
+$REPLACE_becoming
+$APPEND_better
+$APPEND_become
+$REPLACE_lots
+$REPLACE_fast
+$REPLACE_memories
+$REPLACE_worse
+$REPLACE_interested
+$REPLACE_hear
+$REPLACE_Secondly
+$REPLACE_thoughts
+$REPLACE_journal
+$REPLACE_bought
+$REPLACE_useful
+$REPLACE_teach
+$APPEND_learn
+$REPLACE_throughout
+$REPLACE_money
+$REPLACE_change
+$REPLACE_imagine
+$REPLACE_late
+$REPLACE_mine
+$REPLACE_same
+$REPLACE_future
+$REPLACE_sure
+$REPLACE_students
+$REPLACE_along
+$REPLACE_exercise
+$REPLACE_opinion
+$REPLACE_return
+$REPLACE_cause
+$REPLACE_month
+$REPLACE_stop
+$REPLACE_worried
+$REPLACE_trying
+$REPLACE_health
+$REPLACE_American
+$APPEND_writing
+$REPLACE_enjoyed
+$REPLACE_second
+$APPEND_look
+$APPEND_old
+$REPLACE_finally
+$REPLACE_wish
+$REPLACE_famous
+$REPLACE_talking
+$REPLACE_abroad
+$REPLACE_information
+$APPEND_And
+$REPLACE_stopped
+$REPLACE_lose
+$REPLACE_sentence
+$REPLACE_pronunciation
+$REPLACE_feeling
+$REPLACE_younger
+$REPLACE_passed
+$REPLACE_among
+$REPLACE_paid
+$REPLACE_playing
+$REPLACE_attend
+$REPLACE_early
+$REPLACE_All
+$REPLACE_Maybe
+$APPEND_high
+$REPLACE_child
+$APPEND_anything
+$REPLACE_order
+$REPLACE_saying
+$REPLACE_families
+$REPLACE_special
+$REPLACE_spent
+$REPLACE_appreciate
+$REPLACE_successful
+$APPEND_If
+$REPLACE_turned
+$REPLACE_cities
+$REPLACE_definitely
+$REPLACE_fell
+$APPEND_try
+$APPEND_skills
+$REPLACE_world
+$REPLACE_technology
+$REPLACE_small
+$REPLACE_wrote
+$REPLACE_takes
+$REPLACE_seems
+$REPLACE_various
+$REPLACE_actually
+$REPLACE_exam
+$REPLACE_free
+$REPLACE_gone
+$REPLACE_strong
+$REPLACE_receive
+$REPLACE_Yesterday
+$REPLACE_probably
+$REPLACE_Every
+$REPLACE_normal
+$REPLACE_ask
+$REPLACE_company
+$REPLACE_environment
+$REPLACE_buy
+$REPLACE_shows
+$REPLACE_easy
+$REPLACE_sincerely
+$REPLACE_vacation
+$REPLACE_far
+$REPLACE_sauce
+$REPLACE_teacher
+$REPLACE_living
+$REPLACE_parties
+$REPLACE_goes
+$REPLACE_Christmas
+$REPLACE_Hello
+$APPEND_start
+$REPLACE_hurt
+$REPLACE_wonder
+$REPLACE_mind
+$REPLACE_possible
+$REPLACE_thinking
+$APPEND_Now
+$REPLACE_relationship
+$REPLACE_plan
+$REPLACE_man
+$REPLACE_woman
+$REPLACE_activities
+$APPEND_down
+$REPLACE_returned
+$REPLACE_pay
+$REPLACE_ability
+$REPLACE_exciting
+$REPLACE_safe
+$APPEND_off
+$APPEND_until
+$REPLACE_goal
+$APPEND_either
+$REPLACE_mistakes
+$REPLACE_within
+$REPLACE_etc
+$REPLACE_cost
+$REPLACE_particular
+$REPLACE_sense
+$REPLACE_longer
+$REPLACE_advice
+$REPLACE_several
+$APPEND_Do
+$APPEND_currently
+$REPLACE_answer
+$REPLACE_Even
+$REPLACE_held
+$REPLACE_online
+$APPEND_life
+$REPLACE_Firstly
+$REPLACE_close
+$APPEND_study
+$REPLACE_wear
+$APPEND_language
+$REPLACE_number
+$REPLACE_young
+$APPEND_whole
+$APPEND_two
+$APPEND_Then
+$REPLACE_large
+$REPLACE_eating
+$REPLACE_given
+$REPLACE_video
+$REPLACE_spoke
+$REPLACE_Another
+$APPEND_rather
+$APPEND_Of
+$APPEND_once
+$REPLACE_wonderful
+$APPEND_must
+$REPLACE_tonight
+$REPLACE_Their
+$REPLACE_languages
+$REPLACE_big
+$REPLACE_break
+$REPLACE_government
+$REPLACE_staff
+$REPLACE_prepare
+$REPLACE_quit
+$REPLACE_completely
+$REPLACE_ourselves
+$APPEND_He
+$REPLACE_nor
+$APPEND_someone
+$REPLACE_sad
+$REPLACE_against
+$REPLACE_anymore
+$APPEND_give
+$REPLACE_stayed
+$REPLACE_achieve
+$APPEND_An
+$APPEND_Right
+$REPLACE_maybe
+$REPLACE_lost
+$APPEND_came
+$REPLACE_accommodation
+$APPEND_One
+$APPEND_"
+$REPLACE_daughter
+$APPEND_next
+$REPLACE_love
+$REPLACE_cheap
+$REPLACE_poor
+$REPLACE_By
+$REPLACE_whole
+$REPLACE_bring
+$REPLACE_real
+$REPLACE_worked
+$REPLACE_almost
+$REPLACE_prefer
+$APPEND_find
+$REPLACE_everybody
+$APPEND_another
+$REPLACE_create
+$REPLACE_addition
+$REPLACE_turn
+$REPLACE_situation
+$APPEND_due
+$REPLACE_boyfriend
+$APPEND_home
+$REPLACE_finish
+$REPLACE_rather
+$APPEND_said
+$APPEND_'re
+$REPLACE_careful
+$APPEND_long
+$REPLACE_recommended
+$REPLACE_customers
+$REPLACE_level
+$REPLACE_died
+$REPLACE_comes
+$APPEND_You
+$REPLACE_glad
+$APPEND_come
+$REPLACE_short
+$REPLACE_knowledge
+$REPLACE_set
+$REPLACE_Lang
+$REPLACE_planning
+$REPLACE_confidence
+$REPLACE_gain
+$REPLACE_check
+$REPLACE_immediately
+$REPLACE_scared
+$REPLACE_conversation
+$REPLACE_native
+$REPLACE_His
+$REPLACE_full
+$REPLACE_express
+$REPLACE_married
+$REPLACE_shopping
+$APPEND_working
+$APPEND_food
+$REPLACE_research
+$REPLACE_whenever
+$REPLACE_corrections
+$REPLACE_weird
+$APPEND_quite
+$REPLACE_necessary
+$REPLACE_Korea
+$REPLACE_whose
+$REPLACE_higher
+$APPEND_entries
+$REPLACE_Starting
+$REPLACE_attended
+$APPEND_5
+$APPEND_past
+$REPLACE_realize
+$REPLACE_cold
+$APPEND_playing
+$REPLACE_ended
+$REPLACE_inside
+$APPEND_please
+$REPLACE_coffee
+$REPLACE_enjoyable
+$APPEND_took
+$REPLACE_economic
+$APPEND_member
+$REPLACE_natural
+$REPLACE_registered
+$REPLACE_idea
+$APPEND_Recently
+$APPEND_<
+$REPLACE_caused
+$REPLACE_student
+$REPLACE_questions
+$REPLACE_music
+$REPLACE_story
+$REPLACE_happiness
+$REPLACE_gives
+$APPEND_She
+$REPLACE_Especially
+$REPLACE_energy
+$REPLACE_available
+$REPLACE_anywhere
+$REPLACE_taken
+$REPLACE_four
+$REPLACE_sleep
+$REPLACE_afraid
+$REPLACE_Everyone
+$APPEND_learning
+$REPLACE_ate
+$APPEND_few
+$APPEND_Since
+$REPLACE_helps
+$REPLACE_vegetables
+$REPLACE_kept
+$REPLACE_gets
+$REPLACE_explain
+$REPLACE_girlfriend
+$REPLACE_choice
+$REPLACE_waiting
+$APPEND_put
+$APPEND_yesterday
+$APPEND_During
+$REPLACE_From
+$APPEND_starting
+$REPLACE_scary
+$REPLACE_program
+$REPLACE_fish
+$REPLACE_hand
+$REPLACE_enter
+$APPEND_friends
+$REPLACE_decide
+$REPLACE_score
+$REPLACE_lonely
+$APPEND_easily
+$REPLACE_discovered
+$REPLACE_seeing
+$REPLACE_message
+$REPLACE_week
+$APPEND_studying
+$REPLACE_universities
+$REPLACE_introduce
+$REPLACE_common
+$REPLACE_heavily
+$REPLACE_People
+$REPLACE_care
+$APPEND_hard
+$REPLACE_hit
+$REPLACE_America
+$REPLACE_point
+$APPEND_need
+$REPLACE_funny
+$APPEND_almost
+$REPLACE_pass
+$REPLACE_temperature
+$REPLACE_performance
+$REPLACE_call
+$REPLACE_extremely
+$REPLACE_chance
+$REPLACE_main
+$REPLACE_season
+$REPLACE_series
+$REPLACE_nearby
+$REPLACE_license
+$REPLACE_expected
+$REPLACE_Last
+$REPLACE_picture
+$REPLACE_movies
+$APPEND_Also
+$REPLACE_seriously
+$REPLACE_via
+$REPLACE_running
+$REPLACE_run
+$REPLACE_regarding
+$REPLACE_chose
+$REPLACE_moment
+$APPEND_feeling
+$APPEND_bit
+$REPLACE_occurred
+$REPLACE_travelling
+$REPLACE_brought
+$APPEND_makes
+$REPLACE_amount
+$REPLACE_speakers
+$REPLACE_scenery
+$APPEND_year
+$APPEND_quickly
+$REPLACE_grateful
+$REPLACE_character
+$REPLACE_sleepy
+$REPLACE_bed
+$REPLACE_increase
+$APPEND_Good
+$REPLACE_area
+$REPLACE_certain
+$REPLACE_ways
+$REPLACE_looks
+$REPLACE_Nowadays
+$REPLACE_lucky
+$REPLACE_current
+$REPLACE_traditional
+$APPEND_write
+$APPEND_anymore
+$REPLACE_noticed
+$REPLACE_Did
+$REPLACE_matter
+$REPLACE_worry
+$REPLACE_angry
+$REPLACE_With
+$REPLACE_biggest
+$REPLACE_alcohol
+$APPEND_left
+$REPLACE_move
+$REPLACE_succeed
+$REPLACE_post
+$REPLACE_]
+$REPLACE_abilities
+$REPLACE_earthquake
+$REPLACE_visited
+$APPEND_]
+$REPLACE_speech
+$REPLACE_Thank
+$REPLACE_fewer
+$REPLACE_happen
+$APPEND_tomorrow
+$REPLACE_dinner
+$REPLACE_quiet
+$APPEND_type
+$REPLACE_previous
+$REPLACE_Furthermore
+$REPLACE_colleagues
+$REPLACE_present
+$REPLACE_No
+$REPLACE_chicken
+$REPLACE_city
+$REPLACE_weeks
+$REPLACE_develop
+$REPLACE_join
+$APPEND_Last
+$REPLACE_except
+$REPLACE_economy
+$REPLACE_sang
+$REPLACE_phrase
+$REPLACE_provide
+$REPLACE_lately
+$REPLACE_experienced
+$REPLACE_won
+$REPLACE_Though
+$APPEND_Therefore
+$APPEND_piece
+$REPLACE_including
+$REPLACE_husband
+$REPLACE_changed
+$REPLACE_view
+$REPLACE_becomes
+$REPLACE_share
+$APPEND_place
+$REPLACE_test
+$APPEND_4
+$APPEND_years
+$REPLACE_Our
+$REPLACE_wrong
+$REPLACE_seemed
+$REPLACE_wondering
+$REPLACE_computer
+$REPLACE_known
+$REPLACE_culture
+$REPLACE_Hong
+$REPLACE_clear
+$REPLACE_birthday
+$REPLACE_despite
+$REPLACE_front
+$REPLACE_sound
+$REPLACE_thankful
+$REPLACE_practise
+$REPLACE_Will
+$REPLACE_atmosphere
+$REPLACE_activity
+$APPEND_movie
+$REPLACE_China
+$REPLACE_reasons
+$REPLACE_name
+$REPLACE_serious
+$REPLACE_2
+$REPLACE_warm
+$REPLACE_depressed
+$REPLACE_simple
+$APPEND_trying
+$REPLACE_alone
+$APPEND_`
+$REPLACE_listen
+$REPLACE__
+$REPLACE_faithfully
+$REPLACE_Which
+$REPLACE_relieved
+$APPEND_1
+$REPLACE_price
+$REPLACE_store
+$REPLACE_lower
+$REPLACE_strange
+$REPLACE_game
+$REPLACE_sick
+$REPLACE_focus
+$REPLACE_suddenly
+$APPEND_Please
+$REPLACE_Would
+$REPLACE_traveled
+$REPLACE_event
+$REPLACE_ones
+$APPEND_Yesterday
+$APPEND_making
+$REPLACE_remembered
+$REPLACE_s
+$REPLACE_Lately
+$APPEND_S
+$REPLACE_member
+$APPEND_decided
+$REPLACE_across
+$REPLACE_entered
+$APPEND_maybe
+$REPLACE_University
+$REPLACE_difficulties
+$REPLACE_terrible
+$REPLACE_places
+$REPLACE_pretty
+$REPLACE_weekend
+$REPLACE_decision
+$APPEND_later
+$REPLACE_anybody
+$REPLACE_result
+$REPLACE_buses
+$REPLACE_Fortunately
+$APPEND_suddenly
+$REPLACE_slept
+$APPEND_school
+$REPLACE_group
+$REPLACE_electricity
+$REPLACE_fan
+$REPLACE_supposed
+$REPLACE_recent
+$REPLACE_wants
+$APPEND_10
+$REPLACE_low
+$APPEND_continue
+$APPEND_keep
+$APPEND_words
+$APPEND_Sometimes
+$REPLACE_type
+$REPLACE_Tomorrow
+$REPLACE_okay
+$APPEND_class
+$REPLACE_Her
+$APPEND_everything
+$APPEND_university
+$REPLACE_behind
+$REPLACE_clean
+$REPLACE_anxious
+$REPLACE_follow
+$APPEND_amount
+$REPLACE_parents
+$APPEND_While
+$REPLACE_email
+$REPLACE_mean
+$REPLACE_Most
+$APPEND_watching
+$REPLACE_taste
+$APPEND_taking
+$REPLACE_Sometimes
+$REPLACE_French
+$REPLACE_wearing
+$APPEND_weather
+$REPLACE_law
+$REPLACE_difficulty
+$APPEND_job
+$REPLACE_training
+$REPLACE_crowded
+$APPEND_All
+$REPLACE_gotten
+$REPLACE_catch
+$REPLACE_method
+$REPLACE_public
+$REPLACE_classes
+$REPLACE_seem
+$APPEND_show
+$REPLACE_question
+$REPLACE_development
+$REPLACE_says
+$REPLACE_faster
+$REPLACE_mother
+$REPLACE_guitar
+$REPLACE_teeth
+$REPLACE_song
+$REPLACE_lesson
+$REPLACE_knew
+$REPLACE_sent
+$REPLACE_unable
+$REPLACE_alot
+$REPLACE_Those
+$REPLACE_concert
+$APPEND_speak
+$REPLACE_software
+$REPLACE_German
+$REPLACE_Currently
+$REPLACE_yourself
+$REPLACE_fact
+$REPLACE_major
+$REPLACE_snowboarding
+$REPLACE_apartment
+$REPLACE_none
+$REPLACE_Here
+$REPLACE_reply
+$REPLACE_lived
+$APPEND_site
+$REPLACE_introduction
+$REPLACE_exchange
+$APPEND_level
+$REPLACE_iPhone
+$REPLACE_consider
+$REPLACE_leaves
+$APPEND_early
+$REPLACE_requires
+$REPLACE_Saturday
+$TRANSFORM_CASE_CAPITAL_1
+$REPLACE_further
+$REPLACE_absolutely
+$REPLACE_realised
+$APPEND_heard
+$REPLACE_following
+$REPLACE_doctor
+$REPLACE_beginner
+$APPEND_against
+$REPLACE_embarrassed
+$REPLACE_correctly
+$REPLACE_half
+$REPLACE_dangerous
+$REPLACE_moved
+$REPLACE_complete
+$REPLACE_perfect
+$REPLACE_Anyway
+$REPLACE_hold
+$REPLACE_differences
+$REPLACE_lunch
+$REPLACE_himself
+$REPLACE_based
+$APPEND_thought
+$REPLACE_reach
+$REPLACE_cheaper
+$REPLACE_loud
+$APPEND_By
+$APPEND_everyone
+$REPLACE_leaving
+$REPLACE_released
+$REPLACE_fine
+$REPLACE_Australia
+$REPLACE_style
+$REPLACE_deal
+$APPEND_along
+$REPLACE_satisfied
+$REPLACE_Of
+$REPLACE_variety
+$APPEND_improve
+$REPLACE_under
+$REPLACE_giving
+$REPLACE_party
+$APPEND_understand
+$REPLACE_everywhere
+$REPLACE_confident
+$APPEND_play
+$REPLACE_slow
+$REPLACE_centre
+$REPLACE_light
+$REPLACE_trouble
+$REPLACE_Its
+$APPEND_became
+$REPLACE_begin
+$REPLACE_grade
+$REPLACE_exams
+$REPLACE_busy
+$REPLACE_nbsp
+$REPLACE_3
+$REPLACE_control
+$REPLACE_characters
+$REPLACE_needs
+$REPLACE_pictures
+$APPEND_New
+$APPEND_test
+$REPLACE_currently
+$REPLACE_describe
+$REPLACE_uncomfortable
+$REPLACE_affected
+$REPLACE_songs
+$REPLACE_helped
+$REPLACE_head
+$APPEND_let
+$REPLACE_costs
+$REPLACE_five
+$REPLACE_slowly
+$REPLACE_1
+$REPLACE_causes
+$REPLACE_ashamed
+$APPEND_coming
+$APPEND_everyday
+$REPLACE_products
+$REPLACE_dishes
+$REPLACE_least
+$REPLACE_wore
+$REPLACE_internet
+$REPLACE_mentioned
+$APPEND_began
+$REPLACE_word
+$REPLACE_service
+$REPLACE_workers
+$REPLACE_continued
+$REPLACE_sounds
+$REPLACE_hour
+$REPLACE_jobs
+$REPLACE_career
+$REPLACE_personal
+$REPLACE_piece
+$REPLACE_per
+$REPLACE_Regarding
+$REPLACE_entrance
+$REPLACE_improving
+$APPEND_=
+$REPLACE_areas
+$REPLACE_1st
+$REPLACE_mostly
+$REPLACE_lessons
+$REPLACE_drink
+$REPLACE_hair
+$APPEND_exactly
+$REPLACE_e
+$REPLACE_luck
+$REPLACE_members
+$APPEND_means
+$REPLACE_mistake
+$REPLACE_somewhere
+$APPEND_pair
+$REPLACE_tomatoes
+$APPEND_definitely
+$REPLACE_swimming
+$REPLACE_perform
+$REPLACE_compared
+$REPLACE_unfortunately
+$REPLACE_however
+$REPLACE_twice
+$REPLACE_society
+$APPEND_20
+$REPLACE_preparing
+$REPLACE_Two
+$APPEND_Japan
+$REPLACE_nobody
+$REPLACE_environmental
+$REPLACE_till
+$REPLACE_fall
+$REPLACE_spoken
+$REPLACE_forget
+$REPLACE_form
+$APPEND_number
+$APPEND_watch
+$APPEND_live
+$REPLACE_include
+$REPLACE_related
+$REPLACE_wait
+$APPEND_These
+$REPLACE_European
+$APPEND_tell
+$REPLACE_meeting
+$REPLACE_evening
+$REPLACE_nowadays
+$REPLACE_northern
+$REPLACE_convenience
+$REPLACE_performed
+$REPLACE_plans
+$REPLACE_competition
+$REPLACE_open
+$REPLACE_confused
+$REPLACE_practicing
+$REPLACE_quality
+$REPLACE_professional
+$REPLACE_maintain
+$REPLACE_pain
+$REPLACE_familiar
+$REPLACE_classical
+$REPLACE_shop
+$REPLACE_filled
+$REPLACE_improved
+$REPLACE_meant
+$APPEND_listening
+$REPLACE_ceremony
+$REPLACE_increasing
+$REPLACE_drove
+$APPEND_completely
+$REPLACE_account
+$REPLACE_developed
+$REPLACE_lack
+$REPLACE_purpose
+$REPLACE_upon
+$REPLACE_tasted
+$REPLACE_crazy
+$REPLACE_summer
+$REPLACE_regret
+$REPLACE_born
+$REPLACE_rain
+$REPLACE_weight
+$REPLACE_required
+$REPLACE_accept
+$REPLACE_cut
+$REPLACE_flew
+$REPLACE_waste
+$APPEND_ca
+$APPEND_trip
+$REPLACE_Going
+$REPLACE_excellent
+$REPLACE_created
+$REPLACE_reality
+$REPLACE_cultural
+$REPLACE_save
+$REPLACE_programs
+$REPLACE_painful
+$REPLACE_Many
+$REPLACE_dish
+$REPLACE_teaching
+$REPLACE_Studying
+$REPLACE_water
+$REPLACE_happens
+$REPLACE_pleased
+$REPLACE_ordinary
+$APPEND_practice
+$REPLACE_train
+$REPLACE_results
+$REPLACE_Italian
+$REPLACE_weak
+$REPLACE_period
+$REPLACE_above
+$REPLACE_hot
+$REPLACE_Not
+$REPLACE_feelings
+$REPLACE_mobile
+$REPLACE_walk
+$APPEND_game
+$REPLACE_impressed
+$APPEND_same
+$REPLACE_Germany
+$REPLACE_girl
+$REPLACE_closer
+$REPLACE_communication
+$REPLACE_worst
+$APPEND_No
+$REPLACE_located
+$REPLACE_phone
+$REPLACE_sit
+$REPLACE_Lastly
+$REPLACE_feels
+$APPEND_listen
+$APPEND_done
+$REPLACE_subtitles
+$REPLACE_Whenever
+$REPLACE_potatoes
+$REPLACE_fluent
+$REPLACE_amazing
+$REPLACE_neither
+$APPEND_With
+$APPEND_never
+$REPLACE_stressed
+$REPLACE_prevent
+$REPLACE_photos
+$APPEND_$
+$REPLACE_non
+$REPLACE_agree
+$REPLACE_Moreover
+$REPLACE_restaurants
+$REPLACE_types
+$REPLACE_office
+$REPLACE_studies
+$REPLACE_history
+$REPLACE_calm
+$REPLACE_walked
+$REPLACE_modern
+$APPEND_three
+$REPLACE_clothing
+$REPLACE_private
+$APPEND_little
+$APPEND_outside
+$APPEND_OR
+$REPLACE_simply
+$REPLACE_particularly
+$REPLACE_notice
+$REPLACE_side
+$APPEND_looked
+$REPLACE_YouTube
+$APPEND_students
+$REPLACE_afterwards
+$APPEND_reading
+$REPLACE_graduate
+$REPLACE_library
+$REPLACE_gained
+$REPLACE_bicycle
+$REPLACE_son
+$APPEND_compared
+$REPLACE_events
+$APPEND_Although
+$REPLACE_US
+$REPLACE_properly
+$APPEND_Maybe
+$APPEND_Can
+$APPEND_best
+$REPLACE_wondered
+$REPLACE_arrive
+$APPEND_say
+$REPLACE_considered
+$REPLACE_dream
+$REPLACE_feet
+$REPLACE_broke
+$APPEND_From
+$REPLACE_southern
+$REPLACE_hometown
+$APPEND_journal
+$REPLACE_Everything
+$APPEND_money
+$REPLACE_concentrate
+$REPLACE_stories
+$REPLACE_teachers
+$APPEND_happened
+$REPLACE_New
+$REPLACE_transport
+$REPLACE_stronger
+$REPLACE_heart
+$REPLACE_staying
+$REPLACE_honest
+$REPLACE_sold
+$APPEND_wrong
+$APPEND_Or
+$REPLACE_relax
+$REPLACE_heavy
+$REPLACE_*
+$REPLACE_speaker
+$REPLACE_limited
+$APPEND_speaking
+$APPEND_e
+$REPLACE_countryside
+$REPLACE_heat
+$REPLACE_prepared
+$REPLACE_truth
+$REPLACE_books
+$REPLACE_drank
+$REPLACE_nuclear
+$REPLACE_title
+$REPLACE_6
+$REPLACE_boring
+$REPLACE_totally
+$REPLACE_practiced
+$REPLACE_therefore
+$REPLACE_book
+$REPLACE_regularly
+$REPLACE_safety
+$REPLACE_normally
+$REPLACE_visiting
+$APPEND_kinds
+$REPLACE_impressive
+$REPLACE_final
+$REPLACE_driving
+$REPLACE_stuff
+$REPLACE_guess
+$REPLACE_avoid
+$REPLACE_answered
+$REPLACE_pleasant
+$APPEND_times
+$APPEND_without
+$REPLACE_focused
+$REPLACE_badly
+$REPLACE_solve
+$REPLACE_grow
+$REPLACE_drive
+$APPEND_although
+$REPLACE_news
+$REPLACE_Afterwards
+$APPEND_6
+$REPLACE_Learning
+$REPLACE_Thanks
+$REPLACE_flight
+$REPLACE_building
+$REPLACE_opened
+$REPLACE_shocked
+$REPLACE_volleyball
+$REPLACE_accepted
+$APPEND_exam
+$REPLACE_team
+$REPLACE_system
+$APPEND_ones
+$REPLACE_goals
+$REPLACE_Before
+$REPLACE_meat
+$APPEND_Does
+$REPLACE_schedule
+$REPLACE_cream
+$REPLACE_listened
+$REPLACE_Why
+$REPLACE_worth
+$APPEND_members
+$REPLACE_strength
+$REPLACE_works
+$APPEND_m
+$REPLACE_surprise
+$REPLACE_holidays
+$REPLACE_7
+$APPEND_written
+$REPLACE_medicine
+$REPLACE_contact
+$REPLACE_position
+$APPEND_tried
+$REPLACE_highly
+$REPLACE_missed
+$REPLACE_typhoon
+$REPLACE_celebrate
+$REPLACE_February
+$REPLACE_greater
+$REPLACE_support
+$REPLACE_allow
+$REPLACE_appeared
+$REPLACE_naturally
+$REPLACE_breakfast
+$REPLACE_afternoon
+$REPLACE_dead
+$REPLACE_proud
+$REPLACE_stuck
+$APPEND_half
+$REPLACE_lyrics
+$APPEND_based
+$REPLACE_sing
+$REPLACE_process
+$REPLACE_search
+$REPLACE_sell
+$REPLACE_learnt
+$REPLACE_responsibility
+$REPLACE_field
+$REPLACE_lifestyle
+$REPLACE_helpful
+$REPLACE_Koreans
+$REPLACE_awake
+$REPLACE_success
+$APPEND_living
+$REPLACE_latest
+$REPLACE_corrected
+$REPLACE_communicating
+$REPLACE_raise
+$REPLACE_showed
+$REPLACE_father
+$REPLACE_marriage
+$REPLACE_elementary
+$REPLACE_allows
+$APPEND_lot
+$REPLACE_eventually
+$REPLACE_customer
+$REPLACE_unusual
+$REPLACE_advise
+$REPLACE_letter
+$REPLACE_clearly
+$REPLACE_essay
+$REPLACE_bigger
+$REPLACE_habit
+$APPEND_system
+$REPLACE_ran
+$APPEND_speakers
+$REPLACE_bored
+$REPLACE_whatever
+$REPLACE_fourth
+$REPLACE_chosen
+$REPLACE_room
+$REPLACE_30
+$REPLACE_carefully
+$REPLACE_loss
+$REPLACE_ingredients
+$REPLACE_singing
+$REPLACE_ride
+$REPLACE_build
+$REPLACE_cooking
+$REPLACE_add
+$REPLACE_mom
+$REPLACE_sign
+$REPLACE_chatting
+$REPLACE_happier
+$REPLACE_seat
+$REPLACE_affect
+$REPLACE_appropriate
+$REPLACE_named
+$APPEND_30
+$REPLACE_female
+$REPLACE_fashion
+$REPLACE_attending
+$REPLACE_Tonight
+$REPLACE_role
+$REPLACE_somebody
+$APPEND_Unfortunately
+$REPLACE_employees
+$REPLACE_face
+$REPLACE_middle
+$REPLACE_junior
+$REPLACE_lovely
+$REPLACE_reduce
+$REPLACE_positive
+$REPLACE_concerned
+$REPLACE_overseas
+$REPLACE_"
+$REPLACE_Second
+$APPEND_Our
+$APPEND_named
+$REPLACE_mountain
+$APPEND_eating
+$REPLACE_warmer
+$REPLACE_death
+$REPLACE_electronic
+$REPLACE_figure
+$REPLACE_frequently
+$REPLACE_pair
+$REPLACE_Americans
+$REPLACE_rest
+$REPLACE_TV
+$APPEND_themselves
+$APPEND_however
+$REPLACE_subject
+$APPEND_music
+$REPLACE_dormitory
+$APPEND_forward
+$REPLACE_department
+$REPLACE_pronounce
+$REPLACE_wake
+$REPLACE_cook
+$APPEND_visit
+$REPLACE_raised
+$REPLACE_smaller
+$REPLACE_stressful
+$APPEND_lately
+$REPLACE_completed
+$REPLACE_photography
+$REPLACE_10
+$APPEND_saying
+$REPLACE_dropped
+$REPLACE_laughed
+$APPEND_read
+$REPLACE_complain
+$REPLACE_Usually
+$APPEND_felt
+$REPLACE_Thus
+$REPLACE_foreigner
+$REPLACE_theatre
+$APPEND_website
+$APPEND_days
+$REPLACE_slightly
+$REPLACE_incorrect
+$REPLACE_frustrated
+$REPLACE_grandmother
+$REPLACE_forty
+$REPLACE_signed
+$APPEND_book
+$REPLACE_sore
+$REPLACE_classmates
+$REPLACE_equipment
+$REPLACE_memory
+$REPLACE_ordered
+$APPEND_stay
+$REPLACE_expect
+$REPLACE_drunk
+$APPEND_gave
+$REPLACE_midnight
+$APPEND_seem
+$APPEND_cut
+$REPLACE_address
+$REPLACE_couple
+$REPLACE_Compared
+$REPLACE_friendly
+$REPLACE_rode
+$REPLACE_losing
+$REPLACE_nearly
+$REPLACE_six
+$REPLACE_speeches
+$REPLACE_international
+$REPLACE_understood
+$REPLACE_thank
+$REPLACE_rarely
+$REPLACE_match
+$REPLACE_uploaded
+$REPLACE_Luckily
+$REPLACE_failed
+$REPLACE_hamburger
+$REPLACE_sleeping
+$REPLACE_tongue
+$REPLACE_colleague
+$REPLACE_require
+$REPLACE_terribly
+$REPLACE_case
+$APPEND_traditional
+$REPLACE_graduation
+$REPLACE_offer
+$REPLACE_respond
+$REPLACE_perfectly
+$REPLACE_businesses
+$REPLACE_8
+$APPEND_s
+$REPLACE_understanding
+$REPLACE_hungry
+$REPLACE_conclusion
+$REPLACE_homework
+$REPLACE_design
+$REPLACE_British
+$REPLACE_peaceful
+$REPLACE_forgot
+$REPLACE_suitable
+$REPLACE_soccer
+$REPLACE_tells
+$REPLACE_third
+$REPLACE_exactly
+$REPLACE_term
+$REPLACE_drinking
+$REPLACE_searching
+$REPLACE_hung
+$REPLACE_air
+$REPLACE_strongly
+$APPEND_looking
+$REPLACE_band
+$REPLACE_checked
+$REPLACE_send
+$REPLACE_Zealand
+$REPLACE_draw
+$REPLACE_educational
+$REPLACE_incident
+$APPEND_Some
+$APPEND_friend
+$APPEND_free
+$REPLACE_toward
+$REPLACE_interview
+$APPEND_>
+$REPLACE_tough
+$REPLACE_canceled
+$REPLACE_memorize
+$REPLACE_historical
+$REPLACE_slang
+$REPLACE_replied
+$REPLACE_considering
+$REPLACE_skill
+$REPLACE_musical
+$REPLACE_improvement
+$REPLACE_carry
+$REPLACE_education
+$APPEND_great
+$REPLACE_companies
+$REPLACE_cool
+$APPEND_comes
+$REPLACE_employee
+$REPLACE_age
+$APPEND_Yes
+$REPLACE_Could
+$REPLACE_relaxed
+$REPLACE_greatest
+$REPLACE_total
+$REPLACE_ready
+$REPLACE_guy
+$REPLACE_chocolate
+$APPEND_tense
+$REPLACE_earn
+$REPLACE_topic
+$REPLACE_beat
+$REPLACE_date
+$REPLACE_illnesses
+$REPLACE_conditioner
+$APPEND_inside
+$REPLACE_suggested
+$REPLACE_drama
+$REPLACE_pick
+$REPLACE_starts
+$REPLACE_manage
+$APPEND_anyway
+$REPLACE_Thailand
+$REPLACE_McDonald
+$REPLACE_Writing
+$APPEND_Are
+$REPLACE_2nd
+$APPEND_fall
+$REPLACE_flu
+$REPLACE_websites
+$REPLACE_snowy
+$APPEND_diary
+$REPLACE_road
+$REPLACE_professor
+$REPLACE_exhausted
+$APPEND_held
+$REPLACE_colored
+$REPLACE_sitting
+$REPLACE_wanna
+$REPLACE_according
+$REPLACE_lead
+$REPLACE_scene
+$REPLACE_hardly
+$REPLACE_ticket
+$REPLACE_remain
+$REPLACE_worrying
+$REPLACE_patience
+$REPLACE_Having
+$REPLACE_allowed
+$REPLACE_whilst
+$REPLACE_entire
+$REPLACE_promised
+$REPLACE_photo
+$REPLACE_motivated
+$REPLACE_dairy
+$APPEND_full
+$REPLACE_points
+$REPLACE_Soon
+$REPLACE_messages
+$APPEND_alone
+$REPLACE_alive
+$APPEND_Every
+$APPEND_entire
+$REPLACE_programme
+$REPLACE_fully
+$REPLACE_cloudy
+$REPLACE_occur
+$REPLACE_meaning
+$APPEND_area
+$REPLACE_liked
+$REPLACE_sweet
+$REPLACE_act
+$REPLACE_graduated
+$REPLACE_childhood
+$APPEND_available
+$REPLACE_believed
+$REPLACE_newspaper
+$REPLACE_enjoying
+$REPLACE_riding
+$APPEND_Not
+$REPLACE_body
+$REPLACE_beneficial
+$REPLACE_recognize
+$APPEND_native
+$REPLACE_attention
+$REPLACE_Until
+$REPLACE_struck
+$REPLACE_Just
+$REPLACE_correcting
+$REPLACE_interest
+$REPLACE_changing
+$REPLACE_pollution
+$APPEND_pieces
+$REPLACE_According
+$REPLACE_autumn
+$APPEND_problem
+$REPLACE_gym
+$REPLACE_basic
+$REPLACE_includes
+$REPLACE_games
+$APPEND_seeing
+$REPLACE_sunny
+$REPLACE_5
+$APPEND_learned
+$REPLACE_stage
+$REPLACE_touch
+$REPLACE_discuss
+$REPLACE_airplane
+$REPLACE_Has
+$REPLACE_die
+$REPLACE_relationships
+$REPLACE_effects
+$REPLACE_sat
+$REPLACE_parts
+$REPLACE_tsunami
+$REPLACE_response
+$REPLACE_teaches
+$REPLACE_self
+$REPLACE_thanks
+$REPLACE_rained
+$REPLACE_laundry
+$REPLACE_dependent
+$APPEND_near
+$REPLACE_below
+$REPLACE_custom
+$REPLACE_inconvenient
+$REPLACE_relaxing
+$REPLACE_wedding
+$REPLACE_challenge
+$APPEND_set
+$REPLACE_chatted
+$APPEND_immediately
+$REPLACE_attractive
+$REPLACE_translate
+$APPEND_Just
+$APPEND_TV
+$REPLACE_win
+$REPLACE_museum
+$REPLACE_neighborhood
+$REPLACE_Right
+$REPLACE_regular
+$REPLACE_experiences
+$APPEND_word
+$APPEND_played
+$REPLACE_hobby
+$REPLACE_developing
+$REPLACE_truly
+$APPEND_ended
+$REPLACE_issue
+$APPEND_correct
+$REPLACE_impossible
+$REPLACE_concerning
+$REPLACE_realise
+$REPLACE_brings
+$APPEND_room
+$REPLACE_advised
+$REPLACE_workplace
+$REPLACE_surfing
+$APPEND_Let
+$APPEND_daily
+$REPLACE_stomach
+$APPEND_night
+$REPLACE_meal
+$REPLACE_disadvantages
+$REPLACE_loudly
+$REPLACE_prize
+$REPLACE_besides
+$APPEND_experience
+$REPLACE_Despite
+$REPLACE_4
+$APPEND_concert
+$REPLACE_3rd
+$REPLACE_power
+$REPLACE_`
+$APPEND_lots
+$REPLACE_changes
+$REPLACE_kindergarten
+$REPLACE_sweat
+$REPLACE_ten
+$APPEND_wo
+$REPLACE_overcome
+$REPLACE_effective
+$REPLACE_terms
+$REPLACE_shown
+$REPLACE_chat
+$APPEND_team
+$REPLACE_sorry
+$APPEND_7
+$REPLACE_station
+$APPEND_man
+$REPLACE_produce
+$REPLACE_technological
+$REPLACE_differently
+$REPLACE_transferred
+$APPEND_told
+$APPEND_late
+$REPLACE_laugh
+$REPLACE_worker
+$REPLACE_space
+$REPLACE_introduced
+$REPLACE_single
+$REPLACE_cancelled
+$REPLACE_methods
+$REPLACE_transportation
+$REPLACE_Philippines
+$REPLACE_possibility
+$REPLACE_tasty
+$REPLACE_location
+$REPLACE_male
+$APPEND_simply
+$REPLACE_tastes
+$REPLACE_ease
+$REPLACE_straight
+$REPLACE_uses
+$REPLACE_participate
+$REPLACE_discover
+$APPEND_co
+$REPLACE_details
+$REPLACE_logged
+$REPLACE_bright
+$REPLACE_Once
+$REPLACE_walking
+$APPEND_spent
+$MERGE_HYPHEN
+$REPLACE_growing
+$REPLACE_slight
+$APPEND_current
+$REPLACE_moving
+$REPLACE_spring
+$REPLACE_August
+$REPLACE_fans
+$REPLACE_Well
+$APPEND_nervous
+$REPLACE_version
+$REPLACE_upset
+$REPLACE_stress
+$REPLACE_appointment
+$REPLACE_tasks
+$REPLACE_Being
+$REPLACE_encouraged
+$REPLACE_town
+$REPLACE_eight
+$REPLACE_mood
+$REPLACE_forecast
+$APPEND_lessons
+$APPEND_finished
+$REPLACE_increased
+$REPLACE_blossoms
+$REPLACE_aware
+$REPLACE_Besides
+$REPLACE_Taiwanese
+$REPLACE_someday
+$REPLACE_happening
+$REPLACE_volunteer
+$REPLACE_fireworks
+$REPLACE_ideas
+$REPLACE_curious
+$REPLACE_responsible
+$REPLACE_voice
+$REPLACE_covered
+$APPEND_ice
+$REPLACE_rang
+$REPLACE_items
+$REPLACE_apart
+$APPEND_program
+$REPLACE_bye
+$REPLACE_Next
+$REPLACE_complicated
+$REPLACE_Someone
+$APPEND_earlier
+$APPEND_difficult
+$REPLACE_invited
+$REPLACE_applied
+$APPEND_anyone
+$REPLACE_gaining
+$REPLACE_cute
+$REPLACE_line
+$REPLACE_partner
+$REPLACE_regretted
+$REPLACE_clock
+$APPEND_according
+$REPLACE_greatly
+$REPLACE_appear
+$REPLACE_opposite
+$REPLACE_Like
+$REPLACE_patient
+$REPLACE_spread
+$REPLACE_dollars
+$REPLACE_relieve
+$REPLACE_article
+$REPLACE_benefits
+$APPEND_American
+$REPLACE_Looking
+$REPLACE_Who
+$REPLACE_fix
+$REPLACE_human
+$REPLACE_technologies
+$REPLACE_breathe
+$REPLACE_strict
+$REPLACE_opinions
+$APPEND_possibly
+$REPLACE_appearance
+$REPLACE_explanation
+$REPLACE_herself
+$APPEND_student
+$REPLACE_plane
+$REPLACE_hearing
+$REPLACE_personality
+$REPLACE_attitude
+$REPLACE_journey
+$REPLACE_recover
+$REPLACE_magazine
+$REPLACE_disappeared
+$APPEND_taken
+$REPLACE_Me
+$REPLACE_efficiently
+$REPLACE_strawberries
+$APPEND_becoming
+$REPLACE_October
+$REPLACE_social
+$REPLACE_suicide
+$REPLACE_reached
+$REPLACE_damaged
+$REPLACE_personalities
+$REPLACE_valuable
+$REPLACE_height
+$REPLACE_Asian
+$REPLACE_sight
+$REPLACE_issues
+$REPLACE_titled
+$REPLACE_science
+$REPLACE_cell
+$REPLACE_amongst
+$APPEND_movies
+$REPLACE_June
+$REPLACE_policies
+$REPLACE_silent
+$REPLACE_girls
+$APPEND_company
+$APPEND_second
+$APPEND_ability
+$APPEND_hope
+$REPLACE_former
+$APPEND_GOOD
+$REPLACE_fashionable
+$REPLACE_club
+$APPEND_end
+$REPLACE_path
+$APPEND_+
+$REPLACE_top
+$APPEND_happy
+$REPLACE_lay
+$REPLACE_accident
+$REPLACE_festival
+$REPLACE_Later
+$REPLACE_destroyed
+$APPEND_plan
+$APPEND_famous
+$REPLACE_safely
+$APPEND_related
+$REPLACE_suit
+$REPLACE_stand
+$REPLACE_contrast
+$APPEND_period
+$REPLACE_highest
+$REPLACE_habits
+$APPEND_First
+$REPLACE_January
+$REPLACE_putting
+$REPLACE_grew
+$REPLACE_degrees
+$REPLACE_latter
+$REPLACE_extent
+$REPLACE_lang
+$REPLACE_episode
+$REPLACE_physically
+$APPEND_types
+$REPLACE_cooked
+$REPLACE_original
+$REPLACE_fresh
+$APPEND_world
+$REPLACE_l
+$REPLACE_Year
+$APPEND_wanted
+$REPLACE_Wednesday
+$REPLACE_unique
+$REPLACE_active
+$REPLACE_center
+$APPEND_problems
+$REPLACE_encourage
+$APPEND_8
+$REPLACE_individual
+$REPLACE_included
+$REPLACE_suggestions
+$REPLACE_sea
+$REPLACE_smoothly
+$REPLACE_headache
+$REPLACE_Was
+$REPLACE_Internet
+$REPLACE_pleasure
+$REPLACE_Thursday
+$REPLACE_board
+$REPLACE_phrases
+$REPLACE_built
+$APPEND_caused
+$REPLACE_subjects
+$APPEND_places
+$REPLACE_grammatical
+$REPLACE_suggest
+$APPEND_big
+$REPLACE_bath
+$APPEND_train
+$REPLACE_hesitant
+$APPEND_seriously
+$REPLACE_deep
+$APPEND_children
+$REPLACE_refreshed
+$APPEND_Correct
+$APPEND_yourself
+$APPEND_THE
+$REPLACE_reasonable
+$APPEND_spend
+$APPEND_skill
+$REPLACE_obvious
+$REPLACE_Friday
+$REPLACE_soup
+$REPLACE_basketball
+$REPLACE_Your
+$REPLACE_drawing
+$REPLACE_m
+$APPEND_sentences
+$REPLACE_english
+$APPEND_fell
+$REPLACE_colder
+$REPLACE_car
+$APPEND_group
+$REPLACE_receiving
+$REPLACE_sun
+$APPEND_15
+$APPEND_hot
+$APPEND_verb
+$REPLACE_technical
+$REPLACE_Through
+$APPEND_buy
+$REPLACE_route
+$REPLACE_Vietnamese
+$REPLACE_grandfather
+$REPLACE_April
+$REPLACE_lasts
+$REPLACE_environmentally
+$REPLACE_progress
+$REPLACE_telling
+$REPLACE_preparation
+$REPLACE_supermarket
+$REPLACE_Perhaps
+$REPLACE_plays
+$REPLACE_driver
+$REPLACE_anyway
+$APPEND_within
+$REPLACE_Vietnam
+$REPLACE_green
+$REPLACE_access
+$APPEND_t
+$REPLACE_concerns
+$REPLACE_laptop
+$APPEND_eventually
+$REPLACE_fried
+$REPLACE_pieces
+$REPLACE_security
+$REPLACE_condition
+$REPLACE_dreams
+$REPLACE_reminded
+$REPLACE_December
+$REPLACE_finding
+$REPLACE_produced
+$REPLACE_broken
+$REPLACE_raising
+$REPLACE_specific
+$REPLACE_humid
+$APPEND_reason
+$REPLACE_programming
+$REPLACE_brush
+$REPLACE_powerful
+$REPLACE_shape
+$REPLACE_involves
+$APPEND_summer
+$REPLACE_kinds
+$APPEND_eat
+$REPLACE_market
+$REPLACE_Introducing
+$APPEND_kept
+$APPEND_information
+$REPLACE_Filipino
+$REPLACE_hang
+$REPLACE_nature
+$REPLACE_stood
+$REPLACE_oldest
+$APPEND_books
+$APPEND_top
+$REPLACE_physical
+$REPLACE_Thai
+$REPLACE_effort
+$REPLACE_U
+$APPEND_phone
+$REPLACE_author
+$REPLACE_imagined
+$REPLACE_request
+$REPLACE_Australian
+$REPLACE_didn
+$REPLACE_Something
+$REPLACE_translator
+$REPLACE_text
+$APPEND_account
+$REPLACE_protect
+$REPLACE_resources
+$REPLACE_Additionally
+$APPEND_afterwards
+$APPEND_Should
+$REPLACE_awhile
+$REPLACE_meanings
+$APPEND_pictures
+$REPLACE_benefit
+$REPLACE_exist
+$REPLACE_connection
+$REPLACE_impression
+$APPEND_meeting
+$REPLACE_electrical
+$APPEND_style
+$REPLACE_larger
+$REPLACE_hotter
+$REPLACE_foot
+$APPEND_further
+$REPLACE_described
+$REPLACE_note
+$REPLACE_football
+$APPEND_ourselves
+$REPLACE_searched
+$REPLACE_temporary
+$REPLACE_semester
+$REPLACE_announced
+$REPLACE_Suddenly
+$APPEND_others
+$APPEND_goes
+$REPLACE_sort
+$REPLACE_itself
+$REPLACE_rich
+$APPEND_song
+$REPLACE_memorable
+$REPLACE_Europe
+$REPLACE_features
+$REPLACE_apply
+$REPLACE_celebrated
+$REPLACE_delivery
+$REPLACE_winter
+$REPLACE_miss
+$REPLACE_application
+$APPEND_onwards
+$REPLACE_population
+$REPLACE_failure
+$REPLACE_lazy
+$REPLACE_scored
+$REPLACE_November
+$APPEND_travel
+$REPLACE_Let
+$REPLACE_alcoholic
+$REPLACE_disappointment
+$REPLACE_severe
+$REPLACE_effect
+$REPLACE_speed
+$APPEND_How
+$APPEND_sounds
+$REPLACE_cooler
+$REPLACE_'cause
+$APPEND_mean
+$REPLACE_divided
+$REPLACE_ha
+$REPLACE_9
+$REPLACE_advantages
+$APPEND_call
+$REPLACE_21st
+$REPLACE_fit
+$REPLACE_lit
+$REPLACE_directly
+$REPLACE_videos
+$REPLACE_pressure
+$REPLACE_pursue
+$REPLACE_forgotten
+$REPLACE_industry
+$REPLACE_Speaking
+$APPEND_enjoy
+$REPLACE_Should
+$REPLACE_grown
+$REPLACE_participated
+$REPLACE_treat
+$REPLACE_expression
+$REPLACE_fly
+$REPLACE_tall
+$REPLACE_situations
+$REPLACE_host
+$REPLACE_visitors
+$APPEND_hear
+$REPLACE_Instead
+$REPLACE_agreed
+$REPLACE_affects
+$REPLACE_drew
+$REPLACE_spending
+$REPLACE_huge
+$REPLACE_ill
+$REPLACE_tradition
+$REPLACE_argue
+$REPLACE_turns
+$REPLACE_ground
+$REPLACE_sometime
+$REPLACE_Italy
+$APPEND_works
+$REPLACE_likely
+$REPLACE_Madam
+$APPEND_questions
+$REPLACE_ceremonies
+$APPEND_turn
+$APPEND_Korean
+$REPLACE_gradually
+$REPLACE_financial
+$REPLACE_involved
+$REPLACE_throw
+$REPLACE_advertising
+$REPLACE_tend
+$REPLACE_characteristics
+$APPEND_among
+$REPLACE_electric
+$REPLACE_sister
+$APPEND_car
+$REPLACE_fantastic
+$REPLACE_examination
+$APPEND_city
+$REPLACE_eaten
+$REPLACE_film
+$APPEND_small
+$REPLACE_players
+$REPLACE_stores
+$REPLACE_machine
+$REPLACE_managed
+$REPLACE_tour
+$APPEND_video
+$REPLACE_journals
+$REPLACE_guys
+$APPEND_meet
+$REPLACE_deeply
+$REPLACE_floor
+$REPLACE_keeps
+$REPLACE_talks
+$REPLACE_focusing
+$REPLACE_mysterious
+$APPEND_less
+$REPLACE_rice
+$REPLACE_recovered
+$REPLACE_injured
+$REPLACE_poorly
+$REPLACE_comedy
+$REPLACE_cigarettes
+$REPLACE_anime
+$REPLACE_influence
+$REPLACE_Eventually
+$REPLACE_offered
+$REPLACE_sale
+$REPLACE_effectively
+$REPLACE_disappointing
+$REPLACE_illness
+$REPLACE_comments
+$APPEND_talk
+$REPLACE_contains
+$APPEND_People
+$APPEND_power
+$REPLACE_31st
+$REPLACE_distance
+$REPLACE_appears
+$REPLACE_importance
+$REPLACE_choosing
+$APPEND_interesting
+$REPLACE_snow
+$APPEND_o
+$REPLACE_tennis
+$REPLACE_continues
+$REPLACE_dress
+$REPLACE_percent
+$REPLACE_size
+$REPLACE_dictionaries
+$APPEND_seems
+$REPLACE_fever
+$APPEND_etc
+$APPEND_Though
+$REPLACE_whereas
+$APPEND_several
+$APPEND_far
+$APPEND_classes
+$APPEND_public
+$REPLACE_traffic
+$REPLACE_damage
+$APPEND_nothing
+$REPLACE_worthwhile
+$REPLACE_appreciated
+$REPLACE_articles
+$APPEND_begin
+$APPEND_needed
+$REPLACE_recommendations
+$REPLACE_don
+$REPLACE_buildings
+$APPEND_four
+$REPLACE_jealous
+$REPLACE_seminar
+$APPEND_gradually
+$REPLACE_complaints
+$REPLACE_Nothing
+$REPLACE_advance
+$REPLACE_flowers
+$APPEND_Starting
+$REPLACE_beyond
+$REPLACE_advertised
+$APPEND_mainly
+$APPEND_possible
+$REPLACE_suffering
+$APPEND_12
+$REPLACE_Hopefully
+$APPEND_countries
+$APPEND_similar
+$REPLACE_quick
+$REPLACE_general
+$REPLACE_successfully
+$REPLACE_dark
+$REPLACE_unbelievable
+$REPLACE_causing
+$REPLACE_13th
+$REPLACE_unexpected
+$REPLACE_begins
+$REPLACE_tea
+$REPLACE_Sunday
+$APPEND_somewhere
+$REPLACE_digital
+$APPEND_stories
+$APPEND_idea
+$APPEND_tired
+$APPEND_family
+$REPLACE_animation
+$REPLACE_shot
+$REPLACE_Or
+$APPEND_managed
+$REPLACE_bus
+$APPEND_close
+$REPLACE_disease
+$REPLACE_desire
+$REPLACE_carried
+$REPLACE_disappear
+$REPLACE_essential
+$APPEND_news
+$REPLACE_forced
+$REPLACE_fault
+$REPLACE_translation
+$REPLACE_television
+$REPLACE_cried
+$REPLACE_freely
+$REPLACE_Valentine
+$REPLACE_somewhat
+$REPLACE_operation
+$REPLACE_conversational
+$APPEND_absolutely
+$APPEND_properly
+$REPLACE_sites
+$REPLACE_allergies
+$REPLACE_salary
+$REPLACE_rise
+$REPLACE_entertainment
+$REPLACE_kitchen
+$REPLACE_emotional
+$REPLACE_McDonalds
+$REPLACE_extra
+$APPEND_nearby
+$REPLACE_mention
+$APPEND_Here
+$APPEND_nice
+$APPEND_college
+$APPEND_Before
+$APPEND_form
+$REPLACE_likes
+$APPEND_turned
+$REPLACE_rent
+$REPLACE_tourists
+$REPLACE_unknown
+$REPLACE_actors
+$APPEND_longer
+$REPLACE_fill
+$REPLACE_Nobody
+$REPLACE_Singapore
+$REPLACE_helping
+$REPLACE_exercises
+$APPEND_real
+$APPEND_located
+$APPEND_received
+$APPEND_gets
+$APPEND_bad
+$REPLACE_doubt
+$REPLACE_sweaty
+$REPLACE_prefecture
+$REPLACE_audience
+$REPLACE_sports
+$REPLACE_minute
+$REPLACE_product
+$REPLACE_buying
+$REPLACE_exact
+$REPLACE_temporarily
+$REPLACE_Avatar
+$REPLACE_Skype
+$REPLACE_discussion
+$REPLACE_item
+$REPLACE_gon
+$REPLACE_accessories
+$REPLACE_incredibly
+$REPLACE_Where
+$REPLACE_World
+$REPLACE_advantage
+$REPLACE_ridiculous
+$REPLACE_wherever
+$REPLACE_shook
+$REPLACE_global
+$REPLACE_entitled
+$REPLACE_Working
+$APPEND_hours
+$REPLACE_Starbucks
+$REPLACE_routine
+$REPLACE_flavored
+$APPEND_item
+$REPLACE_techniques
+$REPLACE_creates
+$REPLACE_peace
+$REPLACE_annoyed
+$REPLACE_rate
+$REPLACE_September
+$REPLACE_Russian
+$REPLACE_assistant
+$REPLACE_plenty
+$REPLACE_local
+$APPEND_store
+$REPLACE_sooner
+$REPLACE_overslept
+$REPLACE_Everybody
+$REPLACE_selling
+$REPLACE_negative
+$REPLACE_setting
+$APPEND_helps
+$REPLACE_lecture
+$APPEND_happen
+$REPLACE_survive
+$REPLACE_art
+$APPEND_certainly
+$APPEND_fully
+$APPEND_above
+$REPLACE_speaks
+$REPLACE_asking
+$REPLACE_economical
+$REPLACE_salaries
+$APPEND_clearly
+$REPLACE_mail
+$REPLACE_holding
+$REPLACE_organise
+$REPLACE_efficient
+$APPEND_name
+$REPLACE_constantly
+$REPLACE_overtime
+$REPLACE_grandma
+$REPLACE_returning
+$REPLACE_laziness
+$REPLACE_importantly
+$APPEND_true
+$APPEND_series
+$REPLACE_converse
+$APPEND_session
+$REPLACE_sugar
+$APPEND_Currently
+$REPLACE_mentally
+$APPEND_starts
+$REPLACE_theater
+$APPEND_tonight
+$REPLACE_succeeded
+$REPLACE_awful
+$REPLACE_political
+$APPEND_important
+$REPLACE_log
+$REPLACE_awesome
+$REPLACE_00
+$APPEND_Did
+$REPLACE_announcement
+$REPLACE_addicted
+$REPLACE_disaster
+$REPLACE_page
+$REPLACE_blossom
+$REPLACE_stars
+$REPLACE_presentation
+$REPLACE_Nevertheless
+$APPEND_talking
+$APPEND_Instead
+$APPEND_Chinese
+$REPLACE_Festival
+$REPLACE_reasonably
+$APPEND_someday
+$REPLACE_expressions
+$APPEND_Lately
+$REPLACE_average
+$APPEND_season
+$REPLACE_cover
+$REPLACE_manager
+$REPLACE_wife
+$REPLACE_12
+$REPLACE_possibly
+$REPLACE_approaching
+$REPLACE_keeping
+$REPLACE_motorcycle
+$REPLACE_happily
+$APPEND_items
+$REPLACE_cherry
+$REPLACE_shall
+$REPLACE_determined
+$REPLACE_cheerful
+$REPLACE_ahead
+$REPLACE_solution
+$REPLACE_patients
+$REPLACE_unforgettable
+$REPLACE_decreasing
+$REPLACE_laid
+$REPLACE_arrange
+$REPLACE_content
+$REPLACE_starring
+$REPLACE_opening
+$REPLACE_continuing
+$REPLACE_bloom
+$REPLACE_concern
+$APPEND_towards
+$REPLACE_extreme
+$APPEND_Will
+$REPLACE_tests
+$REPLACE_replace
+$APPEND_mostly
+$REPLACE_inform
+$REPLACE_lying
+$REPLACE_barely
+$REPLACE_unpleasant
+$REPLACE_brand
+$REPLACE_turning
+$REPLACE_added
+$APPEND_age
+$REPLACE_wide
+$REPLACE_passing
+$REPLACE_production
+$REPLACE_23rd
+$REPLACE_ramen
+$REPLACE_occasionally
+$REPLACE_borrowed
+$REPLACE_comparison
+$REPLACE_curry
+$REPLACE_upcoming
+$REPLACE_begun
+$APPEND_mistakes
+$REPLACE_mouth
+$REPLACE_scenes
+$REPLACE_accidentally
+$REPLACE_gases
+$REPLACE_blog
+$REPLACE_Disney
+$APPEND_straight
+$REPLACE_topics
+$REPLACE_register
+$REPLACE_color
+$REPLACE_explained
+$APPEND_shopping
+$REPLACE_Taiwan
+$REPLACE_sales
+$REPLACE_dictionary
+$REPLACE_inexpensive
+$APPEND_directly
+$REPLACE_comfortably
+$REPLACE_suprised
+$APPEND_AM
+$REPLACE_dance
+$REPLACE_eager
+$REPLACE_envious
+$REPLACE_lie
+$REPLACE_Apart
+$REPLACE_closed
+$REPLACE_brother
+$REPLACE_hopefully
+$APPEND_caught
+$REPLACE_background
+$REPLACE_conditions
+$REPLACE_attracted
+$REPLACE_aim
+$REPLACE_twenty
+$REPLACE_Each
+$APPEND_air
+$REPLACE_technique
+$REPLACE_umbrella
+$REPLACE_Buddhist
+$REPLACE_yen
+$APPEND_clothes
+$APPEND_open
+$REPLACE_originally
+$APPEND_OK
+$REPLACE_complex
+$APPEND_upon
+$REPLACE_<
+$REPLACE_showing
+$REPLACE_weaknesses
+$REPLACE_OR
+$REPLACE_potato
+$APPEND_photo
+$REPLACE_flavor
+$REPLACE_Tuesday
+$REPLACE_organized
+$REPLACE_preferred
+$REPLACE_state
+$APPEND_normally
+$APPEND_areas
+$REPLACE_arranged
+$REPLACE_embarrassing
+$REPLACE_positively
+$REPLACE_coworkers
+$APPEND_host
+$REPLACE_influenced
+$REPLACE_respect
+$REPLACE_separate
+$REPLACE_comedies
+$APPEND_listened
+$REPLACE_report
+$REPLACE_Using
+$REPLACE_performing
+$REPLACE_construction
+$REPLACE_trees
+$REPLACE_conversations
+$REPLACE_western
+$APPEND_drinking
+$APPEND_Next
+$APPEND_points
+$APPEND_young
+$REPLACE_provides
+$REPLACE_motivation
+$REPLACE_muscle
+$REPLACE_diet
+$APPEND_fluently
+$REPLACE_Such
+$REPLACE_task
+$REPLACE_sounded
+$REPLACE_schools
+$REPLACE_park
+$APPEND_various
+$APPEND_five
+$REPLACE_unhappy
+$REPLACE_Due
+$REPLACE_alright
+$REPLACE_campus
+$APPEND_foreign
+$APPEND_studies
+$REPLACE_handle
+$REPLACE_continuous
+$REPLACE_drug
+$REPLACE_expenses
+$REPLACE_aged
+$REPLACE_surrounded
+$REPLACE_thus
+$REPLACE_noise
+$REPLACE_healthier
+$REPLACE_potential
+$REPLACE_Potter
+$APPEND_self
+$APPEND_picture
+$REPLACE_None
+$REPLACE_sudden
+$REPLACE_lifestyles
+$APPEND_given
+$REPLACE_aspects
+$REPLACE_specifically
+$REPLACE_destination
+$REPLACE_followed
+$REPLACE_Other
+$REPLACE_horrible
+$REPLACE_radiation
+$REPLACE_essays
+$REPLACE_apologize
+$REPLACE_placed
+$APPEND_future
+$REPLACE_awkward
+$REPLACE_thirty
+$REPLACE_kids
+$REPLACE_responsibilities
+$REPLACE_Generally
+$REPLACE_relatives
+$APPEND_More
+$REPLACE_safer
+$REPLACE_hoping
+$REPLACE_heroes
+$REPLACE_psychological
+$REPLACE_posted
+$REPLACE_treatment
+$REPLACE_glasses
+$REPLACE_souvenirs
+$REPLACE_entertaining
+$APPEND_Tomorrow
+$APPEND_activities
+$REPLACE_serve
+$REPLACE_actions
+$APPEND_teacher
+$REPLACE_o
+$REPLACE_forever
+$REPLACE_colour
+$APPEND_change
+$REPLACE_plants
+$REPLACE_fulfill
+$REPLACE_animated
+$REPLACE_textbook
+$REPLACE_mathematics
+$REPLACE_figured
+$APPEND_running
+$REPLACE_computers
+$REPLACE_Singaporean
+$REPLACE_imagination
+$REPLACE_runny
+$REPLACE_bill
+$REPLACE_meals
+$APPEND_perhaps
+$REPLACE_stupid
+$REPLACE_worries
+$APPEND_bought
+$APPEND_article
+$REPLACE_wasted
+$REPLACE_falling
+$REPLACE_necessity
+$APPEND_common
+$REPLACE_Tokyo
+$REPLACE_fascinating
+$REPLACE_Only
+$REPLACE_tense
+$APPEND_Ever
+$REPLACE_behaviour
+$REPLACE_magazines
+$REPLACE_cultures
+$REPLACE_rid
+$REPLACE_choices
+$REPLACE_track
+$REPLACE_complaint
+$REPLACE_white
+$REPLACE_approximately
+$REPLACE_largest
+$REPLACE_smart
+$APPEND_finish
+$REPLACE_acting
+$REPLACE_foolish
+$REPLACE_prices
+$REPLACE_r
+$REPLACE_swim
+$REPLACE_review
+$REPLACE_shameful
+$APPEND_Anyway
+$REPLACE_senior
+$REPLACE_proper
+$REPLACE_provided
+$REPLACE_troublesome
+$APPEND_known
+$REPLACE_homes
+$REPLACE_spirit
+$REPLACE_ga
+$REPLACE_Michael
+$APPEND_wish
+$APPEND_performance
+$REPLACE_typical
+$APPEND_Well
+$REPLACE_previously
+$REPLACE_fail
+$APPEND_itself
+$REPLACE_sung
+$REPLACE_citizens
+$REPLACE_rapidly
+$REPLACE_stadium
+$APPEND_page
+$APPEND_unfortunately
+$REPLACE_surprising
+$REPLACE_unfamiliar
+$REPLACE_repair
+$REPLACE_escape
+$REPLACE_actor
+$REPLACE_Almost
+$REPLACE_shoes
+$REPLACE_disagree
+$REPLACE_co
+$REPLACE_attempt
+$REPLACE_instance
+$REPLACE_lasted
+$APPEND_connect
+$APPEND_San
+$REPLACE_hairstyle
+$REPLACE_internship
+$REPLACE_Bye
+$REPLACE_tourist
+$REPLACE_5th
+$REPLACE_cousin
+$REPLACE_beside
+$REPLACE_facilities
+$REPLACE_yummy
+$REPLACE_prove
+$APPEND_certain
+$REPLACE_beginners
+$REPLACE_worn
+$REPLACE_wont
+$APPEND_wearing
+$REPLACE_improves
+$REPLACE_electronics
+$REPLACE_realistic
+$REPLACE_annoying
+$REPLACE_dreamed
+$APPEND_results
+$REPLACE_certainly
+$REPLACE_classroom
+$REPLACE_argument
+$REPLACE_warmth
+$REPLACE_achieved
+$APPEND_meaning
+$REPLACE_photographs
+$REPLACE_animals
+$REPLACE_community
+$REPLACE_interests
+$REPLACE_medium
+$REPLACE_beer
+$REPLACE_engineer
+$REPLACE_Good
+$APPEND_looks
+$REPLACE_beauty
+$APPEND_speaker
+$REPLACE_connect
+$APPEND_driving
+$APPEND_Have
+$REPLACE_reminds
+$REPLACE_apologized
+$REPLACE_obtain
+$REPLACE_Probably
+$REPLACE_strangers
+$APPEND_bring
+$REPLACE_smile
+$REPLACE_exhibition
+$REPLACE_pot
+$REPLACE_encounter
+$APPEND_degrees
+$REPLACE_lights
+$APPEND_bus
+$REPLACE_movement
+$REPLACE_cancel
+$REPLACE_y
+$REPLACE_black
+$REPLACE_concentration
+$REPLACE_graduating
+$REPLACE_usage
+$REPLACE_handsome
+$APPEND_ride
+$REPLACE_degree
+$APPEND_point
+$APPEND_conversation
+$REPLACE_menu
+$REPLACE_assistance
+$REPLACE_Summer
+$APPEND_behind
+$REPLACE_police
+$REPLACE_15th
+$REPLACE_separated
+$REPLACE_regardless
+$REPLACE_significant
+$REPLACE_transfer
+$REPLACE_religious
+$REPLACE_tempura
+$REPLACE_academic
+$REPLACE_otherwise
+$REPLACE_useless
+$REPLACE_celebrities
+$REPLACE_dislike
+$REPLACE_11
+$APPEND_sound
+$APPEND_^
+$REPLACE_replaced
+$REPLACE_sushi
+$REPLACE_wind
+$REPLACE_web
+$REPLACE_Britain
+$REPLACE_complained
+$REPLACE_model
+$REPLACE_de
+$REPLACE_depends
+$REPLACE_pm
+$REPLACE_cafe
+$REPLACE_congratulate
+$REPLACE_ending
+$APPEND_building
+$REPLACE_presented
+$REPLACE_shut
+$APPEND_restaurant
+$APPEND_March
+$REPLACE_freedom
+$APPEND_story
+$REPLACE_creating
+$REPLACE_concept
+$REPLACE_conduct
+$REPLACE_France
+$REPLACE_paper
+$REPLACE_offers
+$REPLACE_Oh
+$REPLACE_occured
+$REPLACE_touched
+$REPLACE_travelled
+$APPEND_Thus
+$REPLACE_sickness
+$REPLACE_neighbor
+$REPLACE_paying
+$REPLACE_national
+$APPEND_needs
+$REPLACE_climb
+$REPLACE_Take
+$APPEND_Everyone
+$REPLACE_aftershocks
+$REPLACE_committed
+$REPLACE_textbooks
+$REPLACE_waited
+$REPLACE_round
+$REPLACE_Okay
+$REPLACE_eldest
+$APPEND_allow
+$REPLACE_Spanish
+$REPLACE_Spring
+$REPLACE_absence
+$REPLACE_actresses
+$REPLACE_majority
+$REPLACE_growth
+$APPEND_requires
+$REPLACE_About
+$REPLACE_intend
+$APPEND_deep
+$REPLACE_enjoyment
+$APPEND_raining
+$REPLACE_Am
+$REPLACE_eyes
+$REPLACE_Afterward
+$REPLACE_drugs
+$REPLACE_cram
+$REPLACE_dancing
+$APPEND_M
+$REPLACE_nationalities
+$REPLACE_throat
+$APPEND_shows
+$REPLACE_Facebook
+$APPEND_TO
+$REPLACE_brilliant
+$REPLACE_drop
+$REPLACE_owner
+$APPEND_side
+$REPLACE_struggling
+$REPLACE_100
+$REPLACE_surely
+$REPLACE_devices
+$APPEND_takes
+$REPLACE_TO
+$REPLACE_neighbors
+$REPLACE_youth
+$REPLACE_connected
+$REPLACE_passes
+$REPLACE_kilometers
+$APPEND_fun
+$REPLACE_viewing
+$REPLACE_behavior
+$REPLACE_chores
+$REPLACE_mystery
+$APPEND_shall
+$APPEND_taught
+$REPLACE_display
+$REPLACE_ensure
+$APPEND_online
+$REPLACE_assignment
+$REPLACE_compare
+$APPEND_Still
+$REPLACE_conditioning
+$REPLACE_suffered
+$REPLACE_haven't
+$REPLACE_muscles
+$APPEND_grammar
+$APPEND_Two
+$REPLACE_chemistry
+$REPLACE_consideration
+$REPLACE_smoking
+$REPLACE_Harry
+$APPEND_seemed
+$REPLACE_marry
+$REPLACE_hunting
+$REPLACE_recommendation
+$APPEND_previously
+$REPLACE_dramas
+$REPLACE_passionate
+$APPEND_ways
+$REPLACE_hurts
+$APPEND_sense
+$APPEND_drink
+$REPLACE_refrigerator
+$REPLACE_organised
+$REPLACE_cleaning
+$REPLACE_courage
+$APPEND_arrived
+$REPLACE_housework
+$REPLACE_charge
+$REPLACE_violin
+$APPEND_offer
+$APPEND_water
+$REPLACE_injuries
+$REPLACE_perspective
+$REPLACE_hoped
+$REPLACE_challenging
+$REPLACE_THE
+$APPEND_regarding
+$APPEND_Their
+$REPLACE_upload
+$REPLACE_luxurious
+$REPLACE_unnecessary
+$APPEND_harder
+$APPEND_twice
+$REPLACE_rules
+$APPEND_rest
+$REPLACE_afford
+$APPEND_says
+$REPLACE_project
+$REPLACE_bear
+$REPLACE_mainly
+$REPLACE_Yet
+$REPLACE_diligently
+$REPLACE_led
+$REPLACE_architecture
+$REPLACE_accurate
+$REPLACE_mindset
+$REPLACE_fought
+$REPLACE_mid
+$REPLACE_vocalist
+$REPLACE_flexible
+$APPEND_girl
+$REPLACE_tiring
+$REPLACE_broadcast
+$REPLACE_July
+$APPEND_version
+$REPLACE_seven
+$REPLACE_Nice
+$REPLACE_alarm
+$APPEND_dish
+$REPLACE_jewelry
+$REPLACE_studing
+$REPLACE_cuisine
+$APPEND_According
+$APPEND_delicious
+$REPLACE_ladies
+$REPLACE_hospital
+$REPLACE_sweating
+$REPLACE_obviously
+$APPEND_interested
+$REPLACE_College
+$REPLACE_Autumn
+$REPLACE_Hawaii
+$REPLACE_scheduled
+$REPLACE_crying
+$REPLACE_climbing
+$APPEND_giving
+$REPLACE_smoke
+$APPEND_9
+$REPLACE_limit
+$REPLACE_flying
+$APPEND_knowledge
+$REPLACE_4th
+$REPLACE_Francisco
+$REPLACE_tournament
+$APPEND_sleep
+$REPLACE_participants
+$REPLACE_snacks
+$REPLACE_energetic
+$REPLACE_allergic
+$APPEND_fast
+$APPEND_score
+$REPLACE_clearer
+$APPEND_source
+$REPLACE_lottery
+$APPEND_service
+$REPLACE_acquire
+$REPLACE_arrival
+$APPEND_situation
+$REPLACE_polite
+$REPLACE_laughter
+$REPLACE_Thirdly
+$APPEND_particular
+$REPLACE_standard
+$REPLACE_suppose
+$REPLACE_emails
+$REPLACE_Disneyland
+$REPLACE_nine
+$REPLACE_rising
+$REPLACE_cartoon
+$REPLACE_refreshing
+$REPLACE_factories
+$REPLACE_20th
+$APPEND_single
+$APPEND_sometime
+$REPLACE_cleaner
+$APPEND_Such
+$APPEND_particularly
+$REPLACE_fruit
+$REPLACE_beforehand
+$REPLACE_11th
+$REPLACE_Halloween
+$REPLACE_attract
+$APPEND_forms
+$APPEND_under
+$REPLACE_guests
+$REPLACE_classmate
+$APPEND_Yours
+$REPLACE_learners
+$REPLACE_red
+$REPLACE_critical
+$REPLACE_pitiful
+$REPLACE_groups
+$REPLACE_grandparents
+$REPLACE_primary
+$REPLACE_Both
+$REPLACE_aside
+$REPLACE_youngest
+$REPLACE_practising
+$APPEND_Am
+$REPLACE_summary
+$REPLACE_telephone
+$APPEND_nowadays
+$REPLACE_20
+$REPLACE_tons
+$REPLACE_Listening
+$REPLACE_guilt
+$REPLACE_occurs
+$REPLACE_Anyways
+$REPLACE_rush
+$REPLACE_intermediate
+$REPLACE_theirs
+$APPEND_business
+$REPLACE_neighboring
+$REPLACE_independence
+$APPEND_cost
+$APPEND_country
+$REPLACE_beef
+$REPLACE_formal
+$APPEND_worked
+$REPLACE_Hence
+$REPLACE_Mother
+$REPLACE_picked
+$REPLACE_star
+$REPLACE_fishing
+$REPLACE_planted
+$REPLACE_fear
+$APPEND_100
+$APPEND_onto
+$REPLACE_choir
+$REPLACE_spot
+$REPLACE_correction
+$REPLACE_suits
+$REPLACE_Day
+$REPLACE_supported
+$REPLACE_comfort
+$REPLACE_newspapers
+$REPLACE_friendship
+$REPLACE_May
+$REPLACE_freezing
+$REPLACE_discussed
+$APPEND_{
+$APPEND_whom
+$REPLACE_trust
+$REPLACE_industries
+$REPLACE_decisions
+$APPEND_poor
+$APPEND_correctly
+$REPLACE_hundred
+$REPLACE_recipe
+$REPLACE_competitive
+$REPLACE_burden
+$REPLACE_abandoned
+$APPEND_walking
+$REPLACE_individuals
+$APPEND_travelling
+$REPLACE_theme
+$REPLACE_runs
+$REPLACE_threw
+$REPLACE_rock
+$APPEND_thinking
+$REPLACE_Taking
+$REPLACE_ideal
+$REPLACE_practical
+$APPEND_re
+$APPEND_station
+$REPLACE_collect
+$REPLACE_perhaps
+$REPLACE_advanced
+$REPLACE_humans
+$APPEND_realized
+$REPLACE_remove
+$REPLACE_notebook
+$REPLACE_continuously
+$REPLACE_beach
+$REPLACE_ends
+$REPLACE_secret
+$REPLACE_skilled
+$REPLACE_jump
+$REPLACE_episodes
+$REPLACE_cup
+$REPLACE_consists
+$REPLACE_release
+$REPLACE_notes
+$REPLACE_22nd
+$REPLACE_fallen
+$APPEND_Which
+$APPEND_saw
+$REPLACE_libraries
+$REPLACE_consecutive
+$REPLACE_March
+$REPLACE_closely
+$REPLACE_century
+$APPEND_per
+$REPLACE_circumstances
+$REPLACE_whoever
+$REPLACE_rented
+$REPLACE_aging
+$APPEND_regularly
+$REPLACE_cycling
+$REPLACE_depression
+$REPLACE_row
+$APPEND_constantly
+$APPEND_feelings
+$REPLACE_Angeles
+$REPLACE_talented
+$REPLACE_00am
+$REPLACE_shower
+$REPLACE_functions
+$APPEND_love
+$APPEND_believe
+$REPLACE_basis
+$REPLACE_follows
+$APPEND_hardly
+$REPLACE_teenager
+$REPLACE_diverse
+$REPLACE_Sir
+$REPLACE_decrease
+$REPLACE_goodbye
+$REPLACE_behave
+$APPEND_everywhere
+$REPLACE_users
+$REPLACE_analysis
+$REPLACE_translating
+$REPLACE_relaxation
+$REPLACE_unexpectedly
+$REPLACE_Russia
+$REPLACE_championship
+$APPEND_lives
+$REPLACE_hate
+$APPEND_somehow
+$REPLACE_joining
+$APPEND_stop
+$APPEND_enjoyed
+$APPEND_cup
+$REPLACE_flies
+$REPLACE_Talking
+$REPLACE_painting
+$REPLACE_letters
+$REPLACE_master
+$REPLACE_stated
+$REPLACE_aggressive
+$REPLACE_shy
+$APPEND_care
+$APPEND_wear
+$REPLACE_served
+$REPLACE_stops
+$APPEND_house
+$REPLACE_diligent
+$REPLACE_IN
+$REPLACE_deciding
+$REPLACE_sweets
+$REPLACE_argued
+$REPLACE_bookstore
+$APPEND_pretty
+$REPLACE_range
+$REPLACE_vegetable
+$REPLACE_appreciation
+$REPLACE_pity
+$REPLACE_update
+$REPLACE_More
+$REPLACE_laughing
+$REPLACE_economics
+$REPLACE_cellphone
+$REPLACE_OK
+$REPLACE_pregnant
+$REPLACE_spite
+$REPLACE_karaoke
+$REPLACE_tutor
+$REPLACE_cockroaches
+$APPEND_Most
+$REPLACE_additional
+$APPEND_energy
+$REPLACE_contain
+$REPLACE_actual
+$REPLACE_shining
+$APPEND_feels
+$REPLACE_lesser
+$REPLACE_pages
+$REPLACE_cartoons
+$REPLACE_arise
+$REPLACE_f
+$REPLACE_luckily
+$REPLACE_airport
+$REPLACE_windy
+$REPLACE_instructor
+$APPEND_Why
+$REPLACE_weighed
+$REPLACE_river
+$APPEND_frequently
+$APPEND_method
+$REPLACE_shrine
+$APPEND_short
+$REPLACE_suffer
+$REPLACE_6th
+$REPLACE_fight
+$APPEND_worth
+$REPLACE_absent
+$REPLACE_United
+$REPLACE_chef
+$REPLACE_anytime
+$REPLACE_Three
+$REPLACE_noisy
+$APPEND_therefore
+$REPLACE_iPod
+$APPEND_French
+$REPLACE_wishes
+$REPLACE_Yours
+$APPEND_Being
+$APPEND_Its
+$APPEND_field
+$APPEND_photos
+$REPLACE_definition
+$APPEND_gives
+$REPLACE_scores
+$APPEND_Having
+$REPLACE_statement
+$APPEND_spoken
+$APPEND_price
+$REPLACE_cleaned
+$REPLACE_varied
+$APPEND_Oh
+$REPLACE_wash
+$REPLACE_satisfactory
+$REPLACE_ceiling
+$APPEND_including
+$APPEND_special
+$APPEND_popular
+$REPLACE_invention
+$REPLACE_materials
+$REPLACE_media
+$REPLACE_=
+$REPLACE_dialogue
+$REPLACE_designed
+$REPLACE_popularity
+$REPLACE_York
+$REPLACE_Getting
+$APPEND_shown
+$REPLACE_carrying
+$REPLACE_00pm
+$REPLACE_stations
+$REPLACE_puts
+$REPLACE_screen
+$REPLACE_appreciative
+$REPLACE_cruel
+$APPEND_main
+$REPLACE_action
+$REPLACE_unlucky
+$REPLACE_God
+$APPEND_basically
+$REPLACE_d
+$REPLACE_climbed
+$REPLACE_thoroughly
+$REPLACE_Canada
+$REPLACE_hesitate
+$APPEND_developed
+$APPEND_post
+$REPLACE_represent
+$REPLACE_comment
+$REPLACE_controlled
+$REPLACE_source
+$REPLACE_customs
+$REPLACE_drawn
+$REPLACE_mature
+$REPLACE_commute
+$APPEND_Once
+$APPEND_letter
+$REPLACE_attached
+$REPLACE_gift
+$REPLACE_nap
+$APPEND_asked
+$REPLACE_inspired
+$APPEND_event
+$REPLACE_seafood
+$APPEND_watched
+$REPLACE_errors
+$APPEND_passed
+$APPEND_english
+$REPLACE_complaining
+$REPLACE_roommate
+$REPLACE_Life
+$REPLACE_mental
+$REPLACE_grades
+$APPEND_parts
+$REPLACE_pronounciation
+$REPLACE_strengthen
+$REPLACE_priority
+$APPEND_abroad
+$APPEND_ticket
+$REPLACE_insurance
+$REPLACE_hesitation
+$REPLACE_researched
+$REPLACE_unlike
+$REPLACE_exercising
+$REPLACE_exchanged
+$REPLACE_knows
+$REPLACE_founded
+$REPLACE_messy
+$REPLACE_dying
+$APPEND_plans
+$APPEND_match
+$REPLACE_Fourth
+$REPLACE_answers
+$REPLACE_assignments
+$REPLACE_Whether
+$REPLACE_elder
+$REPLACE_gas
+$REPLACE_heading
+$REPLACE_laws
+$REPLACE_kindly
+$REPLACE_wine
+$REPLACE_household
+$REPLACE_dining
+$REPLACE_sensitive
+$REPLACE_wet
+$REPLACE_Personally
+$APPEND_middle
+$REPLACE_busier
+$REPLACE_dirty
+$REPLACE_religion
+$REPLACE_facing
+$APPEND_totally
+$REPLACE_repeatedly
+$REPLACE_tries
+$REPLACE_organising
+$REPLACE_operating
+$REPLACE_ex
+$APPEND_languages
+$REPLACE_services
+$REPLACE_remaining
+$REPLACE_killed
+$REPLACE_fair
+$REPLACE_bike
+$REPLACE_'t
+$APPEND_titled
+$REPLACE_exception
+$APPEND_carefully
+$REPLACE_salon
+$REPLACE_translated
+$REPLACE_welcome
+$REPLACE_gratitude
+$REPLACE_Watching
+$REPLACE_adults
+$APPEND_large
+$REPLACE_untill
+$REPLACE_coach
+$REPLACE_mountains
+$REPLACE_sandwich
+$REPLACE_examples
+$APPEND_gone
+$REPLACE_multiple
+$APPEND_meant
+$REPLACE_delivered
+$REPLACE_entering
+$APPEND_Hello
+$REPLACE_option
+$REPLACE_cigarette
+$REPLACE_acted
+$REPLACE_bathroom
+$REPLACE_accustomed
+$REPLACE_literature
+$REPLACE_bottom
+$APPEND_course
+$APPEND_choose
+$REPLACE_resume
+$APPEND_web
+$REPLACE_aloud
+$REPLACE_material
+$REPLACE_struggle
+$REPLACE_trains
+$REPLACE_dog
+$APPEND_Both
+$REPLACE_leisure
+$REPLACE_climate
+$REPLACE_japanese
+$REPLACE_reduced
+$APPEND_break
+$APPEND_grow
+$REPLACE_Thinking
+$REPLACE_dessert
+$REPLACE_Yeah
+$REPLACE_salt
+$REPLACE_rare
+$REPLACE_fairly
+$REPLACE_knowing
+$REPLACE_varieties
+$APPEND_festival
+$REPLACE_kitten
+$APPEND_changes
+$REPLACE_Introduction
+$REPLACE_viruses
+$APPEND_gotten
+$REPLACE_h
+$REPLACE_experiencing
+$APPEND_rain
+$APPEND_weight
+$REPLACE_brown
+$REPLACE_Everyday
+$APPEND_Tokyo
+$REPLACE_split
+$REPLACE_section
+$APPEND_dinner
+$REPLACE_Making
+$REPLACE_courses
+$REPLACE_remains
+$REPLACE_Dragon
+$REPLACE_soft
+$REPLACE_independent
+$REPLACE_conducted
+$APPEND_mode
+$APPEND_tickets
+$APPEND_leave
+$APPEND_culture
+$REPLACE_Iam
+$REPLACE_joy
+$REPLACE_violent
+$REPLACE_leaf
+$REPLACE_fortune
+$APPEND_reasons
+$REPLACE_Fukushima
+$APPEND_thus
+$REPLACE_boss
+$REPLACE_player
+$REPLACE_closest
+$REPLACE_lies
+$APPEND_consists
+$REPLACE_impolite
+$REPLACE_unpredictable
+$REPLACE_shared
+$REPLACE_7th
+$APPEND_Up
+$REPLACE_step
+$APPEND_football
+$REPLACE_central
+$REPLACE_symptoms
+$REPLACE_funds
+$REPLACE_resolve
+$REPLACE_Technology
+$REPLACE_solutions
+$REPLACE_adult
+$REPLACE_military
+$REPLACE_supermarkets
+$APPEND_sites
+$REPLACE_levels
+$REPLACE_broad
+$REPLACE_smiling
+$REPLACE_expecting
+$REPLACE_shorter
+$APPEND_Like
+$REPLACE_gloomy
+$REPLACE_weekdays
+$REPLACE_blew
+$REPLACE_determine
+$REPLACE_discount
+$APPEND_attend
+$REPLACE_treated
+$REPLACE_length
+$REPLACE_raw
+$REPLACE_promote
+$REPLACE_court
+$REPLACE_commercial
+$REPLACE_expectations
+$APPEND_exercise
+$REPLACE_tickets
+$REPLACE_status
+$REPLACE_retirement
+$REPLACE_crowd
+$REPLACE_requested
+$REPLACE_South
+$APPEND_corrected
+$REPLACE_aunt
+$REPLACE_Traveling
+$REPLACE_region
+$REPLACE_pulled
+$APPEND_14
+$REPLACE_impatient
+$REPLACE_roads
+$REPLACE_value
+$REPLACE_existence
+$REPLACE_applications
+$REPLACE_boiled
+$REPLACE_warming
+$REPLACE_15
+$REPLACE_Iwas
+$REPLACE_accomplish
+$APPEND_Sounds
+$APPEND_send
+$APPEND_programs
+$REPLACE_costume
+$APPEND_1st
+$REPLACE_ancient
+$REPLACE_physics
+$REPLACE_record
+$REPLACE_published
+$REPLACE_cross
+$REPLACE_harmful
+$REPLACE_description
+$APPEND_wrote
+$APPEND_pay
+$REPLACE_fond
+$APPEND_color
+$REPLACE_asks
+$APPEND_stuff
+$REPLACE_specially
+$REPLACE_uneasy
+$APPEND_riding
+$REPLACE_inthe
+$REPLACE_nose
+$REPLACE_scientific
+$REPLACE_Among
+$REPLACE_danger
+$REPLACE_commit
+$REPLACE_Particularly
+$REPLACE_troubles
+$REPLACE_button
+$REPLACE_delayed
+$REPLACE_Diego
+$REPLACE_daytime
+$REPLACE_phenomenon
+$APPEND_following
+$REPLACE_Consequently
+$REPLACE_saving
+$REPLACE_souvenir
+$REPLACE_missing
+$REPLACE_unless
+$APPEND_office
+$REPLACE_anniversary
+$REPLACE_anger
+$APPEND_himself
+$APPEND_happening
+$REPLACE_cheer
+$REPLACE_animal
+$APPEND_subject
+$REPLACE_nicer
+$REPLACE_sells
+$REPLACE_lenses
+$REPLACE_OF
+$REPLACE_possibilities
+$REPLACE_efforts
+$REPLACE_Years
+$REPLACE_merchandise
+$REPLACE_subsidies
+$REPLACE_forms
+$REPLACE_hotel
+$APPEND_non
+$REPLACE_appetite
+$REPLACE_sport
+$REPLACE_expand
+$REPLACE_rhythm
+$APPEND_Another
+$REPLACE_Language
+$APPEND_Each
+$REPLACE_window
+$REPLACE_increases
+$REPLACE_states
+$REPLACE_excitement
+$REPLACE_promise
+$APPEND_seen
+$REPLACE_luggage
+$APPEND_generally
+$REPLACE_frustrating
+$REPLACE_colors
+$REPLACE_mosquitoes
+$REPLACE_seats
+$REPLACE_woken
+$REPLACE_switched
+$REPLACE_grammatically
+$REPLACE_ON
+$REPLACE_kindness
+$REPLACE_thieves
+$REPLACE_spoiled
+$REPLACE_States
+$REPLACE_hamburgers
+$APPEND_nearly
+$REPLACE_situated
+$REPLACE_foods
+$REPLACE_collecting
+$REPLACE_unfortunate
+$REPLACE_camera
+$REPLACE_dramatic
+$REPLACE_noodle
+$APPEND_human
+$REPLACE_re
+$REPLACE_humidity
+$APPEND_strongly
+$REPLACE_kimchi
+$APPEND_difference
+$REPLACE_artists
+$REPLACE_medical
+$REPLACE_incredible
+$APPEND_helping
+$APPEND_ahead
+$REPLACE_lines
+$REPLACE_thinks
+$REPLACE_thousand
+$REPLACE_sixth
+$REPLACE_exposed
+$REPLACE_colours
+$REPLACE_widely
+$APPEND_nuclear
+$REPLACE_worldwide
+$REPLACE_comprehension
+$APPEND_hair
+$REPLACE_halfway
+$APPEND_cause
+$REPLACE_cast
+$APPEND_coffee
+$REPLACE_attractions
+$REPLACE_beautifully
+$REPLACE_handwritten
+$APPEND_band
+$APPEND_improving
+$APPEND_40
+$REPLACE_shops
+$REPLACE_basically
+$APPEND_studied
+$REPLACE_manufacturer
+$REPLACE_Western
+$APPEND_throughout
+$REPLACE_identify
+$APPEND_Would
+$REPLACE_Switzerland
+$APPEND_everybody
+$APPEND_grade
+$REPLACE_farewell
+$REPLACE_romantic
+$REPLACE_Celsius
+$REPLACE_bread
+$APPEND_favorite
+$APPEND_Despite
+$REPLACE_downloaded
+$REPLACE_balance
+$APPEND_carry
+$REPLACE_cure
+$REPLACE_programmer
+$APPEND_considered
+$APPEND_slowly
+$REPLACE_discovery
+$APPEND_stopped
+$REPLACE_standing
+$REPLACE_earned
+$REPLACE_skating
+$REPLACE_detail
+$REPLACE_apology
+$REPLACE_writer
+$REPLACE_highway
+$REPLACE_Goodbye
+$REPLACE_quote
+$REPLACE_maintenance
+$APPEND_taste
+$REPLACE_package
+$REPLACE_responded
+$REPLACE_criticize
+$APPEND_deeply
+$REPLACE_jogging
+$APPEND_waiting
+$REPLACE_fatter
+$REPLACE_cycle
+$APPEND_Only
+$REPLACE_afterward
+$REPLACE_specialty
+$REPLACE_goodness
+$REPLACE_groceries
+$APPEND_staff
+$REPLACE_somehow
+$APPEND_Moreover
+$APPEND_training
+$REPLACE_clever
+$REPLACE_camp
+$APPEND_traveling
+$APPEND_minutes
+$REPLACE_sandwiches
+$APPEND_run
+$REPLACE_options
+$REPLACE_calories
+$REPLACE_branch
+$REPLACE_barbecue
+$APPEND_entrance
+$REPLACE_noodles
+$APPEND_products
+$APPEND_helped
+$REPLACE_newly
+$APPEND_drank
+$REPLACE_precise
+$REPLACE_increasingly
+$APPEND_Dear
+$REPLACE_novels
+$REPLACE_mix
+$REPLACE_budget
+$REPLACE_petrol
+$REPLACE_trial
+$APPEND_Perhaps
+$REPLACE_occasions
+$APPEND_Actually
+$REPLACE_eastern
+$REPLACE_sights
+$REPLACE_industrial
+$APPEND_result
+$REPLACE_generally
+$REPLACE_Canadian
+$REPLACE_Surprisingly
+$APPEND_strong
+$REPLACE_memorizing
+$REPLACE_irritated
+$REPLACE_implemented
+$REPLACE_Welcome
+$REPLACE_coast
+$REPLACE_signs
+$REPLACE_leading
+$APPEND_PM
+$APPEND_access
+$REPLACE_fat
+$REPLACE_breeze
+$REPLACE_India
+$APPEND_slept
+$REPLACE_weigh
+$REPLACE_commonly
+$REPLACE_supervisor
+$REPLACE_tomato
+$REPLACE_agency
+$APPEND_till
+$REPLACE_couldn
+$REPLACE_strangely
+$APPEND_stayed
+$REPLACE_ni
+$APPEND_exams
+$REPLACE_School
+$REPLACE_blue
+$APPEND_allowed
+$REPLACE_30th
+$REPLACE_kittens
+$REPLACE_typing
+$REPLACE_headed
+$APPEND_present
+$REPLACE_Reading
+$REPLACE_injury
+$REPLACE_Dear
+$REPLACE_PM
+$REPLACE_minor
+$REPLACE_drinks
+$REPLACE_enthusiasm
+$REPLACE_dilemma
+$REPLACE_income
+$REPLACE_sadness
+$REPLACE_weaker
+$REPLACE_Thanksgiving
+$REPLACE_documents
+$REPLACE_fake
+$REPLACE_boy
+$REPLACE_regards
+$APPEND_Finally
+$REPLACE_obstacle
+$REPLACE_batteries
+$APPEND_talked
+$APPEND_becomes
+$REPLACE_numerous
+$REPLACE_cheese
+$REPLACE_judge
+$APPEND_busy
+$APPEND_reach
+$APPEND_Fuji
+$REPLACE_intelligent
+$REPLACE_reception
+$REPLACE_Chinatown
+$REPLACE_repeat
+$APPEND_June
+$REPLACE_reported
+$APPEND_required
+$REPLACE_cases
+$REPLACE_matters
+$REPLACE_prepositions
+$REPLACE_accidents
+$REPLACE_fields
+$APPEND_ask
+$APPEND_sad
+$REPLACE_selected
+$REPLACE_skipped
+$REPLACE_freshmen
+$REPLACE_mode
+$REPLACE_calendar
+$REPLACE_luxury
+$REPLACE_summertime
+$REPLACE_device
+$APPEND_lesson
+$APPEND_surely
+$REPLACE_loved
+$REPLACE_reflect
+$REPLACE_shoulder
+$REPLACE_muscular
+$APPEND_plenty
+$REPLACE_Indian
+$REPLACE_pork
+$REPLACE_double
+$REPLACE_loneliness
+$REPLACE_economies
+$REPLACE_meaningful
+$REPLACE_cooperate
+$REPLACE_land
+$APPEND_report
+$REPLACE_block
+$REPLACE_cheapest
+$REPLACE_mirror
+$REPLACE_wealthy
+$APPEND_application
+$REPLACE_quarter
+$REPLACE_babies
+$REPLACE_risk
+$REPLACE_discussions
+$REPLACE_lightning
+$REPLACE_briefly
+$REPLACE_congratulated
+$REPLACE_breathing
+$REPLACE_eagerly
+$REPLACE_resolved
+$APPEND_staying
+$APPEND_history
+$APPEND_phones
+$REPLACE_involving
+$REPLACE_enthusiastic
+$REPLACE_cookies
+$REPLACE_frightened
+$REPLACE_entirely
+$REPLACE_enormous
+$APPEND_aspects
+$REPLACE_stable
+$APPEND_section
+$APPEND_Thanks
+$APPEND_women
+$REPLACE_phase
+$REPLACE_16th
+$REPLACE_spicy
+$APPEND_produced
+$REPLACE_street
+$REPLACE_ignore
+$REPLACE_designer
+$APPEND_club
+$REPLACE_mum
+$REPLACE_sincere
+$REPLACE_offensive
+$REPLACE_memorized
+$APPEND_question
+$REPLACE_wa
+$REPLACE_garbage
+$REPLACE_Playing
+$REPLACE_castle
+$REPLACE_swam
+$REPLACE_leader
+$REPLACE_earthquakes
+$REPLACE_displayed
+$REPLACE_marathon
+$APPEND_songs
+$REPLACE_See
+$REPLACE_burn
+$APPEND_happily
+$REPLACE_salesman
+$REPLACE_unhealthy
+$REPLACE_base
+$REPLACE_crossing
+$REPLACE_Honestly
+$REPLACE_machines
+$REPLACE_freshman
+$REPLACE_dry
+$APPEND_exact
+$APPEND_January
+$APPEND_terms
+$REPLACE_happiest
+$APPEND_tastes
+$APPEND_design
+$REPLACE_champion
+$REPLACE_Diary
+$REPLACE_expressing
+$REPLACE_hardest
+$REPLACE_installed
+$REPLACE_Go
+$REPLACE_dollar
+$REPLACE_wooden
+$REPLACE_contrary
+$REPLACE_refers
+$REPLACE_employment
+$REPLACE_removed
+$REPLACE_opposing
+$REPLACE_actress
+$REPLACE_Ever
+$APPEND_beginning
+$REPLACE_approach
+$REPLACE_guide
+$REPLACE_blooming
+$REPLACE_necessarily
+$REPLACE_fed
+$REPLACE_stands
+$REPLACE_principal
+$REPLACE_faced
+$APPEND_local
+$APPEND_highly
+$REPLACE_fiction
+$APPEND_finding
+$REPLACE_attracts
+$REPLACE_2011
+$REPLACE_businessmen
+$REPLACE_Friends
+$REPLACE_repaired
+$REPLACE_bet
+$REPLACE_hunger
+$REPLACE_dealing
+$REPLACE_Except
+$APPEND_role
+$REPLACE_admitted
+$REPLACE_island
+$REPLACE_quietly
+$REPLACE_lets
+$REPLACE_fee
+$REPLACE_performances
+$REPLACE_bar
+$REPLACE_maximum
+$REPLACE_escaped
+$REPLACE_ours
+$APPEND_originally
+$REPLACE_surroundings
+$REPLACE_golden
+$APPEND_technology
+$APPEND_research
+$REPLACE_borrow
+$REPLACE_remind
+$REPLACE_Beginning
+$REPLACE_passage
+$APPEND_drive
+$APPEND_teaching
+$REPLACE_typhoons
+$REPLACE_grabbed
+$REPLACE_incidents
+$REPLACE_hid
+$REPLACE_operate
+$REPLACE_19th
+$APPEND_sure
+$REPLACE_permission
+$APPEND_previous
+$REPLACE_rental
+$REPLACE_tothe
+$APPEND_round
+$REPLACE_Oops
+$REPLACE_survival
+$REPLACE_shaped
+$APPEND_costs
+$REPLACE_conference
+$APPEND_move
+$REPLACE_dressed
+$REPLACE_smells
+$REPLACE_artistic
+$REPLACE_holds
+$REPLACE_introducing
+$REPLACE_nursery
+$APPEND_May
+$REPLACE_troubled
+$REPLACE_optimistic
+$REPLACE_guarantee
+$REPLACE_toothache
+$REPLACE_bother
+$REPLACE_Congratulations
+$REPLACE_purchased
+$APPEND_21
+$REPLACE_accurately
+$REPLACE_belief
+$REPLACE_numbers
+$REPLACE_switch
+$REPLACE_personally
+$REPLACE_negatively
+$REPLACE_fireflies
+$APPEND_receive
+$APPEND_shop
+$REPLACE_haircut
+$REPLACE_productive
+$REPLACE_crisis
+$REPLACE_relatively
+$REPLACE_celebration
+$REPLACE_controversial
+$REPLACE_AM
+$REPLACE_factors
+$REPLACE_snowing
+$REPLACE_amusing
+$REPLACE_sharing
+$REPLACE_Companies
+$REPLACE_NYC
+$REPLACE_moves
+$REPLACE_hanging
+$REPLACE_simpler
+$APPEND_apart
+$REPLACE_race
+$REPLACE_hip
+$REPLACE_underwear
+$REPLACE_official
+$REPLACE_shift
+$APPEND_week
+$REPLACE_analyse
+$REPLACE_25th
+$REPLACE_teenage
+$APPEND_recent
+$REPLACE_skin
+$REPLACE_enroll
+$REPLACE_nickname
+$APPEND_accidentally
+$REPLACE_inventions
+$REPLACE_boys
+$APPEND_Afterwards
+$REPLACE_gentle
+$REPLACE_overnight
+$APPEND_explain
+$REPLACE_wanting
+$REPLACE_encouraging
+$REPLACE_contribute
+$REPLACE_necessities
+$REPLACE_enrolled
+$REPLACE_Normally
+$REPLACE_balloon
+$REPLACE_applying
+$APPEND_uses
+$REPLACE_recall
+$REPLACE_nearest
+$REPLACE_cashier
+$REPLACE_corner
+$APPEND_space
+$REPLACE_thatI
+$REPLACE_treasure
+$REPLACE_International
+$REPLACE_forth
+$REPLACE_assigned
+$APPEND_education
+$APPEND_except
+$REPLACE_jewellery
+$REPLACE_manga
+$APPEND_participate
+$APPEND_increase
+$REPLACE_slippery
+$REPLACE_snowboard
+$REPLACE_novel
+$REPLACE_predict
+$REPLACE_remained
+$REPLACE_outcome
+$APPEND_whose
+$APPEND_slightly
+$APPEND_serious
+$REPLACE_Research
+$REPLACE_marvelous
+$APPEND_excited
+$REPLACE_organization
+$REPLACE_list
+$REPLACE_automatically
+$REPLACE_differ
+$REPLACE_Mount
+$REPLACE_arrangement
+$APPEND_spending
+$REPLACE_adopt
+$APPEND_Soon
+$APPEND_Mr
+$REPLACE_irritable
+$REPLACE_Wish
+$REPLACE_writting
+$REPLACE_Sincerely
+$APPEND_winter
+$REPLACE_rose
+$REPLACE_businessman
+$REPLACE_flavors
+$REPLACE_smell
+$REPLACE_fortunate
+$APPEND_TOEIC
+$APPEND_mentioned
+$APPEND_process
+$APPEND_amp
+$APPEND_neither
+$REPLACE_enemies
+$REPLACE_acceptance
+$REPLACE_drivers
+$REPLACE_murderer
+$REPLACE_Melbourne
+$REPLACE_Specifically
+$APPEND_complete
+$APPEND_focus
+$REPLACE_illegal
+$APPEND_hurts
+$REPLACE_groom
+$APPEND_preposition
+$APPEND_com
+$APPEND_beautiful
+$REPLACE_sightseeing
+$REPLACE_bringing
+$REPLACE_sources
+$APPEND_videos
+$APPEND_lunch
+$APPEND_11
+$REPLACE_suggestion
+$REPLACE_programmes
+$APPEND_jobs
+$REPLACE_scent
+$REPLACE_crime
+$REPLACE_desperate
+$REPLACE_deliver
+$APPEND_performed
+$REPLACE_cars
+$REPLACE_pet
+$REPLACE_dangers
+$APPEND_perform
+$REPLACE_vehicles
+$APPEND_figure
+$APPEND_Later
+$REPLACE_matches
+$REPLACE_spaghetti
+$APPEND_light
+$REPLACE_corrects
+$REPLACE_Unlike
+$APPEND_occasionally
+$APPEND_truly
+$REPLACE_silence
+$REPLACE_intense
+$REPLACE_substitute
+$APPEND_freely
+$APPEND_party
+$APPEND_His
+$REPLACE_bothersome
+$REPLACE_pursuing
+$REPLACE_Out
+$REPLACE_direction
+$APPEND_check
+$REPLACE_authorities
+$APPEND_sort
+$REPLACE_challenges
+$REPLACE_plural
+$REPLACE_refused
+$REPLACE_informed
+$REPLACE_demand
+$REPLACE_mess
+$REPLACE_force
+$REPLACE_paintings
+$APPEND_remember
+$REPLACE_sky
+$APPEND_practicing
+$REPLACE_understandable
+$REPLACE_crashed
+$APPEND_communicate
+$REPLACE_manner
+$REPLACE_payment
+$REPLACE_artist
+$APPEND_tend
+$REPLACE_recession
+$REPLACE_til
+$REPLACE_mixed
+$APPEND_bar
+$REPLACE_England
+$REPLACE_gathered
+$REPLACE_combined
+$REPLACE_Rome
+$APPEND_wet
+$REPLACE_network
+$REPLACE_steak
+$REPLACE_California
+$REPLACE_birth
+$APPEND_state
+$REPLACE_expressed
+$REPLACE_haven
+$REPLACE_seldom
+$APPEND_health
+$REPLACE_partners
+$REPLACE_finishing
+$REPLACE_Monday
+$REPLACE_liters
+$REPLACE_Hi
+$APPEND_August
+$REPLACE_gorgeous
+$APPEND_seven
+$APPEND_remaining
+$REPLACE_chances
+$APPEND_older
+$REPLACE_Eating
+$APPEND_Christmas
+$REPLACE_dentist
+$REPLACE_league
+$REPLACE_korean
+$APPEND_greatly
+$APPEND_return
+$REPLACE_genres
+$REPLACE_authors
+$APPEND_Thank
+$REPLACE_diseases
+$REPLACE_travels
+$REPLACE_sheet
+$REPLACE_fastest
+$APPEND_surprised
+$REPLACE_rushed
+$APPEND_attending
+$APPEND_Furthermore
+$REPLACE_Laden
+$REPLACE_creative
+$REPLACE_meantime
+$REPLACE_Turkey
+$REPLACE_presenting
+$REPLACE_Christian
+$REPLACE_nervousness
+$REPLACE_meaningless
+$APPEND_player
+$REPLACE_motivate
+$REPLACE_advertisements
+$REPLACE_artwork
+$REPLACE_encouragement
+$REPLACE_regard
+$REPLACE_slower
+$REPLACE_dolls
+$REPLACE_200
+$REPLACE_unconsciously
+$APPEND_happens
+$REPLACE_facility
+$APPEND_advice
+$REPLACE_North
+$REPLACE_awareness
+$APPEND_planned
+$REPLACE_genetic
+$REPLACE_management
+$REPLACE_refund
+$REPLACE_brighter
+$REPLACE_confirm
+$REPLACE_burning
+$REPLACE_composition
+$APPEND_answer
+$REPLACE_conserve
+$REPLACE_destruction
+$REPLACE_duties
+$REPLACE_creativity
+$APPEND_expressions
+$APPEND_commit
+$REPLACE_East
+$REPLACE_milk
+$REPLACE_30pm
+$REPLACE_belong
+$REPLACE_autograph
+$REPLACE_caring
+$REPLACE_download
+$APPEND_development
+$REPLACE_compete
+$REPLACE_qualities
+$APPEND_avoid
+$REPLACE_recieved
+$APPEND_Perfect
+$REPLACE_yours
+$REPLACE_breaks
+$REPLACE_amusement
+$REPLACE_models
+$REPLACE_persevere
+$REPLACE_emergency
+$REPLACE_empty
+$REPLACE_rescue
+$APPEND_term
+$REPLACE_requirements
+$REPLACE_sufficient
+$APPEND_cooking
+$REPLACE_fascinated
+$REPLACE_14th
+$REPLACE_relevant
+$REPLACE_listed
+$REPLACE_vision
+$REPLACE_g
+$REPLACE_leadership
+$REPLACE_butI
+$APPEND_provide
+$REPLACE_organize
+$APPEND_created
+$REPLACE_12th
+$REPLACE_collection
+$REPLACE_supply
+$APPEND_Besides
+$REPLACE_stranger
+$REPLACE_combination
+$REPLACE_farther
+$REPLACE_awaiting
+$APPEND_hand
+$REPLACE_unsure
+$REPLACE_profile
+$APPEND_moving
+$APPEND_street
+$REPLACE_delighted
+$REPLACE_pretended
+$REPLACE_driven
+$REPLACE_maintaining
+$REPLACE_liar
+$TRANSFORM_SPLIT_HYPHEN
+$REPLACE_glass
+$REPLACE_stick
+$REPLACE_itchy
+$REPLACE_ought
+$REPLACE_consumption
+$REPLACE_quicker
+$REPLACE_spare
+$REPLACE_governments
+$APPEND_view
+$REPLACE_P
+$REPLACE_colorful
+$REPLACE_guitarist
+$APPEND_wants
+$REPLACE_million
+$REPLACE_behalf
+$REPLACE_kilometres
+$REPLACE_bank
+$APPEND_morning
+$REPLACE_weekends
+$REPLACE_occasion
+$APPEND_tour
+$REPLACE_object
+$REPLACE_Others
+$REPLACE_Considering
+$REPLACE_species
+$REPLACE_session
+$APPEND_removed
+$REPLACE_hiking
+$REPLACE_resolutions
+$REPLACE_peak
+$REPLACE_consequences
+$REPLACE_soaked
+$REPLACE_presents
+$APPEND_25
+$REPLACE_salad
+$REPLACE_filling
+$REPLACE_attack
+$APPEND_foods
+$REPLACE_tendency
+$REPLACE_discoveries
+$REPLACE_immediate
+$REPLACE_submitted
+$REPLACE_THAT
+$APPEND_develop
+$REPLACE_battery
+$REPLACE_dont
+$REPLACE_feature
+$APPEND_opportunity
+$REPLACE_bodies
+$REPLACE_goldfish
+$REPLACE_adapt
+$REPLACE_views
+$REPLACE_forgetting
+$REPLACE_saved
+$REPLACE_doesn
+$REPLACE_thirst
+$APPEND_Me
+$REPLACE_distant
+$REPLACE_opposition
+$REPLACE_breed
+$REPLACE_practised
+$REPLACE_miserable
+$APPEND_sore
+$REPLACE_brain
+$REPLACE_sessions
+$REPLACE_policeman
+$REPLACE_favor
+$REPLACE_managing
+$REPLACE_rains
+$REPLACE_baths
+$REPLACE_surrounding
+$REPLACE_Seoul
+$APPEND_regardless
+$APPEND_Something
+$REPLACE_architectural
+$REPLACE_ok
+$REPLACE_welfare
+$APPEND_share
+$REPLACE_daughters
+$REPLACE_phones
+$REPLACE_downstairs
+$REPLACE_arriving
+$REPLACE_stepped
+$REPLACE_competing
+$REPLACE_catching
+$REPLACE_conversing
+$REPLACE_encourages
+$REPLACE_depressing
+$REPLACE_begining
+$REPLACE_admission
+$APPEND_voice
+$REPLACE_boredom
+$APPEND_alot
+$APPEND_familiar
+$REPLACE_breaking
+$REPLACE_fortunately
+$REPLACE_Over
+$APPEND_lost
+$REPLACE_intended
+$REPLACE_neighbourhood
+$REPLACE_mysteries
+$REPLACE_certificate
+$REPLACE_data
+$APPEND_personal
+$REPLACE_joyful
+$REPLACE_immigrants
+$REPLACE_emotions
+$REPLACE_checkup
+$REPLACE_licence
+$REPLACE_juice
+$APPEND_whenever
+$REPLACE_dogs
+$REPLACE_thereby
+$APPEND_department
+$APPEND_assignment
+$REPLACE_defend
+$REPLACE_approached
+$REPLACE_Fireworks
+$APPEND_activity
+$APPEND_quality
+$REPLACE_basics
+$REPLACE_costumes
+$REPLACE_key
+$REPLACE_outdoors
+$REPLACE_hay
+$APPEND_prepare
+$REPLACE_hiding
+$REPLACE_curiosity
+$APPEND_dealing
+$REPLACE_passion
+$REPLACE_costed
+$REPLACE_fries
+$REPLACE_HAVE
+$REPLACE_divorced
+$APPEND_display
+$REPLACE_baby
+$APPEND_cherry
+$REPLACE_Returning
+$APPEND_lack
+$APPEND_learnt
+$REPLACE_Im
+$APPEND_naturally
+$REPLACE_router
+$APPEND_goals
+$REPLACE_seaside
+$REPLACE_summarize
+$APPEND_appeared
+$REPLACE_claim
+$APPEND_ate
+$REPLACE_exchanging
+$APPEND_arrive
+$APPEND_art
+$REPLACE_participating
+$REPLACE_seek
+$REPLACE_innocent
+$APPEND_express
+$REPLACE_lunchtime
+$REPLACE_reaction
+$REPLACE_consisted
+$REPLACE_Eastern
+$APPEND_track
+$APPEND_baby
+$REPLACE_touching
+$REPLACE_lively
+$REPLACE_bridge
+$REPLACE_murderers
+$REPLACE_Brazil
+$REPLACE_feeding
+$REPLACE_honestly
+$REPLACE_Piece
+$REPLACE_springs
+$REPLACE_purchase
+$REPLACE_pray
+$REPLACE_washed
+$APPEND_sentence
+$REPLACE_Olympics
+$REPLACE_strongest
+$REPLACE_leads
+$REPLACE_stomachache
+$REPLACE_John
+$REPLACE_opponent
+$REPLACE_contents
+$REPLACE_plot
+$APPEND_Many
+$REPLACE_experiment
+$REPLACE_beings
+$REPLACE_owns
+$REPLACE_airline
+$REPLACE_severely
+$REPLACE_ages
+@@UNKNOWN@@
+@@PADDING@@

output_vocabulary/non_padded_namespaces.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *tags
2	+ *labels

requirements.txt CHANGED Viewed

@@ -1,10 +1,14 @@
 gradio
-transformers
-torch
 spacy
 nltk
 gensim
 pattern
 textblob

+torch==1.10.0
+allennlp==0.8.4
+python-Levenshtein==0.12.1
+transformers==4.11.3
+scikit-learn==0.20.0
+sentencepiece==0.1.95
+overrides==4.1.2
+numpy==1.19.5
 gradio
 spacy
 nltk
 gensim
 pattern
 textblob

utils/filter_brackets.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import argparse
+import re
+from helpers import write_lines
+def filter_line(line):
+    if "-LRB-" in line and "-RRB-" in line:
+        rep = re.sub(r'\-.*?LRB.*?\-.*?\-.*?RRB.*?\-', '', line)
+        line_cleaned = rep
+    elif ("-LRB-" in line and "-RRB-" not in line) or (
+            "-LRB-" not in line and "-RRB-" in line):
+        line_cleaned = line.replace("-LRB-", '"').replace("-RRB-", '"')
+    else:
+        line_cleaned = line
+    return line_cleaned
+def main(args):
+    with open(args.source) as f:
+        data = [row.rstrip() for row in f]
+    write_lines(args.output, [filter_line(row) for row in data])
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-s', '--source',
+                        help='Path to the source file',
+                        required=True)
+    parser.add_argument('-o', '--output',
+                        help='Path to the output file',
+                        required=True)
+    args = parser.parse_args()
+    main(args)

utils/helpers.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import os
+from pathlib import Path
+VOCAB_DIR = Path(__file__).resolve().parent.parent / "data"
+PAD = "@@PADDING@@"
+UNK = "@@UNKNOWN@@"
+START_TOKEN = "$START"
+SEQ_DELIMETERS = {"tokens": " ",
+                  "labels": "SEPL|||SEPR",
+                  "operations": "SEPL__SEPR"}
+REPLACEMENTS = {
+    "''": '"',
+    '--': '—',
+    '`': "'",
+    "'ve": "' ve",
+}
+def get_verb_form_dicts():
+    path_to_dict = os.path.join(VOCAB_DIR, "verb-form-vocab.txt")
+    encode, decode = {}, {}
+    with open(path_to_dict, encoding="utf-8") as f:
+        for line in f:
+            words, tags = line.split(":")
+            word1, word2 = words.split("_")
+            tag1, tag2 = tags.split("_")
+            decode_key = f"{word1}_{tag1}_{tag2.strip()}"
+            if decode_key not in decode:
+                encode[words] = tags
+                decode[decode_key] = word2
+    return encode, decode
+ENCODE_VERB_DICT, DECODE_VERB_DICT = get_verb_form_dicts()
+def get_target_sent_by_edits(source_tokens, edits):
+    target_tokens = source_tokens[:]
+    shift_idx = 0
+    for edit in edits:
+        start, end, label, _ = edit
+        target_pos = start + shift_idx
+        source_token = target_tokens[target_pos] \
+            if len(target_tokens) > target_pos >= 0 else ''
+        if label == "":
+            del target_tokens[target_pos]
+            shift_idx -= 1
+        elif start == end:
+            word = label.replace("$APPEND_", "")
+            target_tokens[target_pos: target_pos] = [word]
+            shift_idx += 1
+        elif label.startswith("$TRANSFORM_"):
+            word = apply_reverse_transformation(source_token, label)
+            if word is None:
+                word = source_token
+            target_tokens[target_pos] = word
+        elif start == end - 1:
+            word = label.replace("$REPLACE_", "")
+            target_tokens[target_pos] = word
+        elif label.startswith("$MERGE_"):
+            target_tokens[target_pos + 1: target_pos + 1] = [label]
+            shift_idx += 1
+    return replace_merge_transforms(target_tokens)
+def replace_merge_transforms(tokens):
+    if all(not x.startswith("$MERGE_") for x in tokens):
+        return tokens
+    target_line = " ".join(tokens)
+    target_line = target_line.replace(" $MERGE_HYPHEN ", "-")
+    target_line = target_line.replace(" $MERGE_SPACE ", "")
+    return target_line.split()
+def convert_using_case(token, smart_action):
+    if not smart_action.startswith("$TRANSFORM_CASE_"):
+        return token
+    if smart_action.endswith("LOWER"):
+        return token.lower()
+    elif smart_action.endswith("UPPER"):
+        return token.upper()
+    elif smart_action.endswith("CAPITAL"):
+        return token.capitalize()
+    elif smart_action.endswith("CAPITAL_1"):
+        return token[0] + token[1:].capitalize()
+    elif smart_action.endswith("UPPER_-1"):
+        return token[:-1].upper() + token[-1]
+    else:
+        return token
+def convert_using_verb(token, smart_action):
+    key_word = "$TRANSFORM_VERB_"
+    if not smart_action.startswith(key_word):
+        raise Exception(f"Unknown action type {smart_action}")
+    encoding_part = f"{token}_{smart_action[len(key_word):]}"
+    decoded_target_word = decode_verb_form(encoding_part)
+    return decoded_target_word
+def convert_using_split(token, smart_action):
+    key_word = "$TRANSFORM_SPLIT"
+    if not smart_action.startswith(key_word):
+        raise Exception(f"Unknown action type {smart_action}")
+    target_words = token.split("-")
+    return " ".join(target_words)
+def convert_using_plural(token, smart_action):
+    if smart_action.endswith("PLURAL"):
+        return token + "s"
+    elif smart_action.endswith("SINGULAR"):
+        return token[:-1]
+    else:
+        raise Exception(f"Unknown action type {smart_action}")
+def apply_reverse_transformation(source_token, transform):
+    if transform.startswith("$TRANSFORM"):
+        # deal with equal
+        if transform == "$KEEP":
+            return source_token
+        # deal with case
+        if transform.startswith("$TRANSFORM_CASE"):
+            return convert_using_case(source_token, transform)
+        # deal with verb
+        if transform.startswith("$TRANSFORM_VERB"):
+            return convert_using_verb(source_token, transform)
+        # deal with split
+        if transform.startswith("$TRANSFORM_SPLIT"):
+            return convert_using_split(source_token, transform)
+        # deal with single/plural
+        if transform.startswith("$TRANSFORM_AGREEMENT"):
+            return convert_using_plural(source_token, transform)
+        # raise exception if not find correct type
+        raise Exception(f"Unknown action type {transform}")
+    else:
+        return source_token
+def read_parallel_lines(fn1, fn2):
+    lines1 = read_lines(fn1, skip_strip=True)
+    lines2 = read_lines(fn2, skip_strip=True)
+    assert len(lines1) == len(lines2)
+    out_lines1, out_lines2 = [], []
+    for line1, line2 in zip(lines1, lines2):
+        if not line1.strip() or not line2.strip():
+            continue
+        else:
+            out_lines1.append(line1)
+            out_lines2.append(line2)
+    return out_lines1, out_lines2
+def read_lines(fn, skip_strip=False):
+    if not os.path.exists(fn):
+        return []
+    with open(fn, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+    return [s.strip() for s in lines if s.strip() or skip_strip]
+def write_lines(fn, lines, mode='w'):
+    if mode == 'w' and os.path.exists(fn):
+        os.remove(fn)
+    with open(fn, encoding='utf-8', mode=mode) as f:
+        f.writelines(['%s\n' % s for s in lines])
+def decode_verb_form(original):
+    return DECODE_VERB_DICT.get(original)
+def encode_verb_form(original_word, corrected_word):
+    decoding_request = original_word + "_" + corrected_word
+    decoding_response = ENCODE_VERB_DICT.get(decoding_request, "").strip()
+    if original_word and decoding_response:
+        answer = decoding_response
+    else:
+        answer = None
+    return answer
+def get_weights_name(transformer_name, lowercase):
+    if transformer_name == 'bert' and lowercase:
+        return 'bert-base-uncased'
+    if transformer_name == 'bert' and not lowercase:
+        return 'bert-base-cased'
+    if transformer_name == 'bert-large' and not lowercase:
+        return 'bert-large-cased'
+    if transformer_name == 'distilbert':
+        if not lowercase:
+            print('Warning! This model was trained only on uncased sentences.')
+        return 'distilbert-base-uncased'
+    if transformer_name == 'albert':
+        if not lowercase:
+            print('Warning! This model was trained only on uncased sentences.')
+        return 'albert-base-v1'
+    if lowercase:
+        print('Warning! This model was trained only on cased sentences.')
+    if transformer_name == 'roberta':
+        return 'roberta-base'
+    if transformer_name == 'roberta-large':
+        return 'roberta-large'
+    if transformer_name == 'gpt2':
+        return 'gpt2'
+    if transformer_name == 'transformerxl':
+        return 'transfo-xl-wt103'
+    if transformer_name == 'xlnet':
+        return 'xlnet-base-cased'
+    if transformer_name == 'xlnet-large':
+        return 'xlnet-large-cased'
+def remove_double_tokens(sent):
+    tokens = sent.split(' ')
+    deleted_idx = []
+    for i in range(len(tokens) -1):
+        if tokens[i] == tokens[i + 1]:
+            deleted_idx.append(i + 1)
+    if deleted_idx:
+        tokens = [tokens[i] for i in range(len(tokens)) if i not in deleted_idx]
+    return ' '.join(tokens)
+def normalize(sent):
+    sent = remove_double_tokens(sent)
+    for fr, to in REPLACEMENTS.items():
+        sent = sent.replace(fr, to)
+    return sent.lower()

utils/prepare_clc_fce_data.py ADDED Viewed

	@@ -0,0 +1,123 @@

+#!/usr/bin/env python
+"""
+Convert CLC-FCE dataset (The Cambridge Learner Corpus) to the parallel sentences format.
+"""
+import argparse
+import glob
+import os
+import re
+from xml.etree import cElementTree
+from nltk.tokenize import sent_tokenize, word_tokenize
+from tqdm import tqdm
+def annotate_fce_doc(xml):
+    """Takes a FCE xml document and yields sentences with annotated errors."""
+    result = []
+    doc = cElementTree.fromstring(xml)
+    paragraphs = doc.findall('head/text/*/coded_answer/p')
+    for p in paragraphs:
+        text = _get_formatted_text(p)
+        result.append(text)
+    return '\n'.join(result)
+def _get_formatted_text(elem, ignore_tags=None):
+    text = elem.text or ''
+    ignore_tags = [tag.upper() for tag in (ignore_tags or [])]
+    correct = None
+    mistake = None
+    for child in elem.getchildren():
+        tag = child.tag.upper()
+        if tag == 'NS':
+            text += _get_formatted_text(child)
+        elif tag == 'UNKNOWN':
+            text += ' UNKNOWN '
+        elif tag == 'C':
+            assert correct is None
+            correct = _get_formatted_text(child)
+        elif tag == 'I':
+            assert mistake is None
+            mistake = _get_formatted_text(child)
+        elif tag in ignore_tags:
+            pass
+        else:
+            raise ValueError(f"Unknown tag `{child.tag}`", text)
+    if correct or mistake:
+        correct = correct or ''
+        mistake = mistake or ''
+        if '=>' not in mistake:
+            text += f'{{{mistake}=>{correct}}}'
+        else:
+            text += mistake
+    text += elem.tail or ''
+    return text
+def convert_fce(fce_dir):
+    """Processes the whole FCE directory. Yields annotated documents (strings)."""
+    # Ensure we got the valid dataset path
+    if not os.path.isdir(fce_dir):
+        raise UserWarning(
+            f"{fce_dir} is not a valid path")
+    dataset_dir = os.path.join(fce_dir, 'dataset')
+    if not os.path.exists(dataset_dir):
+        raise UserWarning(
+            f"{fce_dir} doesn't point to a dataset's root dir")
+    # Convert XML docs to the corpora format
+    filenames = sorted(glob.glob(os.path.join(dataset_dir, '*/*.xml')))
+    docs = []
+    for filename in filenames:
+        with open(filename, encoding='utf-8') as f:
+            doc = annotate_fce_doc(f.read())
+            docs.append(doc)
+    return docs
+def main():
+    fce = convert_fce(args.fce_dataset_path)
+    with open(args.output + "/fce-original.txt", 'w', encoding='utf-8') as out_original, \
+            open(args.output + "/fce-applied.txt", 'w', encoding='utf-8') as out_applied:
+        for doc in tqdm(fce, unit='doc'):
+            sents = re.split(r"\n +\n", doc)
+            for sent in sents:
+                tokenized_sents = sent_tokenize(sent)
+                for i in range(len(tokenized_sents)):
+                    if re.search(r"[{>][.?!]$", tokenized_sents[i]):
+                        tokenized_sents[i + 1] = tokenized_sents[i] + " " + tokenized_sents[i + 1]
+                        tokenized_sents[i] = ""
+                    regexp = r'{([^{}]*?)=>([^{}]*?)}'
+                    original = re.sub(regexp, r"\1", tokenized_sents[i])
+                    applied = re.sub(regexp, r"\2", tokenized_sents[i])
+                    # filter out nested alerts
+                    if original != "" and applied != "" and not re.search(r"[{}=]", original) \
+                            and not re.search(r"[{}=]", applied):
+                        out_original.write(" ".join(word_tokenize(original)) + "\n")
+                        out_applied.write(" ".join(word_tokenize(applied)) + "\n")
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=(
+        "Convert CLC-FCE dataset to the parallel sentences format."))
+    parser.add_argument('fce_dataset_path',
+                        help='Path to the folder with the FCE dataset')
+    parser.add_argument('--output',
+                        help='Path to the output folder')
+    args = parser.parse_args()
+    main()

utils/preprocess_data.py ADDED Viewed

	@@ -0,0 +1,488 @@

+import argparse
+import os
+from difflib import SequenceMatcher
+import Levenshtein
+import numpy as np
+from tqdm import tqdm
+from helpers import write_lines, read_parallel_lines, encode_verb_form, \
+    apply_reverse_transformation, SEQ_DELIMETERS, START_TOKEN
+def perfect_align(t, T, insertions_allowed=0,
+                  cost_function=Levenshtein.distance):
+    # dp[i, j, k] is a minimal cost of matching first `i` tokens of `t` with
+    # first `j` tokens of `T`, after making `k` insertions after last match of
+    # token from `t`. In other words t[:i] aligned with T[:j].
+    # Initialize with INFINITY (unknown)
+    shape = (len(t) + 1, len(T) + 1, insertions_allowed + 1)
+    dp = np.ones(shape, dtype=int) * int(1e9)
+    come_from = np.ones(shape, dtype=int) * int(1e9)
+    come_from_ins = np.ones(shape, dtype=int) * int(1e9)
+    dp[0, 0, 0] = 0  # The only known starting point. Nothing matched to nothing.
+    for i in range(len(t) + 1):  # Go inclusive
+        for j in range(len(T) + 1):  # Go inclusive
+            for q in range(insertions_allowed + 1):  # Go inclusive
+                if i < len(t):
+                    # Given matched sequence of t[:i] and T[:j], match token
+                    # t[i] with following tokens T[j:k].
+                    for k in range(j, len(T) + 1):
+                        transform = \
+                            apply_transformation(t[i], '   '.join(T[j:k]))
+                        if transform:
+                            cost = 0
+                        else:
+                            cost = cost_function(t[i], '   '.join(T[j:k]))
+                        current = dp[i, j, q] + cost
+                        if dp[i + 1, k, 0] > current:
+                            dp[i + 1, k, 0] = current
+                            come_from[i + 1, k, 0] = j
+                            come_from_ins[i + 1, k, 0] = q
+                if q < insertions_allowed:
+                    # Given matched sequence of t[:i] and T[:j], create
+                    # insertion with following tokens T[j:k].
+                    for k in range(j, len(T) + 1):
+                        cost = len('   '.join(T[j:k]))
+                        current = dp[i, j, q] + cost
+                        if dp[i, k, q + 1] > current:
+                            dp[i, k, q + 1] = current
+                            come_from[i, k, q + 1] = j
+                            come_from_ins[i, k, q + 1] = q
+    # Solution is in the dp[len(t), len(T), *]. Backtracking from there.
+    alignment = []
+    i = len(t)
+    j = len(T)
+    q = dp[i, j, :].argmin()
+    while i > 0 or q > 0:
+        is_insert = (come_from_ins[i, j, q] != q) and (q != 0)
+        j, k, q = come_from[i, j, q], j, come_from_ins[i, j, q]
+        if not is_insert:
+            i -= 1
+        if is_insert:
+            alignment.append(['INSERT', T[j:k], (i, i)])
+        else:
+            alignment.append([f'REPLACE_{t[i]}', T[j:k], (i, i + 1)])
+    assert j == 0
+    return dp[len(t), len(T)].min(), list(reversed(alignment))
+def _split(token):
+    if not token:
+        return []
+    parts = token.split()
+    return parts or [token]
+def apply_merge_transformation(source_tokens, target_words, shift_idx):
+    edits = []
+    if len(source_tokens) > 1 and len(target_words) == 1:
+        # check merge
+        transform = check_merge(source_tokens, target_words)
+        if transform:
+            for i in range(len(source_tokens) - 1):
+                edits.append([(shift_idx + i, shift_idx + i + 1), transform])
+            return edits
+    if len(source_tokens) == len(target_words) == 2:
+        # check swap
+        transform = check_swap(source_tokens, target_words)
+        if transform:
+            edits.append([(shift_idx, shift_idx + 1), transform])
+    return edits
+def is_sent_ok(sent, delimeters=SEQ_DELIMETERS):
+    for del_val in delimeters.values():
+        if del_val in sent and del_val != delimeters["tokens"]:
+            return False
+    return True
+def check_casetype(source_token, target_token):
+    if source_token.lower() != target_token.lower():
+        return None
+    if source_token.lower() == target_token:
+        return "$TRANSFORM_CASE_LOWER"
+    elif source_token.capitalize() == target_token:
+        return "$TRANSFORM_CASE_CAPITAL"
+    elif source_token.upper() == target_token:
+        return "$TRANSFORM_CASE_UPPER"
+    elif source_token[1:].capitalize() == target_token[1:] and source_token[0] == target_token[0]:
+        return "$TRANSFORM_CASE_CAPITAL_1"
+    elif source_token[:-1].upper() == target_token[:-1] and source_token[-1] == target_token[-1]:
+        return "$TRANSFORM_CASE_UPPER_-1"
+    else:
+        return None
+def check_equal(source_token, target_token):
+    if source_token == target_token:
+        return "$KEEP"
+    else:
+        return None
+def check_split(source_token, target_tokens):
+    if source_token.split("-") == target_tokens:
+        return "$TRANSFORM_SPLIT_HYPHEN"
+    else:
+        return None
+def check_merge(source_tokens, target_tokens):
+    if "".join(source_tokens) == "".join(target_tokens):
+        return "$MERGE_SPACE"
+    elif "-".join(source_tokens) == "-".join(target_tokens):
+        return "$MERGE_HYPHEN"
+    else:
+        return None
+def check_swap(source_tokens, target_tokens):
+    if source_tokens == [x for x in reversed(target_tokens)]:
+        return "$MERGE_SWAP"
+    else:
+        return None
+def check_plural(source_token, target_token):
+    if source_token.endswith("s") and source_token[:-1] == target_token:
+        return "$TRANSFORM_AGREEMENT_SINGULAR"
+    elif target_token.endswith("s") and source_token == target_token[:-1]:
+        return "$TRANSFORM_AGREEMENT_PLURAL"
+    else:
+        return None
+def check_verb(source_token, target_token):
+    encoding = encode_verb_form(source_token, target_token)
+    if encoding:
+        return f"$TRANSFORM_VERB_{encoding}"
+    else:
+        return None
+def apply_transformation(source_token, target_token):
+    target_tokens = target_token.split()
+    if len(target_tokens) > 1:
+        # check split
+        transform = check_split(source_token, target_tokens)
+        if transform:
+            return transform
+    checks = [check_equal, check_casetype, check_verb, check_plural]
+    for check in checks:
+        transform = check(source_token, target_token)
+        if transform:
+            return transform
+    return None
+def align_sequences(source_sent, target_sent):
+    # check if sent is OK
+    if not is_sent_ok(source_sent) or not is_sent_ok(target_sent):
+        return None
+    source_tokens = source_sent.split()
+    target_tokens = target_sent.split()
+    matcher = SequenceMatcher(None, source_tokens, target_tokens)
+    diffs = list(matcher.get_opcodes())
+    all_edits = []
+    for diff in diffs:
+        tag, i1, i2, j1, j2 = diff
+        source_part = _split(" ".join(source_tokens[i1:i2]))
+        target_part = _split(" ".join(target_tokens[j1:j2]))
+        if tag == 'equal':
+            continue
+        elif tag == 'delete':
+            # delete all words separatly
+            for j in range(i2 - i1):
+                edit = [(i1 + j, i1 + j + 1), '$DELETE']
+                all_edits.append(edit)
+        elif tag == 'insert':
+            # append to the previous word
+            for target_token in target_part:
+                edit = ((i1 - 1, i1), f"$APPEND_{target_token}")
+                all_edits.append(edit)
+        else:
+            # check merge first of all
+            edits = apply_merge_transformation(source_part, target_part,
+                                               shift_idx=i1)
+            if edits:
+                all_edits.extend(edits)
+                continue
+            # normalize alignments if need (make them singleton)
+            _, alignments = perfect_align(source_part, target_part,
+                                          insertions_allowed=0)
+            for alignment in alignments:
+                new_shift = alignment[2][0]
+                edits = convert_alignments_into_edits(alignment,
+                                                      shift_idx=i1 + new_shift)
+                all_edits.extend(edits)
+    # get labels
+    labels = convert_edits_into_labels(source_tokens, all_edits)
+    # match tags to source tokens
+    sent_with_tags = add_labels_to_the_tokens(source_tokens, labels)
+    return sent_with_tags
+def convert_edits_into_labels(source_tokens, all_edits):
+    # make sure that edits are flat
+    flat_edits = []
+    for edit in all_edits:
+        (start, end), edit_operations = edit
+        if isinstance(edit_operations, list):
+            for operation in edit_operations:
+                new_edit = [(start, end), operation]
+                flat_edits.append(new_edit)
+        elif isinstance(edit_operations, str):
+            flat_edits.append(edit)
+        else:
+            raise Exception("Unknown operation type")
+    all_edits = flat_edits[:]
+    labels = []
+    total_labels = len(source_tokens) + 1
+    if not all_edits:
+        labels = [["$KEEP"] for x in range(total_labels)]
+    else:
+        for i in range(total_labels):
+            edit_operations = [x[1] for x in all_edits if x[0][0] == i - 1
+                               and x[0][1] == i]
+            if not edit_operations:
+                labels.append(["$KEEP"])
+            else:
+                labels.append(edit_operations)
+    return labels
+def convert_alignments_into_edits(alignment, shift_idx):
+    edits = []
+    action, target_tokens, new_idx = alignment
+    source_token = action.replace("REPLACE_", "")
+    # check if delete
+    if not target_tokens:
+        edit = [(shift_idx, 1 + shift_idx), "$DELETE"]
+        return [edit]
+    # check splits
+    for i in range(1, len(target_tokens)):
+        target_token = " ".join(target_tokens[:i + 1])
+        transform = apply_transformation(source_token, target_token)
+        if transform:
+            edit = [(shift_idx, shift_idx + 1), transform]
+            edits.append(edit)
+            target_tokens = target_tokens[i + 1:]
+            for target in target_tokens:
+                edits.append([(shift_idx, shift_idx + 1), f"$APPEND_{target}"])
+            return edits
+    transform_costs = []
+    transforms = []
+    for target_token in target_tokens:
+        transform = apply_transformation(source_token, target_token)
+        if transform:
+            cost = 0
+            transforms.append(transform)
+        else:
+            cost = Levenshtein.distance(source_token, target_token)
+            transforms.append(None)
+        transform_costs.append(cost)
+    min_cost_idx = transform_costs.index(min(transform_costs))
+    # append to the previous word
+    for i in range(0, min_cost_idx):
+        target = target_tokens[i]
+        edit = [(shift_idx - 1, shift_idx), f"$APPEND_{target}"]
+        edits.append(edit)
+    # replace/transform target word
+    transform = transforms[min_cost_idx]
+    target = transform if transform is not None \
+        else f"$REPLACE_{target_tokens[min_cost_idx]}"
+    edit = [(shift_idx, 1 + shift_idx), target]
+    edits.append(edit)
+    # append to this word
+    for i in range(min_cost_idx + 1, len(target_tokens)):
+        target = target_tokens[i]
+        edit = [(shift_idx, 1 + shift_idx), f"$APPEND_{target}"]
+        edits.append(edit)
+    return edits
+def add_labels_to_the_tokens(source_tokens, labels, delimeters=SEQ_DELIMETERS):
+    tokens_with_all_tags = []
+    source_tokens_with_start = [START_TOKEN] + source_tokens
+    for token, label_list in zip(source_tokens_with_start, labels):
+        all_tags = delimeters['operations'].join(label_list)
+        comb_record = token + delimeters['labels'] + all_tags
+        tokens_with_all_tags.append(comb_record)
+    return delimeters['tokens'].join(tokens_with_all_tags)
+def convert_data_from_raw_files(source_file, target_file, output_file, chunk_size):
+    tagged = []
+    source_data, target_data = read_parallel_lines(source_file, target_file)
+    print(f"The size of raw dataset is {len(source_data)}")
+    cnt_total, cnt_all, cnt_tp = 0, 0, 0
+    for source_sent, target_sent in tqdm(zip(source_data, target_data)):
+        try:
+            aligned_sent = align_sequences(source_sent, target_sent)
+        except Exception:
+            aligned_sent = align_sequences(source_sent, target_sent)
+        if source_sent != target_sent:
+            cnt_tp += 1
+        alignments = [aligned_sent]
+        cnt_all += len(alignments)
+        try:
+            check_sent = convert_tagged_line(aligned_sent)
+        except Exception:
+            # debug mode
+            aligned_sent = align_sequences(source_sent, target_sent)
+            check_sent = convert_tagged_line(aligned_sent)
+        if "".join(check_sent.split()) != "".join(
+                target_sent.split()):
+            # do it again for debugging
+            aligned_sent = align_sequences(source_sent, target_sent)
+            check_sent = convert_tagged_line(aligned_sent)
+            print(f"Incorrect pair: \n{target_sent}\n{check_sent}")
+            continue
+        if alignments:
+            cnt_total += len(alignments)
+            tagged.extend(alignments)
+        if len(tagged) > chunk_size:
+            write_lines(output_file, tagged, mode='a')
+            tagged = []
+    print(f"Overall extracted {cnt_total}. "
+          f"Original TP {cnt_tp}."
+          f" Original TN {cnt_all - cnt_tp}")
+    if tagged:
+        write_lines(output_file, tagged, 'a')
+def convert_labels_into_edits(labels):
+    all_edits = []
+    for i, label_list in enumerate(labels):
+        if label_list == ["$KEEP"]:
+            continue
+        else:
+            edit = [(i - 1, i), label_list]
+            all_edits.append(edit)
+    return all_edits
+def get_target_sent_by_levels(source_tokens, labels):
+    relevant_edits = convert_labels_into_edits(labels)
+    target_tokens = source_tokens[:]
+    leveled_target_tokens = {}
+    if not relevant_edits:
+        target_sentence = " ".join(target_tokens)
+        return leveled_target_tokens, target_sentence
+    max_level = max([len(x[1]) for x in relevant_edits])
+    for level in range(max_level):
+        rest_edits = []
+        shift_idx = 0
+        for edits in relevant_edits:
+            (start, end), label_list = edits
+            label = label_list[0]
+            target_pos = start + shift_idx
+            source_token = target_tokens[target_pos] if target_pos >= 0 else START_TOKEN
+            if label == "$DELETE":
+                del target_tokens[target_pos]
+                shift_idx -= 1
+            elif label.startswith("$APPEND_"):
+                word = label.replace("$APPEND_", "")
+                target_tokens[target_pos + 1: target_pos + 1] = [word]
+                shift_idx += 1
+            elif label.startswith("$REPLACE_"):
+                word = label.replace("$REPLACE_", "")
+                target_tokens[target_pos] = word
+            elif label.startswith("$TRANSFORM"):
+                word = apply_reverse_transformation(source_token, label)
+                if word is None:
+                    word = source_token
+                target_tokens[target_pos] = word
+            elif label.startswith("$MERGE_"):
+                # apply merge only on last stage
+                if level == (max_level - 1):
+                    target_tokens[target_pos + 1: target_pos + 1] = [label]
+                    shift_idx += 1
+                else:
+                    rest_edit = [(start + shift_idx, end + shift_idx), [label]]
+                    rest_edits.append(rest_edit)
+            rest_labels = label_list[1:]
+            if rest_labels:
+                rest_edit = [(start + shift_idx, end + shift_idx), rest_labels]
+                rest_edits.append(rest_edit)
+        leveled_tokens = target_tokens[:]
+        # update next step
+        relevant_edits = rest_edits[:]
+        if level == (max_level - 1):
+            leveled_tokens = replace_merge_transforms(leveled_tokens)
+        leveled_labels = convert_edits_into_labels(leveled_tokens,
+                                                   relevant_edits)
+        leveled_target_tokens[level + 1] = {"tokens": leveled_tokens,
+                                            "labels": leveled_labels}
+    target_sentence = " ".join(leveled_target_tokens[max_level]["tokens"])
+    return leveled_target_tokens, target_sentence
+def replace_merge_transforms(tokens):
+    if all(not x.startswith("$MERGE_") for x in tokens):
+        return tokens
+    target_tokens = tokens[:]
+    allowed_range = (1, len(tokens) - 1)
+    for i in range(len(tokens)):
+        target_token = tokens[i]
+        if target_token.startswith("$MERGE"):
+            if target_token.startswith("$MERGE_SWAP") and i in allowed_range:
+                target_tokens[i - 1] = tokens[i + 1]
+                target_tokens[i + 1] = tokens[i - 1]
+                target_tokens[i: i + 1] = []
+    target_line = " ".join(target_tokens)
+    target_line = target_line.replace(" $MERGE_HYPHEN ", "-")
+    target_line = target_line.replace(" $MERGE_SPACE ", "")
+    return target_line.split()
+def convert_tagged_line(line, delimeters=SEQ_DELIMETERS):
+    label_del = delimeters['labels']
+    source_tokens = [x.split(label_del)[0]
+                     for x in line.split(delimeters['tokens'])][1:]
+    labels = [x.split(label_del)[1].split(delimeters['operations'])
+              for x in line.split(delimeters['tokens'])]
+    assert len(source_tokens) + 1 == len(labels)
+    levels_dict, target_line = get_target_sent_by_levels(source_tokens, labels)
+    return target_line
+def main(args):
+    convert_data_from_raw_files(args.source, args.target, args.output_file, args.chunk_size)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-s', '--source',
+                        help='Path to the source file',
+                        required=True)
+    parser.add_argument('-t', '--target',
+                        help='Path to the target file',
+                        required=True)
+    parser.add_argument('-o', '--output_file',
+                        help='Path to the output file',
+                        required=True)
+    parser.add_argument('--chunk_size',
+                        type=int,
+                        help='Dump each chunk size.',
+                        default=1000000)
+    args = parser.parse_args()
+    main(args)